1 /* ******************************************************************
2    Huffman encoder, part of New Generation Entropy library
3    Copyright (C) 2013-2016, Yann Collet.
4 
5    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 
7    Redistribution and use in source and binary forms, with or without
8    modification, are permitted provided that the following conditions are
9    met:
10 
11        * Redistributions of source code must retain the above copyright
12    notice, this list of conditions and the following disclaimer.
13        * Redistributions in binary form must reproduce the above
14    copyright notice, this list of conditions and the following disclaimer
15    in the documentation and/or other materials provided with the
16    distribution.
17 
18    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30     You can contact the author at :
31     - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
32     - Public forum : https://groups.google.com/forum/#!forum/lz4c
33 ****************************************************************** */
34 
35 /* **************************************************************
36 *  Compiler specifics
37 ****************************************************************/
38 #ifdef _MSC_VER    /* Visual Studio */
39 #  pragma warning(disable : 4127)        /* disable: C4127: conditional expression is constant */
40 #endif
41 
42 
43 /* **************************************************************
44 *  Includes
45 ****************************************************************/
46 #include <string.h>     /* memcpy, memset */
47 #include <stdio.h>      /* printf (debug) */
48 #include "compiler.h"
49 #include "bitstream.h"
50 #include "hist.h"
51 #define FSE_STATIC_LINKING_ONLY   /* FSE_optimalTableLog_internal */
52 #include "fse.h"        /* header compression */
53 #define HUF_STATIC_LINKING_ONLY
54 #include "huf.h"
55 #include "error_private.h"
56 
57 
58 /* **************************************************************
59 *  Error Management
60 ****************************************************************/
61 #define HUF_isError ERR_isError
62 #define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)   /* use only *after* variable declarations */
63 #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
64 #define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }
65 
66 
67 /* **************************************************************
68 *  Utils
69 ****************************************************************/
70 unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
71 {
72     return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
73 }
74 
75 
76 /* *******************************************************
77 *  HUF : Huffman block compression
78 *********************************************************/
79 /* HUF_compressWeights() :
80  * Same as FSE_compress(), but dedicated to huff0's weights compression.
81  * The use case needs much less stack memory.
82  * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
83  */
84 #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
85 static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
86 {
87     BYTE* const ostart = (BYTE*) dst;
88     BYTE* op = ostart;
89     BYTE* const oend = ostart + dstSize;
90 
91     unsigned maxSymbolValue = HUF_TABLELOG_MAX;
92     U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
93 
94     FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
95     BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER];
96 
97     unsigned count[HUF_TABLELOG_MAX+1];
98     S16 norm[HUF_TABLELOG_MAX+1];
99 
100     /* init conditions */
101     if (wtSize <= 1) return 0;  /* Not compressible */
102 
103     /* Scan input and build symbol stats */
104     {   unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize);   /* never fails */
105         if (maxCount == wtSize) return 1;   /* only a single symbol in src : rle */
106         if (maxCount == 1) return 0;        /* each symbol present maximum once => not compressible */
107     }
108 
109     tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
110     CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) );
111 
112     /* Write table description header */
113     {   CHECK_V_F(hSize, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
114         op += hSize;
115     }
116 
117     /* Compress */
118     CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
119     {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable) );
120         if (cSize == 0) return 0;   /* not enough space for compressed data */
121         op += cSize;
122     }
123 
124     return op-ostart;
125 }
126 
127 
128 struct HUF_CElt_s {
129   U16  val;
130   BYTE nbBits;
131 };   /* typedef'd to HUF_CElt within "huf.h" */
132 
133 /*! HUF_writeCTable() :
134     `CTable` : Huffman tree to save, using huf representation.
135     @return : size of saved CTable */
136 size_t HUF_writeCTable (void* dst, size_t maxDstSize,
137                         const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
138 {
139     BYTE bitsToWeight[HUF_TABLELOG_MAX + 1];   /* precomputed conversion table */
140     BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
141     BYTE* op = (BYTE*)dst;
142     U32 n;
143 
144      /* check conditions */
145     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
146 
147     /* convert to weight */
148     bitsToWeight[0] = 0;
149     for (n=1; n<huffLog+1; n++)
150         bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
151     for (n=0; n<maxSymbolValue; n++)
152         huffWeight[n] = bitsToWeight[CTable[n].nbBits];
153 
154     /* attempt weights compression by FSE */
155     {   CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) );
156         if ((hSize>1) & (hSize < maxSymbolValue/2)) {   /* FSE compressed */
157             op[0] = (BYTE)hSize;
158             return hSize+1;
159     }   }
160 
161     /* write raw values as 4-bits (max : 15) */
162     if (maxSymbolValue > (256-128)) return ERROR(GENERIC);   /* should not happen : likely means source cannot be compressed */
163     if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall);   /* not enough space within dst buffer */
164     op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
165     huffWeight[maxSymbolValue] = 0;   /* to be sure it doesn't cause msan issue in final combination */
166     for (n=0; n<maxSymbolValue; n+=2)
167         op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
168     return ((maxSymbolValue+1)/2) + 1;
169 }
170 
171 
172 size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize)
173 {
174     BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];   /* init not required, even though some static analyzer may complain */
175     U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];   /* large enough for values from 0 to 16 */
176     U32 tableLog = 0;
177     U32 nbSymbols = 0;
178 
179     /* get symbol weights */
180     CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
181 
182     /* check result */
183     if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
184     if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
185 
186     /* Prepare base value per rank */
187     {   U32 n, nextRankStart = 0;
188         for (n=1; n<=tableLog; n++) {
189             U32 current = nextRankStart;
190             nextRankStart += (rankVal[n] << (n-1));
191             rankVal[n] = current;
192     }   }
193 
194     /* fill nbBits */
195     {   U32 n; for (n=0; n<nbSymbols; n++) {
196             const U32 w = huffWeight[n];
197             CTable[n].nbBits = (BYTE)(tableLog + 1 - w);
198     }   }
199 
200     /* fill val */
201     {   U16 nbPerRank[HUF_TABLELOG_MAX+2]  = {0};  /* support w=0=>n=tableLog+1 */
202         U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
203         { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
204         /* determine stating value per rank */
205         valPerRank[tableLog+1] = 0;   /* for w==0 */
206         {   U16 min = 0;
207             U32 n; for (n=tableLog; n>0; n--) {  /* start at n=tablelog <-> w=1 */
208                 valPerRank[n] = min;     /* get starting value within each rank */
209                 min += nbPerRank[n];
210                 min >>= 1;
211         }   }
212         /* assign value within rank, symbol order */
213         { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
214     }
215 
216     *maxSymbolValuePtr = nbSymbols - 1;
217     return readSize;
218 }
219 
220 U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue)
221 {
222     const HUF_CElt* table = (const HUF_CElt*)symbolTable;
223     assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
224     return table[symbolValue].nbBits;
225 }
226 
227 
228 typedef struct nodeElt_s {
229     U32 count;
230     U16 parent;
231     BYTE byte;
232     BYTE nbBits;
233 } nodeElt;
234 
235 static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
236 {
237     const U32 largestBits = huffNode[lastNonNull].nbBits;
238     if (largestBits <= maxNbBits) return largestBits;   /* early exit : no elt > maxNbBits */
239 
240     /* there are several too large elements (at least >= 2) */
241     {   int totalCost = 0;
242         const U32 baseCost = 1 << (largestBits - maxNbBits);
243         U32 n = lastNonNull;
244 
245         while (huffNode[n].nbBits > maxNbBits) {
246             totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
247             huffNode[n].nbBits = (BYTE)maxNbBits;
248             n --;
249         }  /* n stops at huffNode[n].nbBits <= maxNbBits */
250         while (huffNode[n].nbBits == maxNbBits) n--;   /* n end at index of smallest symbol using < maxNbBits */
251 
252         /* renorm totalCost */
253         totalCost >>= (largestBits - maxNbBits);  /* note : totalCost is necessarily a multiple of baseCost */
254 
255         /* repay normalized cost */
256         {   U32 const noSymbol = 0xF0F0F0F0;
257             U32 rankLast[HUF_TABLELOG_MAX+2];
258             int pos;
259 
260             /* Get pos of last (smallest) symbol per rank */
261             memset(rankLast, 0xF0, sizeof(rankLast));
262             {   U32 currentNbBits = maxNbBits;
263                 for (pos=n ; pos >= 0; pos--) {
264                     if (huffNode[pos].nbBits >= currentNbBits) continue;
265                     currentNbBits = huffNode[pos].nbBits;   /* < maxNbBits */
266                     rankLast[maxNbBits-currentNbBits] = pos;
267             }   }
268 
269             while (totalCost > 0) {
270                 U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1;
271                 for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
272                     U32 highPos = rankLast[nBitsToDecrease];
273                     U32 lowPos = rankLast[nBitsToDecrease-1];
274                     if (highPos == noSymbol) continue;
275                     if (lowPos == noSymbol) break;
276                     {   U32 const highTotal = huffNode[highPos].count;
277                         U32 const lowTotal = 2 * huffNode[lowPos].count;
278                         if (highTotal <= lowTotal) break;
279                 }   }
280                 /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
281                 /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
282                 while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
283                     nBitsToDecrease ++;
284                 totalCost -= 1 << (nBitsToDecrease-1);
285                 if (rankLast[nBitsToDecrease-1] == noSymbol)
286                     rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];   /* this rank is no longer empty */
287                 huffNode[rankLast[nBitsToDecrease]].nbBits ++;
288                 if (rankLast[nBitsToDecrease] == 0)    /* special case, reached largest symbol */
289                     rankLast[nBitsToDecrease] = noSymbol;
290                 else {
291                     rankLast[nBitsToDecrease]--;
292                     if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
293                         rankLast[nBitsToDecrease] = noSymbol;   /* this rank is now empty */
294             }   }   /* while (totalCost > 0) */
295 
296             while (totalCost < 0) {  /* Sometimes, cost correction overshoot */
297                 if (rankLast[1] == noSymbol) {  /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
298                     while (huffNode[n].nbBits == maxNbBits) n--;
299                     huffNode[n+1].nbBits--;
300                     rankLast[1] = n+1;
301                     totalCost++;
302                     continue;
303                 }
304                 huffNode[ rankLast[1] + 1 ].nbBits--;
305                 rankLast[1]++;
306                 totalCost ++;
307     }   }   }   /* there are several too large elements (at least >= 2) */
308 
309     return maxNbBits;
310 }
311 
312 
313 typedef struct {
314     U32 base;
315     U32 current;
316 } rankPos;
317 
318 static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue)
319 {
320     rankPos rank[32];
321     U32 n;
322 
323     memset(rank, 0, sizeof(rank));
324     for (n=0; n<=maxSymbolValue; n++) {
325         U32 r = BIT_highbit32(count[n] + 1);
326         rank[r].base ++;
327     }
328     for (n=30; n>0; n--) rank[n-1].base += rank[n].base;
329     for (n=0; n<32; n++) rank[n].current = rank[n].base;
330     for (n=0; n<=maxSymbolValue; n++) {
331         U32 const c = count[n];
332         U32 const r = BIT_highbit32(c+1) + 1;
333         U32 pos = rank[r].current++;
334         while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) {
335             huffNode[pos] = huffNode[pos-1];
336             pos--;
337         }
338         huffNode[pos].count = c;
339         huffNode[pos].byte  = (BYTE)n;
340     }
341 }
342 
343 
344 /** HUF_buildCTable_wksp() :
345  *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
346  *  `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of HUF_CTABLE_WORKSPACE_SIZE_U32 unsigned.
347  */
348 #define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
349 typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
350 size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
351 {
352     nodeElt* const huffNode0 = (nodeElt*)workSpace;
353     nodeElt* const huffNode = huffNode0+1;
354     U32 n, nonNullRank;
355     int lowS, lowN;
356     U16 nodeNb = STARTNODE;
357     U32 nodeRoot;
358 
359     /* safety checks */
360     if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
361     if (wkspSize < sizeof(huffNodeTable)) return ERROR(workSpace_tooSmall);
362     if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
363     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
364     memset(huffNode0, 0, sizeof(huffNodeTable));
365 
366     /* sort, decreasing order */
367     HUF_sort(huffNode, count, maxSymbolValue);
368 
369     /* init for parents */
370     nonNullRank = maxSymbolValue;
371     while(huffNode[nonNullRank].count == 0) nonNullRank--;
372     lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
373     huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
374     huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb;
375     nodeNb++; lowS-=2;
376     for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
377     huffNode0[0].count = (U32)(1U<<31);  /* fake entry, strong barrier */
378 
379     /* create parents */
380     while (nodeNb <= nodeRoot) {
381         U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
382         U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
383         huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
384         huffNode[n1].parent = huffNode[n2].parent = nodeNb;
385         nodeNb++;
386     }
387 
388     /* distribute weights (unlimited tree height) */
389     huffNode[nodeRoot].nbBits = 0;
390     for (n=nodeRoot-1; n>=STARTNODE; n--)
391         huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
392     for (n=0; n<=nonNullRank; n++)
393         huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
394 
395     /* enforce maxTableLog */
396     maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits);
397 
398     /* fill result into tree (val, nbBits) */
399     {   U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
400         U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
401         if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC);   /* check fit into table */
402         for (n=0; n<=nonNullRank; n++)
403             nbPerRank[huffNode[n].nbBits]++;
404         /* determine stating value per rank */
405         {   U16 min = 0;
406             for (n=maxNbBits; n>0; n--) {
407                 valPerRank[n] = min;      /* get starting value within each rank */
408                 min += nbPerRank[n];
409                 min >>= 1;
410         }   }
411         for (n=0; n<=maxSymbolValue; n++)
412             tree[huffNode[n].byte].nbBits = huffNode[n].nbBits;   /* push nbBits per symbol, symbol order */
413         for (n=0; n<=maxSymbolValue; n++)
414             tree[n].val = valPerRank[tree[n].nbBits]++;   /* assign value within rank, symbol order */
415     }
416 
417     return maxNbBits;
418 }
419 
420 /** HUF_buildCTable() :
421  * @return : maxNbBits
422  *  Note : count is used before tree is written, so they can safely overlap
423  */
424 size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
425 {
426     huffNodeTable nodeTable;
427     return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable));
428 }
429 
430 static size_t HUF_estimateCompressedSize(HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
431 {
432     size_t nbBits = 0;
433     int s;
434     for (s = 0; s <= (int)maxSymbolValue; ++s) {
435         nbBits += CTable[s].nbBits * count[s];
436     }
437     return nbBits >> 3;
438 }
439 
440 static int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
441   int bad = 0;
442   int s;
443   for (s = 0; s <= (int)maxSymbolValue; ++s) {
444     bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
445   }
446   return !bad;
447 }
448 
449 size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
450 
451 FORCE_INLINE_TEMPLATE void
452 HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
453 {
454     BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
455 }
456 
457 #define HUF_FLUSHBITS(s)  BIT_flushBits(s)
458 
459 #define HUF_FLUSHBITS_1(stream) \
460     if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
461 
462 #define HUF_FLUSHBITS_2(stream) \
463     if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
464 
465 FORCE_INLINE_TEMPLATE size_t
466 HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
467                                    const void* src, size_t srcSize,
468                                    const HUF_CElt* CTable)
469 {
470     const BYTE* ip = (const BYTE*) src;
471     BYTE* const ostart = (BYTE*)dst;
472     BYTE* const oend = ostart + dstSize;
473     BYTE* op = ostart;
474     size_t n;
475     BIT_CStream_t bitC;
476 
477     /* init */
478     if (dstSize < 8) return 0;   /* not enough space to compress */
479     { size_t const initErr = BIT_initCStream(&bitC, op, oend-op);
480       if (HUF_isError(initErr)) return 0; }
481 
482     n = srcSize & ~3;  /* join to mod 4 */
483     switch (srcSize & 3)
484     {
485         case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
486                  HUF_FLUSHBITS_2(&bitC);
487 		 /* fall-through */
488         case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
489                  HUF_FLUSHBITS_1(&bitC);
490 		 /* fall-through */
491         case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
492                  HUF_FLUSHBITS(&bitC);
493 		 /* fall-through */
494         case 0 : /* fall-through */
495         default: break;
496     }
497 
498     for (; n>0; n-=4) {  /* note : n&3==0 at this stage */
499         HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
500         HUF_FLUSHBITS_1(&bitC);
501         HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
502         HUF_FLUSHBITS_2(&bitC);
503         HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
504         HUF_FLUSHBITS_1(&bitC);
505         HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
506         HUF_FLUSHBITS(&bitC);
507     }
508 
509     return BIT_closeCStream(&bitC);
510 }
511 
512 #if DYNAMIC_BMI2
513 
514 static TARGET_ATTRIBUTE("bmi2") size_t
515 HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
516                                    const void* src, size_t srcSize,
517                                    const HUF_CElt* CTable)
518 {
519     return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
520 }
521 
522 static size_t
523 HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
524                                       const void* src, size_t srcSize,
525                                       const HUF_CElt* CTable)
526 {
527     return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
528 }
529 
530 static size_t
531 HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
532                               const void* src, size_t srcSize,
533                               const HUF_CElt* CTable, const int bmi2)
534 {
535     if (bmi2) {
536         return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
537     }
538     return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
539 }
540 
541 #else
542 
543 static size_t
544 HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
545                               const void* src, size_t srcSize,
546                               const HUF_CElt* CTable, const int bmi2)
547 {
548     (void)bmi2;
549     return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
550 }
551 
552 #endif
553 
554 size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
555 {
556     return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
557 }
558 
559 
560 static size_t
561 HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
562                               const void* src, size_t srcSize,
563                               const HUF_CElt* CTable, int bmi2)
564 {
565     size_t const segmentSize = (srcSize+3)/4;   /* first 3 segments */
566     const BYTE* ip = (const BYTE*) src;
567     const BYTE* const iend = ip + srcSize;
568     BYTE* const ostart = (BYTE*) dst;
569     BYTE* const oend = ostart + dstSize;
570     BYTE* op = ostart;
571 
572     if (dstSize < 6 + 1 + 1 + 1 + 8) return 0;   /* minimum space to compress successfully */
573     if (srcSize < 12) return 0;   /* no saving possible : too small input */
574     op += 6;   /* jumpTable */
575 
576     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
577         if (cSize==0) return 0;
578         assert(cSize <= 65535);
579         MEM_writeLE16(ostart, (U16)cSize);
580         op += cSize;
581     }
582 
583     ip += segmentSize;
584     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
585         if (cSize==0) return 0;
586         assert(cSize <= 65535);
587         MEM_writeLE16(ostart+2, (U16)cSize);
588         op += cSize;
589     }
590 
591     ip += segmentSize;
592     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, segmentSize, CTable, bmi2) );
593         if (cSize==0) return 0;
594         assert(cSize <= 65535);
595         MEM_writeLE16(ostart+4, (U16)cSize);
596         op += cSize;
597     }
598 
599     ip += segmentSize;
600     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, oend-op, ip, iend-ip, CTable, bmi2) );
601         if (cSize==0) return 0;
602         op += cSize;
603     }
604 
605     return op-ostart;
606 }
607 
608 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
609 {
610     return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
611 }
612 
613 typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
614 
615 static size_t HUF_compressCTable_internal(
616                 BYTE* const ostart, BYTE* op, BYTE* const oend,
617                 const void* src, size_t srcSize,
618                 HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
619 {
620     size_t const cSize = (nbStreams==HUF_singleStream) ?
621                          HUF_compress1X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2) :
622                          HUF_compress4X_usingCTable_internal(op, oend - op, src, srcSize, CTable, bmi2);
623     if (HUF_isError(cSize)) { return cSize; }
624     if (cSize==0) { return 0; }   /* uncompressible */
625     op += cSize;
626     /* check compressibility */
627     if ((size_t)(op-ostart) >= srcSize-1) { return 0; }
628     return op-ostart;
629 }
630 
631 typedef struct {
632     unsigned count[HUF_SYMBOLVALUE_MAX + 1];
633     HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
634     huffNodeTable nodeTable;
635 } HUF_compress_tables_t;
636 
637 /* HUF_compress_internal() :
638  * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
639 static size_t
640 HUF_compress_internal (void* dst, size_t dstSize,
641                  const void* src, size_t srcSize,
642                        unsigned maxSymbolValue, unsigned huffLog,
643                        HUF_nbStreams_e nbStreams,
644                        void* workSpace, size_t wkspSize,
645                        HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
646                  const int bmi2)
647 {
648     HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
649     BYTE* const ostart = (BYTE*)dst;
650     BYTE* const oend = ostart + dstSize;
651     BYTE* op = ostart;
652 
653     /* checks & inits */
654     if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
655     if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
656     if (!srcSize) return 0;  /* Uncompressed */
657     if (!dstSize) return 0;  /* cannot fit anything within dst budget */
658     if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);   /* current block size limit */
659     if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
660     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
661     if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
662     if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
663 
664     /* Heuristic : If old table is valid, use it for small inputs */
665     if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
666         return HUF_compressCTable_internal(ostart, op, oend,
667                                            src, srcSize,
668                                            nbStreams, oldHufTable, bmi2);
669     }
670 
671     /* Scan input and build symbol stats */
672     {   CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace, wkspSize) );
673         if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; }   /* single symbol, rle */
674         if (largest <= (srcSize >> 7)+4) return 0;   /* heuristic : probably not compressible enough */
675     }
676 
677     /* Check validity of previous table */
678     if ( repeat
679       && *repeat == HUF_repeat_check
680       && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) {
681         *repeat = HUF_repeat_none;
682     }
683     /* Heuristic : use existing table for small inputs */
684     if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
685         return HUF_compressCTable_internal(ostart, op, oend,
686                                            src, srcSize,
687                                            nbStreams, oldHufTable, bmi2);
688     }
689 
690     /* Build Huffman Tree */
691     huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
692     {   size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
693                                             maxSymbolValue, huffLog,
694                                             table->nodeTable, sizeof(table->nodeTable));
695         CHECK_F(maxBits);
696         huffLog = (U32)maxBits;
697         /* Zero unused symbols in CTable, so we can check it for validity */
698         memset(table->CTable + (maxSymbolValue + 1), 0,
699                sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
700     }
701 
702     /* Write table description header */
703     {   CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) );
704         /* Check if using previous huffman table is beneficial */
705         if (repeat && *repeat != HUF_repeat_none) {
706             size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
707             size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue);
708             if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
709                 return HUF_compressCTable_internal(ostart, op, oend,
710                                                    src, srcSize,
711                                                    nbStreams, oldHufTable, bmi2);
712         }   }
713 
714         /* Use the new huffman table */
715         if (hSize + 12ul >= srcSize) { return 0; }
716         op += hSize;
717         if (repeat) { *repeat = HUF_repeat_none; }
718         if (oldHufTable)
719             memcpy(oldHufTable, table->CTable, sizeof(table->CTable));  /* Save new table */
720     }
721     return HUF_compressCTable_internal(ostart, op, oend,
722                                        src, srcSize,
723                                        nbStreams, table->CTable, bmi2);
724 }
725 
726 
727 size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
728                       const void* src, size_t srcSize,
729                       unsigned maxSymbolValue, unsigned huffLog,
730                       void* workSpace, size_t wkspSize)
731 {
732     return HUF_compress_internal(dst, dstSize, src, srcSize,
733                                  maxSymbolValue, huffLog, HUF_singleStream,
734                                  workSpace, wkspSize,
735                                  NULL, NULL, 0, 0 /*bmi2*/);
736 }
737 
738 size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
739                       const void* src, size_t srcSize,
740                       unsigned maxSymbolValue, unsigned huffLog,
741                       void* workSpace, size_t wkspSize,
742                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
743 {
744     return HUF_compress_internal(dst, dstSize, src, srcSize,
745                                  maxSymbolValue, huffLog, HUF_singleStream,
746                                  workSpace, wkspSize, hufTable,
747                                  repeat, preferRepeat, bmi2);
748 }
749 
750 size_t HUF_compress1X (void* dst, size_t dstSize,
751                  const void* src, size_t srcSize,
752                  unsigned maxSymbolValue, unsigned huffLog)
753 {
754     unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
755     return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
756 }
757 
758 /* HUF_compress4X_repeat():
759  * compress input using 4 streams.
760  * provide workspace to generate compression tables */
761 size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
762                       const void* src, size_t srcSize,
763                       unsigned maxSymbolValue, unsigned huffLog,
764                       void* workSpace, size_t wkspSize)
765 {
766     return HUF_compress_internal(dst, dstSize, src, srcSize,
767                                  maxSymbolValue, huffLog, HUF_fourStreams,
768                                  workSpace, wkspSize,
769                                  NULL, NULL, 0, 0 /*bmi2*/);
770 }
771 
772 /* HUF_compress4X_repeat():
773  * compress input using 4 streams.
774  * re-use an existing huffman compression table */
775 size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
776                       const void* src, size_t srcSize,
777                       unsigned maxSymbolValue, unsigned huffLog,
778                       void* workSpace, size_t wkspSize,
779                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
780 {
781     return HUF_compress_internal(dst, dstSize, src, srcSize,
782                                  maxSymbolValue, huffLog, HUF_fourStreams,
783                                  workSpace, wkspSize,
784                                  hufTable, repeat, preferRepeat, bmi2);
785 }
786 
787 size_t HUF_compress2 (void* dst, size_t dstSize,
788                 const void* src, size_t srcSize,
789                 unsigned maxSymbolValue, unsigned huffLog)
790 {
791     unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
792     return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
793 }
794 
795 size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
796 {
797     return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
798 }
799