1 /* ******************************************************************
2    FSE : Finite State Entropy encoder
3    Copyright (C) 2013-2015, Yann Collet.
4 
5    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 
7    Redistribution and use in source and binary forms, with or without
8    modification, are permitted provided that the following conditions are
9    met:
10 
11        * Redistributions of source code must retain the above copyright
12    notice, this list of conditions and the following disclaimer.
13        * Redistributions in binary form must reproduce the above
14    copyright notice, this list of conditions and the following disclaimer
15    in the documentation and/or other materials provided with the
16    distribution.
17 
18    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30     You can contact the author at :
31     - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
32     - Public forum : https://groups.google.com/forum/#!forum/lz4c
33 ****************************************************************** */
34 
35 /* **************************************************************
36 *  Includes
37 ****************************************************************/
38 #include <stdlib.h>     /* malloc, free, qsort */
39 #include <string.h>     /* memcpy, memset */
40 #include <stdio.h>      /* printf (debug) */
41 #include "bitstream.h"
42 #include "compiler.h"
43 #define FSE_STATIC_LINKING_ONLY
44 #include "fse.h"
45 #include "error_private.h"
46 
47 
48 /* **************************************************************
49 *  Error Management
50 ****************************************************************/
51 #define FSE_isError ERR_isError
52 #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; }   /* use only *after* variable declarations */
53 
54 
55 /* **************************************************************
56 *  Templates
57 ****************************************************************/
58 /*
59   designed to be included
60   for type-specific functions (template emulation in C)
61   Objective is to write these functions only once, for improved maintenance
62 */
63 
64 /* safety checks */
65 #ifndef FSE_FUNCTION_EXTENSION
66 #  error "FSE_FUNCTION_EXTENSION must be defined"
67 #endif
68 #ifndef FSE_FUNCTION_TYPE
69 #  error "FSE_FUNCTION_TYPE must be defined"
70 #endif
71 
72 /* Function names */
73 #define FSE_CAT(X,Y) X##Y
74 #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
75 #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
76 
77 
78 /* Function templates */
79 
80 /* FSE_buildCTable_wksp() :
81  * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
82  * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
83  * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
84  */
85 size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
86 {
87     U32 const tableSize = 1 << tableLog;
88     U32 const tableMask = tableSize - 1;
89     void* const ptr = ct;
90     U16* const tableU16 = ( (U16*) ptr) + 2;
91     void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
92     FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
93     U32 const step = FSE_TABLESTEP(tableSize);
94     U32 cumul[FSE_MAX_SYMBOL_VALUE+2];
95 
96     FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace;
97     U32 highThreshold = tableSize-1;
98 
99     /* CTable header */
100     if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge);
101     tableU16[-2] = (U16) tableLog;
102     tableU16[-1] = (U16) maxSymbolValue;
103 
104     /* For explanations on how to distribute symbol values over the table :
105     *  http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
106 
107     /* symbol start positions */
108     {   U32 u;
109         cumul[0] = 0;
110         for (u=1; u<=maxSymbolValue+1; u++) {
111             if (normalizedCounter[u-1]==-1) {  /* Low proba symbol */
112                 cumul[u] = cumul[u-1] + 1;
113                 tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
114             } else {
115                 cumul[u] = cumul[u-1] + normalizedCounter[u-1];
116         }   }
117         cumul[maxSymbolValue+1] = tableSize+1;
118     }
119 
120     /* Spread symbols */
121     {   U32 position = 0;
122         U32 symbol;
123         for (symbol=0; symbol<=maxSymbolValue; symbol++) {
124             int nbOccurences;
125             for (nbOccurences=0; nbOccurences<normalizedCounter[symbol]; nbOccurences++) {
126                 tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
127                 position = (position + step) & tableMask;
128                 while (position > highThreshold) position = (position + step) & tableMask;   /* Low proba area */
129         }   }
130 
131         if (position!=0) return ERROR(GENERIC);   /* Must have gone through all positions */
132     }
133 
134     /* Build table */
135     {   U32 u; for (u=0; u<tableSize; u++) {
136         FSE_FUNCTION_TYPE s = tableSymbol[u];   /* note : static analyzer may not understand tableSymbol is properly initialized */
137         tableU16[cumul[s]++] = (U16) (tableSize+u);   /* TableU16 : sorted by symbol order; gives next state value */
138     }   }
139 
140     /* Build Symbol Transformation Table */
141     {   unsigned total = 0;
142         unsigned s;
143         for (s=0; s<=maxSymbolValue; s++) {
144             switch (normalizedCounter[s])
145             {
146             case  0: break;
147 
148             case -1:
149             case  1:
150                 symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
151                 symbolTT[s].deltaFindState = total - 1;
152                 total ++;
153                 break;
154             default :
155                 {
156                     U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1);
157                     U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
158                     symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
159                     symbolTT[s].deltaFindState = total - normalizedCounter[s];
160                     total +=  normalizedCounter[s];
161     }   }   }   }
162 
163     return 0;
164 }
165 
166 
167 size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
168 {
169     FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE];   /* memset() is not necessary, even if static analyzer complain about it */
170     return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol));
171 }
172 
173 
174 
175 #ifndef FSE_COMMONDEFS_ONLY
176 
177 /*-**************************************************************
178 *  FSE NCount encoding-decoding
179 ****************************************************************/
180 size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
181 {
182     size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
183     return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND;  /* maxSymbolValue==0 ? use default */
184 }
185 
186 static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
187                                        const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
188                                        unsigned writeIsSafe)
189 {
190     BYTE* const ostart = (BYTE*) header;
191     BYTE* out = ostart;
192     BYTE* const oend = ostart + headerBufferSize;
193     int nbBits;
194     const int tableSize = 1 << tableLog;
195     int remaining;
196     int threshold;
197     U32 bitStream;
198     int bitCount;
199     unsigned charnum = 0;
200     int previous0 = 0;
201 
202     bitStream = 0;
203     bitCount  = 0;
204     /* Table Size */
205     bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
206     bitCount  += 4;
207 
208     /* Init */
209     remaining = tableSize+1;   /* +1 for extra accuracy */
210     threshold = tableSize;
211     nbBits = tableLog+1;
212 
213     while (remaining>1) {  /* stops at 1 */
214         if (previous0) {
215             unsigned start = charnum;
216             while (!normalizedCounter[charnum]) charnum++;
217             while (charnum >= start+24) {
218                 start+=24;
219                 bitStream += 0xFFFFU << bitCount;
220                 if ((!writeIsSafe) && (out > oend-2)) return ERROR(dstSize_tooSmall);   /* Buffer overflow */
221                 out[0] = (BYTE) bitStream;
222                 out[1] = (BYTE)(bitStream>>8);
223                 out+=2;
224                 bitStream>>=16;
225             }
226             while (charnum >= start+3) {
227                 start+=3;
228                 bitStream += 3 << bitCount;
229                 bitCount += 2;
230             }
231             bitStream += (charnum-start) << bitCount;
232             bitCount += 2;
233             if (bitCount>16) {
234                 if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall);   /* Buffer overflow */
235                 out[0] = (BYTE)bitStream;
236                 out[1] = (BYTE)(bitStream>>8);
237                 out += 2;
238                 bitStream >>= 16;
239                 bitCount -= 16;
240         }   }
241         {   int count = normalizedCounter[charnum++];
242             int const max = (2*threshold-1)-remaining;
243             remaining -= count < 0 ? -count : count;
244             count++;   /* +1 for extra accuracy */
245             if (count>=threshold) count += max;   /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
246             bitStream += count << bitCount;
247             bitCount  += nbBits;
248             bitCount  -= (count<max);
249             previous0  = (count==1);
250             if (remaining<1) return ERROR(GENERIC);
251             while (remaining<threshold) nbBits--, threshold>>=1;
252         }
253         if (bitCount>16) {
254             if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall);   /* Buffer overflow */
255             out[0] = (BYTE)bitStream;
256             out[1] = (BYTE)(bitStream>>8);
257             out += 2;
258             bitStream >>= 16;
259             bitCount -= 16;
260     }   }
261 
262     /* flush remaining bitStream */
263     if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall);   /* Buffer overflow */
264     out[0] = (BYTE)bitStream;
265     out[1] = (BYTE)(bitStream>>8);
266     out+= (bitCount+7) /8;
267 
268     if (charnum > maxSymbolValue + 1) return ERROR(GENERIC);
269 
270     return (out-ostart);
271 }
272 
273 
274 size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
275 {
276     if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);   /* Unsupported */
277     if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC);   /* Unsupported */
278 
279     if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
280         return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
281 
282     return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
283 }
284 
285 
286 
287 /*-**************************************************************
288 *  Counting histogram
289 ****************************************************************/
290 /*! FSE_count_simple
291     This function counts byte values within `src`, and store the histogram into table `count`.
292     It doesn't use any additional memory.
293     But this function is unsafe : it doesn't check that all values within `src` can fit into `count`.
294     For this reason, prefer using a table `count` with 256 elements.
295     @return : count of most numerous element
296 */
297 size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
298                         const void* src, size_t srcSize)
299 {
300     const BYTE* ip = (const BYTE*)src;
301     const BYTE* const end = ip + srcSize;
302     unsigned maxSymbolValue = *maxSymbolValuePtr;
303     unsigned max=0;
304 
305     memset(count, 0, (maxSymbolValue+1)*sizeof(*count));
306     if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
307 
308     while (ip<end) count[*ip++]++;
309 
310     while (!count[maxSymbolValue]) maxSymbolValue--;
311     *maxSymbolValuePtr = maxSymbolValue;
312 
313     { U32 s; for (s=0; s<=maxSymbolValue; s++) if (count[s] > max) max = count[s]; }
314 
315     return (size_t)max;
316 }
317 
318 
319 /* FSE_count_parallel_wksp() :
320  * Same as FSE_count_parallel(), but using an externally provided scratch buffer.
321  * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */
322 static size_t FSE_count_parallel_wksp(
323                                 unsigned* count, unsigned* maxSymbolValuePtr,
324                                 const void* source, size_t sourceSize,
325                                 unsigned checkMax, unsigned* const workSpace)
326 {
327     const BYTE* ip = (const BYTE*)source;
328     const BYTE* const iend = ip+sourceSize;
329     unsigned maxSymbolValue = *maxSymbolValuePtr;
330     unsigned max=0;
331     U32* const Counting1 = workSpace;
332     U32* const Counting2 = Counting1 + 256;
333     U32* const Counting3 = Counting2 + 256;
334     U32* const Counting4 = Counting3 + 256;
335 
336     memset(Counting1, 0, 4*256*sizeof(unsigned));
337 
338     /* safety checks */
339     if (!sourceSize) {
340         memset(count, 0, maxSymbolValue + 1);
341         *maxSymbolValuePtr = 0;
342         return 0;
343     }
344     if (!maxSymbolValue) maxSymbolValue = 255;            /* 0 == default */
345 
346     /* by stripes of 16 bytes */
347     {   U32 cached = MEM_read32(ip); ip += 4;
348         while (ip < iend-15) {
349             U32 c = cached; cached = MEM_read32(ip); ip += 4;
350             Counting1[(BYTE) c     ]++;
351             Counting2[(BYTE)(c>>8) ]++;
352             Counting3[(BYTE)(c>>16)]++;
353             Counting4[       c>>24 ]++;
354             c = cached; cached = MEM_read32(ip); ip += 4;
355             Counting1[(BYTE) c     ]++;
356             Counting2[(BYTE)(c>>8) ]++;
357             Counting3[(BYTE)(c>>16)]++;
358             Counting4[       c>>24 ]++;
359             c = cached; cached = MEM_read32(ip); ip += 4;
360             Counting1[(BYTE) c     ]++;
361             Counting2[(BYTE)(c>>8) ]++;
362             Counting3[(BYTE)(c>>16)]++;
363             Counting4[       c>>24 ]++;
364             c = cached; cached = MEM_read32(ip); ip += 4;
365             Counting1[(BYTE) c     ]++;
366             Counting2[(BYTE)(c>>8) ]++;
367             Counting3[(BYTE)(c>>16)]++;
368             Counting4[       c>>24 ]++;
369         }
370         ip-=4;
371     }
372 
373     /* finish last symbols */
374     while (ip<iend) Counting1[*ip++]++;
375 
376     if (checkMax) {   /* verify stats will fit into destination table */
377         U32 s; for (s=255; s>maxSymbolValue; s--) {
378             Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
379             if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
380     }   }
381 
382     {   U32 s; for (s=0; s<=maxSymbolValue; s++) {
383             count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
384             if (count[s] > max) max = count[s];
385     }   }
386 
387     while (!count[maxSymbolValue]) maxSymbolValue--;
388     *maxSymbolValuePtr = maxSymbolValue;
389     return (size_t)max;
390 }
391 
392 /* FSE_countFast_wksp() :
393  * Same as FSE_countFast(), but using an externally provided scratch buffer.
394  * `workSpace` size must be table of >= `1024` unsigned */
395 size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
396                      const void* source, size_t sourceSize, unsigned* workSpace)
397 {
398     if (sourceSize < 1500) return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
399     return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
400 }
401 
402 /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
403 size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
404                      const void* source, size_t sourceSize)
405 {
406     unsigned tmpCounters[1024];
407     return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters);
408 }
409 
410 /* FSE_count_wksp() :
411  * Same as FSE_count(), but using an externally provided scratch buffer.
412  * `workSpace` size must be table of >= `1024` unsigned */
413 size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
414                  const void* source, size_t sourceSize, unsigned* workSpace)
415 {
416     if (*maxSymbolValuePtr < 255)
417         return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
418     *maxSymbolValuePtr = 255;
419     return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
420 }
421 
422 size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr,
423                  const void* src, size_t srcSize)
424 {
425     unsigned tmpCounters[1024];
426     return FSE_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters);
427 }
428 
429 
430 
431 /*-**************************************************************
432 *  FSE Compression Code
433 ****************************************************************/
434 /*! FSE_sizeof_CTable() :
435     FSE_CTable is a variable size structure which contains :
436     `U16 tableLog;`
437     `U16 maxSymbolValue;`
438     `U16 nextStateNumber[1 << tableLog];`                         // This size is variable
439     `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];`  // This size is variable
440 Allocation is manual (C standard does not support variable-size structures).
441 */
442 size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog)
443 {
444     if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
445     return FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
446 }
447 
448 FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
449 {
450     size_t size;
451     if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
452     size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
453     return (FSE_CTable*)malloc(size);
454 }
455 
456 void FSE_freeCTable (FSE_CTable* ct) { free(ct); }
457 
458 /* provides the minimum logSize to safely represent a distribution */
459 static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
460 {
461     U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1;
462     U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
463     U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
464     assert(srcSize > 1); /* Not supported, RLE should be used instead */
465     return minBits;
466 }
467 
468 unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
469 {
470     U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
471     U32 tableLog = maxTableLog;
472     U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
473     assert(srcSize > 1); /* Not supported, RLE should be used instead */
474     if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
475     if (maxBitsSrc < tableLog) tableLog = maxBitsSrc;   /* Accuracy can be reduced */
476     if (minBits > tableLog) tableLog = minBits;   /* Need a minimum to safely represent all symbol values */
477     if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
478     if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
479     return tableLog;
480 }
481 
482 unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
483 {
484     return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
485 }
486 
487 
488 /* Secondary normalization method.
489    To be used when primary method fails. */
490 
491 static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue)
492 {
493     short const NOT_YET_ASSIGNED = -2;
494     U32 s;
495     U32 distributed = 0;
496     U32 ToDistribute;
497 
498     /* Init */
499     U32 const lowThreshold = (U32)(total >> tableLog);
500     U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
501 
502     for (s=0; s<=maxSymbolValue; s++) {
503         if (count[s] == 0) {
504             norm[s]=0;
505             continue;
506         }
507         if (count[s] <= lowThreshold) {
508             norm[s] = -1;
509             distributed++;
510             total -= count[s];
511             continue;
512         }
513         if (count[s] <= lowOne) {
514             norm[s] = 1;
515             distributed++;
516             total -= count[s];
517             continue;
518         }
519 
520         norm[s]=NOT_YET_ASSIGNED;
521     }
522     ToDistribute = (1 << tableLog) - distributed;
523 
524     if ((total / ToDistribute) > lowOne) {
525         /* risk of rounding to zero */
526         lowOne = (U32)((total * 3) / (ToDistribute * 2));
527         for (s=0; s<=maxSymbolValue; s++) {
528             if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {
529                 norm[s] = 1;
530                 distributed++;
531                 total -= count[s];
532                 continue;
533         }   }
534         ToDistribute = (1 << tableLog) - distributed;
535     }
536 
537     if (distributed == maxSymbolValue+1) {
538         /* all values are pretty poor;
539            probably incompressible data (should have already been detected);
540            find max, then give all remaining points to max */
541         U32 maxV = 0, maxC = 0;
542         for (s=0; s<=maxSymbolValue; s++)
543             if (count[s] > maxC) maxV=s, maxC=count[s];
544         norm[maxV] += (short)ToDistribute;
545         return 0;
546     }
547 
548     if (total == 0) {
549         /* all of the symbols were low enough for the lowOne or lowThreshold */
550         for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1))
551             if (norm[s] > 0) ToDistribute--, norm[s]++;
552         return 0;
553     }
554 
555     {   U64 const vStepLog = 62 - tableLog;
556         U64 const mid = (1ULL << (vStepLog-1)) - 1;
557         U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total;   /* scale on remaining */
558         U64 tmpTotal = mid;
559         for (s=0; s<=maxSymbolValue; s++) {
560             if (norm[s]==NOT_YET_ASSIGNED) {
561                 U64 const end = tmpTotal + (count[s] * rStep);
562                 U32 const sStart = (U32)(tmpTotal >> vStepLog);
563                 U32 const sEnd = (U32)(end >> vStepLog);
564                 U32 const weight = sEnd - sStart;
565                 if (weight < 1)
566                     return ERROR(GENERIC);
567                 norm[s] = (short)weight;
568                 tmpTotal = end;
569     }   }   }
570 
571     return 0;
572 }
573 
574 
575 size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
576                            const unsigned* count, size_t total,
577                            unsigned maxSymbolValue)
578 {
579     /* Sanity checks */
580     if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
581     if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC);   /* Unsupported size */
582     if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);   /* Unsupported size */
583     if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC);   /* Too small tableLog, compression potentially impossible */
584 
585     {   static U32 const rtbTable[] = {     0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
586         U64 const scale = 62 - tableLog;
587         U64 const step = ((U64)1<<62) / total;   /* <== here, one division ! */
588         U64 const vStep = 1ULL<<(scale-20);
589         int stillToDistribute = 1<<tableLog;
590         unsigned s;
591         unsigned largest=0;
592         short largestP=0;
593         U32 lowThreshold = (U32)(total >> tableLog);
594 
595         for (s=0; s<=maxSymbolValue; s++) {
596             if (count[s] == total) return 0;   /* rle special case */
597             if (count[s] == 0) { normalizedCounter[s]=0; continue; }
598             if (count[s] <= lowThreshold) {
599                 normalizedCounter[s] = -1;
600                 stillToDistribute--;
601             } else {
602                 short proba = (short)((count[s]*step) >> scale);
603                 if (proba<8) {
604                     U64 restToBeat = vStep * rtbTable[proba];
605                     proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
606                 }
607                 if (proba > largestP) largestP=proba, largest=s;
608                 normalizedCounter[s] = proba;
609                 stillToDistribute -= proba;
610         }   }
611         if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
612             /* corner case, need another normalization method */
613             size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
614             if (FSE_isError(errorCode)) return errorCode;
615         }
616         else normalizedCounter[largest] += (short)stillToDistribute;
617     }
618 
619 #if 0
620     {   /* Print Table (debug) */
621         U32 s;
622         U32 nTotal = 0;
623         for (s=0; s<=maxSymbolValue; s++)
624             printf("%3i: %4i \n", s, normalizedCounter[s]);
625         for (s=0; s<=maxSymbolValue; s++)
626             nTotal += abs(normalizedCounter[s]);
627         if (nTotal != (1U<<tableLog))
628             printf("Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
629         getchar();
630     }
631 #endif
632 
633     return tableLog;
634 }
635 
636 
637 /* fake FSE_CTable, for raw (uncompressed) input */
638 size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
639 {
640     const unsigned tableSize = 1 << nbBits;
641     const unsigned tableMask = tableSize - 1;
642     const unsigned maxSymbolValue = tableMask;
643     void* const ptr = ct;
644     U16* const tableU16 = ( (U16*) ptr) + 2;
645     void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1);   /* assumption : tableLog >= 1 */
646     FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
647     unsigned s;
648 
649     /* Sanity checks */
650     if (nbBits < 1) return ERROR(GENERIC);             /* min size */
651 
652     /* header */
653     tableU16[-2] = (U16) nbBits;
654     tableU16[-1] = (U16) maxSymbolValue;
655 
656     /* Build table */
657     for (s=0; s<tableSize; s++)
658         tableU16[s] = (U16)(tableSize + s);
659 
660     /* Build Symbol Transformation Table */
661     {   const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
662         for (s=0; s<=maxSymbolValue; s++) {
663             symbolTT[s].deltaNbBits = deltaNbBits;
664             symbolTT[s].deltaFindState = s-1;
665     }   }
666 
667     return 0;
668 }
669 
670 /* fake FSE_CTable, for rle input (always same symbol) */
671 size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
672 {
673     void* ptr = ct;
674     U16* tableU16 = ( (U16*) ptr) + 2;
675     void* FSCTptr = (U32*)ptr + 2;
676     FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) FSCTptr;
677 
678     /* header */
679     tableU16[-2] = (U16) 0;
680     tableU16[-1] = (U16) symbolValue;
681 
682     /* Build table */
683     tableU16[0] = 0;
684     tableU16[1] = 0;   /* just in case */
685 
686     /* Build Symbol Transformation Table */
687     symbolTT[symbolValue].deltaNbBits = 0;
688     symbolTT[symbolValue].deltaFindState = 0;
689 
690     return 0;
691 }
692 
693 
694 static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
695                            const void* src, size_t srcSize,
696                            const FSE_CTable* ct, const unsigned fast)
697 {
698     const BYTE* const istart = (const BYTE*) src;
699     const BYTE* const iend = istart + srcSize;
700     const BYTE* ip=iend;
701 
702     BIT_CStream_t bitC;
703     FSE_CState_t CState1, CState2;
704 
705     /* init */
706     if (srcSize <= 2) return 0;
707     { size_t const initError = BIT_initCStream(&bitC, dst, dstSize);
708       if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ }
709 
710 #define FSE_FLUSHBITS(s)  (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
711 
712     if (srcSize & 1) {
713         FSE_initCState2(&CState1, ct, *--ip);
714         FSE_initCState2(&CState2, ct, *--ip);
715         FSE_encodeSymbol(&bitC, &CState1, *--ip);
716         FSE_FLUSHBITS(&bitC);
717     } else {
718         FSE_initCState2(&CState2, ct, *--ip);
719         FSE_initCState2(&CState1, ct, *--ip);
720     }
721 
722     /* join to mod 4 */
723     srcSize -= 2;
724     if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) {  /* test bit 2 */
725         FSE_encodeSymbol(&bitC, &CState2, *--ip);
726         FSE_encodeSymbol(&bitC, &CState1, *--ip);
727         FSE_FLUSHBITS(&bitC);
728     }
729 
730     /* 2 or 4 encoding per loop */
731     while ( ip>istart ) {
732 
733         FSE_encodeSymbol(&bitC, &CState2, *--ip);
734 
735         if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 )   /* this test must be static */
736             FSE_FLUSHBITS(&bitC);
737 
738         FSE_encodeSymbol(&bitC, &CState1, *--ip);
739 
740         if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) {  /* this test must be static */
741             FSE_encodeSymbol(&bitC, &CState2, *--ip);
742             FSE_encodeSymbol(&bitC, &CState1, *--ip);
743         }
744 
745         FSE_FLUSHBITS(&bitC);
746     }
747 
748     FSE_flushCState(&bitC, &CState2);
749     FSE_flushCState(&bitC, &CState1);
750     return BIT_closeCStream(&bitC);
751 }
752 
753 size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
754                            const void* src, size_t srcSize,
755                            const FSE_CTable* ct)
756 {
757     unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
758 
759     if (fast)
760         return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
761     else
762         return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
763 }
764 
765 
766 size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
767 
768 #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
769 #define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }
770 
771 /* FSE_compress_wksp() :
772  * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
773  * `wkspSize` size must be `(1<<tableLog)`.
774  */
775 size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
776 {
777     BYTE* const ostart = (BYTE*) dst;
778     BYTE* op = ostart;
779     BYTE* const oend = ostart + dstSize;
780 
781     U32   count[FSE_MAX_SYMBOL_VALUE+1];
782     S16   norm[FSE_MAX_SYMBOL_VALUE+1];
783     FSE_CTable* CTable = (FSE_CTable*)workSpace;
784     size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue);
785     void* scratchBuffer = (void*)(CTable + CTableSize);
786     size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable));
787 
788     /* init conditions */
789     if (wkspSize < FSE_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
790     if (srcSize <= 1) return 0;  /* Not compressible */
791     if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
792     if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
793 
794     /* Scan input and build symbol stats */
795     {   CHECK_V_F(maxCount, FSE_count_wksp(count, &maxSymbolValue, src, srcSize, (unsigned*)scratchBuffer) );
796         if (maxCount == srcSize) return 1;   /* only a single symbol in src : rle */
797         if (maxCount == 1) return 0;         /* each symbol present maximum once => not compressible */
798         if (maxCount < (srcSize >> 7)) return 0;   /* Heuristic : not compressible enough */
799     }
800 
801     tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
802     CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) );
803 
804     /* Write table description header */
805     {   CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
806         op += nc_err;
807     }
808 
809     /* Compress */
810     CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) );
811     {   CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) );
812         if (cSize == 0) return 0;   /* not enough space for compressed data */
813         op += cSize;
814     }
815 
816     /* check compressibility */
817     if ( (size_t)(op-ostart) >= srcSize-1 ) return 0;
818 
819     return op-ostart;
820 }
821 
822 typedef struct {
823     FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
824     BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
825 } fseWkspMax_t;
826 
827 size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
828 {
829     fseWkspMax_t scratchBuffer;
830     FSE_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE));   /* compilation failures here means scratchBuffer is not large enough */
831     if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
832     return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
833 }
834 
835 size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
836 {
837     return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
838 }
839 
840 
841 #endif   /* FSE_COMMONDEFS_ONLY */
842