1*a28cd43dSSascha Wildner /*
2*a28cd43dSSascha Wildner  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3*a28cd43dSSascha Wildner  * All rights reserved.
4*a28cd43dSSascha Wildner  *
5*a28cd43dSSascha Wildner  * This source code is licensed under both the BSD-style license (found in the
6*a28cd43dSSascha Wildner  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*a28cd43dSSascha Wildner  * in the COPYING file in the root directory of this source tree).
8*a28cd43dSSascha Wildner  * You may select, at your option, one of the above-listed licenses.
9*a28cd43dSSascha Wildner  */
10*a28cd43dSSascha Wildner 
11*a28cd43dSSascha Wildner  /*-*************************************
12*a28cd43dSSascha Wildner  *  Dependencies
13*a28cd43dSSascha Wildner  ***************************************/
14*a28cd43dSSascha Wildner #include "zstd_compress_sequences.h"
15*a28cd43dSSascha Wildner 
16*a28cd43dSSascha Wildner /**
17*a28cd43dSSascha Wildner  * -log2(x / 256) lookup table for x in [0, 256).
18*a28cd43dSSascha Wildner  * If x == 0: Return 0
19*a28cd43dSSascha Wildner  * Else: Return floor(-log2(x / 256) * 256)
20*a28cd43dSSascha Wildner  */
21*a28cd43dSSascha Wildner static unsigned const kInverseProbabilityLog256[256] = {
22*a28cd43dSSascha Wildner     0,    2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162,
23*a28cd43dSSascha Wildner     1130, 1100, 1073, 1047, 1024, 1001, 980,  960,  941,  923,  906,  889,
24*a28cd43dSSascha Wildner     874,  859,  844,  830,  817,  804,  791,  779,  768,  756,  745,  734,
25*a28cd43dSSascha Wildner     724,  714,  704,  694,  685,  676,  667,  658,  650,  642,  633,  626,
26*a28cd43dSSascha Wildner     618,  610,  603,  595,  588,  581,  574,  567,  561,  554,  548,  542,
27*a28cd43dSSascha Wildner     535,  529,  523,  517,  512,  506,  500,  495,  489,  484,  478,  473,
28*a28cd43dSSascha Wildner     468,  463,  458,  453,  448,  443,  438,  434,  429,  424,  420,  415,
29*a28cd43dSSascha Wildner     411,  407,  402,  398,  394,  390,  386,  382,  377,  373,  370,  366,
30*a28cd43dSSascha Wildner     362,  358,  354,  350,  347,  343,  339,  336,  332,  329,  325,  322,
31*a28cd43dSSascha Wildner     318,  315,  311,  308,  305,  302,  298,  295,  292,  289,  286,  282,
32*a28cd43dSSascha Wildner     279,  276,  273,  270,  267,  264,  261,  258,  256,  253,  250,  247,
33*a28cd43dSSascha Wildner     244,  241,  239,  236,  233,  230,  228,  225,  222,  220,  217,  215,
34*a28cd43dSSascha Wildner     212,  209,  207,  204,  202,  199,  197,  194,  192,  190,  187,  185,
35*a28cd43dSSascha Wildner     182,  180,  178,  175,  173,  171,  168,  166,  164,  162,  159,  157,
36*a28cd43dSSascha Wildner     155,  153,  151,  149,  146,  144,  142,  140,  138,  136,  134,  132,
37*a28cd43dSSascha Wildner     130,  128,  126,  123,  121,  119,  117,  115,  114,  112,  110,  108,
38*a28cd43dSSascha Wildner     106,  104,  102,  100,  98,   96,   94,   93,   91,   89,   87,   85,
39*a28cd43dSSascha Wildner     83,   82,   80,   78,   76,   74,   73,   71,   69,   67,   66,   64,
40*a28cd43dSSascha Wildner     62,   61,   59,   57,   55,   54,   52,   50,   49,   47,   46,   44,
41*a28cd43dSSascha Wildner     42,   41,   39,   37,   36,   34,   33,   31,   30,   28,   26,   25,
42*a28cd43dSSascha Wildner     23,   22,   20,   19,   17,   16,   14,   13,   11,   10,   8,    7,
43*a28cd43dSSascha Wildner     5,    4,    2,    1,
44*a28cd43dSSascha Wildner };
45*a28cd43dSSascha Wildner 
ZSTD_getFSEMaxSymbolValue(FSE_CTable const * ctable)46*a28cd43dSSascha Wildner static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
47*a28cd43dSSascha Wildner   void const* ptr = ctable;
48*a28cd43dSSascha Wildner   U16 const* u16ptr = (U16 const*)ptr;
49*a28cd43dSSascha Wildner   U32 const maxSymbolValue = MEM_read16(u16ptr + 1);
50*a28cd43dSSascha Wildner   return maxSymbolValue;
51*a28cd43dSSascha Wildner }
52*a28cd43dSSascha Wildner 
53*a28cd43dSSascha Wildner /**
54*a28cd43dSSascha Wildner  * Returns true if we should use ncount=-1 else we should
55*a28cd43dSSascha Wildner  * use ncount=1 for low probability symbols instead.
56*a28cd43dSSascha Wildner  */
ZSTD_useLowProbCount(size_t const nbSeq)57*a28cd43dSSascha Wildner static unsigned ZSTD_useLowProbCount(size_t const nbSeq)
58*a28cd43dSSascha Wildner {
59*a28cd43dSSascha Wildner     /* Heuristic: This should cover most blocks <= 16K and
60*a28cd43dSSascha Wildner      * start to fade out after 16K to about 32K depending on
61*a28cd43dSSascha Wildner      * comprssibility.
62*a28cd43dSSascha Wildner      */
63*a28cd43dSSascha Wildner     return nbSeq >= 2048;
64*a28cd43dSSascha Wildner }
65*a28cd43dSSascha Wildner 
66*a28cd43dSSascha Wildner /**
67*a28cd43dSSascha Wildner  * Returns the cost in bytes of encoding the normalized count header.
68*a28cd43dSSascha Wildner  * Returns an error if any of the helper functions return an error.
69*a28cd43dSSascha Wildner  */
ZSTD_NCountCost(unsigned const * count,unsigned const max,size_t const nbSeq,unsigned const FSELog)70*a28cd43dSSascha Wildner static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
71*a28cd43dSSascha Wildner                               size_t const nbSeq, unsigned const FSELog)
72*a28cd43dSSascha Wildner {
73*a28cd43dSSascha Wildner     BYTE wksp[FSE_NCOUNTBOUND];
74*a28cd43dSSascha Wildner     S16 norm[MaxSeq + 1];
75*a28cd43dSSascha Wildner     const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
76*a28cd43dSSascha Wildner     FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max, ZSTD_useLowProbCount(nbSeq)), "");
77*a28cd43dSSascha Wildner     return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
78*a28cd43dSSascha Wildner }
79*a28cd43dSSascha Wildner 
80*a28cd43dSSascha Wildner /**
81*a28cd43dSSascha Wildner  * Returns the cost in bits of encoding the distribution described by count
82*a28cd43dSSascha Wildner  * using the entropy bound.
83*a28cd43dSSascha Wildner  */
ZSTD_entropyCost(unsigned const * count,unsigned const max,size_t const total)84*a28cd43dSSascha Wildner static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total)
85*a28cd43dSSascha Wildner {
86*a28cd43dSSascha Wildner     unsigned cost = 0;
87*a28cd43dSSascha Wildner     unsigned s;
88*a28cd43dSSascha Wildner     for (s = 0; s <= max; ++s) {
89*a28cd43dSSascha Wildner         unsigned norm = (unsigned)((256 * count[s]) / total);
90*a28cd43dSSascha Wildner         if (count[s] != 0 && norm == 0)
91*a28cd43dSSascha Wildner             norm = 1;
92*a28cd43dSSascha Wildner         assert(count[s] < total);
93*a28cd43dSSascha Wildner         cost += count[s] * kInverseProbabilityLog256[norm];
94*a28cd43dSSascha Wildner     }
95*a28cd43dSSascha Wildner     return cost >> 8;
96*a28cd43dSSascha Wildner }
97*a28cd43dSSascha Wildner 
98*a28cd43dSSascha Wildner /**
99*a28cd43dSSascha Wildner  * Returns the cost in bits of encoding the distribution in count using ctable.
100*a28cd43dSSascha Wildner  * Returns an error if ctable cannot represent all the symbols in count.
101*a28cd43dSSascha Wildner  */
ZSTD_fseBitCost(FSE_CTable const * ctable,unsigned const * count,unsigned const max)102*a28cd43dSSascha Wildner size_t ZSTD_fseBitCost(
103*a28cd43dSSascha Wildner     FSE_CTable const* ctable,
104*a28cd43dSSascha Wildner     unsigned const* count,
105*a28cd43dSSascha Wildner     unsigned const max)
106*a28cd43dSSascha Wildner {
107*a28cd43dSSascha Wildner     unsigned const kAccuracyLog = 8;
108*a28cd43dSSascha Wildner     size_t cost = 0;
109*a28cd43dSSascha Wildner     unsigned s;
110*a28cd43dSSascha Wildner     FSE_CState_t cstate;
111*a28cd43dSSascha Wildner     FSE_initCState(&cstate, ctable);
112*a28cd43dSSascha Wildner     if (ZSTD_getFSEMaxSymbolValue(ctable) < max) {
113*a28cd43dSSascha Wildner         DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u",
114*a28cd43dSSascha Wildner                     ZSTD_getFSEMaxSymbolValue(ctable), max);
115*a28cd43dSSascha Wildner         return ERROR(GENERIC);
116*a28cd43dSSascha Wildner     }
117*a28cd43dSSascha Wildner     for (s = 0; s <= max; ++s) {
118*a28cd43dSSascha Wildner         unsigned const tableLog = cstate.stateLog;
119*a28cd43dSSascha Wildner         unsigned const badCost = (tableLog + 1) << kAccuracyLog;
120*a28cd43dSSascha Wildner         unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog);
121*a28cd43dSSascha Wildner         if (count[s] == 0)
122*a28cd43dSSascha Wildner             continue;
123*a28cd43dSSascha Wildner         if (bitCost >= badCost) {
124*a28cd43dSSascha Wildner             DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s);
125*a28cd43dSSascha Wildner             return ERROR(GENERIC);
126*a28cd43dSSascha Wildner         }
127*a28cd43dSSascha Wildner         cost += (size_t)count[s] * bitCost;
128*a28cd43dSSascha Wildner     }
129*a28cd43dSSascha Wildner     return cost >> kAccuracyLog;
130*a28cd43dSSascha Wildner }
131*a28cd43dSSascha Wildner 
132*a28cd43dSSascha Wildner /**
133*a28cd43dSSascha Wildner  * Returns the cost in bits of encoding the distribution in count using the
134*a28cd43dSSascha Wildner  * table described by norm. The max symbol support by norm is assumed >= max.
135*a28cd43dSSascha Wildner  * norm must be valid for every symbol with non-zero probability in count.
136*a28cd43dSSascha Wildner  */
ZSTD_crossEntropyCost(short const * norm,unsigned accuracyLog,unsigned const * count,unsigned const max)137*a28cd43dSSascha Wildner size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
138*a28cd43dSSascha Wildner                              unsigned const* count, unsigned const max)
139*a28cd43dSSascha Wildner {
140*a28cd43dSSascha Wildner     unsigned const shift = 8 - accuracyLog;
141*a28cd43dSSascha Wildner     size_t cost = 0;
142*a28cd43dSSascha Wildner     unsigned s;
143*a28cd43dSSascha Wildner     assert(accuracyLog <= 8);
144*a28cd43dSSascha Wildner     for (s = 0; s <= max; ++s) {
145*a28cd43dSSascha Wildner         unsigned const normAcc = (norm[s] != -1) ? (unsigned)norm[s] : 1;
146*a28cd43dSSascha Wildner         unsigned const norm256 = normAcc << shift;
147*a28cd43dSSascha Wildner         assert(norm256 > 0);
148*a28cd43dSSascha Wildner         assert(norm256 < 256);
149*a28cd43dSSascha Wildner         cost += count[s] * kInverseProbabilityLog256[norm256];
150*a28cd43dSSascha Wildner     }
151*a28cd43dSSascha Wildner     return cost >> 8;
152*a28cd43dSSascha Wildner }
153*a28cd43dSSascha Wildner 
154*a28cd43dSSascha Wildner symbolEncodingType_e
ZSTD_selectEncodingType(FSE_repeat * repeatMode,unsigned const * count,unsigned const max,size_t const mostFrequent,size_t nbSeq,unsigned const FSELog,FSE_CTable const * prevCTable,short const * defaultNorm,U32 defaultNormLog,ZSTD_defaultPolicy_e const isDefaultAllowed,ZSTD_strategy const strategy)155*a28cd43dSSascha Wildner ZSTD_selectEncodingType(
156*a28cd43dSSascha Wildner         FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
157*a28cd43dSSascha Wildner         size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
158*a28cd43dSSascha Wildner         FSE_CTable const* prevCTable,
159*a28cd43dSSascha Wildner         short const* defaultNorm, U32 defaultNormLog,
160*a28cd43dSSascha Wildner         ZSTD_defaultPolicy_e const isDefaultAllowed,
161*a28cd43dSSascha Wildner         ZSTD_strategy const strategy)
162*a28cd43dSSascha Wildner {
163*a28cd43dSSascha Wildner     ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
164*a28cd43dSSascha Wildner     if (mostFrequent == nbSeq) {
165*a28cd43dSSascha Wildner         *repeatMode = FSE_repeat_none;
166*a28cd43dSSascha Wildner         if (isDefaultAllowed && nbSeq <= 2) {
167*a28cd43dSSascha Wildner             /* Prefer set_basic over set_rle when there are 2 or less symbols,
168*a28cd43dSSascha Wildner              * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
169*a28cd43dSSascha Wildner              * If basic encoding isn't possible, always choose RLE.
170*a28cd43dSSascha Wildner              */
171*a28cd43dSSascha Wildner             DEBUGLOG(5, "Selected set_basic");
172*a28cd43dSSascha Wildner             return set_basic;
173*a28cd43dSSascha Wildner         }
174*a28cd43dSSascha Wildner         DEBUGLOG(5, "Selected set_rle");
175*a28cd43dSSascha Wildner         return set_rle;
176*a28cd43dSSascha Wildner     }
177*a28cd43dSSascha Wildner     if (strategy < ZSTD_lazy) {
178*a28cd43dSSascha Wildner         if (isDefaultAllowed) {
179*a28cd43dSSascha Wildner             size_t const staticFse_nbSeq_max = 1000;
180*a28cd43dSSascha Wildner             size_t const mult = 10 - strategy;
181*a28cd43dSSascha Wildner             size_t const baseLog = 3;
182*a28cd43dSSascha Wildner             size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog;  /* 28-36 for offset, 56-72 for lengths */
183*a28cd43dSSascha Wildner             assert(defaultNormLog >= 5 && defaultNormLog <= 6);  /* xx_DEFAULTNORMLOG */
184*a28cd43dSSascha Wildner             assert(mult <= 9 && mult >= 7);
185*a28cd43dSSascha Wildner             if ( (*repeatMode == FSE_repeat_valid)
186*a28cd43dSSascha Wildner               && (nbSeq < staticFse_nbSeq_max) ) {
187*a28cd43dSSascha Wildner                 DEBUGLOG(5, "Selected set_repeat");
188*a28cd43dSSascha Wildner                 return set_repeat;
189*a28cd43dSSascha Wildner             }
190*a28cd43dSSascha Wildner             if ( (nbSeq < dynamicFse_nbSeq_min)
191*a28cd43dSSascha Wildner               || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) {
192*a28cd43dSSascha Wildner                 DEBUGLOG(5, "Selected set_basic");
193*a28cd43dSSascha Wildner                 /* The format allows default tables to be repeated, but it isn't useful.
194*a28cd43dSSascha Wildner                  * When using simple heuristics to select encoding type, we don't want
195*a28cd43dSSascha Wildner                  * to confuse these tables with dictionaries. When running more careful
196*a28cd43dSSascha Wildner                  * analysis, we don't need to waste time checking both repeating tables
197*a28cd43dSSascha Wildner                  * and default tables.
198*a28cd43dSSascha Wildner                  */
199*a28cd43dSSascha Wildner                 *repeatMode = FSE_repeat_none;
200*a28cd43dSSascha Wildner                 return set_basic;
201*a28cd43dSSascha Wildner             }
202*a28cd43dSSascha Wildner         }
203*a28cd43dSSascha Wildner     } else {
204*a28cd43dSSascha Wildner         size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC);
205*a28cd43dSSascha Wildner         size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC);
206*a28cd43dSSascha Wildner         size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog);
207*a28cd43dSSascha Wildner         size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq);
208*a28cd43dSSascha Wildner 
209*a28cd43dSSascha Wildner         if (isDefaultAllowed) {
210*a28cd43dSSascha Wildner             assert(!ZSTD_isError(basicCost));
211*a28cd43dSSascha Wildner             assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost)));
212*a28cd43dSSascha Wildner         }
213*a28cd43dSSascha Wildner         assert(!ZSTD_isError(NCountCost));
214*a28cd43dSSascha Wildner         assert(compressedCost < ERROR(maxCode));
215*a28cd43dSSascha Wildner         DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
216*a28cd43dSSascha Wildner                     (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost);
217*a28cd43dSSascha Wildner         if (basicCost <= repeatCost && basicCost <= compressedCost) {
218*a28cd43dSSascha Wildner             DEBUGLOG(5, "Selected set_basic");
219*a28cd43dSSascha Wildner             assert(isDefaultAllowed);
220*a28cd43dSSascha Wildner             *repeatMode = FSE_repeat_none;
221*a28cd43dSSascha Wildner             return set_basic;
222*a28cd43dSSascha Wildner         }
223*a28cd43dSSascha Wildner         if (repeatCost <= compressedCost) {
224*a28cd43dSSascha Wildner             DEBUGLOG(5, "Selected set_repeat");
225*a28cd43dSSascha Wildner             assert(!ZSTD_isError(repeatCost));
226*a28cd43dSSascha Wildner             return set_repeat;
227*a28cd43dSSascha Wildner         }
228*a28cd43dSSascha Wildner         assert(compressedCost < basicCost && compressedCost < repeatCost);
229*a28cd43dSSascha Wildner     }
230*a28cd43dSSascha Wildner     DEBUGLOG(5, "Selected set_compressed");
231*a28cd43dSSascha Wildner     *repeatMode = FSE_repeat_check;
232*a28cd43dSSascha Wildner     return set_compressed;
233*a28cd43dSSascha Wildner }
234*a28cd43dSSascha Wildner 
235*a28cd43dSSascha Wildner size_t
ZSTD_buildCTable(void * dst,size_t dstCapacity,FSE_CTable * nextCTable,U32 FSELog,symbolEncodingType_e type,unsigned * count,U32 max,const BYTE * codeTable,size_t nbSeq,const S16 * defaultNorm,U32 defaultNormLog,U32 defaultMax,const FSE_CTable * prevCTable,size_t prevCTableSize,void * entropyWorkspace,size_t entropyWorkspaceSize)236*a28cd43dSSascha Wildner ZSTD_buildCTable(void* dst, size_t dstCapacity,
237*a28cd43dSSascha Wildner                 FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
238*a28cd43dSSascha Wildner                 unsigned* count, U32 max,
239*a28cd43dSSascha Wildner                 const BYTE* codeTable, size_t nbSeq,
240*a28cd43dSSascha Wildner                 const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
241*a28cd43dSSascha Wildner                 const FSE_CTable* prevCTable, size_t prevCTableSize,
242*a28cd43dSSascha Wildner                 void* entropyWorkspace, size_t entropyWorkspaceSize)
243*a28cd43dSSascha Wildner {
244*a28cd43dSSascha Wildner     BYTE* op = (BYTE*)dst;
245*a28cd43dSSascha Wildner     const BYTE* const oend = op + dstCapacity;
246*a28cd43dSSascha Wildner     DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity);
247*a28cd43dSSascha Wildner 
248*a28cd43dSSascha Wildner     switch (type) {
249*a28cd43dSSascha Wildner     case set_rle:
250*a28cd43dSSascha Wildner         FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max), "");
251*a28cd43dSSascha Wildner         RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall, "not enough space");
252*a28cd43dSSascha Wildner         *op = codeTable[0];
253*a28cd43dSSascha Wildner         return 1;
254*a28cd43dSSascha Wildner     case set_repeat:
255*a28cd43dSSascha Wildner         ZSTD_memcpy(nextCTable, prevCTable, prevCTableSize);
256*a28cd43dSSascha Wildner         return 0;
257*a28cd43dSSascha Wildner     case set_basic:
258*a28cd43dSSascha Wildner         FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), "");  /* note : could be pre-calculated */
259*a28cd43dSSascha Wildner         return 0;
260*a28cd43dSSascha Wildner     case set_compressed: {
261*a28cd43dSSascha Wildner         S16 norm[MaxSeq + 1];
262*a28cd43dSSascha Wildner         size_t nbSeq_1 = nbSeq;
263*a28cd43dSSascha Wildner         const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
264*a28cd43dSSascha Wildner         if (count[codeTable[nbSeq-1]] > 1) {
265*a28cd43dSSascha Wildner             count[codeTable[nbSeq-1]]--;
266*a28cd43dSSascha Wildner             nbSeq_1--;
267*a28cd43dSSascha Wildner         }
268*a28cd43dSSascha Wildner         assert(nbSeq_1 > 1);
269*a28cd43dSSascha Wildner         assert(entropyWorkspaceSize >= FSE_BUILD_CTABLE_WORKSPACE_SIZE(MaxSeq, MaxFSELog));
270*a28cd43dSSascha Wildner         FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "");
271*a28cd43dSSascha Wildner         {   size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog);   /* overflow protected */
272*a28cd43dSSascha Wildner             FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
273*a28cd43dSSascha Wildner             FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize), "");
274*a28cd43dSSascha Wildner             return NCountSize;
275*a28cd43dSSascha Wildner         }
276*a28cd43dSSascha Wildner     }
277*a28cd43dSSascha Wildner     default: assert(0); RETURN_ERROR(GENERIC, "impossible to reach");
278*a28cd43dSSascha Wildner     }
279*a28cd43dSSascha Wildner }
280*a28cd43dSSascha Wildner 
281*a28cd43dSSascha Wildner FORCE_INLINE_TEMPLATE size_t
ZSTD_encodeSequences_body(void * dst,size_t dstCapacity,FSE_CTable const * CTable_MatchLength,BYTE const * mlCodeTable,FSE_CTable const * CTable_OffsetBits,BYTE const * ofCodeTable,FSE_CTable const * CTable_LitLength,BYTE const * llCodeTable,seqDef const * sequences,size_t nbSeq,int longOffsets)282*a28cd43dSSascha Wildner ZSTD_encodeSequences_body(
283*a28cd43dSSascha Wildner             void* dst, size_t dstCapacity,
284*a28cd43dSSascha Wildner             FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
285*a28cd43dSSascha Wildner             FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
286*a28cd43dSSascha Wildner             FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
287*a28cd43dSSascha Wildner             seqDef const* sequences, size_t nbSeq, int longOffsets)
288*a28cd43dSSascha Wildner {
289*a28cd43dSSascha Wildner     BIT_CStream_t blockStream;
290*a28cd43dSSascha Wildner     FSE_CState_t  stateMatchLength;
291*a28cd43dSSascha Wildner     FSE_CState_t  stateOffsetBits;
292*a28cd43dSSascha Wildner     FSE_CState_t  stateLitLength;
293*a28cd43dSSascha Wildner 
294*a28cd43dSSascha Wildner     RETURN_ERROR_IF(
295*a28cd43dSSascha Wildner         ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)),
296*a28cd43dSSascha Wildner         dstSize_tooSmall, "not enough space remaining");
297*a28cd43dSSascha Wildner     DEBUGLOG(6, "available space for bitstream : %i  (dstCapacity=%u)",
298*a28cd43dSSascha Wildner                 (int)(blockStream.endPtr - blockStream.startPtr),
299*a28cd43dSSascha Wildner                 (unsigned)dstCapacity);
300*a28cd43dSSascha Wildner 
301*a28cd43dSSascha Wildner     /* first symbols */
302*a28cd43dSSascha Wildner     FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
303*a28cd43dSSascha Wildner     FSE_initCState2(&stateOffsetBits,  CTable_OffsetBits,  ofCodeTable[nbSeq-1]);
304*a28cd43dSSascha Wildner     FSE_initCState2(&stateLitLength,   CTable_LitLength,   llCodeTable[nbSeq-1]);
305*a28cd43dSSascha Wildner     BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
306*a28cd43dSSascha Wildner     if (MEM_32bits()) BIT_flushBits(&blockStream);
307*a28cd43dSSascha Wildner     BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
308*a28cd43dSSascha Wildner     if (MEM_32bits()) BIT_flushBits(&blockStream);
309*a28cd43dSSascha Wildner     if (longOffsets) {
310*a28cd43dSSascha Wildner         U32 const ofBits = ofCodeTable[nbSeq-1];
311*a28cd43dSSascha Wildner         unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
312*a28cd43dSSascha Wildner         if (extraBits) {
313*a28cd43dSSascha Wildner             BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
314*a28cd43dSSascha Wildner             BIT_flushBits(&blockStream);
315*a28cd43dSSascha Wildner         }
316*a28cd43dSSascha Wildner         BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
317*a28cd43dSSascha Wildner                     ofBits - extraBits);
318*a28cd43dSSascha Wildner     } else {
319*a28cd43dSSascha Wildner         BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
320*a28cd43dSSascha Wildner     }
321*a28cd43dSSascha Wildner     BIT_flushBits(&blockStream);
322*a28cd43dSSascha Wildner 
323*a28cd43dSSascha Wildner     {   size_t n;
324*a28cd43dSSascha Wildner         for (n=nbSeq-2 ; n<nbSeq ; n--) {      /* intentional underflow */
325*a28cd43dSSascha Wildner             BYTE const llCode = llCodeTable[n];
326*a28cd43dSSascha Wildner             BYTE const ofCode = ofCodeTable[n];
327*a28cd43dSSascha Wildner             BYTE const mlCode = mlCodeTable[n];
328*a28cd43dSSascha Wildner             U32  const llBits = LL_bits[llCode];
329*a28cd43dSSascha Wildner             U32  const ofBits = ofCode;
330*a28cd43dSSascha Wildner             U32  const mlBits = ML_bits[mlCode];
331*a28cd43dSSascha Wildner             DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
332*a28cd43dSSascha Wildner                         (unsigned)sequences[n].litLength,
333*a28cd43dSSascha Wildner                         (unsigned)sequences[n].matchLength + MINMATCH,
334*a28cd43dSSascha Wildner                         (unsigned)sequences[n].offset);
335*a28cd43dSSascha Wildner                                                                             /* 32b*/  /* 64b*/
336*a28cd43dSSascha Wildner                                                                             /* (7)*/  /* (7)*/
337*a28cd43dSSascha Wildner             FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode);       /* 15 */  /* 15 */
338*a28cd43dSSascha Wildner             FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode);      /* 24 */  /* 24 */
339*a28cd43dSSascha Wildner             if (MEM_32bits()) BIT_flushBits(&blockStream);                  /* (7)*/
340*a28cd43dSSascha Wildner             FSE_encodeSymbol(&blockStream, &stateLitLength, llCode);        /* 16 */  /* 33 */
341*a28cd43dSSascha Wildner             if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
342*a28cd43dSSascha Wildner                 BIT_flushBits(&blockStream);                                /* (7)*/
343*a28cd43dSSascha Wildner             BIT_addBits(&blockStream, sequences[n].litLength, llBits);
344*a28cd43dSSascha Wildner             if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
345*a28cd43dSSascha Wildner             BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
346*a28cd43dSSascha Wildner             if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
347*a28cd43dSSascha Wildner             if (longOffsets) {
348*a28cd43dSSascha Wildner                 unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
349*a28cd43dSSascha Wildner                 if (extraBits) {
350*a28cd43dSSascha Wildner                     BIT_addBits(&blockStream, sequences[n].offset, extraBits);
351*a28cd43dSSascha Wildner                     BIT_flushBits(&blockStream);                            /* (7)*/
352*a28cd43dSSascha Wildner                 }
353*a28cd43dSSascha Wildner                 BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
354*a28cd43dSSascha Wildner                             ofBits - extraBits);                            /* 31 */
355*a28cd43dSSascha Wildner             } else {
356*a28cd43dSSascha Wildner                 BIT_addBits(&blockStream, sequences[n].offset, ofBits);     /* 31 */
357*a28cd43dSSascha Wildner             }
358*a28cd43dSSascha Wildner             BIT_flushBits(&blockStream);                                    /* (7)*/
359*a28cd43dSSascha Wildner             DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
360*a28cd43dSSascha Wildner     }   }
361*a28cd43dSSascha Wildner 
362*a28cd43dSSascha Wildner     DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog);
363*a28cd43dSSascha Wildner     FSE_flushCState(&blockStream, &stateMatchLength);
364*a28cd43dSSascha Wildner     DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog);
365*a28cd43dSSascha Wildner     FSE_flushCState(&blockStream, &stateOffsetBits);
366*a28cd43dSSascha Wildner     DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog);
367*a28cd43dSSascha Wildner     FSE_flushCState(&blockStream, &stateLitLength);
368*a28cd43dSSascha Wildner 
369*a28cd43dSSascha Wildner     {   size_t const streamSize = BIT_closeCStream(&blockStream);
370*a28cd43dSSascha Wildner         RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space");
371*a28cd43dSSascha Wildner         return streamSize;
372*a28cd43dSSascha Wildner     }
373*a28cd43dSSascha Wildner }
374*a28cd43dSSascha Wildner 
375*a28cd43dSSascha Wildner static size_t
ZSTD_encodeSequences_default(void * dst,size_t dstCapacity,FSE_CTable const * CTable_MatchLength,BYTE const * mlCodeTable,FSE_CTable const * CTable_OffsetBits,BYTE const * ofCodeTable,FSE_CTable const * CTable_LitLength,BYTE const * llCodeTable,seqDef const * sequences,size_t nbSeq,int longOffsets)376*a28cd43dSSascha Wildner ZSTD_encodeSequences_default(
377*a28cd43dSSascha Wildner             void* dst, size_t dstCapacity,
378*a28cd43dSSascha Wildner             FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
379*a28cd43dSSascha Wildner             FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
380*a28cd43dSSascha Wildner             FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
381*a28cd43dSSascha Wildner             seqDef const* sequences, size_t nbSeq, int longOffsets)
382*a28cd43dSSascha Wildner {
383*a28cd43dSSascha Wildner     return ZSTD_encodeSequences_body(dst, dstCapacity,
384*a28cd43dSSascha Wildner                                     CTable_MatchLength, mlCodeTable,
385*a28cd43dSSascha Wildner                                     CTable_OffsetBits, ofCodeTable,
386*a28cd43dSSascha Wildner                                     CTable_LitLength, llCodeTable,
387*a28cd43dSSascha Wildner                                     sequences, nbSeq, longOffsets);
388*a28cd43dSSascha Wildner }
389*a28cd43dSSascha Wildner 
390*a28cd43dSSascha Wildner 
391*a28cd43dSSascha Wildner #if DYNAMIC_BMI2
392*a28cd43dSSascha Wildner 
393*a28cd43dSSascha Wildner static TARGET_ATTRIBUTE("bmi2") size_t
ZSTD_encodeSequences_bmi2(void * dst,size_t dstCapacity,FSE_CTable const * CTable_MatchLength,BYTE const * mlCodeTable,FSE_CTable const * CTable_OffsetBits,BYTE const * ofCodeTable,FSE_CTable const * CTable_LitLength,BYTE const * llCodeTable,seqDef const * sequences,size_t nbSeq,int longOffsets)394*a28cd43dSSascha Wildner ZSTD_encodeSequences_bmi2(
395*a28cd43dSSascha Wildner             void* dst, size_t dstCapacity,
396*a28cd43dSSascha Wildner             FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
397*a28cd43dSSascha Wildner             FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
398*a28cd43dSSascha Wildner             FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
399*a28cd43dSSascha Wildner             seqDef const* sequences, size_t nbSeq, int longOffsets)
400*a28cd43dSSascha Wildner {
401*a28cd43dSSascha Wildner     return ZSTD_encodeSequences_body(dst, dstCapacity,
402*a28cd43dSSascha Wildner                                     CTable_MatchLength, mlCodeTable,
403*a28cd43dSSascha Wildner                                     CTable_OffsetBits, ofCodeTable,
404*a28cd43dSSascha Wildner                                     CTable_LitLength, llCodeTable,
405*a28cd43dSSascha Wildner                                     sequences, nbSeq, longOffsets);
406*a28cd43dSSascha Wildner }
407*a28cd43dSSascha Wildner 
408*a28cd43dSSascha Wildner #endif
409*a28cd43dSSascha Wildner 
ZSTD_encodeSequences(void * dst,size_t dstCapacity,FSE_CTable const * CTable_MatchLength,BYTE const * mlCodeTable,FSE_CTable const * CTable_OffsetBits,BYTE const * ofCodeTable,FSE_CTable const * CTable_LitLength,BYTE const * llCodeTable,seqDef const * sequences,size_t nbSeq,int longOffsets,int bmi2)410*a28cd43dSSascha Wildner size_t ZSTD_encodeSequences(
411*a28cd43dSSascha Wildner             void* dst, size_t dstCapacity,
412*a28cd43dSSascha Wildner             FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
413*a28cd43dSSascha Wildner             FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
414*a28cd43dSSascha Wildner             FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
415*a28cd43dSSascha Wildner             seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
416*a28cd43dSSascha Wildner {
417*a28cd43dSSascha Wildner     DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity);
418*a28cd43dSSascha Wildner #if DYNAMIC_BMI2
419*a28cd43dSSascha Wildner     if (bmi2) {
420*a28cd43dSSascha Wildner         return ZSTD_encodeSequences_bmi2(dst, dstCapacity,
421*a28cd43dSSascha Wildner                                          CTable_MatchLength, mlCodeTable,
422*a28cd43dSSascha Wildner                                          CTable_OffsetBits, ofCodeTable,
423*a28cd43dSSascha Wildner                                          CTable_LitLength, llCodeTable,
424*a28cd43dSSascha Wildner                                          sequences, nbSeq, longOffsets);
425*a28cd43dSSascha Wildner     }
426*a28cd43dSSascha Wildner #endif
427*a28cd43dSSascha Wildner     (void)bmi2;
428*a28cd43dSSascha Wildner     return ZSTD_encodeSequences_default(dst, dstCapacity,
429*a28cd43dSSascha Wildner                                         CTable_MatchLength, mlCodeTable,
430*a28cd43dSSascha Wildner                                         CTable_OffsetBits, ofCodeTable,
431*a28cd43dSSascha Wildner                                         CTable_LitLength, llCodeTable,
432*a28cd43dSSascha Wildner                                         sequences, nbSeq, longOffsets);
433*a28cd43dSSascha Wildner }
434