1*a28cd43dSSascha Wildner /*
2*a28cd43dSSascha Wildner * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3*a28cd43dSSascha Wildner * All rights reserved.
4*a28cd43dSSascha Wildner *
5*a28cd43dSSascha Wildner * This source code is licensed under both the BSD-style license (found in the
6*a28cd43dSSascha Wildner * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*a28cd43dSSascha Wildner * in the COPYING file in the root directory of this source tree).
8*a28cd43dSSascha Wildner * You may select, at your option, one of the above-listed licenses.
9*a28cd43dSSascha Wildner */
10*a28cd43dSSascha Wildner
11*a28cd43dSSascha Wildner /*-*************************************
12*a28cd43dSSascha Wildner * Dependencies
13*a28cd43dSSascha Wildner ***************************************/
14*a28cd43dSSascha Wildner #include "zstd_compress_sequences.h"
15*a28cd43dSSascha Wildner
16*a28cd43dSSascha Wildner /**
17*a28cd43dSSascha Wildner * -log2(x / 256) lookup table for x in [0, 256).
18*a28cd43dSSascha Wildner * If x == 0: Return 0
19*a28cd43dSSascha Wildner * Else: Return floor(-log2(x / 256) * 256)
20*a28cd43dSSascha Wildner */
21*a28cd43dSSascha Wildner static unsigned const kInverseProbabilityLog256[256] = {
22*a28cd43dSSascha Wildner 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162,
23*a28cd43dSSascha Wildner 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889,
24*a28cd43dSSascha Wildner 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734,
25*a28cd43dSSascha Wildner 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626,
26*a28cd43dSSascha Wildner 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542,
27*a28cd43dSSascha Wildner 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473,
28*a28cd43dSSascha Wildner 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415,
29*a28cd43dSSascha Wildner 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366,
30*a28cd43dSSascha Wildner 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322,
31*a28cd43dSSascha Wildner 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282,
32*a28cd43dSSascha Wildner 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247,
33*a28cd43dSSascha Wildner 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215,
34*a28cd43dSSascha Wildner 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185,
35*a28cd43dSSascha Wildner 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157,
36*a28cd43dSSascha Wildner 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132,
37*a28cd43dSSascha Wildner 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108,
38*a28cd43dSSascha Wildner 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85,
39*a28cd43dSSascha Wildner 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64,
40*a28cd43dSSascha Wildner 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44,
41*a28cd43dSSascha Wildner 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25,
42*a28cd43dSSascha Wildner 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7,
43*a28cd43dSSascha Wildner 5, 4, 2, 1,
44*a28cd43dSSascha Wildner };
45*a28cd43dSSascha Wildner
ZSTD_getFSEMaxSymbolValue(FSE_CTable const * ctable)46*a28cd43dSSascha Wildner static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
47*a28cd43dSSascha Wildner void const* ptr = ctable;
48*a28cd43dSSascha Wildner U16 const* u16ptr = (U16 const*)ptr;
49*a28cd43dSSascha Wildner U32 const maxSymbolValue = MEM_read16(u16ptr + 1);
50*a28cd43dSSascha Wildner return maxSymbolValue;
51*a28cd43dSSascha Wildner }
52*a28cd43dSSascha Wildner
53*a28cd43dSSascha Wildner /**
54*a28cd43dSSascha Wildner * Returns true if we should use ncount=-1 else we should
55*a28cd43dSSascha Wildner * use ncount=1 for low probability symbols instead.
56*a28cd43dSSascha Wildner */
ZSTD_useLowProbCount(size_t const nbSeq)57*a28cd43dSSascha Wildner static unsigned ZSTD_useLowProbCount(size_t const nbSeq)
58*a28cd43dSSascha Wildner {
59*a28cd43dSSascha Wildner /* Heuristic: This should cover most blocks <= 16K and
60*a28cd43dSSascha Wildner * start to fade out after 16K to about 32K depending on
61*a28cd43dSSascha Wildner * comprssibility.
62*a28cd43dSSascha Wildner */
63*a28cd43dSSascha Wildner return nbSeq >= 2048;
64*a28cd43dSSascha Wildner }
65*a28cd43dSSascha Wildner
66*a28cd43dSSascha Wildner /**
67*a28cd43dSSascha Wildner * Returns the cost in bytes of encoding the normalized count header.
68*a28cd43dSSascha Wildner * Returns an error if any of the helper functions return an error.
69*a28cd43dSSascha Wildner */
ZSTD_NCountCost(unsigned const * count,unsigned const max,size_t const nbSeq,unsigned const FSELog)70*a28cd43dSSascha Wildner static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
71*a28cd43dSSascha Wildner size_t const nbSeq, unsigned const FSELog)
72*a28cd43dSSascha Wildner {
73*a28cd43dSSascha Wildner BYTE wksp[FSE_NCOUNTBOUND];
74*a28cd43dSSascha Wildner S16 norm[MaxSeq + 1];
75*a28cd43dSSascha Wildner const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
76*a28cd43dSSascha Wildner FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max, ZSTD_useLowProbCount(nbSeq)), "");
77*a28cd43dSSascha Wildner return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
78*a28cd43dSSascha Wildner }
79*a28cd43dSSascha Wildner
80*a28cd43dSSascha Wildner /**
81*a28cd43dSSascha Wildner * Returns the cost in bits of encoding the distribution described by count
82*a28cd43dSSascha Wildner * using the entropy bound.
83*a28cd43dSSascha Wildner */
ZSTD_entropyCost(unsigned const * count,unsigned const max,size_t const total)84*a28cd43dSSascha Wildner static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total)
85*a28cd43dSSascha Wildner {
86*a28cd43dSSascha Wildner unsigned cost = 0;
87*a28cd43dSSascha Wildner unsigned s;
88*a28cd43dSSascha Wildner for (s = 0; s <= max; ++s) {
89*a28cd43dSSascha Wildner unsigned norm = (unsigned)((256 * count[s]) / total);
90*a28cd43dSSascha Wildner if (count[s] != 0 && norm == 0)
91*a28cd43dSSascha Wildner norm = 1;
92*a28cd43dSSascha Wildner assert(count[s] < total);
93*a28cd43dSSascha Wildner cost += count[s] * kInverseProbabilityLog256[norm];
94*a28cd43dSSascha Wildner }
95*a28cd43dSSascha Wildner return cost >> 8;
96*a28cd43dSSascha Wildner }
97*a28cd43dSSascha Wildner
98*a28cd43dSSascha Wildner /**
99*a28cd43dSSascha Wildner * Returns the cost in bits of encoding the distribution in count using ctable.
100*a28cd43dSSascha Wildner * Returns an error if ctable cannot represent all the symbols in count.
101*a28cd43dSSascha Wildner */
ZSTD_fseBitCost(FSE_CTable const * ctable,unsigned const * count,unsigned const max)102*a28cd43dSSascha Wildner size_t ZSTD_fseBitCost(
103*a28cd43dSSascha Wildner FSE_CTable const* ctable,
104*a28cd43dSSascha Wildner unsigned const* count,
105*a28cd43dSSascha Wildner unsigned const max)
106*a28cd43dSSascha Wildner {
107*a28cd43dSSascha Wildner unsigned const kAccuracyLog = 8;
108*a28cd43dSSascha Wildner size_t cost = 0;
109*a28cd43dSSascha Wildner unsigned s;
110*a28cd43dSSascha Wildner FSE_CState_t cstate;
111*a28cd43dSSascha Wildner FSE_initCState(&cstate, ctable);
112*a28cd43dSSascha Wildner if (ZSTD_getFSEMaxSymbolValue(ctable) < max) {
113*a28cd43dSSascha Wildner DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u",
114*a28cd43dSSascha Wildner ZSTD_getFSEMaxSymbolValue(ctable), max);
115*a28cd43dSSascha Wildner return ERROR(GENERIC);
116*a28cd43dSSascha Wildner }
117*a28cd43dSSascha Wildner for (s = 0; s <= max; ++s) {
118*a28cd43dSSascha Wildner unsigned const tableLog = cstate.stateLog;
119*a28cd43dSSascha Wildner unsigned const badCost = (tableLog + 1) << kAccuracyLog;
120*a28cd43dSSascha Wildner unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog);
121*a28cd43dSSascha Wildner if (count[s] == 0)
122*a28cd43dSSascha Wildner continue;
123*a28cd43dSSascha Wildner if (bitCost >= badCost) {
124*a28cd43dSSascha Wildner DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s);
125*a28cd43dSSascha Wildner return ERROR(GENERIC);
126*a28cd43dSSascha Wildner }
127*a28cd43dSSascha Wildner cost += (size_t)count[s] * bitCost;
128*a28cd43dSSascha Wildner }
129*a28cd43dSSascha Wildner return cost >> kAccuracyLog;
130*a28cd43dSSascha Wildner }
131*a28cd43dSSascha Wildner
132*a28cd43dSSascha Wildner /**
133*a28cd43dSSascha Wildner * Returns the cost in bits of encoding the distribution in count using the
134*a28cd43dSSascha Wildner * table described by norm. The max symbol support by norm is assumed >= max.
135*a28cd43dSSascha Wildner * norm must be valid for every symbol with non-zero probability in count.
136*a28cd43dSSascha Wildner */
ZSTD_crossEntropyCost(short const * norm,unsigned accuracyLog,unsigned const * count,unsigned const max)137*a28cd43dSSascha Wildner size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
138*a28cd43dSSascha Wildner unsigned const* count, unsigned const max)
139*a28cd43dSSascha Wildner {
140*a28cd43dSSascha Wildner unsigned const shift = 8 - accuracyLog;
141*a28cd43dSSascha Wildner size_t cost = 0;
142*a28cd43dSSascha Wildner unsigned s;
143*a28cd43dSSascha Wildner assert(accuracyLog <= 8);
144*a28cd43dSSascha Wildner for (s = 0; s <= max; ++s) {
145*a28cd43dSSascha Wildner unsigned const normAcc = (norm[s] != -1) ? (unsigned)norm[s] : 1;
146*a28cd43dSSascha Wildner unsigned const norm256 = normAcc << shift;
147*a28cd43dSSascha Wildner assert(norm256 > 0);
148*a28cd43dSSascha Wildner assert(norm256 < 256);
149*a28cd43dSSascha Wildner cost += count[s] * kInverseProbabilityLog256[norm256];
150*a28cd43dSSascha Wildner }
151*a28cd43dSSascha Wildner return cost >> 8;
152*a28cd43dSSascha Wildner }
153*a28cd43dSSascha Wildner
154*a28cd43dSSascha Wildner symbolEncodingType_e
ZSTD_selectEncodingType(FSE_repeat * repeatMode,unsigned const * count,unsigned const max,size_t const mostFrequent,size_t nbSeq,unsigned const FSELog,FSE_CTable const * prevCTable,short const * defaultNorm,U32 defaultNormLog,ZSTD_defaultPolicy_e const isDefaultAllowed,ZSTD_strategy const strategy)155*a28cd43dSSascha Wildner ZSTD_selectEncodingType(
156*a28cd43dSSascha Wildner FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
157*a28cd43dSSascha Wildner size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
158*a28cd43dSSascha Wildner FSE_CTable const* prevCTable,
159*a28cd43dSSascha Wildner short const* defaultNorm, U32 defaultNormLog,
160*a28cd43dSSascha Wildner ZSTD_defaultPolicy_e const isDefaultAllowed,
161*a28cd43dSSascha Wildner ZSTD_strategy const strategy)
162*a28cd43dSSascha Wildner {
163*a28cd43dSSascha Wildner ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
164*a28cd43dSSascha Wildner if (mostFrequent == nbSeq) {
165*a28cd43dSSascha Wildner *repeatMode = FSE_repeat_none;
166*a28cd43dSSascha Wildner if (isDefaultAllowed && nbSeq <= 2) {
167*a28cd43dSSascha Wildner /* Prefer set_basic over set_rle when there are 2 or less symbols,
168*a28cd43dSSascha Wildner * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
169*a28cd43dSSascha Wildner * If basic encoding isn't possible, always choose RLE.
170*a28cd43dSSascha Wildner */
171*a28cd43dSSascha Wildner DEBUGLOG(5, "Selected set_basic");
172*a28cd43dSSascha Wildner return set_basic;
173*a28cd43dSSascha Wildner }
174*a28cd43dSSascha Wildner DEBUGLOG(5, "Selected set_rle");
175*a28cd43dSSascha Wildner return set_rle;
176*a28cd43dSSascha Wildner }
177*a28cd43dSSascha Wildner if (strategy < ZSTD_lazy) {
178*a28cd43dSSascha Wildner if (isDefaultAllowed) {
179*a28cd43dSSascha Wildner size_t const staticFse_nbSeq_max = 1000;
180*a28cd43dSSascha Wildner size_t const mult = 10 - strategy;
181*a28cd43dSSascha Wildner size_t const baseLog = 3;
182*a28cd43dSSascha Wildner size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */
183*a28cd43dSSascha Wildner assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */
184*a28cd43dSSascha Wildner assert(mult <= 9 && mult >= 7);
185*a28cd43dSSascha Wildner if ( (*repeatMode == FSE_repeat_valid)
186*a28cd43dSSascha Wildner && (nbSeq < staticFse_nbSeq_max) ) {
187*a28cd43dSSascha Wildner DEBUGLOG(5, "Selected set_repeat");
188*a28cd43dSSascha Wildner return set_repeat;
189*a28cd43dSSascha Wildner }
190*a28cd43dSSascha Wildner if ( (nbSeq < dynamicFse_nbSeq_min)
191*a28cd43dSSascha Wildner || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) {
192*a28cd43dSSascha Wildner DEBUGLOG(5, "Selected set_basic");
193*a28cd43dSSascha Wildner /* The format allows default tables to be repeated, but it isn't useful.
194*a28cd43dSSascha Wildner * When using simple heuristics to select encoding type, we don't want
195*a28cd43dSSascha Wildner * to confuse these tables with dictionaries. When running more careful
196*a28cd43dSSascha Wildner * analysis, we don't need to waste time checking both repeating tables
197*a28cd43dSSascha Wildner * and default tables.
198*a28cd43dSSascha Wildner */
199*a28cd43dSSascha Wildner *repeatMode = FSE_repeat_none;
200*a28cd43dSSascha Wildner return set_basic;
201*a28cd43dSSascha Wildner }
202*a28cd43dSSascha Wildner }
203*a28cd43dSSascha Wildner } else {
204*a28cd43dSSascha Wildner size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC);
205*a28cd43dSSascha Wildner size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC);
206*a28cd43dSSascha Wildner size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog);
207*a28cd43dSSascha Wildner size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq);
208*a28cd43dSSascha Wildner
209*a28cd43dSSascha Wildner if (isDefaultAllowed) {
210*a28cd43dSSascha Wildner assert(!ZSTD_isError(basicCost));
211*a28cd43dSSascha Wildner assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost)));
212*a28cd43dSSascha Wildner }
213*a28cd43dSSascha Wildner assert(!ZSTD_isError(NCountCost));
214*a28cd43dSSascha Wildner assert(compressedCost < ERROR(maxCode));
215*a28cd43dSSascha Wildner DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
216*a28cd43dSSascha Wildner (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost);
217*a28cd43dSSascha Wildner if (basicCost <= repeatCost && basicCost <= compressedCost) {
218*a28cd43dSSascha Wildner DEBUGLOG(5, "Selected set_basic");
219*a28cd43dSSascha Wildner assert(isDefaultAllowed);
220*a28cd43dSSascha Wildner *repeatMode = FSE_repeat_none;
221*a28cd43dSSascha Wildner return set_basic;
222*a28cd43dSSascha Wildner }
223*a28cd43dSSascha Wildner if (repeatCost <= compressedCost) {
224*a28cd43dSSascha Wildner DEBUGLOG(5, "Selected set_repeat");
225*a28cd43dSSascha Wildner assert(!ZSTD_isError(repeatCost));
226*a28cd43dSSascha Wildner return set_repeat;
227*a28cd43dSSascha Wildner }
228*a28cd43dSSascha Wildner assert(compressedCost < basicCost && compressedCost < repeatCost);
229*a28cd43dSSascha Wildner }
230*a28cd43dSSascha Wildner DEBUGLOG(5, "Selected set_compressed");
231*a28cd43dSSascha Wildner *repeatMode = FSE_repeat_check;
232*a28cd43dSSascha Wildner return set_compressed;
233*a28cd43dSSascha Wildner }
234*a28cd43dSSascha Wildner
235*a28cd43dSSascha Wildner size_t
ZSTD_buildCTable(void * dst,size_t dstCapacity,FSE_CTable * nextCTable,U32 FSELog,symbolEncodingType_e type,unsigned * count,U32 max,const BYTE * codeTable,size_t nbSeq,const S16 * defaultNorm,U32 defaultNormLog,U32 defaultMax,const FSE_CTable * prevCTable,size_t prevCTableSize,void * entropyWorkspace,size_t entropyWorkspaceSize)236*a28cd43dSSascha Wildner ZSTD_buildCTable(void* dst, size_t dstCapacity,
237*a28cd43dSSascha Wildner FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
238*a28cd43dSSascha Wildner unsigned* count, U32 max,
239*a28cd43dSSascha Wildner const BYTE* codeTable, size_t nbSeq,
240*a28cd43dSSascha Wildner const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
241*a28cd43dSSascha Wildner const FSE_CTable* prevCTable, size_t prevCTableSize,
242*a28cd43dSSascha Wildner void* entropyWorkspace, size_t entropyWorkspaceSize)
243*a28cd43dSSascha Wildner {
244*a28cd43dSSascha Wildner BYTE* op = (BYTE*)dst;
245*a28cd43dSSascha Wildner const BYTE* const oend = op + dstCapacity;
246*a28cd43dSSascha Wildner DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity);
247*a28cd43dSSascha Wildner
248*a28cd43dSSascha Wildner switch (type) {
249*a28cd43dSSascha Wildner case set_rle:
250*a28cd43dSSascha Wildner FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max), "");
251*a28cd43dSSascha Wildner RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall, "not enough space");
252*a28cd43dSSascha Wildner *op = codeTable[0];
253*a28cd43dSSascha Wildner return 1;
254*a28cd43dSSascha Wildner case set_repeat:
255*a28cd43dSSascha Wildner ZSTD_memcpy(nextCTable, prevCTable, prevCTableSize);
256*a28cd43dSSascha Wildner return 0;
257*a28cd43dSSascha Wildner case set_basic:
258*a28cd43dSSascha Wildner FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), ""); /* note : could be pre-calculated */
259*a28cd43dSSascha Wildner return 0;
260*a28cd43dSSascha Wildner case set_compressed: {
261*a28cd43dSSascha Wildner S16 norm[MaxSeq + 1];
262*a28cd43dSSascha Wildner size_t nbSeq_1 = nbSeq;
263*a28cd43dSSascha Wildner const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
264*a28cd43dSSascha Wildner if (count[codeTable[nbSeq-1]] > 1) {
265*a28cd43dSSascha Wildner count[codeTable[nbSeq-1]]--;
266*a28cd43dSSascha Wildner nbSeq_1--;
267*a28cd43dSSascha Wildner }
268*a28cd43dSSascha Wildner assert(nbSeq_1 > 1);
269*a28cd43dSSascha Wildner assert(entropyWorkspaceSize >= FSE_BUILD_CTABLE_WORKSPACE_SIZE(MaxSeq, MaxFSELog));
270*a28cd43dSSascha Wildner FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "");
271*a28cd43dSSascha Wildner { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */
272*a28cd43dSSascha Wildner FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
273*a28cd43dSSascha Wildner FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, entropyWorkspace, entropyWorkspaceSize), "");
274*a28cd43dSSascha Wildner return NCountSize;
275*a28cd43dSSascha Wildner }
276*a28cd43dSSascha Wildner }
277*a28cd43dSSascha Wildner default: assert(0); RETURN_ERROR(GENERIC, "impossible to reach");
278*a28cd43dSSascha Wildner }
279*a28cd43dSSascha Wildner }
280*a28cd43dSSascha Wildner
281*a28cd43dSSascha Wildner FORCE_INLINE_TEMPLATE size_t
ZSTD_encodeSequences_body(void * dst,size_t dstCapacity,FSE_CTable const * CTable_MatchLength,BYTE const * mlCodeTable,FSE_CTable const * CTable_OffsetBits,BYTE const * ofCodeTable,FSE_CTable const * CTable_LitLength,BYTE const * llCodeTable,seqDef const * sequences,size_t nbSeq,int longOffsets)282*a28cd43dSSascha Wildner ZSTD_encodeSequences_body(
283*a28cd43dSSascha Wildner void* dst, size_t dstCapacity,
284*a28cd43dSSascha Wildner FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
285*a28cd43dSSascha Wildner FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
286*a28cd43dSSascha Wildner FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
287*a28cd43dSSascha Wildner seqDef const* sequences, size_t nbSeq, int longOffsets)
288*a28cd43dSSascha Wildner {
289*a28cd43dSSascha Wildner BIT_CStream_t blockStream;
290*a28cd43dSSascha Wildner FSE_CState_t stateMatchLength;
291*a28cd43dSSascha Wildner FSE_CState_t stateOffsetBits;
292*a28cd43dSSascha Wildner FSE_CState_t stateLitLength;
293*a28cd43dSSascha Wildner
294*a28cd43dSSascha Wildner RETURN_ERROR_IF(
295*a28cd43dSSascha Wildner ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)),
296*a28cd43dSSascha Wildner dstSize_tooSmall, "not enough space remaining");
297*a28cd43dSSascha Wildner DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)",
298*a28cd43dSSascha Wildner (int)(blockStream.endPtr - blockStream.startPtr),
299*a28cd43dSSascha Wildner (unsigned)dstCapacity);
300*a28cd43dSSascha Wildner
301*a28cd43dSSascha Wildner /* first symbols */
302*a28cd43dSSascha Wildner FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
303*a28cd43dSSascha Wildner FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]);
304*a28cd43dSSascha Wildner FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
305*a28cd43dSSascha Wildner BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
306*a28cd43dSSascha Wildner if (MEM_32bits()) BIT_flushBits(&blockStream);
307*a28cd43dSSascha Wildner BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
308*a28cd43dSSascha Wildner if (MEM_32bits()) BIT_flushBits(&blockStream);
309*a28cd43dSSascha Wildner if (longOffsets) {
310*a28cd43dSSascha Wildner U32 const ofBits = ofCodeTable[nbSeq-1];
311*a28cd43dSSascha Wildner unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
312*a28cd43dSSascha Wildner if (extraBits) {
313*a28cd43dSSascha Wildner BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
314*a28cd43dSSascha Wildner BIT_flushBits(&blockStream);
315*a28cd43dSSascha Wildner }
316*a28cd43dSSascha Wildner BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
317*a28cd43dSSascha Wildner ofBits - extraBits);
318*a28cd43dSSascha Wildner } else {
319*a28cd43dSSascha Wildner BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
320*a28cd43dSSascha Wildner }
321*a28cd43dSSascha Wildner BIT_flushBits(&blockStream);
322*a28cd43dSSascha Wildner
323*a28cd43dSSascha Wildner { size_t n;
324*a28cd43dSSascha Wildner for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */
325*a28cd43dSSascha Wildner BYTE const llCode = llCodeTable[n];
326*a28cd43dSSascha Wildner BYTE const ofCode = ofCodeTable[n];
327*a28cd43dSSascha Wildner BYTE const mlCode = mlCodeTable[n];
328*a28cd43dSSascha Wildner U32 const llBits = LL_bits[llCode];
329*a28cd43dSSascha Wildner U32 const ofBits = ofCode;
330*a28cd43dSSascha Wildner U32 const mlBits = ML_bits[mlCode];
331*a28cd43dSSascha Wildner DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
332*a28cd43dSSascha Wildner (unsigned)sequences[n].litLength,
333*a28cd43dSSascha Wildner (unsigned)sequences[n].matchLength + MINMATCH,
334*a28cd43dSSascha Wildner (unsigned)sequences[n].offset);
335*a28cd43dSSascha Wildner /* 32b*/ /* 64b*/
336*a28cd43dSSascha Wildner /* (7)*/ /* (7)*/
337*a28cd43dSSascha Wildner FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
338*a28cd43dSSascha Wildner FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */
339*a28cd43dSSascha Wildner if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
340*a28cd43dSSascha Wildner FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */
341*a28cd43dSSascha Wildner if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
342*a28cd43dSSascha Wildner BIT_flushBits(&blockStream); /* (7)*/
343*a28cd43dSSascha Wildner BIT_addBits(&blockStream, sequences[n].litLength, llBits);
344*a28cd43dSSascha Wildner if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
345*a28cd43dSSascha Wildner BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
346*a28cd43dSSascha Wildner if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
347*a28cd43dSSascha Wildner if (longOffsets) {
348*a28cd43dSSascha Wildner unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
349*a28cd43dSSascha Wildner if (extraBits) {
350*a28cd43dSSascha Wildner BIT_addBits(&blockStream, sequences[n].offset, extraBits);
351*a28cd43dSSascha Wildner BIT_flushBits(&blockStream); /* (7)*/
352*a28cd43dSSascha Wildner }
353*a28cd43dSSascha Wildner BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
354*a28cd43dSSascha Wildner ofBits - extraBits); /* 31 */
355*a28cd43dSSascha Wildner } else {
356*a28cd43dSSascha Wildner BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
357*a28cd43dSSascha Wildner }
358*a28cd43dSSascha Wildner BIT_flushBits(&blockStream); /* (7)*/
359*a28cd43dSSascha Wildner DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
360*a28cd43dSSascha Wildner } }
361*a28cd43dSSascha Wildner
362*a28cd43dSSascha Wildner DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog);
363*a28cd43dSSascha Wildner FSE_flushCState(&blockStream, &stateMatchLength);
364*a28cd43dSSascha Wildner DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog);
365*a28cd43dSSascha Wildner FSE_flushCState(&blockStream, &stateOffsetBits);
366*a28cd43dSSascha Wildner DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog);
367*a28cd43dSSascha Wildner FSE_flushCState(&blockStream, &stateLitLength);
368*a28cd43dSSascha Wildner
369*a28cd43dSSascha Wildner { size_t const streamSize = BIT_closeCStream(&blockStream);
370*a28cd43dSSascha Wildner RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space");
371*a28cd43dSSascha Wildner return streamSize;
372*a28cd43dSSascha Wildner }
373*a28cd43dSSascha Wildner }
374*a28cd43dSSascha Wildner
375*a28cd43dSSascha Wildner static size_t
ZSTD_encodeSequences_default(void * dst,size_t dstCapacity,FSE_CTable const * CTable_MatchLength,BYTE const * mlCodeTable,FSE_CTable const * CTable_OffsetBits,BYTE const * ofCodeTable,FSE_CTable const * CTable_LitLength,BYTE const * llCodeTable,seqDef const * sequences,size_t nbSeq,int longOffsets)376*a28cd43dSSascha Wildner ZSTD_encodeSequences_default(
377*a28cd43dSSascha Wildner void* dst, size_t dstCapacity,
378*a28cd43dSSascha Wildner FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
379*a28cd43dSSascha Wildner FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
380*a28cd43dSSascha Wildner FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
381*a28cd43dSSascha Wildner seqDef const* sequences, size_t nbSeq, int longOffsets)
382*a28cd43dSSascha Wildner {
383*a28cd43dSSascha Wildner return ZSTD_encodeSequences_body(dst, dstCapacity,
384*a28cd43dSSascha Wildner CTable_MatchLength, mlCodeTable,
385*a28cd43dSSascha Wildner CTable_OffsetBits, ofCodeTable,
386*a28cd43dSSascha Wildner CTable_LitLength, llCodeTable,
387*a28cd43dSSascha Wildner sequences, nbSeq, longOffsets);
388*a28cd43dSSascha Wildner }
389*a28cd43dSSascha Wildner
390*a28cd43dSSascha Wildner
391*a28cd43dSSascha Wildner #if DYNAMIC_BMI2
392*a28cd43dSSascha Wildner
393*a28cd43dSSascha Wildner static TARGET_ATTRIBUTE("bmi2") size_t
ZSTD_encodeSequences_bmi2(void * dst,size_t dstCapacity,FSE_CTable const * CTable_MatchLength,BYTE const * mlCodeTable,FSE_CTable const * CTable_OffsetBits,BYTE const * ofCodeTable,FSE_CTable const * CTable_LitLength,BYTE const * llCodeTable,seqDef const * sequences,size_t nbSeq,int longOffsets)394*a28cd43dSSascha Wildner ZSTD_encodeSequences_bmi2(
395*a28cd43dSSascha Wildner void* dst, size_t dstCapacity,
396*a28cd43dSSascha Wildner FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
397*a28cd43dSSascha Wildner FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
398*a28cd43dSSascha Wildner FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
399*a28cd43dSSascha Wildner seqDef const* sequences, size_t nbSeq, int longOffsets)
400*a28cd43dSSascha Wildner {
401*a28cd43dSSascha Wildner return ZSTD_encodeSequences_body(dst, dstCapacity,
402*a28cd43dSSascha Wildner CTable_MatchLength, mlCodeTable,
403*a28cd43dSSascha Wildner CTable_OffsetBits, ofCodeTable,
404*a28cd43dSSascha Wildner CTable_LitLength, llCodeTable,
405*a28cd43dSSascha Wildner sequences, nbSeq, longOffsets);
406*a28cd43dSSascha Wildner }
407*a28cd43dSSascha Wildner
408*a28cd43dSSascha Wildner #endif
409*a28cd43dSSascha Wildner
ZSTD_encodeSequences(void * dst,size_t dstCapacity,FSE_CTable const * CTable_MatchLength,BYTE const * mlCodeTable,FSE_CTable const * CTable_OffsetBits,BYTE const * ofCodeTable,FSE_CTable const * CTable_LitLength,BYTE const * llCodeTable,seqDef const * sequences,size_t nbSeq,int longOffsets,int bmi2)410*a28cd43dSSascha Wildner size_t ZSTD_encodeSequences(
411*a28cd43dSSascha Wildner void* dst, size_t dstCapacity,
412*a28cd43dSSascha Wildner FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
413*a28cd43dSSascha Wildner FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
414*a28cd43dSSascha Wildner FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
415*a28cd43dSSascha Wildner seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
416*a28cd43dSSascha Wildner {
417*a28cd43dSSascha Wildner DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity);
418*a28cd43dSSascha Wildner #if DYNAMIC_BMI2
419*a28cd43dSSascha Wildner if (bmi2) {
420*a28cd43dSSascha Wildner return ZSTD_encodeSequences_bmi2(dst, dstCapacity,
421*a28cd43dSSascha Wildner CTable_MatchLength, mlCodeTable,
422*a28cd43dSSascha Wildner CTable_OffsetBits, ofCodeTable,
423*a28cd43dSSascha Wildner CTable_LitLength, llCodeTable,
424*a28cd43dSSascha Wildner sequences, nbSeq, longOffsets);
425*a28cd43dSSascha Wildner }
426*a28cd43dSSascha Wildner #endif
427*a28cd43dSSascha Wildner (void)bmi2;
428*a28cd43dSSascha Wildner return ZSTD_encodeSequences_default(dst, dstCapacity,
429*a28cd43dSSascha Wildner CTable_MatchLength, mlCodeTable,
430*a28cd43dSSascha Wildner CTable_OffsetBits, ofCodeTable,
431*a28cd43dSSascha Wildner CTable_LitLength, llCodeTable,
432*a28cd43dSSascha Wildner sequences, nbSeq, longOffsets);
433*a28cd43dSSascha Wildner }
434