1*c03c5b1cSMartin Matuska /*
2*c03c5b1cSMartin Matuska * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3*c03c5b1cSMartin Matuska * All rights reserved.
4*c03c5b1cSMartin Matuska *
5*c03c5b1cSMartin Matuska * This source code is licensed under both the BSD-style license (found in the
6*c03c5b1cSMartin Matuska * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*c03c5b1cSMartin Matuska * in the COPYING file in the root directory of this source tree).
8*c03c5b1cSMartin Matuska * You may select, at your option, one of the above-listed licenses.
9*c03c5b1cSMartin Matuska */
10*c03c5b1cSMartin Matuska
11*c03c5b1cSMartin Matuska /*-*************************************
12*c03c5b1cSMartin Matuska * Dependencies
13*c03c5b1cSMartin Matuska ***************************************/
14*c03c5b1cSMartin Matuska #include "zstd_compress_literals.h"
15*c03c5b1cSMartin Matuska
ZSTD_noCompressLiterals(void * dst,size_t dstCapacity,const void * src,size_t srcSize)16*c03c5b1cSMartin Matuska size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
17*c03c5b1cSMartin Matuska {
18*c03c5b1cSMartin Matuska BYTE* const ostart = (BYTE* const)dst;
19*c03c5b1cSMartin Matuska U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
20*c03c5b1cSMartin Matuska
21*c03c5b1cSMartin Matuska RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
22*c03c5b1cSMartin Matuska
23*c03c5b1cSMartin Matuska switch(flSize)
24*c03c5b1cSMartin Matuska {
25*c03c5b1cSMartin Matuska case 1: /* 2 - 1 - 5 */
26*c03c5b1cSMartin Matuska ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
27*c03c5b1cSMartin Matuska break;
28*c03c5b1cSMartin Matuska case 2: /* 2 - 2 - 12 */
29*c03c5b1cSMartin Matuska MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
30*c03c5b1cSMartin Matuska break;
31*c03c5b1cSMartin Matuska case 3: /* 2 - 2 - 20 */
32*c03c5b1cSMartin Matuska MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
33*c03c5b1cSMartin Matuska break;
34*c03c5b1cSMartin Matuska default: /* not necessary : flSize is {1,2,3} */
35*c03c5b1cSMartin Matuska assert(0);
36*c03c5b1cSMartin Matuska }
37*c03c5b1cSMartin Matuska
38*c03c5b1cSMartin Matuska memcpy(ostart + flSize, src, srcSize);
39*c03c5b1cSMartin Matuska DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
40*c03c5b1cSMartin Matuska return srcSize + flSize;
41*c03c5b1cSMartin Matuska }
42*c03c5b1cSMartin Matuska
ZSTD_compressRleLiteralsBlock(void * dst,size_t dstCapacity,const void * src,size_t srcSize)43*c03c5b1cSMartin Matuska size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
44*c03c5b1cSMartin Matuska {
45*c03c5b1cSMartin Matuska BYTE* const ostart = (BYTE* const)dst;
46*c03c5b1cSMartin Matuska U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
47*c03c5b1cSMartin Matuska
48*c03c5b1cSMartin Matuska (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */
49*c03c5b1cSMartin Matuska
50*c03c5b1cSMartin Matuska switch(flSize)
51*c03c5b1cSMartin Matuska {
52*c03c5b1cSMartin Matuska case 1: /* 2 - 1 - 5 */
53*c03c5b1cSMartin Matuska ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
54*c03c5b1cSMartin Matuska break;
55*c03c5b1cSMartin Matuska case 2: /* 2 - 2 - 12 */
56*c03c5b1cSMartin Matuska MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
57*c03c5b1cSMartin Matuska break;
58*c03c5b1cSMartin Matuska case 3: /* 2 - 2 - 20 */
59*c03c5b1cSMartin Matuska MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
60*c03c5b1cSMartin Matuska break;
61*c03c5b1cSMartin Matuska default: /* not necessary : flSize is {1,2,3} */
62*c03c5b1cSMartin Matuska assert(0);
63*c03c5b1cSMartin Matuska }
64*c03c5b1cSMartin Matuska
65*c03c5b1cSMartin Matuska ostart[flSize] = *(const BYTE*)src;
66*c03c5b1cSMartin Matuska DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1);
67*c03c5b1cSMartin Matuska return flSize+1;
68*c03c5b1cSMartin Matuska }
69*c03c5b1cSMartin Matuska
ZSTD_compressLiterals(ZSTD_hufCTables_t const * prevHuf,ZSTD_hufCTables_t * nextHuf,ZSTD_strategy strategy,int disableLiteralCompression,void * dst,size_t dstCapacity,const void * src,size_t srcSize,void * entropyWorkspace,size_t entropyWorkspaceSize,const int bmi2)70*c03c5b1cSMartin Matuska size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
71*c03c5b1cSMartin Matuska ZSTD_hufCTables_t* nextHuf,
72*c03c5b1cSMartin Matuska ZSTD_strategy strategy, int disableLiteralCompression,
73*c03c5b1cSMartin Matuska void* dst, size_t dstCapacity,
74*c03c5b1cSMartin Matuska const void* src, size_t srcSize,
75*c03c5b1cSMartin Matuska void* entropyWorkspace, size_t entropyWorkspaceSize,
76*c03c5b1cSMartin Matuska const int bmi2)
77*c03c5b1cSMartin Matuska {
78*c03c5b1cSMartin Matuska size_t const minGain = ZSTD_minGain(srcSize, strategy);
79*c03c5b1cSMartin Matuska size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
80*c03c5b1cSMartin Matuska BYTE* const ostart = (BYTE*)dst;
81*c03c5b1cSMartin Matuska U32 singleStream = srcSize < 256;
82*c03c5b1cSMartin Matuska symbolEncodingType_e hType = set_compressed;
83*c03c5b1cSMartin Matuska size_t cLitSize;
84*c03c5b1cSMartin Matuska
85*c03c5b1cSMartin Matuska DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)",
86*c03c5b1cSMartin Matuska disableLiteralCompression, (U32)srcSize);
87*c03c5b1cSMartin Matuska
88*c03c5b1cSMartin Matuska /* Prepare nextEntropy assuming reusing the existing table */
89*c03c5b1cSMartin Matuska memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
90*c03c5b1cSMartin Matuska
91*c03c5b1cSMartin Matuska if (disableLiteralCompression)
92*c03c5b1cSMartin Matuska return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
93*c03c5b1cSMartin Matuska
94*c03c5b1cSMartin Matuska /* small ? don't even attempt compression (speed opt) */
95*c03c5b1cSMartin Matuska # define COMPRESS_LITERALS_SIZE_MIN 63
96*c03c5b1cSMartin Matuska { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
97*c03c5b1cSMartin Matuska if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
98*c03c5b1cSMartin Matuska }
99*c03c5b1cSMartin Matuska
100*c03c5b1cSMartin Matuska RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
101*c03c5b1cSMartin Matuska { HUF_repeat repeat = prevHuf->repeatMode;
102*c03c5b1cSMartin Matuska int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
103*c03c5b1cSMartin Matuska if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
104*c03c5b1cSMartin Matuska cLitSize = singleStream ?
105*c03c5b1cSMartin Matuska HUF_compress1X_repeat(
106*c03c5b1cSMartin Matuska ostart+lhSize, dstCapacity-lhSize, src, srcSize,
107*c03c5b1cSMartin Matuska HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
108*c03c5b1cSMartin Matuska (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) :
109*c03c5b1cSMartin Matuska HUF_compress4X_repeat(
110*c03c5b1cSMartin Matuska ostart+lhSize, dstCapacity-lhSize, src, srcSize,
111*c03c5b1cSMartin Matuska HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
112*c03c5b1cSMartin Matuska (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
113*c03c5b1cSMartin Matuska if (repeat != HUF_repeat_none) {
114*c03c5b1cSMartin Matuska /* reused the existing table */
115*c03c5b1cSMartin Matuska DEBUGLOG(5, "Reusing previous huffman table");
116*c03c5b1cSMartin Matuska hType = set_repeat;
117*c03c5b1cSMartin Matuska }
118*c03c5b1cSMartin Matuska }
119*c03c5b1cSMartin Matuska
120*c03c5b1cSMartin Matuska if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
121*c03c5b1cSMartin Matuska memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
122*c03c5b1cSMartin Matuska return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
123*c03c5b1cSMartin Matuska }
124*c03c5b1cSMartin Matuska if (cLitSize==1) {
125*c03c5b1cSMartin Matuska memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
126*c03c5b1cSMartin Matuska return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
127*c03c5b1cSMartin Matuska }
128*c03c5b1cSMartin Matuska
129*c03c5b1cSMartin Matuska if (hType == set_compressed) {
130*c03c5b1cSMartin Matuska /* using a newly constructed table */
131*c03c5b1cSMartin Matuska nextHuf->repeatMode = HUF_repeat_check;
132*c03c5b1cSMartin Matuska }
133*c03c5b1cSMartin Matuska
134*c03c5b1cSMartin Matuska /* Build header */
135*c03c5b1cSMartin Matuska switch(lhSize)
136*c03c5b1cSMartin Matuska {
137*c03c5b1cSMartin Matuska case 3: /* 2 - 2 - 10 - 10 */
138*c03c5b1cSMartin Matuska { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
139*c03c5b1cSMartin Matuska MEM_writeLE24(ostart, lhc);
140*c03c5b1cSMartin Matuska break;
141*c03c5b1cSMartin Matuska }
142*c03c5b1cSMartin Matuska case 4: /* 2 - 2 - 14 - 14 */
143*c03c5b1cSMartin Matuska { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
144*c03c5b1cSMartin Matuska MEM_writeLE32(ostart, lhc);
145*c03c5b1cSMartin Matuska break;
146*c03c5b1cSMartin Matuska }
147*c03c5b1cSMartin Matuska case 5: /* 2 - 2 - 18 - 18 */
148*c03c5b1cSMartin Matuska { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
149*c03c5b1cSMartin Matuska MEM_writeLE32(ostart, lhc);
150*c03c5b1cSMartin Matuska ostart[4] = (BYTE)(cLitSize >> 10);
151*c03c5b1cSMartin Matuska break;
152*c03c5b1cSMartin Matuska }
153*c03c5b1cSMartin Matuska default: /* not possible : lhSize is {3,4,5} */
154*c03c5b1cSMartin Matuska assert(0);
155*c03c5b1cSMartin Matuska }
156*c03c5b1cSMartin Matuska DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize));
157*c03c5b1cSMartin Matuska return lhSize+cLitSize;
158*c03c5b1cSMartin Matuska }
159