1 /*
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11 /* zstd_ddict.c :
12 * concentrates all logic that needs to know the internals of ZSTD_DDict object */
13
14 /*-*******************************************************
15 * Dependencies
16 *********************************************************/
17 #include <string.h> /* memcpy, memmove, memset */
18 #include "cpu.h" /* bmi2 */
19 #include "mem.h" /* low level memory routines */
20 #define FSE_STATIC_LINKING_ONLY
21 #include "fse.h"
22 #define HUF_STATIC_LINKING_ONLY
23 #include "huf.h"
24 #include "zstd_decompress_internal.h"
25 #include "zstd_ddict.h"
26
27 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
28 # include "zstd_legacy.h"
29 #endif
30
31
32
33 /*-*******************************************************
34 * Types
35 *********************************************************/
36 struct ZSTD_DDict_s {
37 void* dictBuffer;
38 const void* dictContent;
39 size_t dictSize;
40 ZSTD_entropyDTables_t entropy;
41 U32 dictID;
42 U32 entropyPresent;
43 ZSTD_customMem cMem;
44 }; /* typedef'd to ZSTD_DDict within "zstd.h" */
45
ZSTD_DDict_dictContent(const ZSTD_DDict * ddict)46 const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict)
47 {
48 assert(ddict != NULL);
49 return ddict->dictContent;
50 }
51
ZSTD_DDict_dictSize(const ZSTD_DDict * ddict)52 size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict)
53 {
54 assert(ddict != NULL);
55 return ddict->dictSize;
56 }
57
ZSTD_copyDDictParameters(ZSTD_DCtx * dctx,const ZSTD_DDict * ddict)58 void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
59 {
60 DEBUGLOG(4, "ZSTD_copyDDictParameters");
61 assert(dctx != NULL);
62 assert(ddict != NULL);
63 dctx->dictID = ddict->dictID;
64 dctx->prefixStart = ddict->dictContent;
65 dctx->virtualStart = ddict->dictContent;
66 dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
67 dctx->previousDstEnd = dctx->dictEnd;
68 if (ddict->entropyPresent) {
69 dctx->litEntropy = 1;
70 dctx->fseEntropy = 1;
71 dctx->LLTptr = ddict->entropy.LLTable;
72 dctx->MLTptr = ddict->entropy.MLTable;
73 dctx->OFTptr = ddict->entropy.OFTable;
74 dctx->HUFptr = ddict->entropy.hufTable;
75 dctx->entropy.rep[0] = ddict->entropy.rep[0];
76 dctx->entropy.rep[1] = ddict->entropy.rep[1];
77 dctx->entropy.rep[2] = ddict->entropy.rep[2];
78 } else {
79 dctx->litEntropy = 0;
80 dctx->fseEntropy = 0;
81 }
82 }
83
84
85 static size_t
ZSTD_loadEntropy_intoDDict(ZSTD_DDict * ddict,ZSTD_dictContentType_e dictContentType)86 ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
87 ZSTD_dictContentType_e dictContentType)
88 {
89 ddict->dictID = 0;
90 ddict->entropyPresent = 0;
91 if (dictContentType == ZSTD_dct_rawContent) return 0;
92
93 if (ddict->dictSize < 8) {
94 if (dictContentType == ZSTD_dct_fullDict)
95 return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
96 return 0; /* pure content mode */
97 }
98 { U32 const magic = MEM_readLE32(ddict->dictContent);
99 if (magic != ZSTD_MAGIC_DICTIONARY) {
100 if (dictContentType == ZSTD_dct_fullDict)
101 return ERROR(dictionary_corrupted); /* only accept specified dictionaries */
102 return 0; /* pure content mode */
103 }
104 }
105 ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
106
107 /* load entropy tables */
108 RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
109 &ddict->entropy, ddict->dictContent, ddict->dictSize)),
110 dictionary_corrupted);
111 ddict->entropyPresent = 1;
112 return 0;
113 }
114
115
ZSTD_initDDict_internal(ZSTD_DDict * ddict,const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType)116 static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
117 const void* dict, size_t dictSize,
118 ZSTD_dictLoadMethod_e dictLoadMethod,
119 ZSTD_dictContentType_e dictContentType)
120 {
121 if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
122 ddict->dictBuffer = NULL;
123 ddict->dictContent = dict;
124 if (!dict) dictSize = 0;
125 } else {
126 void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem);
127 ddict->dictBuffer = internalBuffer;
128 ddict->dictContent = internalBuffer;
129 if (!internalBuffer) return ERROR(memory_allocation);
130 memcpy(internalBuffer, dict, dictSize);
131 }
132 ddict->dictSize = dictSize;
133 ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
134
135 /* parse dictionary content */
136 FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) );
137
138 return 0;
139 }
140
ZSTD_createDDict_advanced(const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType,ZSTD_customMem customMem)141 ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
142 ZSTD_dictLoadMethod_e dictLoadMethod,
143 ZSTD_dictContentType_e dictContentType,
144 ZSTD_customMem customMem)
145 {
146 if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
147
148 { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
149 if (ddict == NULL) return NULL;
150 ddict->cMem = customMem;
151 { size_t const initResult = ZSTD_initDDict_internal(ddict,
152 dict, dictSize,
153 dictLoadMethod, dictContentType);
154 if (ZSTD_isError(initResult)) {
155 ZSTD_freeDDict(ddict);
156 return NULL;
157 } }
158 return ddict;
159 }
160 }
161
162 /*! ZSTD_createDDict() :
163 * Create a digested dictionary, to start decompression without startup delay.
164 * `dict` content is copied inside DDict.
165 * Consequently, `dict` can be released after `ZSTD_DDict` creation */
ZSTD_createDDict(const void * dict,size_t dictSize)166 ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
167 {
168 ZSTD_customMem const allocator = { NULL, NULL, NULL };
169 return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
170 }
171
172 /*! ZSTD_createDDict_byReference() :
173 * Create a digested dictionary, to start decompression without startup delay.
174 * Dictionary content is simply referenced, it will be accessed during decompression.
175 * Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
ZSTD_createDDict_byReference(const void * dictBuffer,size_t dictSize)176 ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
177 {
178 ZSTD_customMem const allocator = { NULL, NULL, NULL };
179 return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
180 }
181
182
ZSTD_initStaticDDict(void * sBuffer,size_t sBufferSize,const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType)183 const ZSTD_DDict* ZSTD_initStaticDDict(
184 void* sBuffer, size_t sBufferSize,
185 const void* dict, size_t dictSize,
186 ZSTD_dictLoadMethod_e dictLoadMethod,
187 ZSTD_dictContentType_e dictContentType)
188 {
189 size_t const neededSpace = sizeof(ZSTD_DDict)
190 + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
191 ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
192 assert(sBuffer != NULL);
193 assert(dict != NULL);
194 if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */
195 if (sBufferSize < neededSpace) return NULL;
196 if (dictLoadMethod == ZSTD_dlm_byCopy) {
197 memcpy(ddict+1, dict, dictSize); /* local copy */
198 dict = ddict+1;
199 }
200 if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
201 dict, dictSize,
202 ZSTD_dlm_byRef, dictContentType) ))
203 return NULL;
204 return ddict;
205 }
206
207
ZSTD_freeDDict(ZSTD_DDict * ddict)208 size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
209 {
210 if (ddict==NULL) return 0; /* support free on NULL */
211 { ZSTD_customMem const cMem = ddict->cMem;
212 ZSTD_free(ddict->dictBuffer, cMem);
213 ZSTD_free(ddict, cMem);
214 return 0;
215 }
216 }
217
218 /*! ZSTD_estimateDDictSize() :
219 * Estimate amount of memory that will be needed to create a dictionary for decompression.
220 * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
ZSTD_estimateDDictSize(size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod)221 size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
222 {
223 return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
224 }
225
ZSTD_sizeof_DDict(const ZSTD_DDict * ddict)226 size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
227 {
228 if (ddict==NULL) return 0; /* support sizeof on NULL */
229 return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
230 }
231
232 /*! ZSTD_getDictID_fromDDict() :
233 * Provides the dictID of the dictionary loaded into `ddict`.
234 * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
235 * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
ZSTD_getDictID_fromDDict(const ZSTD_DDict * ddict)236 unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
237 {
238 if (ddict==NULL) return 0;
239 return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
240 }
241