1 /*
2  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 /* zstd_ddict.c :
12  * concentrates all logic that needs to know the internals of ZSTD_DDict object */
13 
14 /*-*******************************************************
15 *  Dependencies
16 *********************************************************/
17 #include <string.h>      /* memcpy, memmove, memset */
18 #include "cpu.h"         /* bmi2 */
19 #include "mem.h"         /* low level memory routines */
20 #define FSE_STATIC_LINKING_ONLY
21 #include "fse.h"
22 #define HUF_STATIC_LINKING_ONLY
23 #include "huf.h"
24 #include "zstd_decompress_internal.h"
25 #include "zstd_ddict.h"
26 
27 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
28 #  include "zstd_legacy.h"
29 #endif
30 
31 
32 
33 /*-*******************************************************
34 *  Types
35 *********************************************************/
36 struct ZSTD_DDict_s {
37     void* dictBuffer;
38     const void* dictContent;
39     size_t dictSize;
40     ZSTD_entropyDTables_t entropy;
41     U32 dictID;
42     U32 entropyPresent;
43     ZSTD_customMem cMem;
44 };  /* typedef'd to ZSTD_DDict within "zstd.h" */
45 
ZSTD_DDict_dictContent(const ZSTD_DDict * ddict)46 const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict)
47 {
48     assert(ddict != NULL);
49     return ddict->dictContent;
50 }
51 
ZSTD_DDict_dictSize(const ZSTD_DDict * ddict)52 size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict)
53 {
54     assert(ddict != NULL);
55     return ddict->dictSize;
56 }
57 
ZSTD_copyDDictParameters(ZSTD_DCtx * dctx,const ZSTD_DDict * ddict)58 void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
59 {
60     DEBUGLOG(4, "ZSTD_copyDDictParameters");
61     assert(dctx != NULL);
62     assert(ddict != NULL);
63     dctx->dictID = ddict->dictID;
64     dctx->prefixStart = ddict->dictContent;
65     dctx->virtualStart = ddict->dictContent;
66     dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
67     dctx->previousDstEnd = dctx->dictEnd;
68     if (ddict->entropyPresent) {
69         dctx->litEntropy = 1;
70         dctx->fseEntropy = 1;
71         dctx->LLTptr = ddict->entropy.LLTable;
72         dctx->MLTptr = ddict->entropy.MLTable;
73         dctx->OFTptr = ddict->entropy.OFTable;
74         dctx->HUFptr = ddict->entropy.hufTable;
75         dctx->entropy.rep[0] = ddict->entropy.rep[0];
76         dctx->entropy.rep[1] = ddict->entropy.rep[1];
77         dctx->entropy.rep[2] = ddict->entropy.rep[2];
78     } else {
79         dctx->litEntropy = 0;
80         dctx->fseEntropy = 0;
81     }
82 }
83 
84 
85 static size_t
ZSTD_loadEntropy_intoDDict(ZSTD_DDict * ddict,ZSTD_dictContentType_e dictContentType)86 ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
87                            ZSTD_dictContentType_e dictContentType)
88 {
89     ddict->dictID = 0;
90     ddict->entropyPresent = 0;
91     if (dictContentType == ZSTD_dct_rawContent) return 0;
92 
93     if (ddict->dictSize < 8) {
94         if (dictContentType == ZSTD_dct_fullDict)
95             return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
96         return 0;   /* pure content mode */
97     }
98     {   U32 const magic = MEM_readLE32(ddict->dictContent);
99         if (magic != ZSTD_MAGIC_DICTIONARY) {
100             if (dictContentType == ZSTD_dct_fullDict)
101                 return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
102             return 0;   /* pure content mode */
103         }
104     }
105     ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
106 
107     /* load entropy tables */
108     RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
109             &ddict->entropy, ddict->dictContent, ddict->dictSize)),
110         dictionary_corrupted);
111     ddict->entropyPresent = 1;
112     return 0;
113 }
114 
115 
ZSTD_initDDict_internal(ZSTD_DDict * ddict,const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType)116 static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
117                                       const void* dict, size_t dictSize,
118                                       ZSTD_dictLoadMethod_e dictLoadMethod,
119                                       ZSTD_dictContentType_e dictContentType)
120 {
121     if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
122         ddict->dictBuffer = NULL;
123         ddict->dictContent = dict;
124         if (!dict) dictSize = 0;
125     } else {
126         void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem);
127         ddict->dictBuffer = internalBuffer;
128         ddict->dictContent = internalBuffer;
129         if (!internalBuffer) return ERROR(memory_allocation);
130         memcpy(internalBuffer, dict, dictSize);
131     }
132     ddict->dictSize = dictSize;
133     ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
134 
135     /* parse dictionary content */
136     FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) );
137 
138     return 0;
139 }
140 
ZSTD_createDDict_advanced(const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType,ZSTD_customMem customMem)141 ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
142                                       ZSTD_dictLoadMethod_e dictLoadMethod,
143                                       ZSTD_dictContentType_e dictContentType,
144                                       ZSTD_customMem customMem)
145 {
146     if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
147 
148     {   ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
149         if (ddict == NULL) return NULL;
150         ddict->cMem = customMem;
151         {   size_t const initResult = ZSTD_initDDict_internal(ddict,
152                                             dict, dictSize,
153                                             dictLoadMethod, dictContentType);
154             if (ZSTD_isError(initResult)) {
155                 ZSTD_freeDDict(ddict);
156                 return NULL;
157         }   }
158         return ddict;
159     }
160 }
161 
162 /*! ZSTD_createDDict() :
163 *   Create a digested dictionary, to start decompression without startup delay.
164 *   `dict` content is copied inside DDict.
165 *   Consequently, `dict` can be released after `ZSTD_DDict` creation */
ZSTD_createDDict(const void * dict,size_t dictSize)166 ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
167 {
168     ZSTD_customMem const allocator = { NULL, NULL, NULL };
169     return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
170 }
171 
172 /*! ZSTD_createDDict_byReference() :
173  *  Create a digested dictionary, to start decompression without startup delay.
174  *  Dictionary content is simply referenced, it will be accessed during decompression.
175  *  Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
ZSTD_createDDict_byReference(const void * dictBuffer,size_t dictSize)176 ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
177 {
178     ZSTD_customMem const allocator = { NULL, NULL, NULL };
179     return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
180 }
181 
182 
ZSTD_initStaticDDict(void * sBuffer,size_t sBufferSize,const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType)183 const ZSTD_DDict* ZSTD_initStaticDDict(
184                                 void* sBuffer, size_t sBufferSize,
185                                 const void* dict, size_t dictSize,
186                                 ZSTD_dictLoadMethod_e dictLoadMethod,
187                                 ZSTD_dictContentType_e dictContentType)
188 {
189     size_t const neededSpace = sizeof(ZSTD_DDict)
190                              + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
191     ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
192     assert(sBuffer != NULL);
193     assert(dict != NULL);
194     if ((size_t)sBuffer & 7) return NULL;   /* 8-aligned */
195     if (sBufferSize < neededSpace) return NULL;
196     if (dictLoadMethod == ZSTD_dlm_byCopy) {
197         memcpy(ddict+1, dict, dictSize);  /* local copy */
198         dict = ddict+1;
199     }
200     if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
201                                               dict, dictSize,
202                                               ZSTD_dlm_byRef, dictContentType) ))
203         return NULL;
204     return ddict;
205 }
206 
207 
ZSTD_freeDDict(ZSTD_DDict * ddict)208 size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
209 {
210     if (ddict==NULL) return 0;   /* support free on NULL */
211     {   ZSTD_customMem const cMem = ddict->cMem;
212         ZSTD_free(ddict->dictBuffer, cMem);
213         ZSTD_free(ddict, cMem);
214         return 0;
215     }
216 }
217 
218 /*! ZSTD_estimateDDictSize() :
219  *  Estimate amount of memory that will be needed to create a dictionary for decompression.
220  *  Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
ZSTD_estimateDDictSize(size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod)221 size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
222 {
223     return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
224 }
225 
ZSTD_sizeof_DDict(const ZSTD_DDict * ddict)226 size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
227 {
228     if (ddict==NULL) return 0;   /* support sizeof on NULL */
229     return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
230 }
231 
232 /*! ZSTD_getDictID_fromDDict() :
233  *  Provides the dictID of the dictionary loaded into `ddict`.
234  *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
235  *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
ZSTD_getDictID_fromDDict(const ZSTD_DDict * ddict)236 unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
237 {
238     if (ddict==NULL) return 0;
239     return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
240 }
241