1 /*
2  * Copyright (c) 2016-2020, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 /**
12  * This fuzz target performs a zstd round-trip test by generating an arbitrary
13  * array of sequences, generating the associated source buffer, calling
14  * ZSTD_compressSequences(), and then decompresses and compares the result with
15  * the original generated source buffer.
16  */
17 
18 #define ZSTD_STATIC_LINKING_ONLY
19 
20 #include <stddef.h>
21 #include <stdlib.h>
22 #include <stdio.h>
23 #include <string.h>
24 #include <time.h>
25 #include "fuzz_helpers.h"
26 #include "zstd_helpers.h"
27 #include "fuzz_data_producer.h"
28 
29 static ZSTD_CCtx *cctx = NULL;
30 static ZSTD_DCtx *dctx = NULL;
31 static void* literalsBuffer = NULL;
32 static void* generatedSrc = NULL;
33 static ZSTD_Sequence* generatedSequences = NULL;
34 
35 #define ZSTD_FUZZ_GENERATED_SRC_MAXSIZE (1 << 20) /* Allow up to 1MB generated data */
36 #define ZSTD_FUZZ_MATCHLENGTH_MAXSIZE (1 << 18) /* Allow up to 256KB matches */
37 #define ZSTD_FUZZ_GENERATED_DICT_MAXSIZE (1 << 18) /* Allow up to a 256KB dict */
38 #define ZSTD_FUZZ_GENERATED_LITERALS_SIZE (1 << 18) /* Fixed size 256KB literals buffer */
39 #define ZSTD_FUZZ_MAX_NBSEQ (1 << 17) /* Maximum of 128K sequences */
40 
41 /* Deterministic random number generator */
42 #define FUZZ_RDG_rotl32(x,r) ((x << r) | (x >> (32 - r)))
FUZZ_RDG_rand(uint32_t * src)43 static uint32_t FUZZ_RDG_rand(uint32_t* src)
44 {
45     static const uint32_t prime1 = 2654435761U;
46     static const uint32_t prime2 = 2246822519U;
47     uint32_t rand32 = *src;
48     rand32 *= prime1;
49     rand32 ^= prime2;
50     rand32  = FUZZ_RDG_rotl32(rand32, 13);
51     *src = rand32;
52     return rand32 >> 5;
53 }
54 
55 /* Make a pseudorandom string - this simple function exists to avoid
56  * taking a dependency on datagen.h to have RDG_genBuffer().
57  */
generatePseudoRandomString(char * str,size_t size)58 static char *generatePseudoRandomString(char *str, size_t size) {
59     const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJK1234567890!@#$^&*()_";
60     uint32_t seed = 0;
61     if (size) {
62         for (size_t n = 0; n < size; n++) {
63             int key = FUZZ_RDG_rand(&seed) % (int) (sizeof charset - 1);
64             str[n] = charset[key];
65         }
66     }
67     return str;
68 }
69 
70 /* Returns size of source buffer */
decodeSequences(void * dst,size_t nbSequences,size_t literalsSize,const void * dict,size_t dictSize)71 static size_t decodeSequences(void* dst, size_t nbSequences,
72                               size_t literalsSize, const void* dict, size_t dictSize) {
73     const uint8_t* litPtr = literalsBuffer;
74     const uint8_t* const litBegin = literalsBuffer;
75     const uint8_t* const litEnd = literalsBuffer + literalsSize;
76     const uint8_t* dictPtr = dict;
77     uint8_t* op = dst;
78     const uint8_t* const oend = dst + ZSTD_FUZZ_GENERATED_SRC_MAXSIZE;
79     size_t generatedSrcBufferSize = 0;
80     size_t bytesWritten = 0;
81     uint32_t lastLLSize;
82 
83     for (size_t i = 0; i < nbSequences; ++i) {
84         FUZZ_ASSERT(generatedSequences[i].matchLength != 0);
85         FUZZ_ASSERT(generatedSequences[i].offset != 0);
86 
87         if (litPtr + generatedSequences[i].litLength > litEnd) {
88             litPtr = litBegin;
89         }
90         ZSTD_memcpy(op, litPtr, generatedSequences[i].litLength);
91         bytesWritten += generatedSequences[i].litLength;
92         op += generatedSequences[i].litLength;
93         litPtr += generatedSequences[i].litLength;
94 
95         FUZZ_ASSERT(generatedSequences[i].offset != 0);
96         /* Copy over the match */
97         {   size_t matchLength = generatedSequences[i].matchLength;
98             size_t j = 0;
99             size_t k = 0;
100             if (dictSize != 0) {
101                 if (generatedSequences[i].offset > bytesWritten) {
102                     /* Offset goes into the dictionary */
103                     size_t offsetFromEndOfDict = generatedSequences[i].offset - bytesWritten;
104                     for (; k < offsetFromEndOfDict && k < matchLength; ++k) {
105                         op[k] = dictPtr[dictSize - offsetFromEndOfDict + k];
106                     }
107                     matchLength -= k;
108                     op += k;
109                 }
110             }
111             for (; j < matchLength; ++j) {
112                 op[j] = op[j-(int)generatedSequences[i].offset];
113             }
114             op += j;
115             FUZZ_ASSERT(generatedSequences[i].matchLength == j + k);
116             bytesWritten += generatedSequences[i].matchLength;
117         }
118     }
119     generatedSrcBufferSize = bytesWritten;
120     FUZZ_ASSERT(litPtr <= litEnd);
121     lastLLSize = (uint32_t)(litEnd - litPtr);
122     if (lastLLSize <= oend - op) {
123         ZSTD_memcpy(op, litPtr, lastLLSize);
124         generatedSrcBufferSize += lastLLSize;
125     }
126     return generatedSrcBufferSize;
127 }
128 
129 /* Returns nb sequences generated
130  * TODO: Add repcode fuzzing once we support repcode match splits
131  */
generateRandomSequences(FUZZ_dataProducer_t * producer,size_t literalsSizeLimit,size_t dictSize,size_t windowLog)132 static size_t generateRandomSequences(FUZZ_dataProducer_t* producer,
133                                       size_t literalsSizeLimit, size_t dictSize,
134                                       size_t windowLog) {
135     uint32_t bytesGenerated = 0;
136     uint32_t nbSeqGenerated = 0;
137     uint32_t litLength;
138     uint32_t matchLength;
139     uint32_t matchBound;
140     uint32_t offset;
141     uint32_t offsetBound;
142     uint32_t repCode = 0;
143     uint32_t isFirstSequence = 1;
144     uint32_t windowSize = 1 << windowLog;
145 
146     while (nbSeqGenerated < ZSTD_FUZZ_MAX_NBSEQ
147          && bytesGenerated < ZSTD_FUZZ_GENERATED_SRC_MAXSIZE
148          && !FUZZ_dataProducer_empty(producer)) {
149         matchBound = ZSTD_FUZZ_MATCHLENGTH_MAXSIZE;
150         litLength = isFirstSequence && dictSize == 0 ? FUZZ_dataProducer_uint32Range(producer, 1, literalsSizeLimit)
151                                                      : FUZZ_dataProducer_uint32Range(producer, 0, literalsSizeLimit);
152         bytesGenerated += litLength;
153         if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) {
154             break;
155         }
156         offsetBound = bytesGenerated > windowSize ? windowSize : bytesGenerated + dictSize;
157         offset = FUZZ_dataProducer_uint32Range(producer, 1, offsetBound);
158         if (dictSize > 0 && bytesGenerated <= windowSize) {
159             /* Prevent match length from being such that it would be associated with an offset too large
160              * from the decoder's perspective. If not possible (match would be too small),
161              * then reduce the offset if necessary.
162              */
163             size_t bytesToReachWindowSize = windowSize - bytesGenerated;
164             if (bytesToReachWindowSize < ZSTD_MINMATCH_MIN) {
165                 uint32_t newOffsetBound = offsetBound > windowSize ? windowSize : offsetBound;
166                 offset = FUZZ_dataProducer_uint32Range(producer, 1, newOffsetBound);
167             } else {
168                 matchBound = bytesToReachWindowSize > ZSTD_FUZZ_MATCHLENGTH_MAXSIZE ?
169                              ZSTD_FUZZ_MATCHLENGTH_MAXSIZE : bytesToReachWindowSize;
170             }
171         }
172         matchLength = FUZZ_dataProducer_uint32Range(producer, ZSTD_MINMATCH_MIN, matchBound);
173         bytesGenerated += matchLength;
174         if (bytesGenerated > ZSTD_FUZZ_GENERATED_SRC_MAXSIZE) {
175             break;
176         }
177         ZSTD_Sequence seq = {offset, litLength, matchLength, repCode};
178         generatedSequences[nbSeqGenerated++] = seq;
179         isFirstSequence = 0;
180     }
181 
182     return nbSeqGenerated;
183 }
184 
roundTripTest(void * result,size_t resultCapacity,void * compressed,size_t compressedCapacity,size_t srcSize,const void * dict,size_t dictSize,size_t generatedSequencesSize,size_t wLog,unsigned cLevel,unsigned hasDict)185 static size_t roundTripTest(void *result, size_t resultCapacity,
186                             void *compressed, size_t compressedCapacity,
187                             size_t srcSize,
188                             const void *dict, size_t dictSize,
189                             size_t generatedSequencesSize,
190                             size_t wLog, unsigned cLevel, unsigned hasDict)
191 {
192     size_t cSize;
193     size_t dSize;
194     ZSTD_CDict* cdict = NULL;
195     ZSTD_DDict* ddict = NULL;
196 
197     ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
198     ZSTD_CCtx_setParameter(cctx, ZSTD_c_nbWorkers, 0);
199     ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, cLevel);
200     ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, wLog);
201     ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, ZSTD_MINMATCH_MIN);
202     ZSTD_CCtx_setParameter(cctx, ZSTD_c_validateSequences, 1);
203     /* TODO: Add block delim mode fuzzing */
204     ZSTD_CCtx_setParameter(cctx, ZSTD_c_blockDelimiters, ZSTD_sf_noBlockDelimiters);
205     if (hasDict) {
206         FUZZ_ZASSERT(ZSTD_CCtx_loadDictionary(cctx, dict, dictSize));
207         FUZZ_ZASSERT(ZSTD_DCtx_loadDictionary(dctx, dict, dictSize));
208     }
209 
210     cSize = ZSTD_compressSequences(cctx, compressed, compressedCapacity,
211                                    generatedSequences, generatedSequencesSize,
212                                    generatedSrc, srcSize);
213     FUZZ_ZASSERT(cSize);
214     dSize = ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize);
215     FUZZ_ZASSERT(dSize);
216 
217     if (cdict) {
218         ZSTD_freeCDict(cdict);
219     }
220     if (ddict) {
221         ZSTD_freeDDict(ddict);
222     }
223     return dSize;
224 }
225 
LLVMFuzzerTestOneInput(const uint8_t * src,size_t size)226 int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
227 {
228     void* rBuf;
229     size_t rBufSize;
230     void* cBuf;
231     size_t cBufSize;
232     size_t generatedSrcSize;
233     size_t nbSequences;
234     void* dictBuffer;
235     size_t dictSize = 0;
236     unsigned hasDict;
237     unsigned wLog;
238     int cLevel;
239 
240     FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
241     if (literalsBuffer == NULL) {
242         literalsBuffer = FUZZ_malloc(ZSTD_FUZZ_GENERATED_LITERALS_SIZE);
243         literalsBuffer = generatePseudoRandomString(literalsBuffer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE);
244     }
245 
246     hasDict = FUZZ_dataProducer_uint32Range(producer, 0, 1);
247     if (hasDict) {
248         dictSize = FUZZ_dataProducer_uint32Range(producer, 1, ZSTD_FUZZ_GENERATED_DICT_MAXSIZE);
249         dictBuffer = FUZZ_malloc(dictSize);
250         dictBuffer = generatePseudoRandomString(dictBuffer, dictSize);
251     }
252     /* Generate window log first so we dont generate offsets too large */
253     wLog = FUZZ_dataProducer_uint32Range(producer, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX_32);
254     cLevel = FUZZ_dataProducer_int32Range(producer, -3, 22);
255 
256     if (!generatedSequences) {
257         generatedSequences = FUZZ_malloc(sizeof(ZSTD_Sequence)*ZSTD_FUZZ_MAX_NBSEQ);
258     }
259     if (!generatedSrc) {
260         generatedSrc = FUZZ_malloc(ZSTD_FUZZ_GENERATED_SRC_MAXSIZE);
261     }
262     nbSequences = generateRandomSequences(producer, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictSize, wLog);
263     generatedSrcSize = decodeSequences(generatedSrc, nbSequences, ZSTD_FUZZ_GENERATED_LITERALS_SIZE, dictBuffer, dictSize);
264     cBufSize = ZSTD_compressBound(generatedSrcSize);
265     cBuf = FUZZ_malloc(cBufSize);
266 
267     rBufSize = generatedSrcSize;
268     rBuf = FUZZ_malloc(rBufSize);
269 
270     if (!cctx) {
271         cctx = ZSTD_createCCtx();
272         FUZZ_ASSERT(cctx);
273     }
274     if (!dctx) {
275         dctx = ZSTD_createDCtx();
276         FUZZ_ASSERT(dctx);
277     }
278 
279     size_t const result = roundTripTest(rBuf, rBufSize,
280                                         cBuf, cBufSize,
281                                         generatedSrcSize,
282                                         dictBuffer, dictSize,
283                                         nbSequences,
284                                         wLog, cLevel, hasDict);
285     FUZZ_ZASSERT(result);
286     FUZZ_ASSERT_MSG(result == generatedSrcSize, "Incorrect regenerated size");
287     FUZZ_ASSERT_MSG(!FUZZ_memcmp(generatedSrc, rBuf, generatedSrcSize), "Corruption!");
288 
289     free(rBuf);
290     free(cBuf);
291     FUZZ_dataProducer_free(producer);
292     if (hasDict) {
293         free(dictBuffer);
294     }
295 #ifndef STATEFUL_FUZZING
296     ZSTD_freeCCtx(cctx); cctx = NULL;
297     ZSTD_freeDCtx(dctx); dctx = NULL;
298     free(generatedSequences); generatedSequences = NULL;
299     free(generatedSrc); generatedSrc = NULL;
300     free(literalsBuffer); literalsBuffer = NULL;
301 #endif
302     return 0;
303 }
304