1 /*
2  * Copyright (c) 2018-present, Yann Collet, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 /* largeNbDicts
12  * This is a benchmark test tool
13  * dedicated to the specific case of dictionary decompression
14  * using a very large nb of dictionaries
15  * thus suffering latency from lots of cache misses.
16  * It's created in a bid to investigate performance and find optimizations. */
17 
18 
19 /*---  Dependencies  ---*/
20 
21 #include <stddef.h>   /* size_t */
22 #include <stdlib.h>   /* malloc, free, abort */
23 #include <stdio.h>    /* fprintf */
24 #include <assert.h>   /* assert */
25 
26 #include "util.h"
27 #include "benchfn.h"
28 #define ZSTD_STATIC_LINKING_ONLY
29 #include "zstd.h"
30 #include "zdict.h"
31 
32 
33 /*---  Constants  --- */
34 
35 #define KB  *(1<<10)
36 #define MB  *(1<<20)
37 
38 #define BLOCKSIZE_DEFAULT 0  /* no slicing into blocks */
39 #define DICTSIZE  (4 KB)
40 #define CLEVEL_DEFAULT 3
41 
42 #define BENCH_TIME_DEFAULT_S   6
43 #define RUN_TIME_DEFAULT_MS    1000
44 #define BENCH_TIME_DEFAULT_MS (BENCH_TIME_DEFAULT_S * RUN_TIME_DEFAULT_MS)
45 
46 #define DISPLAY_LEVEL_DEFAULT 3
47 
48 #define BENCH_SIZE_MAX (1200 MB)
49 
50 
51 /*---  Macros  ---*/
52 
53 #define CONTROL(c)   { if (!(c)) abort(); }
54 #undef MIN
55 #define MIN(a,b)     ((a) < (b) ? (a) : (b))
56 
57 
58 /*---  Display Macros  ---*/
59 
60 #define DISPLAY(...)         fprintf(stdout, __VA_ARGS__)
61 #define DISPLAYLEVEL(l, ...) { if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } }
62 static int g_displayLevel = DISPLAY_LEVEL_DEFAULT;   /* 0 : no display,  1: errors,  2 : + result + interaction + warnings,  3 : + progression,  4 : + information */
63 
64 
65 /*---  buffer_t  ---*/
66 
67 typedef struct {
68     void* ptr;
69     size_t size;
70     size_t capacity;
71 } buffer_t;
72 
73 static const buffer_t kBuffNull = { NULL, 0, 0 };
74 
75 /* @return : kBuffNull if any error */
createBuffer(size_t capacity)76 static buffer_t createBuffer(size_t capacity)
77 {
78     assert(capacity > 0);
79     void* const ptr = malloc(capacity);
80     if (ptr==NULL) return kBuffNull;
81 
82     buffer_t buffer;
83     buffer.ptr = ptr;
84     buffer.capacity = capacity;
85     buffer.size = 0;
86     return buffer;
87 }
88 
freeBuffer(buffer_t buff)89 static void freeBuffer(buffer_t buff)
90 {
91     free(buff.ptr);
92 }
93 
94 
fillBuffer_fromHandle(buffer_t * buff,FILE * f)95 static void fillBuffer_fromHandle(buffer_t* buff, FILE* f)
96 {
97     size_t const readSize = fread(buff->ptr, 1, buff->capacity, f);
98     buff->size = readSize;
99 }
100 
101 
102 /* @return : kBuffNull if any error */
createBuffer_fromFile(const char * fileName)103 static buffer_t createBuffer_fromFile(const char* fileName)
104 {
105     U64 const fileSize = UTIL_getFileSize(fileName);
106     size_t const bufferSize = (size_t) fileSize;
107 
108     if (fileSize == UTIL_FILESIZE_UNKNOWN) return kBuffNull;
109     assert((U64)bufferSize == fileSize);   /* check overflow */
110 
111     {   FILE* const f = fopen(fileName, "rb");
112         if (f == NULL) return kBuffNull;
113 
114         buffer_t buff = createBuffer(bufferSize);
115         CONTROL(buff.ptr != NULL);
116 
117         fillBuffer_fromHandle(&buff, f);
118         CONTROL(buff.size == buff.capacity);
119 
120         fclose(f);   /* do nothing specific if fclose() fails */
121         return buff;
122     }
123 }
124 
125 
126 /* @return : kBuffNull if any error */
127 static buffer_t
createDictionaryBuffer(const char * dictionaryName,const void * srcBuffer,const size_t * srcBlockSizes,unsigned nbBlocks,size_t requestedDictSize)128 createDictionaryBuffer(const char* dictionaryName,
129                        const void* srcBuffer,
130                        const size_t* srcBlockSizes, unsigned nbBlocks,
131                        size_t requestedDictSize)
132 {
133     if (dictionaryName) {
134         DISPLAYLEVEL(3, "loading dictionary %s \n", dictionaryName);
135         return createBuffer_fromFile(dictionaryName);  /* note : result might be kBuffNull */
136 
137     } else {
138 
139         DISPLAYLEVEL(3, "creating dictionary, of target size %u bytes \n",
140                         (unsigned)requestedDictSize);
141         void* const dictBuffer = malloc(requestedDictSize);
142         CONTROL(dictBuffer != NULL);
143 
144         size_t const dictSize = ZDICT_trainFromBuffer(dictBuffer, requestedDictSize,
145                                                       srcBuffer,
146                                                       srcBlockSizes, nbBlocks);
147         CONTROL(!ZSTD_isError(dictSize));
148 
149         buffer_t result;
150         result.ptr = dictBuffer;
151         result.capacity = requestedDictSize;
152         result.size = dictSize;
153         return result;
154     }
155 }
156 
157 
158 /*! BMK_loadFiles() :
159  *  Loads `buffer`, with content from files listed within `fileNamesTable`.
160  *  Fills `buffer` entirely.
161  * @return : 0 on success, !=0 on error */
loadFiles(void * buffer,size_t bufferSize,size_t * fileSizes,const char * const * fileNamesTable,unsigned nbFiles)162 static int loadFiles(void* buffer, size_t bufferSize,
163                      size_t* fileSizes,
164                      const char* const * fileNamesTable, unsigned nbFiles)
165 {
166     size_t pos = 0, totalSize = 0;
167 
168     for (unsigned n=0; n<nbFiles; n++) {
169         U64 fileSize = UTIL_getFileSize(fileNamesTable[n]);
170         if (UTIL_isDirectory(fileNamesTable[n])) {
171             fileSizes[n] = 0;
172             continue;
173         }
174         if (fileSize == UTIL_FILESIZE_UNKNOWN) {
175             fileSizes[n] = 0;
176             continue;
177         }
178 
179         FILE* const f = fopen(fileNamesTable[n], "rb");
180         assert(f!=NULL);
181 
182         assert(pos <= bufferSize);
183         assert(fileSize <= bufferSize - pos);
184 
185         {   size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
186             assert(readSize == fileSize);
187             pos += readSize;
188         }
189         fileSizes[n] = (size_t)fileSize;
190         totalSize += (size_t)fileSize;
191         fclose(f);
192     }
193 
194     assert(totalSize == bufferSize);
195     return 0;
196 }
197 
198 
199 
200 /*---  slice_collection_t  ---*/
201 
202 typedef struct {
203     void** slicePtrs;
204     size_t* capacities;
205     size_t nbSlices;
206 } slice_collection_t;
207 
208 static const slice_collection_t kNullCollection = { NULL, NULL, 0 };
209 
freeSliceCollection(slice_collection_t collection)210 static void freeSliceCollection(slice_collection_t collection)
211 {
212     free(collection.slicePtrs);
213     free(collection.capacities);
214 }
215 
216 /* shrinkSizes() :
217  * downsizes sizes of slices within collection, according to `newSizes`.
218  * every `newSizes` entry must be <= than its corresponding collection size */
shrinkSizes(slice_collection_t collection,const size_t * newSizes)219 void shrinkSizes(slice_collection_t collection,
220                  const size_t* newSizes)  /* presumed same size as collection */
221 {
222     size_t const nbSlices = collection.nbSlices;
223     for (size_t blockNb = 0; blockNb < nbSlices; blockNb++) {
224         assert(newSizes[blockNb] <= collection.capacities[blockNb]);
225         collection.capacities[blockNb] = newSizes[blockNb];
226     }
227 }
228 
229 
230 /* splitSlices() :
231  * nbSlices : if == 0, nbSlices is automatically determined from srcSlices and blockSize.
232  *            otherwise, creates exactly nbSlices slices,
233  *            by either truncating input (when smaller)
234  *            or repeating input from beginning */
235 static slice_collection_t
splitSlices(slice_collection_t srcSlices,size_t blockSize,size_t nbSlices)236 splitSlices(slice_collection_t srcSlices, size_t blockSize, size_t nbSlices)
237 {
238     if (blockSize==0) blockSize = (size_t)(-1);   /* means "do not cut" */
239     size_t nbSrcBlocks = 0;
240     for (size_t ssnb=0; ssnb < srcSlices.nbSlices; ssnb++) {
241         size_t pos = 0;
242         while (pos <= srcSlices.capacities[ssnb]) {
243             nbSrcBlocks++;
244             pos += blockSize;
245         }
246     }
247 
248     if (nbSlices == 0) nbSlices = nbSrcBlocks;
249 
250     void** const sliceTable = (void**)malloc(nbSlices * sizeof(*sliceTable));
251     size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
252     if (sliceTable == NULL || capacities == NULL) {
253         free(sliceTable);
254         free(capacities);
255         return kNullCollection;
256     }
257 
258     size_t ssnb = 0;
259     for (size_t sliceNb=0; sliceNb < nbSlices; ) {
260         ssnb = (ssnb + 1) % srcSlices.nbSlices;
261         size_t pos = 0;
262         char* const ptr = (char*)srcSlices.slicePtrs[ssnb];
263         while (pos < srcSlices.capacities[ssnb] && sliceNb < nbSlices) {
264             size_t const size = MIN(blockSize, srcSlices.capacities[ssnb] - pos);
265             sliceTable[sliceNb] = ptr + pos;
266             capacities[sliceNb] = size;
267             sliceNb++;
268             pos += blockSize;
269         }
270     }
271 
272     slice_collection_t result;
273     result.nbSlices = nbSlices;
274     result.slicePtrs = sliceTable;
275     result.capacities = capacities;
276     return result;
277 }
278 
279 
sliceCollection_totalCapacity(slice_collection_t sc)280 static size_t sliceCollection_totalCapacity(slice_collection_t sc)
281 {
282     size_t totalSize = 0;
283     for (size_t n=0; n<sc.nbSlices; n++)
284         totalSize += sc.capacities[n];
285     return totalSize;
286 }
287 
288 
289 /* ---  buffer collection  --- */
290 
291 typedef struct {
292     buffer_t buffer;
293     slice_collection_t slices;
294 } buffer_collection_t;
295 
296 
freeBufferCollection(buffer_collection_t bc)297 static void freeBufferCollection(buffer_collection_t bc)
298 {
299     freeBuffer(bc.buffer);
300     freeSliceCollection(bc.slices);
301 }
302 
303 
304 static buffer_collection_t
createBufferCollection_fromSliceCollectionSizes(slice_collection_t sc)305 createBufferCollection_fromSliceCollectionSizes(slice_collection_t sc)
306 {
307     size_t const bufferSize = sliceCollection_totalCapacity(sc);
308 
309     buffer_t buffer = createBuffer(bufferSize);
310     CONTROL(buffer.ptr != NULL);
311 
312     size_t const nbSlices = sc.nbSlices;
313     void** const slices = (void**)malloc(nbSlices * sizeof(*slices));
314     CONTROL(slices != NULL);
315 
316     size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
317     CONTROL(capacities != NULL);
318 
319     char* const ptr = (char*)buffer.ptr;
320     size_t pos = 0;
321     for (size_t n=0; n < nbSlices; n++) {
322         capacities[n] = sc.capacities[n];
323         slices[n] = ptr + pos;
324         pos += capacities[n];
325     }
326 
327     buffer_collection_t result;
328     result.buffer = buffer;
329     result.slices.nbSlices = nbSlices;
330     result.slices.capacities = capacities;
331     result.slices.slicePtrs = slices;
332     return result;
333 }
334 
335 
336 /* @return : kBuffNull if any error */
337 static buffer_collection_t
createBufferCollection_fromFiles(const char * const * fileNamesTable,unsigned nbFiles)338 createBufferCollection_fromFiles(const char* const * fileNamesTable, unsigned nbFiles)
339 {
340     U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);
341     assert(totalSizeToLoad != UTIL_FILESIZE_UNKNOWN);
342     assert(totalSizeToLoad <= BENCH_SIZE_MAX);
343     size_t const loadedSize = (size_t)totalSizeToLoad;
344     assert(loadedSize > 0);
345     void* const srcBuffer = malloc(loadedSize);
346     assert(srcBuffer != NULL);
347 
348     assert(nbFiles > 0);
349     size_t* const fileSizes = (size_t*)calloc(nbFiles, sizeof(*fileSizes));
350     assert(fileSizes != NULL);
351 
352     /* Load input buffer */
353     int const errorCode = loadFiles(srcBuffer, loadedSize,
354                                     fileSizes,
355                                     fileNamesTable, nbFiles);
356     assert(errorCode == 0);
357 
358     void** sliceTable = (void**)malloc(nbFiles * sizeof(*sliceTable));
359     assert(sliceTable != NULL);
360 
361     char* const ptr = (char*)srcBuffer;
362     size_t pos = 0;
363     unsigned fileNb = 0;
364     for ( ; (pos < loadedSize) && (fileNb < nbFiles); fileNb++) {
365         sliceTable[fileNb] = ptr + pos;
366         pos += fileSizes[fileNb];
367     }
368     assert(pos == loadedSize);
369     assert(fileNb == nbFiles);
370 
371 
372     buffer_t buffer;
373     buffer.ptr = srcBuffer;
374     buffer.capacity = loadedSize;
375     buffer.size = loadedSize;
376 
377     slice_collection_t slices;
378     slices.slicePtrs = sliceTable;
379     slices.capacities = fileSizes;
380     slices.nbSlices = nbFiles;
381 
382     buffer_collection_t bc;
383     bc.buffer = buffer;
384     bc.slices = slices;
385     return bc;
386 }
387 
388 
389 
390 
391 /*---  ddict_collection_t  ---*/
392 
393 typedef struct {
394     ZSTD_DDict** ddicts;
395     size_t nbDDict;
396 } ddict_collection_t;
397 
398 static const ddict_collection_t kNullDDictCollection = { NULL, 0 };
399 
freeDDictCollection(ddict_collection_t ddictc)400 static void freeDDictCollection(ddict_collection_t ddictc)
401 {
402     for (size_t dictNb=0; dictNb < ddictc.nbDDict; dictNb++) {
403         ZSTD_freeDDict(ddictc.ddicts[dictNb]);
404     }
405     free(ddictc.ddicts);
406 }
407 
408 /* returns .buffers=NULL if operation fails */
createDDictCollection(const void * dictBuffer,size_t dictSize,size_t nbDDict)409 static ddict_collection_t createDDictCollection(const void* dictBuffer, size_t dictSize, size_t nbDDict)
410 {
411     ZSTD_DDict** const ddicts = malloc(nbDDict * sizeof(ZSTD_DDict*));
412     assert(ddicts != NULL);
413     if (ddicts==NULL) return kNullDDictCollection;
414     for (size_t dictNb=0; dictNb < nbDDict; dictNb++) {
415         ddicts[dictNb] = ZSTD_createDDict(dictBuffer, dictSize);
416         assert(ddicts[dictNb] != NULL);
417     }
418     ddict_collection_t ddictc;
419     ddictc.ddicts = ddicts;
420     ddictc.nbDDict = nbDDict;
421     return ddictc;
422 }
423 
424 
425 /* mess with adresses, so that linear scanning dictionaries != linear address scanning */
shuffleDictionaries(ddict_collection_t dicts)426 void shuffleDictionaries(ddict_collection_t dicts)
427 {
428     size_t const nbDicts = dicts.nbDDict;
429     for (size_t r=0; r<nbDicts; r++) {
430         size_t const d = rand() % nbDicts;
431         ZSTD_DDict* tmpd = dicts.ddicts[d];
432         dicts.ddicts[d] = dicts.ddicts[r];
433         dicts.ddicts[r] = tmpd;
434     }
435     for (size_t r=0; r<nbDicts; r++) {
436         size_t const d1 = rand() % nbDicts;
437         size_t const d2 = rand() % nbDicts;
438         ZSTD_DDict* tmpd = dicts.ddicts[d1];
439         dicts.ddicts[d1] = dicts.ddicts[d2];
440         dicts.ddicts[d2] = tmpd;
441     }
442 }
443 
444 
445 /* ---   Compression  --- */
446 
447 /* compressBlocks() :
448  * @return : total compressed size of all blocks,
449  *        or 0 if error.
450  */
compressBlocks(size_t * cSizes,slice_collection_t dstBlockBuffers,slice_collection_t srcBlockBuffers,ZSTD_CDict * cdict,int cLevel)451 static size_t compressBlocks(size_t* cSizes,   /* optional (can be NULL). If present, must contain at least nbBlocks fields */
452                              slice_collection_t dstBlockBuffers,
453                              slice_collection_t srcBlockBuffers,
454                              ZSTD_CDict* cdict, int cLevel)
455 {
456     size_t const nbBlocks = srcBlockBuffers.nbSlices;
457     assert(dstBlockBuffers.nbSlices == srcBlockBuffers.nbSlices);
458 
459     ZSTD_CCtx* const cctx = ZSTD_createCCtx();
460     assert(cctx != NULL);
461 
462     size_t totalCSize = 0;
463     for (size_t blockNb=0; blockNb < nbBlocks; blockNb++) {
464         size_t cBlockSize;
465         if (cdict == NULL) {
466             cBlockSize = ZSTD_compressCCtx(cctx,
467                             dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb],
468                             srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb],
469                             cLevel);
470         } else {
471             cBlockSize = ZSTD_compress_usingCDict(cctx,
472                             dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb],
473                             srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb],
474                             cdict);
475         }
476         CONTROL(!ZSTD_isError(cBlockSize));
477         if (cSizes) cSizes[blockNb] = cBlockSize;
478         totalCSize += cBlockSize;
479     }
480     return totalCSize;
481 }
482 
483 
484 /* ---  Benchmark  --- */
485 
486 typedef struct {
487     ZSTD_DCtx* dctx;
488     size_t nbDicts;
489     size_t dictNb;
490     ddict_collection_t dictionaries;
491 } decompressInstructions;
492 
createDecompressInstructions(ddict_collection_t dictionaries)493 decompressInstructions createDecompressInstructions(ddict_collection_t dictionaries)
494 {
495     decompressInstructions di;
496     di.dctx = ZSTD_createDCtx();
497     assert(di.dctx != NULL);
498     di.nbDicts = dictionaries.nbDDict;
499     di.dictNb = 0;
500     di.dictionaries = dictionaries;
501     return di;
502 }
503 
freeDecompressInstructions(decompressInstructions di)504 void freeDecompressInstructions(decompressInstructions di)
505 {
506     ZSTD_freeDCtx(di.dctx);
507 }
508 
509 /* benched function */
decompress(const void * src,size_t srcSize,void * dst,size_t dstCapacity,void * payload)510 size_t decompress(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* payload)
511 {
512     decompressInstructions* const di = (decompressInstructions*) payload;
513 
514     size_t const result = ZSTD_decompress_usingDDict(di->dctx,
515                                         dst, dstCapacity,
516                                         src, srcSize,
517                                         di->dictionaries.ddicts[di->dictNb]);
518 
519     di->dictNb = di->dictNb + 1;
520     if (di->dictNb >= di->nbDicts) di->dictNb = 0;
521 
522     return result;
523 }
524 
525 
benchMem(slice_collection_t dstBlocks,slice_collection_t srcBlocks,ddict_collection_t dictionaries,int nbRounds)526 static int benchMem(slice_collection_t dstBlocks,
527                     slice_collection_t srcBlocks,
528                     ddict_collection_t dictionaries,
529                     int nbRounds)
530 {
531     assert(dstBlocks.nbSlices == srcBlocks.nbSlices);
532 
533     unsigned const ms_per_round = RUN_TIME_DEFAULT_MS;
534     unsigned const total_time_ms = nbRounds * ms_per_round;
535 
536     double bestSpeed = 0.;
537 
538     BMK_timedFnState_t* const benchState =
539             BMK_createTimedFnState(total_time_ms, ms_per_round);
540     decompressInstructions di = createDecompressInstructions(dictionaries);
541     BMK_benchParams_t const bp = {
542         .benchFn = decompress,
543         .benchPayload = &di,
544         .initFn = NULL,
545         .initPayload = NULL,
546         .errorFn = ZSTD_isError,
547         .blockCount = dstBlocks.nbSlices,
548         .srcBuffers = (const void* const*) srcBlocks.slicePtrs,
549         .srcSizes = srcBlocks.capacities,
550         .dstBuffers = dstBlocks.slicePtrs,
551         .dstCapacities = dstBlocks.capacities,
552         .blockResults = NULL
553     };
554 
555     for (;;) {
556         BMK_runOutcome_t const outcome = BMK_benchTimedFn(benchState, bp);
557         CONTROL(BMK_isSuccessful_runOutcome(outcome));
558 
559         BMK_runTime_t const result = BMK_extract_runTime(outcome);
560         U64 const dTime_ns = result.nanoSecPerRun;
561         double const dTime_sec = (double)dTime_ns / 1000000000;
562         size_t const srcSize = result.sumOfReturn;
563         double const dSpeed_MBps = (double)srcSize / dTime_sec / (1 MB);
564         if (dSpeed_MBps > bestSpeed) bestSpeed = dSpeed_MBps;
565         DISPLAY("Decompression Speed : %.1f MB/s \r", bestSpeed);
566         fflush(stdout);
567         if (BMK_isCompleted_TimedFn(benchState)) break;
568     }
569     DISPLAY("\n");
570 
571     freeDecompressInstructions(di);
572     BMK_freeTimedFnState(benchState);
573 
574     return 0;   /* success */
575 }
576 
577 
578 /*! bench() :
579  *  fileName : file to load for benchmarking purpose
580  *  dictionary : optional (can be NULL), file to load as dictionary,
581  *              if none provided : will be calculated on the fly by the program.
582  * @return : 0 is success, 1+ otherwise */
bench(const char ** fileNameTable,unsigned nbFiles,const char * dictionary,size_t blockSize,int clevel,unsigned nbDictMax,unsigned nbBlocks,int nbRounds)583 int bench(const char** fileNameTable, unsigned nbFiles,
584           const char* dictionary,
585           size_t blockSize, int clevel,
586           unsigned nbDictMax, unsigned nbBlocks,
587           int nbRounds)
588 {
589     int result = 0;
590 
591     DISPLAYLEVEL(3, "loading %u files... \n", nbFiles);
592     buffer_collection_t const srcs = createBufferCollection_fromFiles(fileNameTable, nbFiles);
593     CONTROL(srcs.buffer.ptr != NULL);
594     buffer_t srcBuffer = srcs.buffer;
595     size_t const srcSize = srcBuffer.size;
596     DISPLAYLEVEL(3, "created src buffer of size %.1f MB \n",
597                     (double)srcSize / (1 MB));
598 
599     slice_collection_t const srcSlices = splitSlices(srcs.slices, blockSize, nbBlocks);
600     nbBlocks = (unsigned)(srcSlices.nbSlices);
601     DISPLAYLEVEL(3, "split input into %u blocks ", nbBlocks);
602     if (blockSize)
603         DISPLAYLEVEL(3, "of max size %u bytes ", (unsigned)blockSize);
604     DISPLAYLEVEL(3, "\n");
605     size_t const totalSrcSlicesSize = sliceCollection_totalCapacity(srcSlices);
606 
607 
608     size_t* const dstCapacities = malloc(nbBlocks * sizeof(*dstCapacities));
609     CONTROL(dstCapacities != NULL);
610     size_t dstBufferCapacity = 0;
611     for (size_t bnb=0; bnb<nbBlocks; bnb++) {
612         dstCapacities[bnb] = ZSTD_compressBound(srcSlices.capacities[bnb]);
613         dstBufferCapacity += dstCapacities[bnb];
614     }
615 
616     buffer_t dstBuffer = createBuffer(dstBufferCapacity);
617     CONTROL(dstBuffer.ptr != NULL);
618 
619     void** const sliceTable = malloc(nbBlocks * sizeof(*sliceTable));
620     CONTROL(sliceTable != NULL);
621 
622     {   char* const ptr = dstBuffer.ptr;
623         size_t pos = 0;
624         for (size_t snb=0; snb < nbBlocks; snb++) {
625             sliceTable[snb] = ptr + pos;
626             pos += dstCapacities[snb];
627     }   }
628 
629     slice_collection_t dstSlices;
630     dstSlices.capacities = dstCapacities;
631     dstSlices.slicePtrs = sliceTable;
632     dstSlices.nbSlices = nbBlocks;
633 
634 
635     /* dictionary determination */
636     buffer_t const dictBuffer = createDictionaryBuffer(dictionary,
637                                 srcs.buffer.ptr,
638                                 srcs.slices.capacities, srcs.slices.nbSlices,
639                                 DICTSIZE);
640     CONTROL(dictBuffer.ptr != NULL);
641 
642     ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer.ptr, dictBuffer.size, clevel);
643     CONTROL(cdict != NULL);
644 
645     size_t const cTotalSizeNoDict = compressBlocks(NULL, dstSlices, srcSlices, NULL, clevel);
646     CONTROL(cTotalSizeNoDict != 0);
647     DISPLAYLEVEL(3, "compressing at level %u without dictionary : Ratio=%.2f  (%u bytes) \n",
648                     clevel,
649                     (double)totalSrcSlicesSize / cTotalSizeNoDict, (unsigned)cTotalSizeNoDict);
650 
651     size_t* const cSizes = malloc(nbBlocks * sizeof(size_t));
652     CONTROL(cSizes != NULL);
653 
654     size_t const cTotalSize = compressBlocks(cSizes, dstSlices, srcSlices, cdict, clevel);
655     CONTROL(cTotalSize != 0);
656     DISPLAYLEVEL(3, "compressed using a %u bytes dictionary : Ratio=%.2f  (%u bytes) \n",
657                     (unsigned)dictBuffer.size,
658                     (double)totalSrcSlicesSize / cTotalSize, (unsigned)cTotalSize);
659 
660     /* now dstSlices contain the real compressed size of each block, instead of the maximum capacity */
661     shrinkSizes(dstSlices, cSizes);
662 
663     size_t const dictMem = ZSTD_estimateDDictSize(dictBuffer.size, ZSTD_dlm_byCopy);
664     unsigned const nbDicts = nbDictMax ? nbDictMax : nbBlocks;
665     size_t const allDictMem = dictMem * nbDicts;
666     DISPLAYLEVEL(3, "generating %u dictionaries, using %.1f MB of memory \n",
667                     nbDicts, (double)allDictMem / (1 MB));
668 
669     ddict_collection_t const dictionaries = createDDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts);
670     CONTROL(dictionaries.ddicts != NULL);
671 
672     shuffleDictionaries(dictionaries);
673 
674     buffer_collection_t resultCollection = createBufferCollection_fromSliceCollectionSizes(srcSlices);
675     CONTROL(resultCollection.buffer.ptr != NULL);
676 
677     result = benchMem(resultCollection.slices, dstSlices, dictionaries, nbRounds);
678 
679     /* free all heap objects in reverse order */
680     freeBufferCollection(resultCollection);
681     freeDDictCollection(dictionaries);
682     free(cSizes);
683     ZSTD_freeCDict(cdict);
684     freeBuffer(dictBuffer);
685     freeSliceCollection(dstSlices);
686     freeBuffer(dstBuffer);
687     freeSliceCollection(srcSlices);
688     freeBufferCollection(srcs);
689 
690     return result;
691 }
692 
693 
694 
695 /* ---  Command Line  --- */
696 
697 /*! readU32FromChar() :
698  * @return : unsigned integer value read from input in `char` format.
699  *  allows and interprets K, KB, KiB, M, MB and MiB suffix.
700  *  Will also modify `*stringPtr`, advancing it to position where it stopped reading.
701  *  Note : function will exit() program if digit sequence overflows */
readU32FromChar(const char ** stringPtr)702 static unsigned readU32FromChar(const char** stringPtr)
703 {
704     unsigned result = 0;
705     while ((**stringPtr >='0') && (**stringPtr <='9')) {
706         unsigned const max = (((unsigned)(-1)) / 10) - 1;
707         assert(result <= max);   /* check overflow */
708         result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
709     }
710     if ((**stringPtr=='K') || (**stringPtr=='M')) {
711         unsigned const maxK = ((unsigned)(-1)) >> 10;
712         assert(result <= maxK);   /* check overflow */
713         result <<= 10;
714         if (**stringPtr=='M') {
715             assert(result <= maxK);   /* check overflow */
716             result <<= 10;
717         }
718         (*stringPtr)++;  /* skip `K` or `M` */
719         if (**stringPtr=='i') (*stringPtr)++;
720         if (**stringPtr=='B') (*stringPtr)++;
721     }
722     return result;
723 }
724 
725 /** longCommandWArg() :
726  *  check if *stringPtr is the same as longCommand.
727  *  If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
728  * @return 0 and doesn't modify *stringPtr otherwise.
729  */
longCommandWArg(const char ** stringPtr,const char * longCommand)730 static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
731 {
732     size_t const comSize = strlen(longCommand);
733     int const result = !strncmp(*stringPtr, longCommand, comSize);
734     if (result) *stringPtr += comSize;
735     return result;
736 }
737 
738 
usage(const char * exeName)739 int usage(const char* exeName)
740 {
741     DISPLAY (" \n");
742     DISPLAY (" %s [Options] filename(s) \n", exeName);
743     DISPLAY (" \n");
744     DISPLAY ("Options : \n");
745     DISPLAY ("-r          : recursively load all files in subdirectories (default: off) \n");
746     DISPLAY ("-B#         : split input into blocks of size # (default: no split) \n");
747     DISPLAY ("-#          : use compression level # (default: %u) \n", CLEVEL_DEFAULT);
748     DISPLAY ("-D #        : use # as a dictionary (default: create one) \n");
749     DISPLAY ("-i#         : nb benchmark rounds (default: %u) \n", BENCH_TIME_DEFAULT_S);
750     DISPLAY ("--nbBlocks=#: use # blocks for bench (default: one per file) \n");
751     DISPLAY ("--nbDicts=# : create # dictionaries for bench (default: one per block) \n");
752     DISPLAY ("-h          : help (this text) \n");
753     return 0;
754 }
755 
bad_usage(const char * exeName)756 int bad_usage(const char* exeName)
757 {
758     DISPLAY (" bad usage : \n");
759     usage(exeName);
760     return 1;
761 }
762 
main(int argc,const char ** argv)763 int main (int argc, const char** argv)
764 {
765     int recursiveMode = 0;
766     int nbRounds = BENCH_TIME_DEFAULT_S;
767     const char* const exeName = argv[0];
768 
769     if (argc < 2) return bad_usage(exeName);
770 
771     const char** nameTable = (const char**)malloc(argc * sizeof(const char*));
772     assert(nameTable != NULL);
773     unsigned nameIdx = 0;
774 
775     const char* dictionary = NULL;
776     int cLevel = CLEVEL_DEFAULT;
777     size_t blockSize = BLOCKSIZE_DEFAULT;
778     unsigned nbDicts = 0;  /* determine nbDicts automatically: 1 dictionary per block */
779     unsigned nbBlocks = 0; /* determine nbBlocks automatically, from source and blockSize */
780 
781     for (int argNb = 1; argNb < argc ; argNb++) {
782         const char* argument = argv[argNb];
783         if (!strcmp(argument, "-h")) { free(nameTable); return usage(exeName); }
784         if (!strcmp(argument, "-r")) { recursiveMode = 1; continue; }
785         if (!strcmp(argument, "-D")) { argNb++; assert(argNb < argc); dictionary = argv[argNb]; continue; }
786         if (longCommandWArg(&argument, "-i")) { nbRounds = readU32FromChar(&argument); continue; }
787         if (longCommandWArg(&argument, "--dictionary=")) { dictionary = argument; continue; }
788         if (longCommandWArg(&argument, "-B")) { blockSize = readU32FromChar(&argument); continue; }
789         if (longCommandWArg(&argument, "--blockSize=")) { blockSize = readU32FromChar(&argument); continue; }
790         if (longCommandWArg(&argument, "--nbDicts=")) { nbDicts = readU32FromChar(&argument); continue; }
791         if (longCommandWArg(&argument, "--nbBlocks=")) { nbBlocks = readU32FromChar(&argument); continue; }
792         if (longCommandWArg(&argument, "--clevel=")) { cLevel = readU32FromChar(&argument); continue; }
793         if (longCommandWArg(&argument, "-")) { cLevel = readU32FromChar(&argument); continue; }
794         /* anything that's not a command is a filename */
795         nameTable[nameIdx++] = argument;
796     }
797 
798     const char** filenameTable = nameTable;
799     unsigned nbFiles = nameIdx;
800     char* buffer_containing_filenames = NULL;
801 
802     if (recursiveMode) {
803 #ifndef UTIL_HAS_CREATEFILELIST
804         assert(0);   /* missing capability, do not run */
805 #endif
806         filenameTable = UTIL_createFileList(nameTable, nameIdx, &buffer_containing_filenames, &nbFiles, 1 /* follow_links */);
807     }
808 
809     int result = bench(filenameTable, nbFiles, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds);
810 
811     free(buffer_containing_filenames);
812     free(nameTable);
813 
814     return result;
815 }
816