1 /*
2  * Copyright (c) 2018-present, Yann Collet, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 /* largeNbDicts
12  * This is a benchmark test tool
13  * dedicated to the specific case of dictionary decompression
14  * using a very large nb of dictionaries
15  * thus suffering latency from lots of cache misses.
16  * It's created in a bid to investigate performance and find optimizations. */
17 
18 
19 /*---  Dependencies  ---*/
20 
21 #include <stddef.h>   /* size_t */
22 #include <stdlib.h>   /* malloc, free, abort */
23 #include <stdio.h>    /* fprintf */
24 #include <limits.h>   /* UINT_MAX */
25 #include <assert.h>   /* assert */
26 
27 #include "util.h"
28 #include "benchfn.h"
29 #define ZSTD_STATIC_LINKING_ONLY
30 #include "zstd.h"
31 #include "zdict.h"
32 
33 
34 /*---  Constants  --- */
35 
36 #define KB  *(1<<10)
37 #define MB  *(1<<20)
38 
39 #define BLOCKSIZE_DEFAULT 0  /* no slicing into blocks */
40 #define DICTSIZE  (4 KB)
41 #define CLEVEL_DEFAULT 3
42 
43 #define BENCH_TIME_DEFAULT_S   6
44 #define RUN_TIME_DEFAULT_MS    1000
45 #define BENCH_TIME_DEFAULT_MS (BENCH_TIME_DEFAULT_S * RUN_TIME_DEFAULT_MS)
46 
47 #define DISPLAY_LEVEL_DEFAULT 3
48 
49 #define BENCH_SIZE_MAX (1200 MB)
50 
51 
52 /*---  Macros  ---*/
53 
54 #define CONTROL(c)   { if (!(c)) abort(); }
55 #undef MIN
56 #define MIN(a,b)     ((a) < (b) ? (a) : (b))
57 
58 
59 /*---  Display Macros  ---*/
60 
61 #define DISPLAY(...)         fprintf(stdout, __VA_ARGS__)
62 #define DISPLAYLEVEL(l, ...) { if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } }
63 static int g_displayLevel = DISPLAY_LEVEL_DEFAULT;   /* 0 : no display,  1: errors,  2 : + result + interaction + warnings,  3 : + progression,  4 : + information */
64 
65 
66 /*---  buffer_t  ---*/
67 
68 typedef struct {
69     void* ptr;
70     size_t size;
71     size_t capacity;
72 } buffer_t;
73 
74 static const buffer_t kBuffNull = { NULL, 0, 0 };
75 
76 /* @return : kBuffNull if any error */
createBuffer(size_t capacity)77 static buffer_t createBuffer(size_t capacity)
78 {
79     assert(capacity > 0);
80     void* const ptr = malloc(capacity);
81     if (ptr==NULL) return kBuffNull;
82 
83     buffer_t buffer;
84     buffer.ptr = ptr;
85     buffer.capacity = capacity;
86     buffer.size = 0;
87     return buffer;
88 }
89 
freeBuffer(buffer_t buff)90 static void freeBuffer(buffer_t buff)
91 {
92     free(buff.ptr);
93 }
94 
95 
fillBuffer_fromHandle(buffer_t * buff,FILE * f)96 static void fillBuffer_fromHandle(buffer_t* buff, FILE* f)
97 {
98     size_t const readSize = fread(buff->ptr, 1, buff->capacity, f);
99     buff->size = readSize;
100 }
101 
102 
103 /* @return : kBuffNull if any error */
createBuffer_fromFile(const char * fileName)104 static buffer_t createBuffer_fromFile(const char* fileName)
105 {
106     U64 const fileSize = UTIL_getFileSize(fileName);
107     size_t const bufferSize = (size_t) fileSize;
108 
109     if (fileSize == UTIL_FILESIZE_UNKNOWN) return kBuffNull;
110     assert((U64)bufferSize == fileSize);   /* check overflow */
111 
112     {   FILE* const f = fopen(fileName, "rb");
113         if (f == NULL) return kBuffNull;
114 
115         buffer_t buff = createBuffer(bufferSize);
116         CONTROL(buff.ptr != NULL);
117 
118         fillBuffer_fromHandle(&buff, f);
119         CONTROL(buff.size == buff.capacity);
120 
121         fclose(f);   /* do nothing specific if fclose() fails */
122         return buff;
123     }
124 }
125 
126 
127 /* @return : kBuffNull if any error */
128 static buffer_t
createDictionaryBuffer(const char * dictionaryName,const void * srcBuffer,const size_t * srcBlockSizes,size_t nbBlocks,size_t requestedDictSize)129 createDictionaryBuffer(const char* dictionaryName,
130                        const void* srcBuffer,
131                        const size_t* srcBlockSizes, size_t nbBlocks,
132                        size_t requestedDictSize)
133 {
134     if (dictionaryName) {
135         DISPLAYLEVEL(3, "loading dictionary %s \n", dictionaryName);
136         return createBuffer_fromFile(dictionaryName);  /* note : result might be kBuffNull */
137 
138     } else {
139 
140         DISPLAYLEVEL(3, "creating dictionary, of target size %u bytes \n",
141                         (unsigned)requestedDictSize);
142         void* const dictBuffer = malloc(requestedDictSize);
143         CONTROL(dictBuffer != NULL);
144 
145         assert(nbBlocks <= UINT_MAX);
146         size_t const dictSize = ZDICT_trainFromBuffer(dictBuffer, requestedDictSize,
147                                                       srcBuffer,
148                                                       srcBlockSizes, (unsigned)nbBlocks);
149         CONTROL(!ZSTD_isError(dictSize));
150 
151         buffer_t result;
152         result.ptr = dictBuffer;
153         result.capacity = requestedDictSize;
154         result.size = dictSize;
155         return result;
156     }
157 }
158 
159 
160 /*! BMK_loadFiles() :
161  *  Loads `buffer`, with content from files listed within `fileNamesTable`.
162  *  Fills `buffer` entirely.
163  * @return : 0 on success, !=0 on error */
loadFiles(void * buffer,size_t bufferSize,size_t * fileSizes,const char * const * fileNamesTable,unsigned nbFiles)164 static int loadFiles(void* buffer, size_t bufferSize,
165                      size_t* fileSizes,
166                      const char* const * fileNamesTable, unsigned nbFiles)
167 {
168     size_t pos = 0, totalSize = 0;
169 
170     for (unsigned n=0; n<nbFiles; n++) {
171         U64 fileSize = UTIL_getFileSize(fileNamesTable[n]);
172         if (UTIL_isDirectory(fileNamesTable[n])) {
173             fileSizes[n] = 0;
174             continue;
175         }
176         if (fileSize == UTIL_FILESIZE_UNKNOWN) {
177             fileSizes[n] = 0;
178             continue;
179         }
180 
181         FILE* const f = fopen(fileNamesTable[n], "rb");
182         assert(f!=NULL);
183 
184         assert(pos <= bufferSize);
185         assert(fileSize <= bufferSize - pos);
186 
187         {   size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
188             assert(readSize == fileSize);
189             pos += readSize;
190         }
191         fileSizes[n] = (size_t)fileSize;
192         totalSize += (size_t)fileSize;
193         fclose(f);
194     }
195 
196     assert(totalSize == bufferSize);
197     return 0;
198 }
199 
200 
201 
202 /*---  slice_collection_t  ---*/
203 
204 typedef struct {
205     void** slicePtrs;
206     size_t* capacities;
207     size_t nbSlices;
208 } slice_collection_t;
209 
210 static const slice_collection_t kNullCollection = { NULL, NULL, 0 };
211 
freeSliceCollection(slice_collection_t collection)212 static void freeSliceCollection(slice_collection_t collection)
213 {
214     free(collection.slicePtrs);
215     free(collection.capacities);
216 }
217 
218 /* shrinkSizes() :
219  * downsizes sizes of slices within collection, according to `newSizes`.
220  * every `newSizes` entry must be <= than its corresponding collection size */
shrinkSizes(slice_collection_t collection,const size_t * newSizes)221 void shrinkSizes(slice_collection_t collection,
222                  const size_t* newSizes)  /* presumed same size as collection */
223 {
224     size_t const nbSlices = collection.nbSlices;
225     for (size_t blockNb = 0; blockNb < nbSlices; blockNb++) {
226         assert(newSizes[blockNb] <= collection.capacities[blockNb]);
227         collection.capacities[blockNb] = newSizes[blockNb];
228     }
229 }
230 
231 
232 /* splitSlices() :
233  * nbSlices : if == 0, nbSlices is automatically determined from srcSlices and blockSize.
234  *            otherwise, creates exactly nbSlices slices,
235  *            by either truncating input (when smaller)
236  *            or repeating input from beginning */
237 static slice_collection_t
splitSlices(slice_collection_t srcSlices,size_t blockSize,size_t nbSlices)238 splitSlices(slice_collection_t srcSlices, size_t blockSize, size_t nbSlices)
239 {
240     if (blockSize==0) blockSize = (size_t)(-1);   /* means "do not cut" */
241     size_t nbSrcBlocks = 0;
242     for (size_t ssnb=0; ssnb < srcSlices.nbSlices; ssnb++) {
243         size_t pos = 0;
244         while (pos <= srcSlices.capacities[ssnb]) {
245             nbSrcBlocks++;
246             pos += blockSize;
247         }
248     }
249 
250     if (nbSlices == 0) nbSlices = nbSrcBlocks;
251 
252     void** const sliceTable = (void**)malloc(nbSlices * sizeof(*sliceTable));
253     size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
254     if (sliceTable == NULL || capacities == NULL) {
255         free(sliceTable);
256         free(capacities);
257         return kNullCollection;
258     }
259 
260     size_t ssnb = 0;
261     for (size_t sliceNb=0; sliceNb < nbSlices; ) {
262         ssnb = (ssnb + 1) % srcSlices.nbSlices;
263         size_t pos = 0;
264         char* const ptr = (char*)srcSlices.slicePtrs[ssnb];
265         while (pos < srcSlices.capacities[ssnb] && sliceNb < nbSlices) {
266             size_t const size = MIN(blockSize, srcSlices.capacities[ssnb] - pos);
267             sliceTable[sliceNb] = ptr + pos;
268             capacities[sliceNb] = size;
269             sliceNb++;
270             pos += blockSize;
271         }
272     }
273 
274     slice_collection_t result;
275     result.nbSlices = nbSlices;
276     result.slicePtrs = sliceTable;
277     result.capacities = capacities;
278     return result;
279 }
280 
281 
sliceCollection_totalCapacity(slice_collection_t sc)282 static size_t sliceCollection_totalCapacity(slice_collection_t sc)
283 {
284     size_t totalSize = 0;
285     for (size_t n=0; n<sc.nbSlices; n++)
286         totalSize += sc.capacities[n];
287     return totalSize;
288 }
289 
290 
291 /* ---  buffer collection  --- */
292 
293 typedef struct {
294     buffer_t buffer;
295     slice_collection_t slices;
296 } buffer_collection_t;
297 
298 
freeBufferCollection(buffer_collection_t bc)299 static void freeBufferCollection(buffer_collection_t bc)
300 {
301     freeBuffer(bc.buffer);
302     freeSliceCollection(bc.slices);
303 }
304 
305 
306 static buffer_collection_t
createBufferCollection_fromSliceCollectionSizes(slice_collection_t sc)307 createBufferCollection_fromSliceCollectionSizes(slice_collection_t sc)
308 {
309     size_t const bufferSize = sliceCollection_totalCapacity(sc);
310 
311     buffer_t buffer = createBuffer(bufferSize);
312     CONTROL(buffer.ptr != NULL);
313 
314     size_t const nbSlices = sc.nbSlices;
315     void** const slices = (void**)malloc(nbSlices * sizeof(*slices));
316     CONTROL(slices != NULL);
317 
318     size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
319     CONTROL(capacities != NULL);
320 
321     char* const ptr = (char*)buffer.ptr;
322     size_t pos = 0;
323     for (size_t n=0; n < nbSlices; n++) {
324         capacities[n] = sc.capacities[n];
325         slices[n] = ptr + pos;
326         pos += capacities[n];
327     }
328 
329     buffer_collection_t result;
330     result.buffer = buffer;
331     result.slices.nbSlices = nbSlices;
332     result.slices.capacities = capacities;
333     result.slices.slicePtrs = slices;
334     return result;
335 }
336 
337 
338 /* @return : kBuffNull if any error */
339 static buffer_collection_t
createBufferCollection_fromFiles(const char * const * fileNamesTable,unsigned nbFiles)340 createBufferCollection_fromFiles(const char* const * fileNamesTable, unsigned nbFiles)
341 {
342     U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);
343     assert(totalSizeToLoad != UTIL_FILESIZE_UNKNOWN);
344     assert(totalSizeToLoad <= BENCH_SIZE_MAX);
345     size_t const loadedSize = (size_t)totalSizeToLoad;
346     assert(loadedSize > 0);
347     void* const srcBuffer = malloc(loadedSize);
348     assert(srcBuffer != NULL);
349 
350     assert(nbFiles > 0);
351     size_t* const fileSizes = (size_t*)calloc(nbFiles, sizeof(*fileSizes));
352     assert(fileSizes != NULL);
353 
354     /* Load input buffer */
355     int const errorCode = loadFiles(srcBuffer, loadedSize,
356                                     fileSizes,
357                                     fileNamesTable, nbFiles);
358     assert(errorCode == 0);
359 
360     void** sliceTable = (void**)malloc(nbFiles * sizeof(*sliceTable));
361     assert(sliceTable != NULL);
362 
363     char* const ptr = (char*)srcBuffer;
364     size_t pos = 0;
365     unsigned fileNb = 0;
366     for ( ; (pos < loadedSize) && (fileNb < nbFiles); fileNb++) {
367         sliceTable[fileNb] = ptr + pos;
368         pos += fileSizes[fileNb];
369     }
370     assert(pos == loadedSize);
371     assert(fileNb == nbFiles);
372 
373 
374     buffer_t buffer;
375     buffer.ptr = srcBuffer;
376     buffer.capacity = loadedSize;
377     buffer.size = loadedSize;
378 
379     slice_collection_t slices;
380     slices.slicePtrs = sliceTable;
381     slices.capacities = fileSizes;
382     slices.nbSlices = nbFiles;
383 
384     buffer_collection_t bc;
385     bc.buffer = buffer;
386     bc.slices = slices;
387     return bc;
388 }
389 
390 
391 
392 
393 /*---  ddict_collection_t  ---*/
394 
395 typedef struct {
396     ZSTD_DDict** ddicts;
397     size_t nbDDict;
398 } ddict_collection_t;
399 
400 static const ddict_collection_t kNullDDictCollection = { NULL, 0 };
401 
freeDDictCollection(ddict_collection_t ddictc)402 static void freeDDictCollection(ddict_collection_t ddictc)
403 {
404     for (size_t dictNb=0; dictNb < ddictc.nbDDict; dictNb++) {
405         ZSTD_freeDDict(ddictc.ddicts[dictNb]);
406     }
407     free(ddictc.ddicts);
408 }
409 
410 /* returns .buffers=NULL if operation fails */
createDDictCollection(const void * dictBuffer,size_t dictSize,size_t nbDDict)411 static ddict_collection_t createDDictCollection(const void* dictBuffer, size_t dictSize, size_t nbDDict)
412 {
413     ZSTD_DDict** const ddicts = malloc(nbDDict * sizeof(ZSTD_DDict*));
414     assert(ddicts != NULL);
415     if (ddicts==NULL) return kNullDDictCollection;
416     for (size_t dictNb=0; dictNb < nbDDict; dictNb++) {
417         ddicts[dictNb] = ZSTD_createDDict(dictBuffer, dictSize);
418         assert(ddicts[dictNb] != NULL);
419     }
420     ddict_collection_t ddictc;
421     ddictc.ddicts = ddicts;
422     ddictc.nbDDict = nbDDict;
423     return ddictc;
424 }
425 
426 
427 /* mess with addresses, so that linear scanning dictionaries != linear address scanning */
shuffleDictionaries(ddict_collection_t dicts)428 void shuffleDictionaries(ddict_collection_t dicts)
429 {
430     size_t const nbDicts = dicts.nbDDict;
431     for (size_t r=0; r<nbDicts; r++) {
432         size_t const d = rand() % nbDicts;
433         ZSTD_DDict* tmpd = dicts.ddicts[d];
434         dicts.ddicts[d] = dicts.ddicts[r];
435         dicts.ddicts[r] = tmpd;
436     }
437     for (size_t r=0; r<nbDicts; r++) {
438         size_t const d1 = rand() % nbDicts;
439         size_t const d2 = rand() % nbDicts;
440         ZSTD_DDict* tmpd = dicts.ddicts[d1];
441         dicts.ddicts[d1] = dicts.ddicts[d2];
442         dicts.ddicts[d2] = tmpd;
443     }
444 }
445 
446 
447 /* ---   Compression  --- */
448 
449 /* compressBlocks() :
450  * @return : total compressed size of all blocks,
451  *        or 0 if error.
452  */
compressBlocks(size_t * cSizes,slice_collection_t dstBlockBuffers,slice_collection_t srcBlockBuffers,ZSTD_CDict * cdict,int cLevel)453 static size_t compressBlocks(size_t* cSizes,   /* optional (can be NULL). If present, must contain at least nbBlocks fields */
454                              slice_collection_t dstBlockBuffers,
455                              slice_collection_t srcBlockBuffers,
456                              ZSTD_CDict* cdict, int cLevel)
457 {
458     size_t const nbBlocks = srcBlockBuffers.nbSlices;
459     assert(dstBlockBuffers.nbSlices == srcBlockBuffers.nbSlices);
460 
461     ZSTD_CCtx* const cctx = ZSTD_createCCtx();
462     assert(cctx != NULL);
463 
464     size_t totalCSize = 0;
465     for (size_t blockNb=0; blockNb < nbBlocks; blockNb++) {
466         size_t cBlockSize;
467         if (cdict == NULL) {
468             cBlockSize = ZSTD_compressCCtx(cctx,
469                             dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb],
470                             srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb],
471                             cLevel);
472         } else {
473             cBlockSize = ZSTD_compress_usingCDict(cctx,
474                             dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb],
475                             srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb],
476                             cdict);
477         }
478         CONTROL(!ZSTD_isError(cBlockSize));
479         if (cSizes) cSizes[blockNb] = cBlockSize;
480         totalCSize += cBlockSize;
481     }
482     return totalCSize;
483 }
484 
485 
486 /* ---  Benchmark  --- */
487 
488 typedef struct {
489     ZSTD_DCtx* dctx;
490     size_t nbDicts;
491     size_t dictNb;
492     ddict_collection_t dictionaries;
493 } decompressInstructions;
494 
createDecompressInstructions(ddict_collection_t dictionaries)495 decompressInstructions createDecompressInstructions(ddict_collection_t dictionaries)
496 {
497     decompressInstructions di;
498     di.dctx = ZSTD_createDCtx();
499     assert(di.dctx != NULL);
500     di.nbDicts = dictionaries.nbDDict;
501     di.dictNb = 0;
502     di.dictionaries = dictionaries;
503     return di;
504 }
505 
freeDecompressInstructions(decompressInstructions di)506 void freeDecompressInstructions(decompressInstructions di)
507 {
508     ZSTD_freeDCtx(di.dctx);
509 }
510 
511 /* benched function */
decompress(const void * src,size_t srcSize,void * dst,size_t dstCapacity,void * payload)512 size_t decompress(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* payload)
513 {
514     decompressInstructions* const di = (decompressInstructions*) payload;
515 
516     size_t const result = ZSTD_decompress_usingDDict(di->dctx,
517                                         dst, dstCapacity,
518                                         src, srcSize,
519                                         di->dictionaries.ddicts[di->dictNb]);
520 
521     di->dictNb = di->dictNb + 1;
522     if (di->dictNb >= di->nbDicts) di->dictNb = 0;
523 
524     return result;
525 }
526 
527 
benchMem(slice_collection_t dstBlocks,slice_collection_t srcBlocks,ddict_collection_t dictionaries,int nbRounds)528 static int benchMem(slice_collection_t dstBlocks,
529                     slice_collection_t srcBlocks,
530                     ddict_collection_t dictionaries,
531                     int nbRounds)
532 {
533     assert(dstBlocks.nbSlices == srcBlocks.nbSlices);
534 
535     unsigned const ms_per_round = RUN_TIME_DEFAULT_MS;
536     unsigned const total_time_ms = nbRounds * ms_per_round;
537 
538     double bestSpeed = 0.;
539 
540     BMK_timedFnState_t* const benchState =
541             BMK_createTimedFnState(total_time_ms, ms_per_round);
542     decompressInstructions di = createDecompressInstructions(dictionaries);
543     BMK_benchParams_t const bp = {
544         .benchFn = decompress,
545         .benchPayload = &di,
546         .initFn = NULL,
547         .initPayload = NULL,
548         .errorFn = ZSTD_isError,
549         .blockCount = dstBlocks.nbSlices,
550         .srcBuffers = (const void* const*) srcBlocks.slicePtrs,
551         .srcSizes = srcBlocks.capacities,
552         .dstBuffers = dstBlocks.slicePtrs,
553         .dstCapacities = dstBlocks.capacities,
554         .blockResults = NULL
555     };
556 
557     for (;;) {
558         BMK_runOutcome_t const outcome = BMK_benchTimedFn(benchState, bp);
559         CONTROL(BMK_isSuccessful_runOutcome(outcome));
560 
561         BMK_runTime_t const result = BMK_extract_runTime(outcome);
562         double const dTime_ns = result.nanoSecPerRun;
563         double const dTime_sec = (double)dTime_ns / 1000000000;
564         size_t const srcSize = result.sumOfReturn;
565         double const dSpeed_MBps = (double)srcSize / dTime_sec / (1 MB);
566         if (dSpeed_MBps > bestSpeed) bestSpeed = dSpeed_MBps;
567         DISPLAY("Decompression Speed : %.1f MB/s \r", bestSpeed);
568         fflush(stdout);
569         if (BMK_isCompleted_TimedFn(benchState)) break;
570     }
571     DISPLAY("\n");
572 
573     freeDecompressInstructions(di);
574     BMK_freeTimedFnState(benchState);
575 
576     return 0;   /* success */
577 }
578 
579 
580 /*! bench() :
581  *  fileName : file to load for benchmarking purpose
582  *  dictionary : optional (can be NULL), file to load as dictionary,
583  *              if none provided : will be calculated on the fly by the program.
584  * @return : 0 is success, 1+ otherwise */
bench(const char ** fileNameTable,unsigned nbFiles,const char * dictionary,size_t blockSize,int clevel,unsigned nbDictMax,unsigned nbBlocks,int nbRounds)585 int bench(const char** fileNameTable, unsigned nbFiles,
586           const char* dictionary,
587           size_t blockSize, int clevel,
588           unsigned nbDictMax, unsigned nbBlocks,
589           int nbRounds)
590 {
591     int result = 0;
592 
593     DISPLAYLEVEL(3, "loading %u files... \n", nbFiles);
594     buffer_collection_t const srcs = createBufferCollection_fromFiles(fileNameTable, nbFiles);
595     CONTROL(srcs.buffer.ptr != NULL);
596     buffer_t srcBuffer = srcs.buffer;
597     size_t const srcSize = srcBuffer.size;
598     DISPLAYLEVEL(3, "created src buffer of size %.1f MB \n",
599                     (double)srcSize / (1 MB));
600 
601     slice_collection_t const srcSlices = splitSlices(srcs.slices, blockSize, nbBlocks);
602     nbBlocks = (unsigned)(srcSlices.nbSlices);
603     DISPLAYLEVEL(3, "split input into %u blocks ", nbBlocks);
604     if (blockSize)
605         DISPLAYLEVEL(3, "of max size %u bytes ", (unsigned)blockSize);
606     DISPLAYLEVEL(3, "\n");
607     size_t const totalSrcSlicesSize = sliceCollection_totalCapacity(srcSlices);
608 
609 
610     size_t* const dstCapacities = malloc(nbBlocks * sizeof(*dstCapacities));
611     CONTROL(dstCapacities != NULL);
612     size_t dstBufferCapacity = 0;
613     for (size_t bnb=0; bnb<nbBlocks; bnb++) {
614         dstCapacities[bnb] = ZSTD_compressBound(srcSlices.capacities[bnb]);
615         dstBufferCapacity += dstCapacities[bnb];
616     }
617 
618     buffer_t dstBuffer = createBuffer(dstBufferCapacity);
619     CONTROL(dstBuffer.ptr != NULL);
620 
621     void** const sliceTable = malloc(nbBlocks * sizeof(*sliceTable));
622     CONTROL(sliceTable != NULL);
623 
624     {   char* const ptr = dstBuffer.ptr;
625         size_t pos = 0;
626         for (size_t snb=0; snb < nbBlocks; snb++) {
627             sliceTable[snb] = ptr + pos;
628             pos += dstCapacities[snb];
629     }   }
630 
631     slice_collection_t dstSlices;
632     dstSlices.capacities = dstCapacities;
633     dstSlices.slicePtrs = sliceTable;
634     dstSlices.nbSlices = nbBlocks;
635 
636 
637     /* dictionary determination */
638     buffer_t const dictBuffer = createDictionaryBuffer(dictionary,
639                                 srcs.buffer.ptr,
640                                 srcs.slices.capacities, srcs.slices.nbSlices,
641                                 DICTSIZE);
642     CONTROL(dictBuffer.ptr != NULL);
643 
644     ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer.ptr, dictBuffer.size, clevel);
645     CONTROL(cdict != NULL);
646 
647     size_t const cTotalSizeNoDict = compressBlocks(NULL, dstSlices, srcSlices, NULL, clevel);
648     CONTROL(cTotalSizeNoDict != 0);
649     DISPLAYLEVEL(3, "compressing at level %u without dictionary : Ratio=%.2f  (%u bytes) \n",
650                     clevel,
651                     (double)totalSrcSlicesSize / cTotalSizeNoDict, (unsigned)cTotalSizeNoDict);
652 
653     size_t* const cSizes = malloc(nbBlocks * sizeof(size_t));
654     CONTROL(cSizes != NULL);
655 
656     size_t const cTotalSize = compressBlocks(cSizes, dstSlices, srcSlices, cdict, clevel);
657     CONTROL(cTotalSize != 0);
658     DISPLAYLEVEL(3, "compressed using a %u bytes dictionary : Ratio=%.2f  (%u bytes) \n",
659                     (unsigned)dictBuffer.size,
660                     (double)totalSrcSlicesSize / cTotalSize, (unsigned)cTotalSize);
661 
662     /* now dstSlices contain the real compressed size of each block, instead of the maximum capacity */
663     shrinkSizes(dstSlices, cSizes);
664 
665     size_t const dictMem = ZSTD_estimateDDictSize(dictBuffer.size, ZSTD_dlm_byCopy);
666     unsigned const nbDicts = nbDictMax ? nbDictMax : nbBlocks;
667     size_t const allDictMem = dictMem * nbDicts;
668     DISPLAYLEVEL(3, "generating %u dictionaries, using %.1f MB of memory \n",
669                     nbDicts, (double)allDictMem / (1 MB));
670 
671     ddict_collection_t const dictionaries = createDDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts);
672     CONTROL(dictionaries.ddicts != NULL);
673 
674     shuffleDictionaries(dictionaries);
675 
676     buffer_collection_t resultCollection = createBufferCollection_fromSliceCollectionSizes(srcSlices);
677     CONTROL(resultCollection.buffer.ptr != NULL);
678 
679     result = benchMem(resultCollection.slices, dstSlices, dictionaries, nbRounds);
680 
681     /* free all heap objects in reverse order */
682     freeBufferCollection(resultCollection);
683     freeDDictCollection(dictionaries);
684     free(cSizes);
685     ZSTD_freeCDict(cdict);
686     freeBuffer(dictBuffer);
687     freeSliceCollection(dstSlices);
688     freeBuffer(dstBuffer);
689     freeSliceCollection(srcSlices);
690     freeBufferCollection(srcs);
691 
692     return result;
693 }
694 
695 
696 
697 /* ---  Command Line  --- */
698 
699 /*! readU32FromChar() :
700  * @return : unsigned integer value read from input in `char` format.
701  *  allows and interprets K, KB, KiB, M, MB and MiB suffix.
702  *  Will also modify `*stringPtr`, advancing it to position where it stopped reading.
703  *  Note : function will exit() program if digit sequence overflows */
readU32FromChar(const char ** stringPtr)704 static unsigned readU32FromChar(const char** stringPtr)
705 {
706     unsigned result = 0;
707     while ((**stringPtr >='0') && (**stringPtr <='9')) {
708         unsigned const max = (((unsigned)(-1)) / 10) - 1;
709         assert(result <= max);   /* check overflow */
710         result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
711     }
712     if ((**stringPtr=='K') || (**stringPtr=='M')) {
713         unsigned const maxK = ((unsigned)(-1)) >> 10;
714         assert(result <= maxK);   /* check overflow */
715         result <<= 10;
716         if (**stringPtr=='M') {
717             assert(result <= maxK);   /* check overflow */
718             result <<= 10;
719         }
720         (*stringPtr)++;  /* skip `K` or `M` */
721         if (**stringPtr=='i') (*stringPtr)++;
722         if (**stringPtr=='B') (*stringPtr)++;
723     }
724     return result;
725 }
726 
727 /** longCommandWArg() :
728  *  check if *stringPtr is the same as longCommand.
729  *  If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
730  * @return 0 and doesn't modify *stringPtr otherwise.
731  */
longCommandWArg(const char ** stringPtr,const char * longCommand)732 static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
733 {
734     size_t const comSize = strlen(longCommand);
735     int const result = !strncmp(*stringPtr, longCommand, comSize);
736     if (result) *stringPtr += comSize;
737     return result;
738 }
739 
740 
usage(const char * exeName)741 int usage(const char* exeName)
742 {
743     DISPLAY (" \n");
744     DISPLAY (" %s [Options] filename(s) \n", exeName);
745     DISPLAY (" \n");
746     DISPLAY ("Options : \n");
747     DISPLAY ("-r          : recursively load all files in subdirectories (default: off) \n");
748     DISPLAY ("-B#         : split input into blocks of size # (default: no split) \n");
749     DISPLAY ("-#          : use compression level # (default: %u) \n", CLEVEL_DEFAULT);
750     DISPLAY ("-D #        : use # as a dictionary (default: create one) \n");
751     DISPLAY ("-i#         : nb benchmark rounds (default: %u) \n", BENCH_TIME_DEFAULT_S);
752     DISPLAY ("--nbBlocks=#: use # blocks for bench (default: one per file) \n");
753     DISPLAY ("--nbDicts=# : create # dictionaries for bench (default: one per block) \n");
754     DISPLAY ("-h          : help (this text) \n");
755     return 0;
756 }
757 
bad_usage(const char * exeName)758 int bad_usage(const char* exeName)
759 {
760     DISPLAY (" bad usage : \n");
761     usage(exeName);
762     return 1;
763 }
764 
main(int argc,const char ** argv)765 int main (int argc, const char** argv)
766 {
767     int recursiveMode = 0;
768     int nbRounds = BENCH_TIME_DEFAULT_S;
769     const char* const exeName = argv[0];
770 
771     if (argc < 2) return bad_usage(exeName);
772 
773     const char** nameTable = (const char**)malloc(argc * sizeof(const char*));
774     assert(nameTable != NULL);
775     unsigned nameIdx = 0;
776 
777     const char* dictionary = NULL;
778     int cLevel = CLEVEL_DEFAULT;
779     size_t blockSize = BLOCKSIZE_DEFAULT;
780     unsigned nbDicts = 0;  /* determine nbDicts automatically: 1 dictionary per block */
781     unsigned nbBlocks = 0; /* determine nbBlocks automatically, from source and blockSize */
782 
783     for (int argNb = 1; argNb < argc ; argNb++) {
784         const char* argument = argv[argNb];
785         if (!strcmp(argument, "-h")) { free(nameTable); return usage(exeName); }
786         if (!strcmp(argument, "-r")) { recursiveMode = 1; continue; }
787         if (!strcmp(argument, "-D")) { argNb++; assert(argNb < argc); dictionary = argv[argNb]; continue; }
788         if (longCommandWArg(&argument, "-i")) { nbRounds = readU32FromChar(&argument); continue; }
789         if (longCommandWArg(&argument, "--dictionary=")) { dictionary = argument; continue; }
790         if (longCommandWArg(&argument, "-B")) { blockSize = readU32FromChar(&argument); continue; }
791         if (longCommandWArg(&argument, "--blockSize=")) { blockSize = readU32FromChar(&argument); continue; }
792         if (longCommandWArg(&argument, "--nbDicts=")) { nbDicts = readU32FromChar(&argument); continue; }
793         if (longCommandWArg(&argument, "--nbBlocks=")) { nbBlocks = readU32FromChar(&argument); continue; }
794         if (longCommandWArg(&argument, "--clevel=")) { cLevel = readU32FromChar(&argument); continue; }
795         if (longCommandWArg(&argument, "-")) { cLevel = readU32FromChar(&argument); continue; }
796         /* anything that's not a command is a filename */
797         nameTable[nameIdx++] = argument;
798     }
799 
800     const char** filenameTable = nameTable;
801     unsigned nbFiles = nameIdx;
802     char* buffer_containing_filenames = NULL;
803 
804     if (recursiveMode) {
805 #ifndef UTIL_HAS_CREATEFILELIST
806         assert(0);   /* missing capability, do not run */
807 #endif
808         filenameTable = UTIL_createFileList(nameTable, nameIdx, &buffer_containing_filenames, &nbFiles, 1 /* follow_links */);
809     }
810 
811     int result = bench(filenameTable, nbFiles, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds);
812 
813     free(buffer_containing_filenames);
814     free(nameTable);
815 
816     return result;
817 }
818