1 /*
2 * Copyright (c) 2018-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11 /* largeNbDicts
12 * This is a benchmark test tool
13 * dedicated to the specific case of dictionary decompression
14 * using a very large nb of dictionaries
15 * thus suffering latency from lots of cache misses.
16 * It's created in a bid to investigate performance and find optimizations. */
17
18
19 /*--- Dependencies ---*/
20
21 #include <stddef.h> /* size_t */
22 #include <stdlib.h> /* malloc, free, abort */
23 #include <stdio.h> /* fprintf */
24 #include <assert.h> /* assert */
25
26 #include "util.h"
27 #include "benchfn.h"
28 #define ZSTD_STATIC_LINKING_ONLY
29 #include "zstd.h"
30 #include "zdict.h"
31
32
33 /*--- Constants --- */
34
35 #define KB *(1<<10)
36 #define MB *(1<<20)
37
38 #define BLOCKSIZE_DEFAULT 0 /* no slicing into blocks */
39 #define DICTSIZE (4 KB)
40 #define CLEVEL_DEFAULT 3
41
42 #define BENCH_TIME_DEFAULT_S 6
43 #define RUN_TIME_DEFAULT_MS 1000
44 #define BENCH_TIME_DEFAULT_MS (BENCH_TIME_DEFAULT_S * RUN_TIME_DEFAULT_MS)
45
46 #define DISPLAY_LEVEL_DEFAULT 3
47
48 #define BENCH_SIZE_MAX (1200 MB)
49
50
51 /*--- Macros ---*/
52
53 #define CONTROL(c) { if (!(c)) abort(); }
54 #undef MIN
55 #define MIN(a,b) ((a) < (b) ? (a) : (b))
56
57
58 /*--- Display Macros ---*/
59
60 #define DISPLAY(...) fprintf(stdout, __VA_ARGS__)
61 #define DISPLAYLEVEL(l, ...) { if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } }
62 static int g_displayLevel = DISPLAY_LEVEL_DEFAULT; /* 0 : no display, 1: errors, 2 : + result + interaction + warnings, 3 : + progression, 4 : + information */
63
64
65 /*--- buffer_t ---*/
66
67 typedef struct {
68 void* ptr;
69 size_t size;
70 size_t capacity;
71 } buffer_t;
72
73 static const buffer_t kBuffNull = { NULL, 0, 0 };
74
75 /* @return : kBuffNull if any error */
createBuffer(size_t capacity)76 static buffer_t createBuffer(size_t capacity)
77 {
78 assert(capacity > 0);
79 void* const ptr = malloc(capacity);
80 if (ptr==NULL) return kBuffNull;
81
82 buffer_t buffer;
83 buffer.ptr = ptr;
84 buffer.capacity = capacity;
85 buffer.size = 0;
86 return buffer;
87 }
88
freeBuffer(buffer_t buff)89 static void freeBuffer(buffer_t buff)
90 {
91 free(buff.ptr);
92 }
93
94
fillBuffer_fromHandle(buffer_t * buff,FILE * f)95 static void fillBuffer_fromHandle(buffer_t* buff, FILE* f)
96 {
97 size_t const readSize = fread(buff->ptr, 1, buff->capacity, f);
98 buff->size = readSize;
99 }
100
101
102 /* @return : kBuffNull if any error */
createBuffer_fromFile(const char * fileName)103 static buffer_t createBuffer_fromFile(const char* fileName)
104 {
105 U64 const fileSize = UTIL_getFileSize(fileName);
106 size_t const bufferSize = (size_t) fileSize;
107
108 if (fileSize == UTIL_FILESIZE_UNKNOWN) return kBuffNull;
109 assert((U64)bufferSize == fileSize); /* check overflow */
110
111 { FILE* const f = fopen(fileName, "rb");
112 if (f == NULL) return kBuffNull;
113
114 buffer_t buff = createBuffer(bufferSize);
115 CONTROL(buff.ptr != NULL);
116
117 fillBuffer_fromHandle(&buff, f);
118 CONTROL(buff.size == buff.capacity);
119
120 fclose(f); /* do nothing specific if fclose() fails */
121 return buff;
122 }
123 }
124
125
126 /* @return : kBuffNull if any error */
127 static buffer_t
createDictionaryBuffer(const char * dictionaryName,const void * srcBuffer,const size_t * srcBlockSizes,unsigned nbBlocks,size_t requestedDictSize)128 createDictionaryBuffer(const char* dictionaryName,
129 const void* srcBuffer,
130 const size_t* srcBlockSizes, unsigned nbBlocks,
131 size_t requestedDictSize)
132 {
133 if (dictionaryName) {
134 DISPLAYLEVEL(3, "loading dictionary %s \n", dictionaryName);
135 return createBuffer_fromFile(dictionaryName); /* note : result might be kBuffNull */
136
137 } else {
138
139 DISPLAYLEVEL(3, "creating dictionary, of target size %u bytes \n",
140 (unsigned)requestedDictSize);
141 void* const dictBuffer = malloc(requestedDictSize);
142 CONTROL(dictBuffer != NULL);
143
144 size_t const dictSize = ZDICT_trainFromBuffer(dictBuffer, requestedDictSize,
145 srcBuffer,
146 srcBlockSizes, nbBlocks);
147 CONTROL(!ZSTD_isError(dictSize));
148
149 buffer_t result;
150 result.ptr = dictBuffer;
151 result.capacity = requestedDictSize;
152 result.size = dictSize;
153 return result;
154 }
155 }
156
157
158 /*! BMK_loadFiles() :
159 * Loads `buffer`, with content from files listed within `fileNamesTable`.
160 * Fills `buffer` entirely.
161 * @return : 0 on success, !=0 on error */
loadFiles(void * buffer,size_t bufferSize,size_t * fileSizes,const char * const * fileNamesTable,unsigned nbFiles)162 static int loadFiles(void* buffer, size_t bufferSize,
163 size_t* fileSizes,
164 const char* const * fileNamesTable, unsigned nbFiles)
165 {
166 size_t pos = 0, totalSize = 0;
167
168 for (unsigned n=0; n<nbFiles; n++) {
169 U64 fileSize = UTIL_getFileSize(fileNamesTable[n]);
170 if (UTIL_isDirectory(fileNamesTable[n])) {
171 fileSizes[n] = 0;
172 continue;
173 }
174 if (fileSize == UTIL_FILESIZE_UNKNOWN) {
175 fileSizes[n] = 0;
176 continue;
177 }
178
179 FILE* const f = fopen(fileNamesTable[n], "rb");
180 assert(f!=NULL);
181
182 assert(pos <= bufferSize);
183 assert(fileSize <= bufferSize - pos);
184
185 { size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
186 assert(readSize == fileSize);
187 pos += readSize;
188 }
189 fileSizes[n] = (size_t)fileSize;
190 totalSize += (size_t)fileSize;
191 fclose(f);
192 }
193
194 assert(totalSize == bufferSize);
195 return 0;
196 }
197
198
199
200 /*--- slice_collection_t ---*/
201
202 typedef struct {
203 void** slicePtrs;
204 size_t* capacities;
205 size_t nbSlices;
206 } slice_collection_t;
207
208 static const slice_collection_t kNullCollection = { NULL, NULL, 0 };
209
freeSliceCollection(slice_collection_t collection)210 static void freeSliceCollection(slice_collection_t collection)
211 {
212 free(collection.slicePtrs);
213 free(collection.capacities);
214 }
215
216 /* shrinkSizes() :
217 * downsizes sizes of slices within collection, according to `newSizes`.
218 * every `newSizes` entry must be <= than its corresponding collection size */
shrinkSizes(slice_collection_t collection,const size_t * newSizes)219 void shrinkSizes(slice_collection_t collection,
220 const size_t* newSizes) /* presumed same size as collection */
221 {
222 size_t const nbSlices = collection.nbSlices;
223 for (size_t blockNb = 0; blockNb < nbSlices; blockNb++) {
224 assert(newSizes[blockNb] <= collection.capacities[blockNb]);
225 collection.capacities[blockNb] = newSizes[blockNb];
226 }
227 }
228
229
230 /* splitSlices() :
231 * nbSlices : if == 0, nbSlices is automatically determined from srcSlices and blockSize.
232 * otherwise, creates exactly nbSlices slices,
233 * by either truncating input (when smaller)
234 * or repeating input from beginning */
235 static slice_collection_t
splitSlices(slice_collection_t srcSlices,size_t blockSize,size_t nbSlices)236 splitSlices(slice_collection_t srcSlices, size_t blockSize, size_t nbSlices)
237 {
238 if (blockSize==0) blockSize = (size_t)(-1); /* means "do not cut" */
239 size_t nbSrcBlocks = 0;
240 for (size_t ssnb=0; ssnb < srcSlices.nbSlices; ssnb++) {
241 size_t pos = 0;
242 while (pos <= srcSlices.capacities[ssnb]) {
243 nbSrcBlocks++;
244 pos += blockSize;
245 }
246 }
247
248 if (nbSlices == 0) nbSlices = nbSrcBlocks;
249
250 void** const sliceTable = (void**)malloc(nbSlices * sizeof(*sliceTable));
251 size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
252 if (sliceTable == NULL || capacities == NULL) {
253 free(sliceTable);
254 free(capacities);
255 return kNullCollection;
256 }
257
258 size_t ssnb = 0;
259 for (size_t sliceNb=0; sliceNb < nbSlices; ) {
260 ssnb = (ssnb + 1) % srcSlices.nbSlices;
261 size_t pos = 0;
262 char* const ptr = (char*)srcSlices.slicePtrs[ssnb];
263 while (pos < srcSlices.capacities[ssnb] && sliceNb < nbSlices) {
264 size_t const size = MIN(blockSize, srcSlices.capacities[ssnb] - pos);
265 sliceTable[sliceNb] = ptr + pos;
266 capacities[sliceNb] = size;
267 sliceNb++;
268 pos += blockSize;
269 }
270 }
271
272 slice_collection_t result;
273 result.nbSlices = nbSlices;
274 result.slicePtrs = sliceTable;
275 result.capacities = capacities;
276 return result;
277 }
278
279
sliceCollection_totalCapacity(slice_collection_t sc)280 static size_t sliceCollection_totalCapacity(slice_collection_t sc)
281 {
282 size_t totalSize = 0;
283 for (size_t n=0; n<sc.nbSlices; n++)
284 totalSize += sc.capacities[n];
285 return totalSize;
286 }
287
288
289 /* --- buffer collection --- */
290
291 typedef struct {
292 buffer_t buffer;
293 slice_collection_t slices;
294 } buffer_collection_t;
295
296
freeBufferCollection(buffer_collection_t bc)297 static void freeBufferCollection(buffer_collection_t bc)
298 {
299 freeBuffer(bc.buffer);
300 freeSliceCollection(bc.slices);
301 }
302
303
304 static buffer_collection_t
createBufferCollection_fromSliceCollectionSizes(slice_collection_t sc)305 createBufferCollection_fromSliceCollectionSizes(slice_collection_t sc)
306 {
307 size_t const bufferSize = sliceCollection_totalCapacity(sc);
308
309 buffer_t buffer = createBuffer(bufferSize);
310 CONTROL(buffer.ptr != NULL);
311
312 size_t const nbSlices = sc.nbSlices;
313 void** const slices = (void**)malloc(nbSlices * sizeof(*slices));
314 CONTROL(slices != NULL);
315
316 size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
317 CONTROL(capacities != NULL);
318
319 char* const ptr = (char*)buffer.ptr;
320 size_t pos = 0;
321 for (size_t n=0; n < nbSlices; n++) {
322 capacities[n] = sc.capacities[n];
323 slices[n] = ptr + pos;
324 pos += capacities[n];
325 }
326
327 buffer_collection_t result;
328 result.buffer = buffer;
329 result.slices.nbSlices = nbSlices;
330 result.slices.capacities = capacities;
331 result.slices.slicePtrs = slices;
332 return result;
333 }
334
335
336 /* @return : kBuffNull if any error */
337 static buffer_collection_t
createBufferCollection_fromFiles(const char * const * fileNamesTable,unsigned nbFiles)338 createBufferCollection_fromFiles(const char* const * fileNamesTable, unsigned nbFiles)
339 {
340 U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);
341 assert(totalSizeToLoad != UTIL_FILESIZE_UNKNOWN);
342 assert(totalSizeToLoad <= BENCH_SIZE_MAX);
343 size_t const loadedSize = (size_t)totalSizeToLoad;
344 assert(loadedSize > 0);
345 void* const srcBuffer = malloc(loadedSize);
346 assert(srcBuffer != NULL);
347
348 assert(nbFiles > 0);
349 size_t* const fileSizes = (size_t*)calloc(nbFiles, sizeof(*fileSizes));
350 assert(fileSizes != NULL);
351
352 /* Load input buffer */
353 int const errorCode = loadFiles(srcBuffer, loadedSize,
354 fileSizes,
355 fileNamesTable, nbFiles);
356 assert(errorCode == 0);
357
358 void** sliceTable = (void**)malloc(nbFiles * sizeof(*sliceTable));
359 assert(sliceTable != NULL);
360
361 char* const ptr = (char*)srcBuffer;
362 size_t pos = 0;
363 unsigned fileNb = 0;
364 for ( ; (pos < loadedSize) && (fileNb < nbFiles); fileNb++) {
365 sliceTable[fileNb] = ptr + pos;
366 pos += fileSizes[fileNb];
367 }
368 assert(pos == loadedSize);
369 assert(fileNb == nbFiles);
370
371
372 buffer_t buffer;
373 buffer.ptr = srcBuffer;
374 buffer.capacity = loadedSize;
375 buffer.size = loadedSize;
376
377 slice_collection_t slices;
378 slices.slicePtrs = sliceTable;
379 slices.capacities = fileSizes;
380 slices.nbSlices = nbFiles;
381
382 buffer_collection_t bc;
383 bc.buffer = buffer;
384 bc.slices = slices;
385 return bc;
386 }
387
388
389
390
391 /*--- ddict_collection_t ---*/
392
393 typedef struct {
394 ZSTD_DDict** ddicts;
395 size_t nbDDict;
396 } ddict_collection_t;
397
398 static const ddict_collection_t kNullDDictCollection = { NULL, 0 };
399
freeDDictCollection(ddict_collection_t ddictc)400 static void freeDDictCollection(ddict_collection_t ddictc)
401 {
402 for (size_t dictNb=0; dictNb < ddictc.nbDDict; dictNb++) {
403 ZSTD_freeDDict(ddictc.ddicts[dictNb]);
404 }
405 free(ddictc.ddicts);
406 }
407
408 /* returns .buffers=NULL if operation fails */
createDDictCollection(const void * dictBuffer,size_t dictSize,size_t nbDDict)409 static ddict_collection_t createDDictCollection(const void* dictBuffer, size_t dictSize, size_t nbDDict)
410 {
411 ZSTD_DDict** const ddicts = malloc(nbDDict * sizeof(ZSTD_DDict*));
412 assert(ddicts != NULL);
413 if (ddicts==NULL) return kNullDDictCollection;
414 for (size_t dictNb=0; dictNb < nbDDict; dictNb++) {
415 ddicts[dictNb] = ZSTD_createDDict(dictBuffer, dictSize);
416 assert(ddicts[dictNb] != NULL);
417 }
418 ddict_collection_t ddictc;
419 ddictc.ddicts = ddicts;
420 ddictc.nbDDict = nbDDict;
421 return ddictc;
422 }
423
424
425 /* mess with adresses, so that linear scanning dictionaries != linear address scanning */
shuffleDictionaries(ddict_collection_t dicts)426 void shuffleDictionaries(ddict_collection_t dicts)
427 {
428 size_t const nbDicts = dicts.nbDDict;
429 for (size_t r=0; r<nbDicts; r++) {
430 size_t const d = rand() % nbDicts;
431 ZSTD_DDict* tmpd = dicts.ddicts[d];
432 dicts.ddicts[d] = dicts.ddicts[r];
433 dicts.ddicts[r] = tmpd;
434 }
435 for (size_t r=0; r<nbDicts; r++) {
436 size_t const d1 = rand() % nbDicts;
437 size_t const d2 = rand() % nbDicts;
438 ZSTD_DDict* tmpd = dicts.ddicts[d1];
439 dicts.ddicts[d1] = dicts.ddicts[d2];
440 dicts.ddicts[d2] = tmpd;
441 }
442 }
443
444
445 /* --- Compression --- */
446
447 /* compressBlocks() :
448 * @return : total compressed size of all blocks,
449 * or 0 if error.
450 */
compressBlocks(size_t * cSizes,slice_collection_t dstBlockBuffers,slice_collection_t srcBlockBuffers,ZSTD_CDict * cdict,int cLevel)451 static size_t compressBlocks(size_t* cSizes, /* optional (can be NULL). If present, must contain at least nbBlocks fields */
452 slice_collection_t dstBlockBuffers,
453 slice_collection_t srcBlockBuffers,
454 ZSTD_CDict* cdict, int cLevel)
455 {
456 size_t const nbBlocks = srcBlockBuffers.nbSlices;
457 assert(dstBlockBuffers.nbSlices == srcBlockBuffers.nbSlices);
458
459 ZSTD_CCtx* const cctx = ZSTD_createCCtx();
460 assert(cctx != NULL);
461
462 size_t totalCSize = 0;
463 for (size_t blockNb=0; blockNb < nbBlocks; blockNb++) {
464 size_t cBlockSize;
465 if (cdict == NULL) {
466 cBlockSize = ZSTD_compressCCtx(cctx,
467 dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb],
468 srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb],
469 cLevel);
470 } else {
471 cBlockSize = ZSTD_compress_usingCDict(cctx,
472 dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb],
473 srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb],
474 cdict);
475 }
476 CONTROL(!ZSTD_isError(cBlockSize));
477 if (cSizes) cSizes[blockNb] = cBlockSize;
478 totalCSize += cBlockSize;
479 }
480 return totalCSize;
481 }
482
483
484 /* --- Benchmark --- */
485
486 typedef struct {
487 ZSTD_DCtx* dctx;
488 size_t nbDicts;
489 size_t dictNb;
490 ddict_collection_t dictionaries;
491 } decompressInstructions;
492
createDecompressInstructions(ddict_collection_t dictionaries)493 decompressInstructions createDecompressInstructions(ddict_collection_t dictionaries)
494 {
495 decompressInstructions di;
496 di.dctx = ZSTD_createDCtx();
497 assert(di.dctx != NULL);
498 di.nbDicts = dictionaries.nbDDict;
499 di.dictNb = 0;
500 di.dictionaries = dictionaries;
501 return di;
502 }
503
freeDecompressInstructions(decompressInstructions di)504 void freeDecompressInstructions(decompressInstructions di)
505 {
506 ZSTD_freeDCtx(di.dctx);
507 }
508
509 /* benched function */
decompress(const void * src,size_t srcSize,void * dst,size_t dstCapacity,void * payload)510 size_t decompress(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* payload)
511 {
512 decompressInstructions* const di = (decompressInstructions*) payload;
513
514 size_t const result = ZSTD_decompress_usingDDict(di->dctx,
515 dst, dstCapacity,
516 src, srcSize,
517 di->dictionaries.ddicts[di->dictNb]);
518
519 di->dictNb = di->dictNb + 1;
520 if (di->dictNb >= di->nbDicts) di->dictNb = 0;
521
522 return result;
523 }
524
525
benchMem(slice_collection_t dstBlocks,slice_collection_t srcBlocks,ddict_collection_t dictionaries,int nbRounds)526 static int benchMem(slice_collection_t dstBlocks,
527 slice_collection_t srcBlocks,
528 ddict_collection_t dictionaries,
529 int nbRounds)
530 {
531 assert(dstBlocks.nbSlices == srcBlocks.nbSlices);
532
533 unsigned const ms_per_round = RUN_TIME_DEFAULT_MS;
534 unsigned const total_time_ms = nbRounds * ms_per_round;
535
536 double bestSpeed = 0.;
537
538 BMK_timedFnState_t* const benchState =
539 BMK_createTimedFnState(total_time_ms, ms_per_round);
540 decompressInstructions di = createDecompressInstructions(dictionaries);
541 BMK_benchParams_t const bp = {
542 .benchFn = decompress,
543 .benchPayload = &di,
544 .initFn = NULL,
545 .initPayload = NULL,
546 .errorFn = ZSTD_isError,
547 .blockCount = dstBlocks.nbSlices,
548 .srcBuffers = (const void* const*) srcBlocks.slicePtrs,
549 .srcSizes = srcBlocks.capacities,
550 .dstBuffers = dstBlocks.slicePtrs,
551 .dstCapacities = dstBlocks.capacities,
552 .blockResults = NULL
553 };
554
555 for (;;) {
556 BMK_runOutcome_t const outcome = BMK_benchTimedFn(benchState, bp);
557 CONTROL(BMK_isSuccessful_runOutcome(outcome));
558
559 BMK_runTime_t const result = BMK_extract_runTime(outcome);
560 U64 const dTime_ns = result.nanoSecPerRun;
561 double const dTime_sec = (double)dTime_ns / 1000000000;
562 size_t const srcSize = result.sumOfReturn;
563 double const dSpeed_MBps = (double)srcSize / dTime_sec / (1 MB);
564 if (dSpeed_MBps > bestSpeed) bestSpeed = dSpeed_MBps;
565 DISPLAY("Decompression Speed : %.1f MB/s \r", bestSpeed);
566 fflush(stdout);
567 if (BMK_isCompleted_TimedFn(benchState)) break;
568 }
569 DISPLAY("\n");
570
571 freeDecompressInstructions(di);
572 BMK_freeTimedFnState(benchState);
573
574 return 0; /* success */
575 }
576
577
578 /*! bench() :
579 * fileName : file to load for benchmarking purpose
580 * dictionary : optional (can be NULL), file to load as dictionary,
581 * if none provided : will be calculated on the fly by the program.
582 * @return : 0 is success, 1+ otherwise */
bench(const char ** fileNameTable,unsigned nbFiles,const char * dictionary,size_t blockSize,int clevel,unsigned nbDictMax,unsigned nbBlocks,int nbRounds)583 int bench(const char** fileNameTable, unsigned nbFiles,
584 const char* dictionary,
585 size_t blockSize, int clevel,
586 unsigned nbDictMax, unsigned nbBlocks,
587 int nbRounds)
588 {
589 int result = 0;
590
591 DISPLAYLEVEL(3, "loading %u files... \n", nbFiles);
592 buffer_collection_t const srcs = createBufferCollection_fromFiles(fileNameTable, nbFiles);
593 CONTROL(srcs.buffer.ptr != NULL);
594 buffer_t srcBuffer = srcs.buffer;
595 size_t const srcSize = srcBuffer.size;
596 DISPLAYLEVEL(3, "created src buffer of size %.1f MB \n",
597 (double)srcSize / (1 MB));
598
599 slice_collection_t const srcSlices = splitSlices(srcs.slices, blockSize, nbBlocks);
600 nbBlocks = (unsigned)(srcSlices.nbSlices);
601 DISPLAYLEVEL(3, "split input into %u blocks ", nbBlocks);
602 if (blockSize)
603 DISPLAYLEVEL(3, "of max size %u bytes ", (unsigned)blockSize);
604 DISPLAYLEVEL(3, "\n");
605 size_t const totalSrcSlicesSize = sliceCollection_totalCapacity(srcSlices);
606
607
608 size_t* const dstCapacities = malloc(nbBlocks * sizeof(*dstCapacities));
609 CONTROL(dstCapacities != NULL);
610 size_t dstBufferCapacity = 0;
611 for (size_t bnb=0; bnb<nbBlocks; bnb++) {
612 dstCapacities[bnb] = ZSTD_compressBound(srcSlices.capacities[bnb]);
613 dstBufferCapacity += dstCapacities[bnb];
614 }
615
616 buffer_t dstBuffer = createBuffer(dstBufferCapacity);
617 CONTROL(dstBuffer.ptr != NULL);
618
619 void** const sliceTable = malloc(nbBlocks * sizeof(*sliceTable));
620 CONTROL(sliceTable != NULL);
621
622 { char* const ptr = dstBuffer.ptr;
623 size_t pos = 0;
624 for (size_t snb=0; snb < nbBlocks; snb++) {
625 sliceTable[snb] = ptr + pos;
626 pos += dstCapacities[snb];
627 } }
628
629 slice_collection_t dstSlices;
630 dstSlices.capacities = dstCapacities;
631 dstSlices.slicePtrs = sliceTable;
632 dstSlices.nbSlices = nbBlocks;
633
634
635 /* dictionary determination */
636 buffer_t const dictBuffer = createDictionaryBuffer(dictionary,
637 srcs.buffer.ptr,
638 srcs.slices.capacities, srcs.slices.nbSlices,
639 DICTSIZE);
640 CONTROL(dictBuffer.ptr != NULL);
641
642 ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer.ptr, dictBuffer.size, clevel);
643 CONTROL(cdict != NULL);
644
645 size_t const cTotalSizeNoDict = compressBlocks(NULL, dstSlices, srcSlices, NULL, clevel);
646 CONTROL(cTotalSizeNoDict != 0);
647 DISPLAYLEVEL(3, "compressing at level %u without dictionary : Ratio=%.2f (%u bytes) \n",
648 clevel,
649 (double)totalSrcSlicesSize / cTotalSizeNoDict, (unsigned)cTotalSizeNoDict);
650
651 size_t* const cSizes = malloc(nbBlocks * sizeof(size_t));
652 CONTROL(cSizes != NULL);
653
654 size_t const cTotalSize = compressBlocks(cSizes, dstSlices, srcSlices, cdict, clevel);
655 CONTROL(cTotalSize != 0);
656 DISPLAYLEVEL(3, "compressed using a %u bytes dictionary : Ratio=%.2f (%u bytes) \n",
657 (unsigned)dictBuffer.size,
658 (double)totalSrcSlicesSize / cTotalSize, (unsigned)cTotalSize);
659
660 /* now dstSlices contain the real compressed size of each block, instead of the maximum capacity */
661 shrinkSizes(dstSlices, cSizes);
662
663 size_t const dictMem = ZSTD_estimateDDictSize(dictBuffer.size, ZSTD_dlm_byCopy);
664 unsigned const nbDicts = nbDictMax ? nbDictMax : nbBlocks;
665 size_t const allDictMem = dictMem * nbDicts;
666 DISPLAYLEVEL(3, "generating %u dictionaries, using %.1f MB of memory \n",
667 nbDicts, (double)allDictMem / (1 MB));
668
669 ddict_collection_t const dictionaries = createDDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts);
670 CONTROL(dictionaries.ddicts != NULL);
671
672 shuffleDictionaries(dictionaries);
673
674 buffer_collection_t resultCollection = createBufferCollection_fromSliceCollectionSizes(srcSlices);
675 CONTROL(resultCollection.buffer.ptr != NULL);
676
677 result = benchMem(resultCollection.slices, dstSlices, dictionaries, nbRounds);
678
679 /* free all heap objects in reverse order */
680 freeBufferCollection(resultCollection);
681 freeDDictCollection(dictionaries);
682 free(cSizes);
683 ZSTD_freeCDict(cdict);
684 freeBuffer(dictBuffer);
685 freeSliceCollection(dstSlices);
686 freeBuffer(dstBuffer);
687 freeSliceCollection(srcSlices);
688 freeBufferCollection(srcs);
689
690 return result;
691 }
692
693
694
695 /* --- Command Line --- */
696
697 /*! readU32FromChar() :
698 * @return : unsigned integer value read from input in `char` format.
699 * allows and interprets K, KB, KiB, M, MB and MiB suffix.
700 * Will also modify `*stringPtr`, advancing it to position where it stopped reading.
701 * Note : function will exit() program if digit sequence overflows */
readU32FromChar(const char ** stringPtr)702 static unsigned readU32FromChar(const char** stringPtr)
703 {
704 unsigned result = 0;
705 while ((**stringPtr >='0') && (**stringPtr <='9')) {
706 unsigned const max = (((unsigned)(-1)) / 10) - 1;
707 assert(result <= max); /* check overflow */
708 result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
709 }
710 if ((**stringPtr=='K') || (**stringPtr=='M')) {
711 unsigned const maxK = ((unsigned)(-1)) >> 10;
712 assert(result <= maxK); /* check overflow */
713 result <<= 10;
714 if (**stringPtr=='M') {
715 assert(result <= maxK); /* check overflow */
716 result <<= 10;
717 }
718 (*stringPtr)++; /* skip `K` or `M` */
719 if (**stringPtr=='i') (*stringPtr)++;
720 if (**stringPtr=='B') (*stringPtr)++;
721 }
722 return result;
723 }
724
725 /** longCommandWArg() :
726 * check if *stringPtr is the same as longCommand.
727 * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
728 * @return 0 and doesn't modify *stringPtr otherwise.
729 */
longCommandWArg(const char ** stringPtr,const char * longCommand)730 static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
731 {
732 size_t const comSize = strlen(longCommand);
733 int const result = !strncmp(*stringPtr, longCommand, comSize);
734 if (result) *stringPtr += comSize;
735 return result;
736 }
737
738
usage(const char * exeName)739 int usage(const char* exeName)
740 {
741 DISPLAY (" \n");
742 DISPLAY (" %s [Options] filename(s) \n", exeName);
743 DISPLAY (" \n");
744 DISPLAY ("Options : \n");
745 DISPLAY ("-r : recursively load all files in subdirectories (default: off) \n");
746 DISPLAY ("-B# : split input into blocks of size # (default: no split) \n");
747 DISPLAY ("-# : use compression level # (default: %u) \n", CLEVEL_DEFAULT);
748 DISPLAY ("-D # : use # as a dictionary (default: create one) \n");
749 DISPLAY ("-i# : nb benchmark rounds (default: %u) \n", BENCH_TIME_DEFAULT_S);
750 DISPLAY ("--nbBlocks=#: use # blocks for bench (default: one per file) \n");
751 DISPLAY ("--nbDicts=# : create # dictionaries for bench (default: one per block) \n");
752 DISPLAY ("-h : help (this text) \n");
753 return 0;
754 }
755
bad_usage(const char * exeName)756 int bad_usage(const char* exeName)
757 {
758 DISPLAY (" bad usage : \n");
759 usage(exeName);
760 return 1;
761 }
762
main(int argc,const char ** argv)763 int main (int argc, const char** argv)
764 {
765 int recursiveMode = 0;
766 int nbRounds = BENCH_TIME_DEFAULT_S;
767 const char* const exeName = argv[0];
768
769 if (argc < 2) return bad_usage(exeName);
770
771 const char** nameTable = (const char**)malloc(argc * sizeof(const char*));
772 assert(nameTable != NULL);
773 unsigned nameIdx = 0;
774
775 const char* dictionary = NULL;
776 int cLevel = CLEVEL_DEFAULT;
777 size_t blockSize = BLOCKSIZE_DEFAULT;
778 unsigned nbDicts = 0; /* determine nbDicts automatically: 1 dictionary per block */
779 unsigned nbBlocks = 0; /* determine nbBlocks automatically, from source and blockSize */
780
781 for (int argNb = 1; argNb < argc ; argNb++) {
782 const char* argument = argv[argNb];
783 if (!strcmp(argument, "-h")) { free(nameTable); return usage(exeName); }
784 if (!strcmp(argument, "-r")) { recursiveMode = 1; continue; }
785 if (!strcmp(argument, "-D")) { argNb++; assert(argNb < argc); dictionary = argv[argNb]; continue; }
786 if (longCommandWArg(&argument, "-i")) { nbRounds = readU32FromChar(&argument); continue; }
787 if (longCommandWArg(&argument, "--dictionary=")) { dictionary = argument; continue; }
788 if (longCommandWArg(&argument, "-B")) { blockSize = readU32FromChar(&argument); continue; }
789 if (longCommandWArg(&argument, "--blockSize=")) { blockSize = readU32FromChar(&argument); continue; }
790 if (longCommandWArg(&argument, "--nbDicts=")) { nbDicts = readU32FromChar(&argument); continue; }
791 if (longCommandWArg(&argument, "--nbBlocks=")) { nbBlocks = readU32FromChar(&argument); continue; }
792 if (longCommandWArg(&argument, "--clevel=")) { cLevel = readU32FromChar(&argument); continue; }
793 if (longCommandWArg(&argument, "-")) { cLevel = readU32FromChar(&argument); continue; }
794 /* anything that's not a command is a filename */
795 nameTable[nameIdx++] = argument;
796 }
797
798 const char** filenameTable = nameTable;
799 unsigned nbFiles = nameIdx;
800 char* buffer_containing_filenames = NULL;
801
802 if (recursiveMode) {
803 #ifndef UTIL_HAS_CREATEFILELIST
804 assert(0); /* missing capability, do not run */
805 #endif
806 filenameTable = UTIL_createFileList(nameTable, nameIdx, &buffer_containing_filenames, &nbFiles, 1 /* follow_links */);
807 }
808
809 int result = bench(filenameTable, nbFiles, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds);
810
811 free(buffer_containing_filenames);
812 free(nameTable);
813
814 return result;
815 }
816