1 /*
2 * Copyright (c) 2018-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11 /* largeNbDicts
12 * This is a benchmark test tool
13 * dedicated to the specific case of dictionary decompression
14 * using a very large nb of dictionaries
15 * thus suffering latency from lots of cache misses.
16 * It's created in a bid to investigate performance and find optimizations. */
17
18
19 /*--- Dependencies ---*/
20
21 #include <stddef.h> /* size_t */
22 #include <stdlib.h> /* malloc, free, abort */
23 #include <stdio.h> /* fprintf */
24 #include <limits.h> /* UINT_MAX */
25 #include <assert.h> /* assert */
26
27 #include "util.h"
28 #include "benchfn.h"
29 #define ZSTD_STATIC_LINKING_ONLY
30 #include "zstd.h"
31 #include "zdict.h"
32
33
34 /*--- Constants --- */
35
36 #define KB *(1<<10)
37 #define MB *(1<<20)
38
39 #define BLOCKSIZE_DEFAULT 0 /* no slicing into blocks */
40 #define DICTSIZE (4 KB)
41 #define CLEVEL_DEFAULT 3
42
43 #define BENCH_TIME_DEFAULT_S 6
44 #define RUN_TIME_DEFAULT_MS 1000
45 #define BENCH_TIME_DEFAULT_MS (BENCH_TIME_DEFAULT_S * RUN_TIME_DEFAULT_MS)
46
47 #define DISPLAY_LEVEL_DEFAULT 3
48
49 #define BENCH_SIZE_MAX (1200 MB)
50
51
52 /*--- Macros ---*/
53
54 #define CONTROL(c) { if (!(c)) abort(); }
55 #undef MIN
56 #define MIN(a,b) ((a) < (b) ? (a) : (b))
57
58
59 /*--- Display Macros ---*/
60
61 #define DISPLAY(...) fprintf(stdout, __VA_ARGS__)
62 #define DISPLAYLEVEL(l, ...) { if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } }
63 static int g_displayLevel = DISPLAY_LEVEL_DEFAULT; /* 0 : no display, 1: errors, 2 : + result + interaction + warnings, 3 : + progression, 4 : + information */
64
65
66 /*--- buffer_t ---*/
67
68 typedef struct {
69 void* ptr;
70 size_t size;
71 size_t capacity;
72 } buffer_t;
73
74 static const buffer_t kBuffNull = { NULL, 0, 0 };
75
76 /* @return : kBuffNull if any error */
createBuffer(size_t capacity)77 static buffer_t createBuffer(size_t capacity)
78 {
79 assert(capacity > 0);
80 void* const ptr = malloc(capacity);
81 if (ptr==NULL) return kBuffNull;
82
83 buffer_t buffer;
84 buffer.ptr = ptr;
85 buffer.capacity = capacity;
86 buffer.size = 0;
87 return buffer;
88 }
89
freeBuffer(buffer_t buff)90 static void freeBuffer(buffer_t buff)
91 {
92 free(buff.ptr);
93 }
94
95
fillBuffer_fromHandle(buffer_t * buff,FILE * f)96 static void fillBuffer_fromHandle(buffer_t* buff, FILE* f)
97 {
98 size_t const readSize = fread(buff->ptr, 1, buff->capacity, f);
99 buff->size = readSize;
100 }
101
102
103 /* @return : kBuffNull if any error */
createBuffer_fromFile(const char * fileName)104 static buffer_t createBuffer_fromFile(const char* fileName)
105 {
106 U64 const fileSize = UTIL_getFileSize(fileName);
107 size_t const bufferSize = (size_t) fileSize;
108
109 if (fileSize == UTIL_FILESIZE_UNKNOWN) return kBuffNull;
110 assert((U64)bufferSize == fileSize); /* check overflow */
111
112 { FILE* const f = fopen(fileName, "rb");
113 if (f == NULL) return kBuffNull;
114
115 buffer_t buff = createBuffer(bufferSize);
116 CONTROL(buff.ptr != NULL);
117
118 fillBuffer_fromHandle(&buff, f);
119 CONTROL(buff.size == buff.capacity);
120
121 fclose(f); /* do nothing specific if fclose() fails */
122 return buff;
123 }
124 }
125
126
127 /* @return : kBuffNull if any error */
128 static buffer_t
createDictionaryBuffer(const char * dictionaryName,const void * srcBuffer,const size_t * srcBlockSizes,size_t nbBlocks,size_t requestedDictSize)129 createDictionaryBuffer(const char* dictionaryName,
130 const void* srcBuffer,
131 const size_t* srcBlockSizes, size_t nbBlocks,
132 size_t requestedDictSize)
133 {
134 if (dictionaryName) {
135 DISPLAYLEVEL(3, "loading dictionary %s \n", dictionaryName);
136 return createBuffer_fromFile(dictionaryName); /* note : result might be kBuffNull */
137
138 } else {
139
140 DISPLAYLEVEL(3, "creating dictionary, of target size %u bytes \n",
141 (unsigned)requestedDictSize);
142 void* const dictBuffer = malloc(requestedDictSize);
143 CONTROL(dictBuffer != NULL);
144
145 assert(nbBlocks <= UINT_MAX);
146 size_t const dictSize = ZDICT_trainFromBuffer(dictBuffer, requestedDictSize,
147 srcBuffer,
148 srcBlockSizes, (unsigned)nbBlocks);
149 CONTROL(!ZSTD_isError(dictSize));
150
151 buffer_t result;
152 result.ptr = dictBuffer;
153 result.capacity = requestedDictSize;
154 result.size = dictSize;
155 return result;
156 }
157 }
158
159
160 /*! BMK_loadFiles() :
161 * Loads `buffer`, with content from files listed within `fileNamesTable`.
162 * Fills `buffer` entirely.
163 * @return : 0 on success, !=0 on error */
loadFiles(void * buffer,size_t bufferSize,size_t * fileSizes,const char * const * fileNamesTable,unsigned nbFiles)164 static int loadFiles(void* buffer, size_t bufferSize,
165 size_t* fileSizes,
166 const char* const * fileNamesTable, unsigned nbFiles)
167 {
168 size_t pos = 0, totalSize = 0;
169
170 for (unsigned n=0; n<nbFiles; n++) {
171 U64 fileSize = UTIL_getFileSize(fileNamesTable[n]);
172 if (UTIL_isDirectory(fileNamesTable[n])) {
173 fileSizes[n] = 0;
174 continue;
175 }
176 if (fileSize == UTIL_FILESIZE_UNKNOWN) {
177 fileSizes[n] = 0;
178 continue;
179 }
180
181 FILE* const f = fopen(fileNamesTable[n], "rb");
182 assert(f!=NULL);
183
184 assert(pos <= bufferSize);
185 assert(fileSize <= bufferSize - pos);
186
187 { size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
188 assert(readSize == fileSize);
189 pos += readSize;
190 }
191 fileSizes[n] = (size_t)fileSize;
192 totalSize += (size_t)fileSize;
193 fclose(f);
194 }
195
196 assert(totalSize == bufferSize);
197 return 0;
198 }
199
200
201
202 /*--- slice_collection_t ---*/
203
204 typedef struct {
205 void** slicePtrs;
206 size_t* capacities;
207 size_t nbSlices;
208 } slice_collection_t;
209
210 static const slice_collection_t kNullCollection = { NULL, NULL, 0 };
211
freeSliceCollection(slice_collection_t collection)212 static void freeSliceCollection(slice_collection_t collection)
213 {
214 free(collection.slicePtrs);
215 free(collection.capacities);
216 }
217
218 /* shrinkSizes() :
219 * downsizes sizes of slices within collection, according to `newSizes`.
220 * every `newSizes` entry must be <= than its corresponding collection size */
shrinkSizes(slice_collection_t collection,const size_t * newSizes)221 void shrinkSizes(slice_collection_t collection,
222 const size_t* newSizes) /* presumed same size as collection */
223 {
224 size_t const nbSlices = collection.nbSlices;
225 for (size_t blockNb = 0; blockNb < nbSlices; blockNb++) {
226 assert(newSizes[blockNb] <= collection.capacities[blockNb]);
227 collection.capacities[blockNb] = newSizes[blockNb];
228 }
229 }
230
231
232 /* splitSlices() :
233 * nbSlices : if == 0, nbSlices is automatically determined from srcSlices and blockSize.
234 * otherwise, creates exactly nbSlices slices,
235 * by either truncating input (when smaller)
236 * or repeating input from beginning */
237 static slice_collection_t
splitSlices(slice_collection_t srcSlices,size_t blockSize,size_t nbSlices)238 splitSlices(slice_collection_t srcSlices, size_t blockSize, size_t nbSlices)
239 {
240 if (blockSize==0) blockSize = (size_t)(-1); /* means "do not cut" */
241 size_t nbSrcBlocks = 0;
242 for (size_t ssnb=0; ssnb < srcSlices.nbSlices; ssnb++) {
243 size_t pos = 0;
244 while (pos <= srcSlices.capacities[ssnb]) {
245 nbSrcBlocks++;
246 pos += blockSize;
247 }
248 }
249
250 if (nbSlices == 0) nbSlices = nbSrcBlocks;
251
252 void** const sliceTable = (void**)malloc(nbSlices * sizeof(*sliceTable));
253 size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
254 if (sliceTable == NULL || capacities == NULL) {
255 free(sliceTable);
256 free(capacities);
257 return kNullCollection;
258 }
259
260 size_t ssnb = 0;
261 for (size_t sliceNb=0; sliceNb < nbSlices; ) {
262 ssnb = (ssnb + 1) % srcSlices.nbSlices;
263 size_t pos = 0;
264 char* const ptr = (char*)srcSlices.slicePtrs[ssnb];
265 while (pos < srcSlices.capacities[ssnb] && sliceNb < nbSlices) {
266 size_t const size = MIN(blockSize, srcSlices.capacities[ssnb] - pos);
267 sliceTable[sliceNb] = ptr + pos;
268 capacities[sliceNb] = size;
269 sliceNb++;
270 pos += blockSize;
271 }
272 }
273
274 slice_collection_t result;
275 result.nbSlices = nbSlices;
276 result.slicePtrs = sliceTable;
277 result.capacities = capacities;
278 return result;
279 }
280
281
sliceCollection_totalCapacity(slice_collection_t sc)282 static size_t sliceCollection_totalCapacity(slice_collection_t sc)
283 {
284 size_t totalSize = 0;
285 for (size_t n=0; n<sc.nbSlices; n++)
286 totalSize += sc.capacities[n];
287 return totalSize;
288 }
289
290
291 /* --- buffer collection --- */
292
293 typedef struct {
294 buffer_t buffer;
295 slice_collection_t slices;
296 } buffer_collection_t;
297
298
freeBufferCollection(buffer_collection_t bc)299 static void freeBufferCollection(buffer_collection_t bc)
300 {
301 freeBuffer(bc.buffer);
302 freeSliceCollection(bc.slices);
303 }
304
305
306 static buffer_collection_t
createBufferCollection_fromSliceCollectionSizes(slice_collection_t sc)307 createBufferCollection_fromSliceCollectionSizes(slice_collection_t sc)
308 {
309 size_t const bufferSize = sliceCollection_totalCapacity(sc);
310
311 buffer_t buffer = createBuffer(bufferSize);
312 CONTROL(buffer.ptr != NULL);
313
314 size_t const nbSlices = sc.nbSlices;
315 void** const slices = (void**)malloc(nbSlices * sizeof(*slices));
316 CONTROL(slices != NULL);
317
318 size_t* const capacities = (size_t*)malloc(nbSlices * sizeof(*capacities));
319 CONTROL(capacities != NULL);
320
321 char* const ptr = (char*)buffer.ptr;
322 size_t pos = 0;
323 for (size_t n=0; n < nbSlices; n++) {
324 capacities[n] = sc.capacities[n];
325 slices[n] = ptr + pos;
326 pos += capacities[n];
327 }
328
329 buffer_collection_t result;
330 result.buffer = buffer;
331 result.slices.nbSlices = nbSlices;
332 result.slices.capacities = capacities;
333 result.slices.slicePtrs = slices;
334 return result;
335 }
336
337
338 /* @return : kBuffNull if any error */
339 static buffer_collection_t
createBufferCollection_fromFiles(const char * const * fileNamesTable,unsigned nbFiles)340 createBufferCollection_fromFiles(const char* const * fileNamesTable, unsigned nbFiles)
341 {
342 U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);
343 assert(totalSizeToLoad != UTIL_FILESIZE_UNKNOWN);
344 assert(totalSizeToLoad <= BENCH_SIZE_MAX);
345 size_t const loadedSize = (size_t)totalSizeToLoad;
346 assert(loadedSize > 0);
347 void* const srcBuffer = malloc(loadedSize);
348 assert(srcBuffer != NULL);
349
350 assert(nbFiles > 0);
351 size_t* const fileSizes = (size_t*)calloc(nbFiles, sizeof(*fileSizes));
352 assert(fileSizes != NULL);
353
354 /* Load input buffer */
355 int const errorCode = loadFiles(srcBuffer, loadedSize,
356 fileSizes,
357 fileNamesTable, nbFiles);
358 assert(errorCode == 0);
359
360 void** sliceTable = (void**)malloc(nbFiles * sizeof(*sliceTable));
361 assert(sliceTable != NULL);
362
363 char* const ptr = (char*)srcBuffer;
364 size_t pos = 0;
365 unsigned fileNb = 0;
366 for ( ; (pos < loadedSize) && (fileNb < nbFiles); fileNb++) {
367 sliceTable[fileNb] = ptr + pos;
368 pos += fileSizes[fileNb];
369 }
370 assert(pos == loadedSize);
371 assert(fileNb == nbFiles);
372
373
374 buffer_t buffer;
375 buffer.ptr = srcBuffer;
376 buffer.capacity = loadedSize;
377 buffer.size = loadedSize;
378
379 slice_collection_t slices;
380 slices.slicePtrs = sliceTable;
381 slices.capacities = fileSizes;
382 slices.nbSlices = nbFiles;
383
384 buffer_collection_t bc;
385 bc.buffer = buffer;
386 bc.slices = slices;
387 return bc;
388 }
389
390
391
392
393 /*--- ddict_collection_t ---*/
394
395 typedef struct {
396 ZSTD_DDict** ddicts;
397 size_t nbDDict;
398 } ddict_collection_t;
399
400 static const ddict_collection_t kNullDDictCollection = { NULL, 0 };
401
freeDDictCollection(ddict_collection_t ddictc)402 static void freeDDictCollection(ddict_collection_t ddictc)
403 {
404 for (size_t dictNb=0; dictNb < ddictc.nbDDict; dictNb++) {
405 ZSTD_freeDDict(ddictc.ddicts[dictNb]);
406 }
407 free(ddictc.ddicts);
408 }
409
410 /* returns .buffers=NULL if operation fails */
createDDictCollection(const void * dictBuffer,size_t dictSize,size_t nbDDict)411 static ddict_collection_t createDDictCollection(const void* dictBuffer, size_t dictSize, size_t nbDDict)
412 {
413 ZSTD_DDict** const ddicts = malloc(nbDDict * sizeof(ZSTD_DDict*));
414 assert(ddicts != NULL);
415 if (ddicts==NULL) return kNullDDictCollection;
416 for (size_t dictNb=0; dictNb < nbDDict; dictNb++) {
417 ddicts[dictNb] = ZSTD_createDDict(dictBuffer, dictSize);
418 assert(ddicts[dictNb] != NULL);
419 }
420 ddict_collection_t ddictc;
421 ddictc.ddicts = ddicts;
422 ddictc.nbDDict = nbDDict;
423 return ddictc;
424 }
425
426
427 /* mess with addresses, so that linear scanning dictionaries != linear address scanning */
shuffleDictionaries(ddict_collection_t dicts)428 void shuffleDictionaries(ddict_collection_t dicts)
429 {
430 size_t const nbDicts = dicts.nbDDict;
431 for (size_t r=0; r<nbDicts; r++) {
432 size_t const d = rand() % nbDicts;
433 ZSTD_DDict* tmpd = dicts.ddicts[d];
434 dicts.ddicts[d] = dicts.ddicts[r];
435 dicts.ddicts[r] = tmpd;
436 }
437 for (size_t r=0; r<nbDicts; r++) {
438 size_t const d1 = rand() % nbDicts;
439 size_t const d2 = rand() % nbDicts;
440 ZSTD_DDict* tmpd = dicts.ddicts[d1];
441 dicts.ddicts[d1] = dicts.ddicts[d2];
442 dicts.ddicts[d2] = tmpd;
443 }
444 }
445
446
447 /* --- Compression --- */
448
449 /* compressBlocks() :
450 * @return : total compressed size of all blocks,
451 * or 0 if error.
452 */
compressBlocks(size_t * cSizes,slice_collection_t dstBlockBuffers,slice_collection_t srcBlockBuffers,ZSTD_CDict * cdict,int cLevel)453 static size_t compressBlocks(size_t* cSizes, /* optional (can be NULL). If present, must contain at least nbBlocks fields */
454 slice_collection_t dstBlockBuffers,
455 slice_collection_t srcBlockBuffers,
456 ZSTD_CDict* cdict, int cLevel)
457 {
458 size_t const nbBlocks = srcBlockBuffers.nbSlices;
459 assert(dstBlockBuffers.nbSlices == srcBlockBuffers.nbSlices);
460
461 ZSTD_CCtx* const cctx = ZSTD_createCCtx();
462 assert(cctx != NULL);
463
464 size_t totalCSize = 0;
465 for (size_t blockNb=0; blockNb < nbBlocks; blockNb++) {
466 size_t cBlockSize;
467 if (cdict == NULL) {
468 cBlockSize = ZSTD_compressCCtx(cctx,
469 dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb],
470 srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb],
471 cLevel);
472 } else {
473 cBlockSize = ZSTD_compress_usingCDict(cctx,
474 dstBlockBuffers.slicePtrs[blockNb], dstBlockBuffers.capacities[blockNb],
475 srcBlockBuffers.slicePtrs[blockNb], srcBlockBuffers.capacities[blockNb],
476 cdict);
477 }
478 CONTROL(!ZSTD_isError(cBlockSize));
479 if (cSizes) cSizes[blockNb] = cBlockSize;
480 totalCSize += cBlockSize;
481 }
482 return totalCSize;
483 }
484
485
486 /* --- Benchmark --- */
487
488 typedef struct {
489 ZSTD_DCtx* dctx;
490 size_t nbDicts;
491 size_t dictNb;
492 ddict_collection_t dictionaries;
493 } decompressInstructions;
494
createDecompressInstructions(ddict_collection_t dictionaries)495 decompressInstructions createDecompressInstructions(ddict_collection_t dictionaries)
496 {
497 decompressInstructions di;
498 di.dctx = ZSTD_createDCtx();
499 assert(di.dctx != NULL);
500 di.nbDicts = dictionaries.nbDDict;
501 di.dictNb = 0;
502 di.dictionaries = dictionaries;
503 return di;
504 }
505
freeDecompressInstructions(decompressInstructions di)506 void freeDecompressInstructions(decompressInstructions di)
507 {
508 ZSTD_freeDCtx(di.dctx);
509 }
510
511 /* benched function */
decompress(const void * src,size_t srcSize,void * dst,size_t dstCapacity,void * payload)512 size_t decompress(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* payload)
513 {
514 decompressInstructions* const di = (decompressInstructions*) payload;
515
516 size_t const result = ZSTD_decompress_usingDDict(di->dctx,
517 dst, dstCapacity,
518 src, srcSize,
519 di->dictionaries.ddicts[di->dictNb]);
520
521 di->dictNb = di->dictNb + 1;
522 if (di->dictNb >= di->nbDicts) di->dictNb = 0;
523
524 return result;
525 }
526
527
benchMem(slice_collection_t dstBlocks,slice_collection_t srcBlocks,ddict_collection_t dictionaries,int nbRounds)528 static int benchMem(slice_collection_t dstBlocks,
529 slice_collection_t srcBlocks,
530 ddict_collection_t dictionaries,
531 int nbRounds)
532 {
533 assert(dstBlocks.nbSlices == srcBlocks.nbSlices);
534
535 unsigned const ms_per_round = RUN_TIME_DEFAULT_MS;
536 unsigned const total_time_ms = nbRounds * ms_per_round;
537
538 double bestSpeed = 0.;
539
540 BMK_timedFnState_t* const benchState =
541 BMK_createTimedFnState(total_time_ms, ms_per_round);
542 decompressInstructions di = createDecompressInstructions(dictionaries);
543 BMK_benchParams_t const bp = {
544 .benchFn = decompress,
545 .benchPayload = &di,
546 .initFn = NULL,
547 .initPayload = NULL,
548 .errorFn = ZSTD_isError,
549 .blockCount = dstBlocks.nbSlices,
550 .srcBuffers = (const void* const*) srcBlocks.slicePtrs,
551 .srcSizes = srcBlocks.capacities,
552 .dstBuffers = dstBlocks.slicePtrs,
553 .dstCapacities = dstBlocks.capacities,
554 .blockResults = NULL
555 };
556
557 for (;;) {
558 BMK_runOutcome_t const outcome = BMK_benchTimedFn(benchState, bp);
559 CONTROL(BMK_isSuccessful_runOutcome(outcome));
560
561 BMK_runTime_t const result = BMK_extract_runTime(outcome);
562 double const dTime_ns = result.nanoSecPerRun;
563 double const dTime_sec = (double)dTime_ns / 1000000000;
564 size_t const srcSize = result.sumOfReturn;
565 double const dSpeed_MBps = (double)srcSize / dTime_sec / (1 MB);
566 if (dSpeed_MBps > bestSpeed) bestSpeed = dSpeed_MBps;
567 DISPLAY("Decompression Speed : %.1f MB/s \r", bestSpeed);
568 fflush(stdout);
569 if (BMK_isCompleted_TimedFn(benchState)) break;
570 }
571 DISPLAY("\n");
572
573 freeDecompressInstructions(di);
574 BMK_freeTimedFnState(benchState);
575
576 return 0; /* success */
577 }
578
579
580 /*! bench() :
581 * fileName : file to load for benchmarking purpose
582 * dictionary : optional (can be NULL), file to load as dictionary,
583 * if none provided : will be calculated on the fly by the program.
584 * @return : 0 is success, 1+ otherwise */
bench(const char ** fileNameTable,unsigned nbFiles,const char * dictionary,size_t blockSize,int clevel,unsigned nbDictMax,unsigned nbBlocks,int nbRounds)585 int bench(const char** fileNameTable, unsigned nbFiles,
586 const char* dictionary,
587 size_t blockSize, int clevel,
588 unsigned nbDictMax, unsigned nbBlocks,
589 int nbRounds)
590 {
591 int result = 0;
592
593 DISPLAYLEVEL(3, "loading %u files... \n", nbFiles);
594 buffer_collection_t const srcs = createBufferCollection_fromFiles(fileNameTable, nbFiles);
595 CONTROL(srcs.buffer.ptr != NULL);
596 buffer_t srcBuffer = srcs.buffer;
597 size_t const srcSize = srcBuffer.size;
598 DISPLAYLEVEL(3, "created src buffer of size %.1f MB \n",
599 (double)srcSize / (1 MB));
600
601 slice_collection_t const srcSlices = splitSlices(srcs.slices, blockSize, nbBlocks);
602 nbBlocks = (unsigned)(srcSlices.nbSlices);
603 DISPLAYLEVEL(3, "split input into %u blocks ", nbBlocks);
604 if (blockSize)
605 DISPLAYLEVEL(3, "of max size %u bytes ", (unsigned)blockSize);
606 DISPLAYLEVEL(3, "\n");
607 size_t const totalSrcSlicesSize = sliceCollection_totalCapacity(srcSlices);
608
609
610 size_t* const dstCapacities = malloc(nbBlocks * sizeof(*dstCapacities));
611 CONTROL(dstCapacities != NULL);
612 size_t dstBufferCapacity = 0;
613 for (size_t bnb=0; bnb<nbBlocks; bnb++) {
614 dstCapacities[bnb] = ZSTD_compressBound(srcSlices.capacities[bnb]);
615 dstBufferCapacity += dstCapacities[bnb];
616 }
617
618 buffer_t dstBuffer = createBuffer(dstBufferCapacity);
619 CONTROL(dstBuffer.ptr != NULL);
620
621 void** const sliceTable = malloc(nbBlocks * sizeof(*sliceTable));
622 CONTROL(sliceTable != NULL);
623
624 { char* const ptr = dstBuffer.ptr;
625 size_t pos = 0;
626 for (size_t snb=0; snb < nbBlocks; snb++) {
627 sliceTable[snb] = ptr + pos;
628 pos += dstCapacities[snb];
629 } }
630
631 slice_collection_t dstSlices;
632 dstSlices.capacities = dstCapacities;
633 dstSlices.slicePtrs = sliceTable;
634 dstSlices.nbSlices = nbBlocks;
635
636
637 /* dictionary determination */
638 buffer_t const dictBuffer = createDictionaryBuffer(dictionary,
639 srcs.buffer.ptr,
640 srcs.slices.capacities, srcs.slices.nbSlices,
641 DICTSIZE);
642 CONTROL(dictBuffer.ptr != NULL);
643
644 ZSTD_CDict* const cdict = ZSTD_createCDict(dictBuffer.ptr, dictBuffer.size, clevel);
645 CONTROL(cdict != NULL);
646
647 size_t const cTotalSizeNoDict = compressBlocks(NULL, dstSlices, srcSlices, NULL, clevel);
648 CONTROL(cTotalSizeNoDict != 0);
649 DISPLAYLEVEL(3, "compressing at level %u without dictionary : Ratio=%.2f (%u bytes) \n",
650 clevel,
651 (double)totalSrcSlicesSize / cTotalSizeNoDict, (unsigned)cTotalSizeNoDict);
652
653 size_t* const cSizes = malloc(nbBlocks * sizeof(size_t));
654 CONTROL(cSizes != NULL);
655
656 size_t const cTotalSize = compressBlocks(cSizes, dstSlices, srcSlices, cdict, clevel);
657 CONTROL(cTotalSize != 0);
658 DISPLAYLEVEL(3, "compressed using a %u bytes dictionary : Ratio=%.2f (%u bytes) \n",
659 (unsigned)dictBuffer.size,
660 (double)totalSrcSlicesSize / cTotalSize, (unsigned)cTotalSize);
661
662 /* now dstSlices contain the real compressed size of each block, instead of the maximum capacity */
663 shrinkSizes(dstSlices, cSizes);
664
665 size_t const dictMem = ZSTD_estimateDDictSize(dictBuffer.size, ZSTD_dlm_byCopy);
666 unsigned const nbDicts = nbDictMax ? nbDictMax : nbBlocks;
667 size_t const allDictMem = dictMem * nbDicts;
668 DISPLAYLEVEL(3, "generating %u dictionaries, using %.1f MB of memory \n",
669 nbDicts, (double)allDictMem / (1 MB));
670
671 ddict_collection_t const dictionaries = createDDictCollection(dictBuffer.ptr, dictBuffer.size, nbDicts);
672 CONTROL(dictionaries.ddicts != NULL);
673
674 shuffleDictionaries(dictionaries);
675
676 buffer_collection_t resultCollection = createBufferCollection_fromSliceCollectionSizes(srcSlices);
677 CONTROL(resultCollection.buffer.ptr != NULL);
678
679 result = benchMem(resultCollection.slices, dstSlices, dictionaries, nbRounds);
680
681 /* free all heap objects in reverse order */
682 freeBufferCollection(resultCollection);
683 freeDDictCollection(dictionaries);
684 free(cSizes);
685 ZSTD_freeCDict(cdict);
686 freeBuffer(dictBuffer);
687 freeSliceCollection(dstSlices);
688 freeBuffer(dstBuffer);
689 freeSliceCollection(srcSlices);
690 freeBufferCollection(srcs);
691
692 return result;
693 }
694
695
696
697 /* --- Command Line --- */
698
699 /*! readU32FromChar() :
700 * @return : unsigned integer value read from input in `char` format.
701 * allows and interprets K, KB, KiB, M, MB and MiB suffix.
702 * Will also modify `*stringPtr`, advancing it to position where it stopped reading.
703 * Note : function will exit() program if digit sequence overflows */
readU32FromChar(const char ** stringPtr)704 static unsigned readU32FromChar(const char** stringPtr)
705 {
706 unsigned result = 0;
707 while ((**stringPtr >='0') && (**stringPtr <='9')) {
708 unsigned const max = (((unsigned)(-1)) / 10) - 1;
709 assert(result <= max); /* check overflow */
710 result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
711 }
712 if ((**stringPtr=='K') || (**stringPtr=='M')) {
713 unsigned const maxK = ((unsigned)(-1)) >> 10;
714 assert(result <= maxK); /* check overflow */
715 result <<= 10;
716 if (**stringPtr=='M') {
717 assert(result <= maxK); /* check overflow */
718 result <<= 10;
719 }
720 (*stringPtr)++; /* skip `K` or `M` */
721 if (**stringPtr=='i') (*stringPtr)++;
722 if (**stringPtr=='B') (*stringPtr)++;
723 }
724 return result;
725 }
726
727 /** longCommandWArg() :
728 * check if *stringPtr is the same as longCommand.
729 * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
730 * @return 0 and doesn't modify *stringPtr otherwise.
731 */
longCommandWArg(const char ** stringPtr,const char * longCommand)732 static unsigned longCommandWArg(const char** stringPtr, const char* longCommand)
733 {
734 size_t const comSize = strlen(longCommand);
735 int const result = !strncmp(*stringPtr, longCommand, comSize);
736 if (result) *stringPtr += comSize;
737 return result;
738 }
739
740
usage(const char * exeName)741 int usage(const char* exeName)
742 {
743 DISPLAY (" \n");
744 DISPLAY (" %s [Options] filename(s) \n", exeName);
745 DISPLAY (" \n");
746 DISPLAY ("Options : \n");
747 DISPLAY ("-r : recursively load all files in subdirectories (default: off) \n");
748 DISPLAY ("-B# : split input into blocks of size # (default: no split) \n");
749 DISPLAY ("-# : use compression level # (default: %u) \n", CLEVEL_DEFAULT);
750 DISPLAY ("-D # : use # as a dictionary (default: create one) \n");
751 DISPLAY ("-i# : nb benchmark rounds (default: %u) \n", BENCH_TIME_DEFAULT_S);
752 DISPLAY ("--nbBlocks=#: use # blocks for bench (default: one per file) \n");
753 DISPLAY ("--nbDicts=# : create # dictionaries for bench (default: one per block) \n");
754 DISPLAY ("-h : help (this text) \n");
755 return 0;
756 }
757
bad_usage(const char * exeName)758 int bad_usage(const char* exeName)
759 {
760 DISPLAY (" bad usage : \n");
761 usage(exeName);
762 return 1;
763 }
764
main(int argc,const char ** argv)765 int main (int argc, const char** argv)
766 {
767 int recursiveMode = 0;
768 int nbRounds = BENCH_TIME_DEFAULT_S;
769 const char* const exeName = argv[0];
770
771 if (argc < 2) return bad_usage(exeName);
772
773 const char** nameTable = (const char**)malloc(argc * sizeof(const char*));
774 assert(nameTable != NULL);
775 unsigned nameIdx = 0;
776
777 const char* dictionary = NULL;
778 int cLevel = CLEVEL_DEFAULT;
779 size_t blockSize = BLOCKSIZE_DEFAULT;
780 unsigned nbDicts = 0; /* determine nbDicts automatically: 1 dictionary per block */
781 unsigned nbBlocks = 0; /* determine nbBlocks automatically, from source and blockSize */
782
783 for (int argNb = 1; argNb < argc ; argNb++) {
784 const char* argument = argv[argNb];
785 if (!strcmp(argument, "-h")) { free(nameTable); return usage(exeName); }
786 if (!strcmp(argument, "-r")) { recursiveMode = 1; continue; }
787 if (!strcmp(argument, "-D")) { argNb++; assert(argNb < argc); dictionary = argv[argNb]; continue; }
788 if (longCommandWArg(&argument, "-i")) { nbRounds = readU32FromChar(&argument); continue; }
789 if (longCommandWArg(&argument, "--dictionary=")) { dictionary = argument; continue; }
790 if (longCommandWArg(&argument, "-B")) { blockSize = readU32FromChar(&argument); continue; }
791 if (longCommandWArg(&argument, "--blockSize=")) { blockSize = readU32FromChar(&argument); continue; }
792 if (longCommandWArg(&argument, "--nbDicts=")) { nbDicts = readU32FromChar(&argument); continue; }
793 if (longCommandWArg(&argument, "--nbBlocks=")) { nbBlocks = readU32FromChar(&argument); continue; }
794 if (longCommandWArg(&argument, "--clevel=")) { cLevel = readU32FromChar(&argument); continue; }
795 if (longCommandWArg(&argument, "-")) { cLevel = readU32FromChar(&argument); continue; }
796 /* anything that's not a command is a filename */
797 nameTable[nameIdx++] = argument;
798 }
799
800 const char** filenameTable = nameTable;
801 unsigned nbFiles = nameIdx;
802 char* buffer_containing_filenames = NULL;
803
804 if (recursiveMode) {
805 #ifndef UTIL_HAS_CREATEFILELIST
806 assert(0); /* missing capability, do not run */
807 #endif
808 filenameTable = UTIL_createFileList(nameTable, nameIdx, &buffer_containing_filenames, &nbFiles, 1 /* follow_links */);
809 }
810
811 int result = bench(filenameTable, nbFiles, dictionary, blockSize, cLevel, nbDicts, nbBlocks, nbRounds);
812
813 free(buffer_containing_filenames);
814 free(nameTable);
815
816 return result;
817 }
818