1 /*
2  * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 
12 /* **************************************
13 *  Tuning parameters
14 ****************************************/
15 #ifndef BMK_TIMETEST_DEFAULT_S   /* default minimum time per test */
16 #define BMK_TIMETEST_DEFAULT_S 3
17 #endif
18 
19 
20 /* *************************************
21 *  Includes
22 ***************************************/
23 #include "platform.h"    /* Large Files support */
24 #include "util.h"        /* UTIL_getFileSize, UTIL_sleep */
25 #include <stdlib.h>      /* malloc, free */
26 #include <string.h>      /* memset */
27 #include <stdio.h>       /* fprintf, fopen */
28 #include <assert.h>      /* assert */
29 
30 #include "benchfn.h"
31 #include "mem.h"
32 #define ZSTD_STATIC_LINKING_ONLY
33 #include "zstd.h"
34 #include "datagen.h"     /* RDG_genBuffer */
35 #include "xxhash.h"
36 #include "benchzstd.h"
37 #include "zstd_errors.h"
38 
39 
40 /* *************************************
41 *  Constants
42 ***************************************/
43 #ifndef ZSTD_GIT_COMMIT
44 #  define ZSTD_GIT_COMMIT_STRING ""
45 #else
46 #  define ZSTD_GIT_COMMIT_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_GIT_COMMIT)
47 #endif
48 
49 #define TIMELOOP_MICROSEC     (1*1000000ULL) /* 1 second */
50 #define TIMELOOP_NANOSEC      (1*1000000000ULL) /* 1 second */
51 #define ACTIVEPERIOD_MICROSEC (70*TIMELOOP_MICROSEC) /* 70 seconds */
52 #define COOLPERIOD_SEC        10
53 
54 #define KB *(1 <<10)
55 #define MB *(1 <<20)
56 #define GB *(1U<<30)
57 
58 #define BMK_RUNTEST_DEFAULT_MS 1000
59 
60 static const size_t maxMemory = (sizeof(size_t)==4)  ?
61                     /* 32-bit */ (2 GB - 64 MB) :
62                     /* 64-bit */ (size_t)(1ULL << ((sizeof(size_t)*8)-31));
63 
64 
65 /* *************************************
66 *  console display
67 ***************************************/
68 #define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
69 #define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
70 /* 0 : no display;   1: errors;   2 : + result + interaction + warnings;   3 : + progression;   4 : + information */
71 
72 static const U64 g_refreshRate = SEC_TO_MICRO / 6;
73 static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
74 
75 #define DISPLAYUPDATE(l, ...) { if (displayLevel>=l) { \
76             if ((UTIL_clockSpanMicro(g_displayClock) > g_refreshRate) || (displayLevel>=4)) \
77             { g_displayClock = UTIL_getTime(); DISPLAY(__VA_ARGS__); \
78             if (displayLevel>=4) fflush(stderr); } } }
79 
80 
81 /* *************************************
82 *  Exceptions
83 ***************************************/
84 #ifndef DEBUG
85 #  define DEBUG 0
86 #endif
87 #define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
88 
89 #define EXM_THROW_INT(errorNum, ...)  {               \
90     DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__);    \
91     DISPLAYLEVEL(1, "Error %i : ", errorNum);         \
92     DISPLAYLEVEL(1, __VA_ARGS__);                     \
93     DISPLAYLEVEL(1, " \n");                           \
94     return errorNum;                                  \
95 }
96 
97 #define RETURN_ERROR(errorNum, retType, ...)  {       \
98     retType r;                                        \
99     memset(&r, 0, sizeof(retType));                   \
100     DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__);    \
101     DISPLAYLEVEL(1, "Error %i : ", errorNum);         \
102     DISPLAYLEVEL(1, __VA_ARGS__);                     \
103     DISPLAYLEVEL(1, " \n");                           \
104     r.tag = errorNum;                                 \
105     return r;                                         \
106 }
107 
108 /* error without displaying */
109 #define RETURN_QUIET_ERROR(errorNum, retType, ...)  { \
110     retType r;                                        \
111     memset(&r, 0, sizeof(retType));                   \
112     DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__);    \
113     DEBUGOUTPUT("Error %i : ", errorNum);             \
114     DEBUGOUTPUT(__VA_ARGS__);                         \
115     DEBUGOUTPUT(" \n");                               \
116     r.tag = errorNum;                                 \
117     return r;                                         \
118 }
119 
120 /* *************************************
121 *  Benchmark Parameters
122 ***************************************/
123 
BMK_initAdvancedParams(void)124 BMK_advancedParams_t BMK_initAdvancedParams(void) {
125     BMK_advancedParams_t const res = {
126         BMK_both, /* mode */
127         BMK_TIMETEST_DEFAULT_S, /* nbSeconds */
128         0, /* blockSize */
129         0, /* nbWorkers */
130         0, /* realTime */
131         0, /* additionalParam */
132         0, /* ldmFlag */
133         0, /* ldmMinMatch */
134         0, /* ldmHashLog */
135         0, /* ldmBuckSizeLog */
136         0  /* ldmHashEveryLog */
137     };
138     return res;
139 }
140 
141 
142 /* ********************************************************
143 *  Bench functions
144 **********************************************************/
145 typedef struct {
146     const void* srcPtr;
147     size_t srcSize;
148     void*  cPtr;
149     size_t cRoom;
150     size_t cSize;
151     void*  resPtr;
152     size_t resSize;
153 } blockParam_t;
154 
155 #undef MIN
156 #undef MAX
157 #define MIN(a,b)    ((a) < (b) ? (a) : (b))
158 #define MAX(a,b)    ((a) > (b) ? (a) : (b))
159 
BMK_initCCtx(ZSTD_CCtx * ctx,const void * dictBuffer,size_t dictBufferSize,int cLevel,const ZSTD_compressionParameters * comprParams,const BMK_advancedParams_t * adv)160 static void BMK_initCCtx(ZSTD_CCtx* ctx,
161     const void* dictBuffer, size_t dictBufferSize, int cLevel,
162     const ZSTD_compressionParameters* comprParams, const BMK_advancedParams_t* adv) {
163     ZSTD_CCtx_reset(ctx);
164     ZSTD_CCtx_resetParameters(ctx);
165     if (adv->nbWorkers==1) {
166         ZSTD_CCtx_setParameter(ctx, ZSTD_p_nbWorkers, 0);
167     } else {
168         ZSTD_CCtx_setParameter(ctx, ZSTD_p_nbWorkers, adv->nbWorkers);
169     }
170     ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionLevel, cLevel);
171     ZSTD_CCtx_setParameter(ctx, ZSTD_p_enableLongDistanceMatching, adv->ldmFlag);
172     ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmMinMatch, adv->ldmMinMatch);
173     ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashLog, adv->ldmHashLog);
174     ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmBucketSizeLog, adv->ldmBucketSizeLog);
175     ZSTD_CCtx_setParameter(ctx, ZSTD_p_ldmHashEveryLog, adv->ldmHashEveryLog);
176     ZSTD_CCtx_setParameter(ctx, ZSTD_p_windowLog, comprParams->windowLog);
177     ZSTD_CCtx_setParameter(ctx, ZSTD_p_hashLog, comprParams->hashLog);
178     ZSTD_CCtx_setParameter(ctx, ZSTD_p_chainLog, comprParams->chainLog);
179     ZSTD_CCtx_setParameter(ctx, ZSTD_p_searchLog, comprParams->searchLog);
180     ZSTD_CCtx_setParameter(ctx, ZSTD_p_minMatch, comprParams->searchLength);
181     ZSTD_CCtx_setParameter(ctx, ZSTD_p_targetLength, comprParams->targetLength);
182     ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionStrategy, comprParams->strategy);
183     ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize);
184 }
185 
BMK_initDCtx(ZSTD_DCtx * dctx,const void * dictBuffer,size_t dictBufferSize)186 static void BMK_initDCtx(ZSTD_DCtx* dctx,
187     const void* dictBuffer, size_t dictBufferSize) {
188     ZSTD_DCtx_reset(dctx);
189     ZSTD_DCtx_loadDictionary(dctx, dictBuffer, dictBufferSize);
190 }
191 
192 
193 typedef struct {
194     ZSTD_CCtx* cctx;
195     const void* dictBuffer;
196     size_t dictBufferSize;
197     int cLevel;
198     const ZSTD_compressionParameters* comprParams;
199     const BMK_advancedParams_t* adv;
200 } BMK_initCCtxArgs;
201 
local_initCCtx(void * payload)202 static size_t local_initCCtx(void* payload) {
203     BMK_initCCtxArgs* ag = (BMK_initCCtxArgs*)payload;
204     BMK_initCCtx(ag->cctx, ag->dictBuffer, ag->dictBufferSize, ag->cLevel, ag->comprParams, ag->adv);
205     return 0;
206 }
207 
208 typedef struct {
209     ZSTD_DCtx* dctx;
210     const void* dictBuffer;
211     size_t dictBufferSize;
212 } BMK_initDCtxArgs;
213 
local_initDCtx(void * payload)214 static size_t local_initDCtx(void* payload) {
215     BMK_initDCtxArgs* ag = (BMK_initDCtxArgs*)payload;
216     BMK_initDCtx(ag->dctx, ag->dictBuffer, ag->dictBufferSize);
217     return 0;
218 }
219 
220 
221 /* `addArgs` is the context */
local_defaultCompress(const void * srcBuffer,size_t srcSize,void * dstBuffer,size_t dstSize,void * addArgs)222 static size_t local_defaultCompress(
223                     const void* srcBuffer, size_t srcSize,
224                     void* dstBuffer, size_t dstSize,
225                     void* addArgs)
226 {
227     size_t moreToFlush = 1;
228     ZSTD_CCtx* const cctx = (ZSTD_CCtx*)addArgs;
229     ZSTD_inBuffer in;
230     ZSTD_outBuffer out;
231     in.src = srcBuffer; in.size = srcSize; in.pos = 0;
232     out.dst = dstBuffer; out.size = dstSize; out.pos = 0;
233     while (moreToFlush) {
234         if(out.pos == out.size) {
235             return (size_t)-ZSTD_error_dstSize_tooSmall;
236         }
237         moreToFlush = ZSTD_compress_generic(cctx, &out, &in, ZSTD_e_end);
238         if (ZSTD_isError(moreToFlush)) {
239             return moreToFlush;
240         }
241     }
242     return out.pos;
243 }
244 
245 /* `addArgs` is the context */
local_defaultDecompress(const void * srcBuffer,size_t srcSize,void * dstBuffer,size_t dstCapacity,void * addArgs)246 static size_t local_defaultDecompress(
247                     const void* srcBuffer, size_t srcSize,
248                     void* dstBuffer, size_t dstCapacity,
249                     void* addArgs)
250 {
251     size_t moreToFlush = 1;
252     ZSTD_DCtx* const dctx = (ZSTD_DCtx*)addArgs;
253     ZSTD_inBuffer in;
254     ZSTD_outBuffer out;
255     in.src = srcBuffer; in.size = srcSize; in.pos = 0;
256     out.dst = dstBuffer; out.size = dstCapacity; out.pos = 0;
257     while (moreToFlush) {
258         if(out.pos == out.size) {
259             return (size_t)-ZSTD_error_dstSize_tooSmall;
260         }
261         moreToFlush = ZSTD_decompress_generic(dctx, &out, &in);
262         if (ZSTD_isError(moreToFlush)) {
263             return moreToFlush;
264         }
265     }
266     return out.pos;
267 
268 }
269 
270 
271 /* ================================================================= */
272 /*      Benchmark Zstandard, mem-to-mem scenarios                    */
273 /* ================================================================= */
274 
BMK_isSuccessful_benchOutcome(BMK_benchOutcome_t outcome)275 int BMK_isSuccessful_benchOutcome(BMK_benchOutcome_t outcome)
276 {
277     return outcome.tag == 0;
278 }
279 
BMK_extract_benchResult(BMK_benchOutcome_t outcome)280 BMK_benchResult_t BMK_extract_benchResult(BMK_benchOutcome_t outcome)
281 {
282     assert(outcome.tag == 0);
283     return outcome.internal_never_use_directly;
284 }
285 
BMK_benchOutcome_error(void)286 static BMK_benchOutcome_t BMK_benchOutcome_error(void)
287 {
288     BMK_benchOutcome_t b;
289     memset(&b, 0, sizeof(b));
290     b.tag = 1;
291     return b;
292 }
293 
BMK_benchOutcome_setValidResult(BMK_benchResult_t result)294 static BMK_benchOutcome_t BMK_benchOutcome_setValidResult(BMK_benchResult_t result)
295 {
296     BMK_benchOutcome_t b;
297     b.tag = 0;
298     b.internal_never_use_directly = result;
299     return b;
300 }
301 
302 
303 /* benchMem with no allocation */
304 static BMK_benchOutcome_t
BMK_benchMemAdvancedNoAlloc(const void ** srcPtrs,size_t * srcSizes,void ** cPtrs,size_t * cCapacities,size_t * cSizes,void ** resPtrs,size_t * resSizes,void ** resultBufferPtr,void * compressedBuffer,size_t maxCompressedSize,BMK_timedFnState_t * timeStateCompress,BMK_timedFnState_t * timeStateDecompress,const void * srcBuffer,size_t srcSize,const size_t * fileSizes,unsigned nbFiles,const int cLevel,const ZSTD_compressionParameters * comprParams,const void * dictBuffer,size_t dictBufferSize,ZSTD_CCtx * cctx,ZSTD_DCtx * dctx,int displayLevel,const char * displayName,const BMK_advancedParams_t * adv)305 BMK_benchMemAdvancedNoAlloc(
306                     const void** srcPtrs, size_t* srcSizes,
307                     void** cPtrs, size_t* cCapacities, size_t* cSizes,
308                     void** resPtrs, size_t* resSizes,
309                     void** resultBufferPtr, void* compressedBuffer,
310                     size_t maxCompressedSize,
311                     BMK_timedFnState_t* timeStateCompress,
312                     BMK_timedFnState_t* timeStateDecompress,
313 
314                     const void* srcBuffer, size_t srcSize,
315                     const size_t* fileSizes, unsigned nbFiles,
316                     const int cLevel,
317                     const ZSTD_compressionParameters* comprParams,
318                     const void* dictBuffer, size_t dictBufferSize,
319                     ZSTD_CCtx* cctx, ZSTD_DCtx* dctx,
320                     int displayLevel, const char* displayName,
321                     const BMK_advancedParams_t* adv)
322 {
323     size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize);  /* avoid div by 0 */
324     BMK_benchResult_t benchResult;
325     size_t const loadedCompressedSize = srcSize;
326     size_t cSize = 0;
327     double ratio = 0.;
328     U32 nbBlocks;
329 
330     assert(cctx != NULL); assert(dctx != NULL);
331 
332     /* init */
333     memset(&benchResult, 0, sizeof(benchResult));
334     if (strlen(displayName)>17) displayName += strlen(displayName) - 17;   /* display last 17 characters */
335     if (adv->mode == BMK_decodeOnly) {  /* benchmark only decompression : source must be already compressed */
336         const char* srcPtr = (const char*)srcBuffer;
337         U64 totalDSize64 = 0;
338         U32 fileNb;
339         for (fileNb=0; fileNb<nbFiles; fileNb++) {
340             U64 const fSize64 = ZSTD_findDecompressedSize(srcPtr, fileSizes[fileNb]);
341             if (fSize64==0) RETURN_ERROR(32, BMK_benchOutcome_t, "Impossible to determine original size ");
342             totalDSize64 += fSize64;
343             srcPtr += fileSizes[fileNb];
344         }
345         {   size_t const decodedSize = (size_t)totalDSize64;
346             assert((U64)decodedSize == totalDSize64);   /* check overflow */
347             free(*resultBufferPtr);
348             *resultBufferPtr = malloc(decodedSize);
349             if (!(*resultBufferPtr)) {
350                 RETURN_ERROR(33, BMK_benchOutcome_t, "not enough memory");
351             }
352             if (totalDSize64 > decodedSize) {  /* size_t overflow */
353                 free(*resultBufferPtr);
354                 RETURN_ERROR(32, BMK_benchOutcome_t, "original size is too large");
355             }
356             cSize = srcSize;
357             srcSize = decodedSize;
358             ratio = (double)srcSize / (double)cSize;
359         }
360     }
361 
362     /* Init data blocks  */
363     {   const char* srcPtr = (const char*)srcBuffer;
364         char* cPtr = (char*)compressedBuffer;
365         char* resPtr = (char*)(*resultBufferPtr);
366         U32 fileNb;
367         for (nbBlocks=0, fileNb=0; fileNb<nbFiles; fileNb++) {
368             size_t remaining = fileSizes[fileNb];
369             U32 const nbBlocksforThisFile = (adv->mode == BMK_decodeOnly) ? 1 : (U32)((remaining + (blockSize-1)) / blockSize);
370             U32 const blockEnd = nbBlocks + nbBlocksforThisFile;
371             for ( ; nbBlocks<blockEnd; nbBlocks++) {
372                 size_t const thisBlockSize = MIN(remaining, blockSize);
373                 srcPtrs[nbBlocks] = srcPtr;
374                 srcSizes[nbBlocks] = thisBlockSize;
375                 cPtrs[nbBlocks] = cPtr;
376                 cCapacities[nbBlocks] = (adv->mode == BMK_decodeOnly) ? thisBlockSize : ZSTD_compressBound(thisBlockSize);
377                 resPtrs[nbBlocks] = resPtr;
378                 resSizes[nbBlocks] = (adv->mode == BMK_decodeOnly) ? (size_t) ZSTD_findDecompressedSize(srcPtr, thisBlockSize) : thisBlockSize;
379                 srcPtr += thisBlockSize;
380                 cPtr += cCapacities[nbBlocks];
381                 resPtr += thisBlockSize;
382                 remaining -= thisBlockSize;
383                 if (adv->mode == BMK_decodeOnly) {
384                     assert(nbBlocks==0);
385                     cSizes[nbBlocks] = thisBlockSize;
386                     benchResult.cSize = thisBlockSize;
387                 }
388             }
389         }
390     }
391 
392     /* warmimg up `compressedBuffer` */
393     if (adv->mode == BMK_decodeOnly) {
394         memcpy(compressedBuffer, srcBuffer, loadedCompressedSize);
395     } else {
396         RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.50, 1);
397     }
398 
399     /* Bench */
400     {   U64 const crcOrig = (adv->mode == BMK_decodeOnly) ? 0 : XXH64(srcBuffer, srcSize, 0);
401 #       define NB_MARKS 4
402         const char* marks[NB_MARKS] = { " |", " /", " =", " \\" };
403         U32 markNb = 0;
404         int compressionCompleted = (adv->mode == BMK_decodeOnly);
405         int decompressionCompleted = (adv->mode == BMK_compressOnly);
406         BMK_benchParams_t cbp, dbp;
407         BMK_initCCtxArgs cctxprep;
408         BMK_initDCtxArgs dctxprep;
409 
410         cbp.benchFn = local_defaultCompress;
411         cbp.benchPayload = cctx;
412         cbp.initFn = local_initCCtx;
413         cbp.initPayload = &cctxprep;
414         cbp.errorFn = ZSTD_isError;
415         cbp.blockCount = nbBlocks;
416         cbp.srcBuffers = srcPtrs;
417         cbp.srcSizes = srcSizes;
418         cbp.dstBuffers = cPtrs;
419         cbp.dstCapacities = cCapacities;
420         cbp.blockResults = cSizes;
421 
422         cctxprep.cctx = cctx;
423         cctxprep.dictBuffer = dictBuffer;
424         cctxprep.dictBufferSize = dictBufferSize;
425         cctxprep.cLevel = cLevel;
426         cctxprep.comprParams = comprParams;
427         cctxprep.adv = adv;
428 
429         dbp.benchFn = local_defaultDecompress;
430         dbp.benchPayload = dctx;
431         dbp.initFn = local_initDCtx;
432         dbp.initPayload = &dctxprep;
433         dbp.errorFn = ZSTD_isError;
434         dbp.blockCount = nbBlocks;
435         dbp.srcBuffers = (const void* const *) cPtrs;
436         dbp.srcSizes = cSizes;
437         dbp.dstBuffers = resPtrs;
438         dbp.dstCapacities = resSizes;
439         dbp.blockResults = NULL;
440 
441         dctxprep.dctx = dctx;
442         dctxprep.dictBuffer = dictBuffer;
443         dctxprep.dictBufferSize = dictBufferSize;
444 
445         DISPLAYLEVEL(2, "\r%70s\r", "");   /* blank line */
446         DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize);
447 
448         while (!(compressionCompleted && decompressionCompleted)) {
449             if (!compressionCompleted) {
450                 BMK_runOutcome_t const cOutcome = BMK_benchTimedFn( timeStateCompress, cbp);
451 
452                 if (!BMK_isSuccessful_runOutcome(cOutcome)) {
453                     return BMK_benchOutcome_error();
454                 }
455 
456                 {   BMK_runTime_t const cResult = BMK_extract_runTime(cOutcome);
457                     cSize = cResult.sumOfReturn;
458                     ratio = (double)srcSize / cSize;
459                     {   BMK_benchResult_t newResult;
460                         newResult.cSpeed = ((U64)srcSize * TIMELOOP_NANOSEC / cResult.nanoSecPerRun);
461                         benchResult.cSize = cSize;
462                         if (newResult.cSpeed > benchResult.cSpeed)
463                             benchResult.cSpeed = newResult.cSpeed;
464                 }   }
465 
466                 {   int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
467                     DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r",
468                             marks[markNb], displayName,
469                             (U32)srcSize, (U32)cSize,
470                             ratioAccuracy, ratio,
471                             benchResult.cSpeed < (10 MB) ? 2 : 1, (double)benchResult.cSpeed / MB_UNIT);
472                 }
473                 compressionCompleted = BMK_isCompleted_TimedFn(timeStateCompress);
474             }
475 
476             if(!decompressionCompleted) {
477                 BMK_runOutcome_t const dOutcome = BMK_benchTimedFn(timeStateDecompress, dbp);
478 
479                 if(!BMK_isSuccessful_runOutcome(dOutcome)) {
480                     return BMK_benchOutcome_error();
481                 }
482 
483                 {   BMK_runTime_t const dResult = BMK_extract_runTime(dOutcome);
484                     U64 const newDSpeed = (srcSize * TIMELOOP_NANOSEC / dResult.nanoSecPerRun);
485                     if (newDSpeed > benchResult.dSpeed)
486                         benchResult.dSpeed = newDSpeed;
487                 }
488 
489                 {   int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
490                     DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r",
491                             marks[markNb], displayName,
492                             (U32)srcSize, (U32)benchResult.cSize,
493                             ratioAccuracy, ratio,
494                             benchResult.cSpeed < (10 MB) ? 2 : 1, (double)benchResult.cSpeed / MB_UNIT,
495                             (double)benchResult.dSpeed / MB_UNIT);
496                 }
497                 decompressionCompleted = BMK_isCompleted_TimedFn(timeStateDecompress);
498             }
499             markNb = (markNb+1) % NB_MARKS;
500         }   /* while (!(compressionCompleted && decompressionCompleted)) */
501 
502         /* CRC Checking */
503         {   const BYTE* resultBuffer = (const BYTE*)(*resultBufferPtr);
504             U64 const crcCheck = XXH64(resultBuffer, srcSize, 0);
505             if ((adv->mode == BMK_both) && (crcOrig!=crcCheck)) {
506                 size_t u;
507                 DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x   \n",
508                         displayName, (unsigned)crcOrig, (unsigned)crcCheck);
509                 for (u=0; u<srcSize; u++) {
510                     if (((const BYTE*)srcBuffer)[u] != resultBuffer[u]) {
511                         U32 segNb, bNb, pos;
512                         size_t bacc = 0;
513                         DISPLAY("Decoding error at pos %u ", (U32)u);
514                         for (segNb = 0; segNb < nbBlocks; segNb++) {
515                             if (bacc + srcSizes[segNb] > u) break;
516                             bacc += srcSizes[segNb];
517                         }
518                         pos = (U32)(u - bacc);
519                         bNb = pos / (128 KB);
520                         DISPLAY("(sample %u, block %u, pos %u) \n", segNb, bNb, pos);
521                         if (u>5) {
522                             int n;
523                             DISPLAY("origin: ");
524                             for (n=-5; n<0; n++) DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]);
525                             DISPLAY(" :%02X:  ", ((const BYTE*)srcBuffer)[u]);
526                             for (n=1; n<3; n++) DISPLAY("%02X ", ((const BYTE*)srcBuffer)[u+n]);
527                             DISPLAY(" \n");
528                             DISPLAY("decode: ");
529                             for (n=-5; n<0; n++) DISPLAY("%02X ", resultBuffer[u+n]);
530                             DISPLAY(" :%02X:  ", resultBuffer[u]);
531                             for (n=1; n<3; n++) DISPLAY("%02X ", resultBuffer[u+n]);
532                             DISPLAY(" \n");
533                         }
534                         break;
535                     }
536                     if (u==srcSize-1) {  /* should never happen */
537                         DISPLAY("no difference detected\n");
538                     }
539                 }
540             }
541         }   /* CRC Checking */
542 
543         if (displayLevel == 1) {   /* hidden display mode -q, used by python speed benchmark */
544             double const cSpeed = (double)benchResult.cSpeed / MB_UNIT;
545             double const dSpeed = (double)benchResult.dSpeed / MB_UNIT;
546             if (adv->additionalParam) {
547                 DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, adv->additionalParam);
548             } else {
549                 DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName);
550             }
551         }
552 
553         DISPLAYLEVEL(2, "%2i#\n", cLevel);
554     }   /* Bench */
555 
556     benchResult.cMem = (1ULL << (comprParams->windowLog)) + ZSTD_sizeof_CCtx(cctx);
557     return BMK_benchOutcome_setValidResult(benchResult);
558 }
559 
BMK_benchMemAdvanced(const void * srcBuffer,size_t srcSize,void * dstBuffer,size_t dstCapacity,const size_t * fileSizes,unsigned nbFiles,int cLevel,const ZSTD_compressionParameters * comprParams,const void * dictBuffer,size_t dictBufferSize,int displayLevel,const char * displayName,const BMK_advancedParams_t * adv)560 BMK_benchOutcome_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize,
561                         void* dstBuffer, size_t dstCapacity,
562                         const size_t* fileSizes, unsigned nbFiles,
563                         int cLevel, const ZSTD_compressionParameters* comprParams,
564                         const void* dictBuffer, size_t dictBufferSize,
565                         int displayLevel, const char* displayName, const BMK_advancedParams_t* adv)
566 
567 {
568     int const dstParamsError = !dstBuffer ^ !dstCapacity;  /* must be both NULL or none */
569 
570     size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ;
571     U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles;
572 
573     /* these are the blockTable parameters, just split up */
574     const void ** const srcPtrs = (const void**)malloc(maxNbBlocks * sizeof(void*));
575     size_t* const srcSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
576 
577 
578     void ** const cPtrs = (void**)malloc(maxNbBlocks * sizeof(void*));
579     size_t* const cSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
580     size_t* const cCapacities = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
581 
582     void ** const resPtrs = (void**)malloc(maxNbBlocks * sizeof(void*));
583     size_t* const resSizes = (size_t*)malloc(maxNbBlocks * sizeof(size_t));
584 
585     BMK_timedFnState_t* timeStateCompress = BMK_createTimedFnState(adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS);
586     BMK_timedFnState_t* timeStateDecompress = BMK_createTimedFnState(adv->nbSeconds * 1000, BMK_RUNTEST_DEFAULT_MS);
587 
588     ZSTD_CCtx* const cctx = ZSTD_createCCtx();
589     ZSTD_DCtx* const dctx = ZSTD_createDCtx();
590 
591     const size_t maxCompressedSize = dstCapacity ? dstCapacity : ZSTD_compressBound(srcSize) + (maxNbBlocks * 1024);
592 
593     void* const internalDstBuffer = dstBuffer ? NULL : malloc(maxCompressedSize);
594     void* const compressedBuffer = dstBuffer ? dstBuffer : internalDstBuffer;
595 
596     BMK_benchOutcome_t outcome = BMK_benchOutcome_error();  /* error by default */
597 
598     void* resultBuffer = srcSize ? malloc(srcSize) : NULL;
599 
600     int allocationincomplete = !srcPtrs || !srcSizes || !cPtrs ||
601         !cSizes || !cCapacities || !resPtrs || !resSizes ||
602         !timeStateCompress || !timeStateDecompress ||
603         !cctx || !dctx ||
604         !compressedBuffer || !resultBuffer;
605 
606 
607     if (!allocationincomplete && !dstParamsError) {
608         outcome = BMK_benchMemAdvancedNoAlloc(srcPtrs, srcSizes,
609                                             cPtrs, cCapacities, cSizes,
610                                             resPtrs, resSizes,
611                                             &resultBuffer,
612                                             compressedBuffer, maxCompressedSize,
613                                             timeStateCompress, timeStateDecompress,
614                                             srcBuffer, srcSize,
615                                             fileSizes, nbFiles,
616                                             cLevel, comprParams,
617                                             dictBuffer, dictBufferSize,
618                                             cctx, dctx,
619                                             displayLevel, displayName, adv);
620     }
621 
622     /* clean up */
623     BMK_freeTimedFnState(timeStateCompress);
624     BMK_freeTimedFnState(timeStateDecompress);
625 
626     ZSTD_freeCCtx(cctx);
627     ZSTD_freeDCtx(dctx);
628 
629     free(internalDstBuffer);
630     free(resultBuffer);
631 
632     free((void*)srcPtrs);
633     free(srcSizes);
634     free(cPtrs);
635     free(cSizes);
636     free(cCapacities);
637     free(resPtrs);
638     free(resSizes);
639 
640     if(allocationincomplete) {
641         RETURN_ERROR(31, BMK_benchOutcome_t, "allocation error : not enough memory");
642     }
643 
644     if(dstParamsError) {
645         RETURN_ERROR(32, BMK_benchOutcome_t, "Dst parameters not coherent");
646     }
647     return outcome;
648 }
649 
BMK_benchMem(const void * srcBuffer,size_t srcSize,const size_t * fileSizes,unsigned nbFiles,int cLevel,const ZSTD_compressionParameters * comprParams,const void * dictBuffer,size_t dictBufferSize,int displayLevel,const char * displayName)650 BMK_benchOutcome_t BMK_benchMem(const void* srcBuffer, size_t srcSize,
651                         const size_t* fileSizes, unsigned nbFiles,
652                         int cLevel, const ZSTD_compressionParameters* comprParams,
653                         const void* dictBuffer, size_t dictBufferSize,
654                         int displayLevel, const char* displayName) {
655 
656     BMK_advancedParams_t const adv = BMK_initAdvancedParams();
657     return BMK_benchMemAdvanced(srcBuffer, srcSize,
658                                 NULL, 0,
659                                 fileSizes, nbFiles,
660                                 cLevel, comprParams,
661                                 dictBuffer, dictBufferSize,
662                                 displayLevel, displayName, &adv);
663 }
664 
BMK_benchCLevel(const void * srcBuffer,size_t benchedSize,const size_t * fileSizes,unsigned nbFiles,int cLevel,const ZSTD_compressionParameters * comprParams,const void * dictBuffer,size_t dictBufferSize,int displayLevel,const char * displayName,BMK_advancedParams_t const * const adv)665 static BMK_benchOutcome_t BMK_benchCLevel(const void* srcBuffer, size_t benchedSize,
666                             const size_t* fileSizes, unsigned nbFiles,
667                             int cLevel, const ZSTD_compressionParameters* comprParams,
668                             const void* dictBuffer, size_t dictBufferSize,
669                             int displayLevel, const char* displayName,
670                             BMK_advancedParams_t const * const adv)
671 {
672     const char* pch = strrchr(displayName, '\\'); /* Windows */
673     if (!pch) pch = strrchr(displayName, '/');    /* Linux */
674     if (pch) displayName = pch+1;
675 
676     if (adv->realTime) {
677         DISPLAYLEVEL(2, "Note : switching to real-time priority \n");
678         SET_REALTIME_PRIORITY;
679     }
680 
681     if (displayLevel == 1 && !adv->additionalParam)   /* --quiet mode */
682         DISPLAY("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n",
683                 ZSTD_VERSION_STRING, ZSTD_GIT_COMMIT_STRING,
684                 (U32)benchedSize, adv->nbSeconds, (U32)(adv->blockSize>>10));
685 
686     return BMK_benchMemAdvanced(srcBuffer, benchedSize,
687                                 NULL, 0,
688                                 fileSizes, nbFiles,
689                                 cLevel, comprParams,
690                                 dictBuffer, dictBufferSize,
691                                 displayLevel, displayName, adv);
692 }
693 
BMK_syntheticTest(int cLevel,double compressibility,const ZSTD_compressionParameters * compressionParams,int displayLevel,const BMK_advancedParams_t * adv)694 BMK_benchOutcome_t BMK_syntheticTest(int cLevel, double compressibility,
695                           const ZSTD_compressionParameters* compressionParams,
696                           int displayLevel, const BMK_advancedParams_t* adv)
697 {
698     char name[20] = {0};
699     size_t const benchedSize = 10000000;
700     void* srcBuffer;
701     BMK_benchOutcome_t res;
702 
703     if (cLevel > ZSTD_maxCLevel()) {
704         RETURN_ERROR(15, BMK_benchOutcome_t, "Invalid Compression Level");
705     }
706 
707     /* Memory allocation */
708     srcBuffer = malloc(benchedSize);
709     if (!srcBuffer) RETURN_ERROR(21, BMK_benchOutcome_t, "not enough memory");
710 
711     /* Fill input buffer */
712     RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0);
713 
714     /* Bench */
715     snprintf (name, sizeof(name), "Synthetic %2u%%", (unsigned)(compressibility*100));
716     res = BMK_benchCLevel(srcBuffer, benchedSize,
717                     &benchedSize /* ? */, 1 /* ? */,
718                     cLevel, compressionParams,
719                     NULL, 0,  /* dictionary */
720                     displayLevel, name, adv);
721 
722     /* clean up */
723     free(srcBuffer);
724 
725     return res;
726 }
727 
728 
729 
BMK_findMaxMem(U64 requiredMem)730 static size_t BMK_findMaxMem(U64 requiredMem)
731 {
732     size_t const step = 64 MB;
733     BYTE* testmem = NULL;
734 
735     requiredMem = (((requiredMem >> 26) + 1) << 26);
736     requiredMem += step;
737     if (requiredMem > maxMemory) requiredMem = maxMemory;
738 
739     do {
740         testmem = (BYTE*)malloc((size_t)requiredMem);
741         requiredMem -= step;
742     } while (!testmem && requiredMem > 0);
743 
744     free(testmem);
745     return (size_t)(requiredMem);
746 }
747 
748 /*! BMK_loadFiles() :
749  *  Loads `buffer` with content of files listed within `fileNamesTable`.
750  *  At most, fills `buffer` entirely. */
BMK_loadFiles(void * buffer,size_t bufferSize,size_t * fileSizes,const char * const * fileNamesTable,unsigned nbFiles,int displayLevel)751 static int BMK_loadFiles(void* buffer, size_t bufferSize,
752                          size_t* fileSizes,
753                          const char* const * fileNamesTable, unsigned nbFiles,
754                          int displayLevel)
755 {
756     size_t pos = 0, totalSize = 0;
757     unsigned n;
758     for (n=0; n<nbFiles; n++) {
759         FILE* f;
760         U64 fileSize = UTIL_getFileSize(fileNamesTable[n]);
761         if (UTIL_isDirectory(fileNamesTable[n])) {
762             DISPLAYLEVEL(2, "Ignoring %s directory...       \n", fileNamesTable[n]);
763             fileSizes[n] = 0;
764             continue;
765         }
766         if (fileSize == UTIL_FILESIZE_UNKNOWN) {
767             DISPLAYLEVEL(2, "Cannot evaluate size of %s, ignoring ... \n", fileNamesTable[n]);
768             fileSizes[n] = 0;
769             continue;
770         }
771         f = fopen(fileNamesTable[n], "rb");
772         if (f==NULL) EXM_THROW_INT(10, "impossible to open file %s", fileNamesTable[n]);
773         DISPLAYUPDATE(2, "Loading %s...       \r", fileNamesTable[n]);
774         if (fileSize > bufferSize-pos) fileSize = bufferSize-pos, nbFiles=n;   /* buffer too small - stop after this file */
775         {   size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
776             if (readSize != (size_t)fileSize) EXM_THROW_INT(11, "could not read %s", fileNamesTable[n]);
777             pos += readSize;
778         }
779         fileSizes[n] = (size_t)fileSize;
780         totalSize += (size_t)fileSize;
781         fclose(f);
782     }
783 
784     if (totalSize == 0) EXM_THROW_INT(12, "no data to bench");
785     return 0;
786 }
787 
BMK_benchFilesAdvanced(const char * const * fileNamesTable,unsigned nbFiles,const char * dictFileName,int cLevel,const ZSTD_compressionParameters * compressionParams,int displayLevel,const BMK_advancedParams_t * adv)788 BMK_benchOutcome_t BMK_benchFilesAdvanced(
789                         const char* const * fileNamesTable, unsigned nbFiles,
790                         const char* dictFileName, int cLevel,
791                         const ZSTD_compressionParameters* compressionParams,
792                         int displayLevel, const BMK_advancedParams_t* adv)
793 {
794     void* srcBuffer = NULL;
795     size_t benchedSize;
796     void* dictBuffer = NULL;
797     size_t dictBufferSize = 0;
798     size_t* fileSizes = NULL;
799     BMK_benchOutcome_t res;
800     U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);
801 
802     if (!nbFiles) {
803         RETURN_ERROR(14, BMK_benchOutcome_t, "No Files to Benchmark");
804     }
805 
806     if (cLevel > ZSTD_maxCLevel()) {
807         RETURN_ERROR(15, BMK_benchOutcome_t, "Invalid Compression Level");
808     }
809 
810     fileSizes = (size_t*)calloc(nbFiles, sizeof(size_t));
811     if (!fileSizes) RETURN_ERROR(12, BMK_benchOutcome_t, "not enough memory for fileSizes");
812 
813     /* Load dictionary */
814     if (dictFileName != NULL) {
815         U64 const dictFileSize = UTIL_getFileSize(dictFileName);
816         if (dictFileSize > 64 MB) {
817             free(fileSizes);
818             RETURN_ERROR(10, BMK_benchOutcome_t, "dictionary file %s too large", dictFileName);
819         }
820         dictBufferSize = (size_t)dictFileSize;
821         dictBuffer = malloc(dictBufferSize);
822         if (dictBuffer==NULL) {
823             free(fileSizes);
824             RETURN_ERROR(11, BMK_benchOutcome_t, "not enough memory for dictionary (%u bytes)",
825                             (U32)dictBufferSize);
826         }
827 
828         {   int const errorCode = BMK_loadFiles(dictBuffer, dictBufferSize,
829                                                 fileSizes, &dictFileName /*?*/,
830                                                 1 /*?*/, displayLevel);
831             if (errorCode) {
832                 res = BMK_benchOutcome_error();
833                 goto _cleanUp;
834         }   }
835     }
836 
837     /* Memory allocation & restrictions */
838     benchedSize = BMK_findMaxMem(totalSizeToLoad * 3) / 3;
839     if ((U64)benchedSize > totalSizeToLoad) benchedSize = (size_t)totalSizeToLoad;
840     if (benchedSize < totalSizeToLoad)
841         DISPLAY("Not enough memory; testing %u MB only...\n", (U32)(benchedSize >> 20));
842 
843     srcBuffer = benchedSize ? malloc(benchedSize) : NULL;
844     if (!srcBuffer) {
845         free(dictBuffer);
846         free(fileSizes);
847         RETURN_ERROR(12, BMK_benchOutcome_t, "not enough memory");
848     }
849 
850     /* Load input buffer */
851     {   int const errorCode = BMK_loadFiles(srcBuffer, benchedSize,
852                                         fileSizes, fileNamesTable, nbFiles,
853                                         displayLevel);
854         if (errorCode) {
855             res = BMK_benchOutcome_error();
856             goto _cleanUp;
857     }   }
858 
859     /* Bench */
860     {   char mfName[20] = {0};
861         snprintf (mfName, sizeof(mfName), " %u files", nbFiles);
862         {   const char* const displayName = (nbFiles > 1) ? mfName : fileNamesTable[0];
863             res = BMK_benchCLevel(srcBuffer, benchedSize,
864                                 fileSizes, nbFiles,
865                                 cLevel, compressionParams,
866                                 dictBuffer, dictBufferSize,
867                                 displayLevel, displayName,
868                                 adv);
869     }   }
870 
871 _cleanUp:
872     free(srcBuffer);
873     free(dictBuffer);
874     free(fileSizes);
875     return res;
876 }
877 
878 
BMK_benchFiles(const char * const * fileNamesTable,unsigned nbFiles,const char * dictFileName,int cLevel,const ZSTD_compressionParameters * compressionParams,int displayLevel)879 BMK_benchOutcome_t BMK_benchFiles(
880                     const char* const * fileNamesTable, unsigned nbFiles,
881                     const char* dictFileName,
882                     int cLevel, const ZSTD_compressionParameters* compressionParams,
883                     int displayLevel)
884 {
885     BMK_advancedParams_t const adv = BMK_initAdvancedParams();
886     return BMK_benchFilesAdvanced(fileNamesTable, nbFiles, dictFileName, cLevel, compressionParams, displayLevel, &adv);
887 }
888