xref: /dragonfly/contrib/zstd/programs/benchfn.c (revision a28cd43d)
1*a28cd43dSSascha Wildner /*
2*a28cd43dSSascha Wildner  * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3*a28cd43dSSascha Wildner  * All rights reserved.
4*a28cd43dSSascha Wildner  *
5*a28cd43dSSascha Wildner  * This source code is licensed under both the BSD-style license (found in the
6*a28cd43dSSascha Wildner  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*a28cd43dSSascha Wildner  * in the COPYING file in the root directory of this source tree).
8*a28cd43dSSascha Wildner  * You may select, at your option, one of the above-listed licenses.
9*a28cd43dSSascha Wildner  */
10*a28cd43dSSascha Wildner 
11*a28cd43dSSascha Wildner 
12*a28cd43dSSascha Wildner 
13*a28cd43dSSascha Wildner /* *************************************
14*a28cd43dSSascha Wildner *  Includes
15*a28cd43dSSascha Wildner ***************************************/
16*a28cd43dSSascha Wildner #include <stdlib.h>      /* malloc, free */
17*a28cd43dSSascha Wildner #include <string.h>      /* memset */
18*a28cd43dSSascha Wildner #include <assert.h>      /* assert */
19*a28cd43dSSascha Wildner 
20*a28cd43dSSascha Wildner #include "timefn.h"        /* UTIL_time_t, UTIL_getTime */
21*a28cd43dSSascha Wildner #include "benchfn.h"
22*a28cd43dSSascha Wildner 
23*a28cd43dSSascha Wildner 
24*a28cd43dSSascha Wildner /* *************************************
25*a28cd43dSSascha Wildner *  Constants
26*a28cd43dSSascha Wildner ***************************************/
27*a28cd43dSSascha Wildner #define TIMELOOP_MICROSEC     SEC_TO_MICRO      /* 1 second */
28*a28cd43dSSascha Wildner #define TIMELOOP_NANOSEC      (1*1000000000ULL) /* 1 second */
29*a28cd43dSSascha Wildner 
30*a28cd43dSSascha Wildner #define KB *(1 <<10)
31*a28cd43dSSascha Wildner #define MB *(1 <<20)
32*a28cd43dSSascha Wildner #define GB *(1U<<30)
33*a28cd43dSSascha Wildner 
34*a28cd43dSSascha Wildner 
35*a28cd43dSSascha Wildner /* *************************************
36*a28cd43dSSascha Wildner *  Debug errors
37*a28cd43dSSascha Wildner ***************************************/
38*a28cd43dSSascha Wildner #if defined(DEBUG) && (DEBUG >= 1)
39*a28cd43dSSascha Wildner #  include <stdio.h>       /* fprintf */
40*a28cd43dSSascha Wildner #  define DISPLAY(...)       fprintf(stderr, __VA_ARGS__)
41*a28cd43dSSascha Wildner #  define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
42*a28cd43dSSascha Wildner #else
43*a28cd43dSSascha Wildner #  define DEBUGOUTPUT(...)
44*a28cd43dSSascha Wildner #endif
45*a28cd43dSSascha Wildner 
46*a28cd43dSSascha Wildner 
47*a28cd43dSSascha Wildner /* error without displaying */
48*a28cd43dSSascha Wildner #define RETURN_QUIET_ERROR(retValue, ...) {           \
49*a28cd43dSSascha Wildner     DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__);    \
50*a28cd43dSSascha Wildner     DEBUGOUTPUT("Error : ");                          \
51*a28cd43dSSascha Wildner     DEBUGOUTPUT(__VA_ARGS__);                         \
52*a28cd43dSSascha Wildner     DEBUGOUTPUT(" \n");                               \
53*a28cd43dSSascha Wildner     return retValue;                                  \
54*a28cd43dSSascha Wildner }
55*a28cd43dSSascha Wildner 
56*a28cd43dSSascha Wildner /* Abort execution if a condition is not met */
57*a28cd43dSSascha Wildner #define CONTROL(c) { if (!(c)) { DEBUGOUTPUT("error: %s \n", #c); abort(); } }
58*a28cd43dSSascha Wildner 
59*a28cd43dSSascha Wildner 
60*a28cd43dSSascha Wildner /* *************************************
61*a28cd43dSSascha Wildner *  Benchmarking an arbitrary function
62*a28cd43dSSascha Wildner ***************************************/
63*a28cd43dSSascha Wildner 
BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome)64*a28cd43dSSascha Wildner int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome)
65*a28cd43dSSascha Wildner {
66*a28cd43dSSascha Wildner     return outcome.error_tag_never_ever_use_directly == 0;
67*a28cd43dSSascha Wildner }
68*a28cd43dSSascha Wildner 
69*a28cd43dSSascha Wildner /* warning : this function will stop program execution if outcome is invalid !
70*a28cd43dSSascha Wildner  *           check outcome validity first, using BMK_isValid_runResult() */
BMK_extract_runTime(BMK_runOutcome_t outcome)71*a28cd43dSSascha Wildner BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome)
72*a28cd43dSSascha Wildner {
73*a28cd43dSSascha Wildner     CONTROL(outcome.error_tag_never_ever_use_directly == 0);
74*a28cd43dSSascha Wildner     return outcome.internal_never_ever_use_directly;
75*a28cd43dSSascha Wildner }
76*a28cd43dSSascha Wildner 
BMK_extract_errorResult(BMK_runOutcome_t outcome)77*a28cd43dSSascha Wildner size_t BMK_extract_errorResult(BMK_runOutcome_t outcome)
78*a28cd43dSSascha Wildner {
79*a28cd43dSSascha Wildner     CONTROL(outcome.error_tag_never_ever_use_directly != 0);
80*a28cd43dSSascha Wildner     return outcome.error_result_never_ever_use_directly;
81*a28cd43dSSascha Wildner }
82*a28cd43dSSascha Wildner 
BMK_runOutcome_error(size_t errorResult)83*a28cd43dSSascha Wildner static BMK_runOutcome_t BMK_runOutcome_error(size_t errorResult)
84*a28cd43dSSascha Wildner {
85*a28cd43dSSascha Wildner     BMK_runOutcome_t b;
86*a28cd43dSSascha Wildner     memset(&b, 0, sizeof(b));
87*a28cd43dSSascha Wildner     b.error_tag_never_ever_use_directly = 1;
88*a28cd43dSSascha Wildner     b.error_result_never_ever_use_directly = errorResult;
89*a28cd43dSSascha Wildner     return b;
90*a28cd43dSSascha Wildner }
91*a28cd43dSSascha Wildner 
BMK_setValid_runTime(BMK_runTime_t runTime)92*a28cd43dSSascha Wildner static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime)
93*a28cd43dSSascha Wildner {
94*a28cd43dSSascha Wildner     BMK_runOutcome_t outcome;
95*a28cd43dSSascha Wildner     outcome.error_tag_never_ever_use_directly = 0;
96*a28cd43dSSascha Wildner     outcome.internal_never_ever_use_directly = runTime;
97*a28cd43dSSascha Wildner     return outcome;
98*a28cd43dSSascha Wildner }
99*a28cd43dSSascha Wildner 
100*a28cd43dSSascha Wildner 
101*a28cd43dSSascha Wildner /* initFn will be measured once, benchFn will be measured `nbLoops` times */
102*a28cd43dSSascha Wildner /* initFn is optional, provide NULL if none */
103*a28cd43dSSascha Wildner /* benchFn must return a size_t value that errorFn can interpret */
104*a28cd43dSSascha Wildner /* takes # of blocks and list of size & stuff for each. */
105*a28cd43dSSascha Wildner /* can report result of benchFn for each block into blockResult. */
106*a28cd43dSSascha Wildner /* blockResult is optional, provide NULL if this information is not required */
107*a28cd43dSSascha Wildner /* note : time per loop can be reported as zero if run time < timer resolution */
BMK_benchFunction(BMK_benchParams_t p,unsigned nbLoops)108*a28cd43dSSascha Wildner BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p,
109*a28cd43dSSascha Wildner                                    unsigned nbLoops)
110*a28cd43dSSascha Wildner {
111*a28cd43dSSascha Wildner     size_t dstSize = 0;
112*a28cd43dSSascha Wildner     nbLoops += !nbLoops;   /* minimum nbLoops is 1 */
113*a28cd43dSSascha Wildner 
114*a28cd43dSSascha Wildner     /* init */
115*a28cd43dSSascha Wildner     {   size_t i;
116*a28cd43dSSascha Wildner         for(i = 0; i < p.blockCount; i++) {
117*a28cd43dSSascha Wildner             memset(p.dstBuffers[i], 0xE5, p.dstCapacities[i]);  /* warm up and erase result buffer */
118*a28cd43dSSascha Wildner     }   }
119*a28cd43dSSascha Wildner 
120*a28cd43dSSascha Wildner     /* benchmark */
121*a28cd43dSSascha Wildner     {   UTIL_time_t const clockStart = UTIL_getTime();
122*a28cd43dSSascha Wildner         unsigned loopNb, blockNb;
123*a28cd43dSSascha Wildner         if (p.initFn != NULL) p.initFn(p.initPayload);
124*a28cd43dSSascha Wildner         for (loopNb = 0; loopNb < nbLoops; loopNb++) {
125*a28cd43dSSascha Wildner             for (blockNb = 0; blockNb < p.blockCount; blockNb++) {
126*a28cd43dSSascha Wildner                 size_t const res = p.benchFn(p.srcBuffers[blockNb], p.srcSizes[blockNb],
127*a28cd43dSSascha Wildner                                    p.dstBuffers[blockNb], p.dstCapacities[blockNb],
128*a28cd43dSSascha Wildner                                    p.benchPayload);
129*a28cd43dSSascha Wildner                 if (loopNb == 0) {
130*a28cd43dSSascha Wildner                     if (p.blockResults != NULL) p.blockResults[blockNb] = res;
131*a28cd43dSSascha Wildner                     if ((p.errorFn != NULL) && (p.errorFn(res))) {
132*a28cd43dSSascha Wildner                         RETURN_QUIET_ERROR(BMK_runOutcome_error(res),
133*a28cd43dSSascha Wildner                             "Function benchmark failed on block %u (of size %u) with error %i",
134*a28cd43dSSascha Wildner                             blockNb, (unsigned)p.srcSizes[blockNb], (int)res);
135*a28cd43dSSascha Wildner                     }
136*a28cd43dSSascha Wildner                     dstSize += res;
137*a28cd43dSSascha Wildner             }   }
138*a28cd43dSSascha Wildner         }  /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */
139*a28cd43dSSascha Wildner 
140*a28cd43dSSascha Wildner         {   PTime const totalTime = UTIL_clockSpanNano(clockStart);
141*a28cd43dSSascha Wildner             BMK_runTime_t rt;
142*a28cd43dSSascha Wildner             rt.nanoSecPerRun = (double)totalTime / nbLoops;
143*a28cd43dSSascha Wildner             rt.sumOfReturn = dstSize;
144*a28cd43dSSascha Wildner             return BMK_setValid_runTime(rt);
145*a28cd43dSSascha Wildner     }   }
146*a28cd43dSSascha Wildner }
147*a28cd43dSSascha Wildner 
148*a28cd43dSSascha Wildner 
149*a28cd43dSSascha Wildner /* ====  Benchmarking any function, providing intermediate results  ==== */
150*a28cd43dSSascha Wildner 
151*a28cd43dSSascha Wildner struct BMK_timedFnState_s {
152*a28cd43dSSascha Wildner     PTime timeSpent_ns;
153*a28cd43dSSascha Wildner     PTime timeBudget_ns;
154*a28cd43dSSascha Wildner     PTime runBudget_ns;
155*a28cd43dSSascha Wildner     BMK_runTime_t fastestRun;
156*a28cd43dSSascha Wildner     unsigned nbLoops;
157*a28cd43dSSascha Wildner     UTIL_time_t coolTime;
158*a28cd43dSSascha Wildner };  /* typedef'd to BMK_timedFnState_t within bench.h */
159*a28cd43dSSascha Wildner 
BMK_createTimedFnState(unsigned total_ms,unsigned run_ms)160*a28cd43dSSascha Wildner BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms)
161*a28cd43dSSascha Wildner {
162*a28cd43dSSascha Wildner     BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r));
163*a28cd43dSSascha Wildner     if (r == NULL) return NULL;   /* malloc() error */
164*a28cd43dSSascha Wildner     BMK_resetTimedFnState(r, total_ms, run_ms);
165*a28cd43dSSascha Wildner     return r;
166*a28cd43dSSascha Wildner }
167*a28cd43dSSascha Wildner 
BMK_freeTimedFnState(BMK_timedFnState_t * state)168*a28cd43dSSascha Wildner void BMK_freeTimedFnState(BMK_timedFnState_t* state) { free(state); }
169*a28cd43dSSascha Wildner 
170*a28cd43dSSascha Wildner BMK_timedFnState_t*
BMK_initStatic_timedFnState(void * buffer,size_t size,unsigned total_ms,unsigned run_ms)171*a28cd43dSSascha Wildner BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms)
172*a28cd43dSSascha Wildner {
173*a28cd43dSSascha Wildner     typedef char check_size[ 2 * (sizeof(BMK_timedFnState_shell) >= sizeof(struct BMK_timedFnState_s)) - 1];  /* static assert : a compilation failure indicates that BMK_timedFnState_shell is not large enough */
174*a28cd43dSSascha Wildner     typedef struct { check_size c; BMK_timedFnState_t tfs; } tfs_align;  /* force tfs to be aligned at its next best position */
175*a28cd43dSSascha Wildner     size_t const tfs_alignment = offsetof(tfs_align, tfs); /* provides the minimal alignment restriction for BMK_timedFnState_t */
176*a28cd43dSSascha Wildner     BMK_timedFnState_t* const r = (BMK_timedFnState_t*)buffer;
177*a28cd43dSSascha Wildner     if (buffer == NULL) return NULL;
178*a28cd43dSSascha Wildner     if (size < sizeof(struct BMK_timedFnState_s)) return NULL;
179*a28cd43dSSascha Wildner     if ((size_t)buffer % tfs_alignment) return NULL;  /* buffer must be properly aligned */
180*a28cd43dSSascha Wildner     BMK_resetTimedFnState(r, total_ms, run_ms);
181*a28cd43dSSascha Wildner     return r;
182*a28cd43dSSascha Wildner }
183*a28cd43dSSascha Wildner 
BMK_resetTimedFnState(BMK_timedFnState_t * timedFnState,unsigned total_ms,unsigned run_ms)184*a28cd43dSSascha Wildner void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms)
185*a28cd43dSSascha Wildner {
186*a28cd43dSSascha Wildner     if (!total_ms) total_ms = 1 ;
187*a28cd43dSSascha Wildner     if (!run_ms) run_ms = 1;
188*a28cd43dSSascha Wildner     if (run_ms > total_ms) run_ms = total_ms;
189*a28cd43dSSascha Wildner     timedFnState->timeSpent_ns = 0;
190*a28cd43dSSascha Wildner     timedFnState->timeBudget_ns = (PTime)total_ms * TIMELOOP_NANOSEC / 1000;
191*a28cd43dSSascha Wildner     timedFnState->runBudget_ns = (PTime)run_ms * TIMELOOP_NANOSEC / 1000;
192*a28cd43dSSascha Wildner     timedFnState->fastestRun.nanoSecPerRun = (double)TIMELOOP_NANOSEC * 2000000000;  /* hopefully large enough : must be larger than any potential measurement */
193*a28cd43dSSascha Wildner     timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL);
194*a28cd43dSSascha Wildner     timedFnState->nbLoops = 1;
195*a28cd43dSSascha Wildner     timedFnState->coolTime = UTIL_getTime();
196*a28cd43dSSascha Wildner }
197*a28cd43dSSascha Wildner 
198*a28cd43dSSascha Wildner /* Tells if nb of seconds set in timedFnState for all runs is spent.
199*a28cd43dSSascha Wildner  * note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */
BMK_isCompleted_TimedFn(const BMK_timedFnState_t * timedFnState)200*a28cd43dSSascha Wildner int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState)
201*a28cd43dSSascha Wildner {
202*a28cd43dSSascha Wildner     return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns);
203*a28cd43dSSascha Wildner }
204*a28cd43dSSascha Wildner 
205*a28cd43dSSascha Wildner 
206*a28cd43dSSascha Wildner #undef MIN
207*a28cd43dSSascha Wildner #define MIN(a,b)   ( (a) < (b) ? (a) : (b) )
208*a28cd43dSSascha Wildner 
209*a28cd43dSSascha Wildner #define MINUSABLETIME  (TIMELOOP_NANOSEC / 2)  /* 0.5 seconds */
210*a28cd43dSSascha Wildner 
BMK_benchTimedFn(BMK_timedFnState_t * cont,BMK_benchParams_t p)211*a28cd43dSSascha Wildner BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont,
212*a28cd43dSSascha Wildner                                   BMK_benchParams_t p)
213*a28cd43dSSascha Wildner {
214*a28cd43dSSascha Wildner     PTime const runBudget_ns = cont->runBudget_ns;
215*a28cd43dSSascha Wildner     PTime const runTimeMin_ns = runBudget_ns / 2;
216*a28cd43dSSascha Wildner     int completed = 0;
217*a28cd43dSSascha Wildner     BMK_runTime_t bestRunTime = cont->fastestRun;
218*a28cd43dSSascha Wildner 
219*a28cd43dSSascha Wildner     while (!completed) {
220*a28cd43dSSascha Wildner         BMK_runOutcome_t const runResult = BMK_benchFunction(p, cont->nbLoops);
221*a28cd43dSSascha Wildner 
222*a28cd43dSSascha Wildner         if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */
223*a28cd43dSSascha Wildner             return runResult;
224*a28cd43dSSascha Wildner         }
225*a28cd43dSSascha Wildner 
226*a28cd43dSSascha Wildner         {   BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult);
227*a28cd43dSSascha Wildner             double const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops;
228*a28cd43dSSascha Wildner 
229*a28cd43dSSascha Wildner             cont->timeSpent_ns += (unsigned long long)loopDuration_ns;
230*a28cd43dSSascha Wildner 
231*a28cd43dSSascha Wildner             /* estimate nbLoops for next run to last approximately 1 second */
232*a28cd43dSSascha Wildner             if (loopDuration_ns > (runBudget_ns / 50)) {
233*a28cd43dSSascha Wildner                 double const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun);
234*a28cd43dSSascha Wildner                 cont->nbLoops = (unsigned)(runBudget_ns / fastestRun_ns) + 1;
235*a28cd43dSSascha Wildner             } else {
236*a28cd43dSSascha Wildner                 /* previous run was too short : blindly increase workload by x multiplier */
237*a28cd43dSSascha Wildner                 const unsigned multiplier = 10;
238*a28cd43dSSascha Wildner                 assert(cont->nbLoops < ((unsigned)-1) / multiplier);  /* avoid overflow */
239*a28cd43dSSascha Wildner                 cont->nbLoops *= multiplier;
240*a28cd43dSSascha Wildner             }
241*a28cd43dSSascha Wildner 
242*a28cd43dSSascha Wildner             if(loopDuration_ns < runTimeMin_ns) {
243*a28cd43dSSascha Wildner                 /* don't report results for which benchmark run time was too small : increased risks of rounding errors */
244*a28cd43dSSascha Wildner                 assert(completed == 0);
245*a28cd43dSSascha Wildner                 continue;
246*a28cd43dSSascha Wildner             } else {
247*a28cd43dSSascha Wildner                 if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) {
248*a28cd43dSSascha Wildner                     bestRunTime = newRunTime;
249*a28cd43dSSascha Wildner                 }
250*a28cd43dSSascha Wildner                 completed = 1;
251*a28cd43dSSascha Wildner             }
252*a28cd43dSSascha Wildner         }
253*a28cd43dSSascha Wildner     }   /* while (!completed) */
254*a28cd43dSSascha Wildner 
255*a28cd43dSSascha Wildner     return BMK_setValid_runTime(bestRunTime);
256*a28cd43dSSascha Wildner }
257