1 /*
2  * Copyright (C) 2016-2020 Yann Collet, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 
12 
13 /* *************************************
14 *  Includes
15 ***************************************/
16 #include <stdlib.h>      /* malloc, free */
17 #include <string.h>      /* memset */
18 #undef NDEBUG            /* assert must not be disabled */
19 #include <assert.h>      /* assert */
20 
21 #include "timefn.h"        /* UTIL_time_t, UTIL_getTime */
22 #include "benchfn.h"
23 
24 
25 /* *************************************
26 *  Constants
27 ***************************************/
28 #define TIMELOOP_MICROSEC     SEC_TO_MICRO      /* 1 second */
29 #define TIMELOOP_NANOSEC      (1*1000000000ULL) /* 1 second */
30 
31 #define KB *(1 <<10)
32 #define MB *(1 <<20)
33 #define GB *(1U<<30)
34 
35 
36 /* *************************************
37 *  Debug errors
38 ***************************************/
39 #if defined(DEBUG) && (DEBUG >= 1)
40 #  include <stdio.h>       /* fprintf */
41 #  define DISPLAY(...)       fprintf(stderr, __VA_ARGS__)
42 #  define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
43 #else
44 #  define DEBUGOUTPUT(...)
45 #endif
46 
47 
48 /* error without displaying */
49 #define RETURN_QUIET_ERROR(retValue, ...) {           \
50     DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__);    \
51     DEBUGOUTPUT("Error : ");                          \
52     DEBUGOUTPUT(__VA_ARGS__);                         \
53     DEBUGOUTPUT(" \n");                               \
54     return retValue;                                  \
55 }
56 
57 
58 /* *************************************
59 *  Benchmarking an arbitrary function
60 ***************************************/
61 
BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome)62 int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome)
63 {
64     return outcome.error_tag_never_ever_use_directly == 0;
65 }
66 
67 /* warning : this function will stop program execution if outcome is invalid !
68  *           check outcome validity first, using BMK_isValid_runResult() */
BMK_extract_runTime(BMK_runOutcome_t outcome)69 BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome)
70 {
71     assert(outcome.error_tag_never_ever_use_directly == 0);
72     return outcome.internal_never_ever_use_directly;
73 }
74 
BMK_extract_errorResult(BMK_runOutcome_t outcome)75 size_t BMK_extract_errorResult(BMK_runOutcome_t outcome)
76 {
77     assert(outcome.error_tag_never_ever_use_directly != 0);
78     return outcome.error_result_never_ever_use_directly;
79 }
80 
BMK_runOutcome_error(size_t errorResult)81 static BMK_runOutcome_t BMK_runOutcome_error(size_t errorResult)
82 {
83     BMK_runOutcome_t b;
84     memset(&b, 0, sizeof(b));
85     b.error_tag_never_ever_use_directly = 1;
86     b.error_result_never_ever_use_directly = errorResult;
87     return b;
88 }
89 
BMK_setValid_runTime(BMK_runTime_t runTime)90 static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime)
91 {
92     BMK_runOutcome_t outcome;
93     outcome.error_tag_never_ever_use_directly = 0;
94     outcome.internal_never_ever_use_directly = runTime;
95     return outcome;
96 }
97 
98 
99 /* initFn will be measured once, benchFn will be measured `nbLoops` times */
100 /* initFn is optional, provide NULL if none */
101 /* benchFn must return a size_t value that errorFn can interpret */
102 /* takes # of blocks and list of size & stuff for each. */
103 /* can report result of benchFn for each block into blockResult. */
104 /* blockResult is optional, provide NULL if this information is not required */
105 /* note : time per loop can be reported as zero if run time < timer resolution */
BMK_benchFunction(BMK_benchParams_t p,unsigned nbLoops)106 BMK_runOutcome_t BMK_benchFunction(BMK_benchParams_t p,
107                                    unsigned nbLoops)
108 {
109     /* init */
110     {   size_t i;
111         for (i = 0; i < p.blockCount; i++) {
112             memset(p.dstBuffers[i], 0xE5, p.dstCapacities[i]);  /* warm up and erase result buffer */
113     }   }
114 
115     /* benchmark */
116     {   UTIL_time_t const clockStart = UTIL_getTime();
117         size_t dstSize = 0;
118         unsigned loopNb, blockNb;
119         nbLoops += !nbLoops;   /* minimum nbLoops is 1 */
120         if (p.initFn != NULL) p.initFn(p.initPayload);
121         for (loopNb = 0; loopNb < nbLoops; loopNb++) {
122             for (blockNb = 0; blockNb < p.blockCount; blockNb++) {
123                 size_t const res = p.benchFn(p.srcBuffers[blockNb], p.srcSizes[blockNb],
124                                    p.dstBuffers[blockNb], p.dstCapacities[blockNb],
125                                    p.benchPayload);
126                 if (loopNb == 0) {
127                     if (p.blockResults != NULL) p.blockResults[blockNb] = res;
128                     if ((p.errorFn != NULL) && (p.errorFn(res))) {
129                         RETURN_QUIET_ERROR(BMK_runOutcome_error(res),
130                             "Function benchmark failed on block %u (of size %u) with error %i",
131                             blockNb, (unsigned)p.srcSizes[blockNb], (int)res);
132                     }
133                     dstSize += res;
134             }   }
135         }  /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */
136 
137         {   PTime const totalTime = UTIL_clockSpanNano(clockStart);
138             BMK_runTime_t rt;
139             rt.nanoSecPerRun = (double)totalTime / nbLoops;
140             rt.sumOfReturn = dstSize;
141             return BMK_setValid_runTime(rt);
142     }   }
143 }
144 
145 
146 /* ====  Benchmarking any function, providing intermediate results  ==== */
147 
148 struct BMK_timedFnState_s {
149     PTime timeSpent_ns;
150     PTime timeBudget_ns;
151     PTime runBudget_ns;
152     BMK_runTime_t fastestRun;
153     unsigned nbLoops;
154     UTIL_time_t coolTime;
155 };  /* typedef'd to BMK_timedFnState_t within bench.h */
156 
BMK_createTimedFnState(unsigned total_ms,unsigned run_ms)157 BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms)
158 {
159     BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r));
160     if (r == NULL) return NULL;   /* malloc() error */
161     BMK_resetTimedFnState(r, total_ms, run_ms);
162     return r;
163 }
164 
BMK_freeTimedFnState(BMK_timedFnState_t * state)165 void BMK_freeTimedFnState(BMK_timedFnState_t* state) { free(state); }
166 
167 BMK_timedFnState_t*
BMK_initStatic_timedFnState(void * buffer,size_t size,unsigned total_ms,unsigned run_ms)168 BMK_initStatic_timedFnState(void* buffer, size_t size, unsigned total_ms, unsigned run_ms)
169 {
170     typedef char check_size[ 2 * (sizeof(BMK_timedFnState_shell) >= sizeof(struct BMK_timedFnState_s)) - 1];  /* static assert : a compilation failure indicates that BMK_timedFnState_shell is not large enough */
171     typedef struct { check_size c; BMK_timedFnState_t tfs; } tfs_align;  /* force tfs to be aligned at its next best position */
172     size_t const tfs_alignment = offsetof(tfs_align, tfs); /* provides the minimal alignment restriction for BMK_timedFnState_t */
173     BMK_timedFnState_t* const r = (BMK_timedFnState_t*)buffer;
174     if (buffer == NULL) return NULL;
175     if (size < sizeof(struct BMK_timedFnState_s)) return NULL;
176     if ((size_t)buffer % tfs_alignment) return NULL;  /* buffer must be properly aligned */
177     BMK_resetTimedFnState(r, total_ms, run_ms);
178     return r;
179 }
180 
BMK_resetTimedFnState(BMK_timedFnState_t * timedFnState,unsigned total_ms,unsigned run_ms)181 void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms)
182 {
183     if (!total_ms) total_ms = 1 ;
184     if (!run_ms) run_ms = 1;
185     if (run_ms > total_ms) run_ms = total_ms;
186     timedFnState->timeSpent_ns = 0;
187     timedFnState->timeBudget_ns = (PTime)total_ms * TIMELOOP_NANOSEC / 1000;
188     timedFnState->runBudget_ns = (PTime)run_ms * TIMELOOP_NANOSEC / 1000;
189     timedFnState->fastestRun.nanoSecPerRun = (double)TIMELOOP_NANOSEC * 2000000000;  /* hopefully large enough : must be larger than any potential measurement */
190     timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL);
191     timedFnState->nbLoops = 1;
192     timedFnState->coolTime = UTIL_getTime();
193 }
194 
195 /* Tells if nb of seconds set in timedFnState for all runs is spent.
196  * note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */
BMK_isCompleted_TimedFn(const BMK_timedFnState_t * timedFnState)197 int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState)
198 {
199     return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns);
200 }
201 
202 
203 #undef MIN
204 #define MIN(a,b)   ( (a) < (b) ? (a) : (b) )
205 
206 #define MINUSABLETIME  (TIMELOOP_NANOSEC / 2)  /* 0.5 seconds */
207 
BMK_benchTimedFn(BMK_timedFnState_t * cont,BMK_benchParams_t p)208 BMK_runOutcome_t BMK_benchTimedFn(BMK_timedFnState_t* cont,
209                                   BMK_benchParams_t p)
210 {
211     PTime const runBudget_ns = cont->runBudget_ns;
212     PTime const runTimeMin_ns = runBudget_ns / 2;
213     BMK_runTime_t bestRunTime = cont->fastestRun;
214 
215     for (;;) {
216         BMK_runOutcome_t const runResult = BMK_benchFunction(p, cont->nbLoops);
217 
218         if (!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */
219             return runResult;
220         }
221 
222         {   BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult);
223             double const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops;
224 
225             cont->timeSpent_ns += (unsigned long long)loopDuration_ns;
226 
227             /* estimate nbLoops for next run to last approximately 1 second */
228             if (loopDuration_ns > (runBudget_ns / 50)) {
229                 double const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun);
230                 cont->nbLoops = (unsigned)(runBudget_ns / fastestRun_ns) + 1;
231             } else {
232                 /* previous run was too short : blindly increase workload by x multiplier */
233                 const unsigned multiplier = 10;
234                 assert(cont->nbLoops < ((unsigned)-1) / multiplier);  /* avoid overflow */
235                 cont->nbLoops *= multiplier;
236             }
237 
238             if (loopDuration_ns < runTimeMin_ns) {
239                 /* When benchmark run time is too small : don't report results.
240                  * increased risks of rounding errors */
241                 continue;
242             }
243 
244             if (newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) {
245                 bestRunTime = newRunTime;
246             }
247         }
248         break;
249     }   /* while (!completed) */
250 
251     return BMK_setValid_runTime(bestRunTime);
252 }
253