1 /*
2 * Copyright (c) Facebook, Inc. and its affiliates.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #pragma once
18
19 #include <folly/BenchmarkUtil.h>
20 #include <folly/Portability.h>
21 #include <folly/Preprocessor.h> // for FB_ANONYMOUS_VARIABLE
22 #include <folly/Range.h>
23 #include <folly/ScopeGuard.h>
24 #include <folly/Traits.h>
25 #include <folly/functional/Invoke.h>
26 #include <folly/portability/GFlags.h>
27
28 #include <cassert>
29 #include <chrono>
30 #include <functional>
31 #include <limits>
32 #include <type_traits>
33 #include <unordered_map>
34
35 #include <boost/function_types/function_arity.hpp>
36 #include <glog/logging.h>
37
38 DECLARE_bool(benchmark);
39
40 namespace folly {
41
42 /**
43 * Runs all benchmarks defined. Usually put in main().
44 */
45 void runBenchmarks();
46
47 /**
48 * Runs all benchmarks defined if and only if the --benchmark flag has
49 * been passed to the program. Usually put in main().
50 */
runBenchmarksOnFlag()51 inline bool runBenchmarksOnFlag() {
52 if (FLAGS_benchmark) {
53 runBenchmarks();
54 }
55 return FLAGS_benchmark;
56 }
57
58 class UserMetric {
59 public:
60 enum class Type { CUSTOM, TIME, METRIC };
61
62 int64_t value{};
63 Type type{Type::CUSTOM};
64
65 UserMetric() = default;
66 /* implicit */ UserMetric(int64_t val, Type typ = Type::CUSTOM)
value(val)67 : value(val), type(typ) {}
68 };
69
70 using UserCounters = std::unordered_map<std::string, UserMetric>;
71
72 namespace detail {
73 struct TimeIterData {
74 std::chrono::high_resolution_clock::duration duration;
75 unsigned int niter;
76 UserCounters userCounters;
77 };
78
79 using BenchmarkFun = std::function<TimeIterData(unsigned int)>;
80
81 struct BenchmarkRegistration {
82 std::string file;
83 std::string name;
84 BenchmarkFun func;
85 bool useCounter = false;
86 };
87
88 struct BenchmarkResult {
89 std::string file;
90 std::string name;
91 double timeInNs;
92 UserCounters counters;
93 };
94
95 /**
96 * Adds a benchmark wrapped in a std::function. Only used
97 * internally. Pass by value is intentional.
98 */
99 void addBenchmarkImpl(
100 const char* file, StringPiece name, BenchmarkFun, bool useCounter);
101
102 /**
103 * Runs all benchmarks defined in the program, doesn't print by default.
104 * Usually used when customized printing of results is desired.
105 */
106 std::vector<BenchmarkResult> runBenchmarksWithResults();
107
108 } // namespace detail
109
110 /**
111 * Supporting type for BENCHMARK_SUSPEND defined below.
112 */
113 struct BenchmarkSuspender {
114 using Clock = std::chrono::high_resolution_clock;
115 using TimePoint = Clock::time_point;
116 using Duration = Clock::duration;
117
118 struct DismissedTag {};
119 static inline constexpr DismissedTag Dismissed{};
120
BenchmarkSuspenderBenchmarkSuspender121 BenchmarkSuspender() : start(Clock::now()) {}
122
BenchmarkSuspenderBenchmarkSuspender123 explicit BenchmarkSuspender(DismissedTag) : start(TimePoint{}) {}
124
125 BenchmarkSuspender(const BenchmarkSuspender&) = delete;
BenchmarkSuspenderBenchmarkSuspender126 BenchmarkSuspender(BenchmarkSuspender&& rhs) noexcept {
127 start = rhs.start;
128 rhs.start = {};
129 }
130
131 BenchmarkSuspender& operator=(const BenchmarkSuspender&) = delete;
132 BenchmarkSuspender& operator=(BenchmarkSuspender&& rhs) noexcept {
133 if (start != TimePoint{}) {
134 tally();
135 }
136 start = rhs.start;
137 rhs.start = {};
138 return *this;
139 }
140
~BenchmarkSuspenderBenchmarkSuspender141 ~BenchmarkSuspender() {
142 if (start != TimePoint{}) {
143 tally();
144 }
145 }
146
dismissBenchmarkSuspender147 void dismiss() {
148 assert(start != TimePoint{});
149 tally();
150 start = {};
151 }
152
rehireBenchmarkSuspender153 void rehire() {
154 assert(start == TimePoint{});
155 start = Clock::now();
156 }
157
158 template <class F>
159 auto dismissing(F f) -> invoke_result_t<F> {
160 SCOPE_EXIT { rehire(); };
161 dismiss();
162 return f();
163 }
164
165 /**
166 * This is for use inside of if-conditions, used in BENCHMARK macros.
167 * If-conditions bypass the explicit on operator bool.
168 */
169 explicit operator bool() const { return false; }
170
171 /**
172 * Accumulates time spent outside benchmark.
173 */
174 static Duration timeSpent;
175
176 private:
tallyBenchmarkSuspender177 void tally() {
178 auto end = Clock::now();
179 timeSpent += end - start;
180 start = end;
181 }
182
183 TimePoint start;
184 };
185
186 /**
187 * Adds a benchmark. Usually not called directly but instead through
188 * the macro BENCHMARK defined below. The lambda function involved
189 * must take exactly one parameter of type unsigned, and the benchmark
190 * uses it with counter semantics (iteration occurs inside the
191 * function).
192 */
193 template <typename Lambda>
194 typename std::enable_if<folly::is_invocable_v<Lambda, unsigned>>::type
addBenchmark(const char * file,StringPiece name,Lambda && lambda)195 addBenchmark(const char* file, StringPiece name, Lambda&& lambda) {
196 auto execute = [=](unsigned int times) {
197 BenchmarkSuspender::timeSpent = {};
198 unsigned int niter;
199
200 // CORE MEASUREMENT STARTS
201 auto start = std::chrono::high_resolution_clock::now();
202 niter = lambda(times);
203 auto end = std::chrono::high_resolution_clock::now();
204 // CORE MEASUREMENT ENDS
205 return detail::TimeIterData{
206 (end - start) - BenchmarkSuspender::timeSpent, niter, UserCounters{}};
207 };
208
209 detail::addBenchmarkImpl(file, name, detail::BenchmarkFun(execute), false);
210 }
211
212 /**
213 * Adds a benchmark. Usually not called directly but instead through
214 * the macro BENCHMARK defined below. The lambda function involved
215 * must take zero parameters, and the benchmark calls it repeatedly
216 * (iteration occurs outside the function).
217 */
218 template <typename Lambda>
addBenchmark(const char * file,StringPiece name,Lambda && lambda)219 typename std::enable_if<folly::is_invocable_v<Lambda>>::type addBenchmark(
220 const char* file, StringPiece name, Lambda&& lambda) {
221 addBenchmark(file, name, [=](unsigned int times) {
222 unsigned int niter = 0;
223 while (times-- > 0) {
224 niter += lambda();
225 }
226 return niter;
227 });
228 }
229
230 /**
231 * similar as previous two template specialization, but lambda will also take
232 * customized counters in the following two cases
233 */
234 template <typename Lambda>
235 typename std::enable_if<
236 folly::is_invocable_v<Lambda, UserCounters&, unsigned>>::type
addBenchmark(const char * file,StringPiece name,Lambda && lambda)237 addBenchmark(const char* file, StringPiece name, Lambda&& lambda) {
238 auto execute = [=](unsigned int times) {
239 BenchmarkSuspender::timeSpent = {};
240 unsigned int niter;
241
242 // CORE MEASUREMENT STARTS
243 auto start = std::chrono::high_resolution_clock::now();
244 UserCounters counters;
245 niter = lambda(counters, times);
246 auto end = std::chrono::high_resolution_clock::now();
247 // CORE MEASUREMENT ENDS
248 return detail::TimeIterData{
249 (end - start) - BenchmarkSuspender::timeSpent, niter, counters};
250 };
251
252 detail::addBenchmarkImpl(
253 file,
254 name,
255 std::function<detail::TimeIterData(unsigned int)>(execute),
256 true);
257 }
258
259 template <typename Lambda>
260 typename std::enable_if<folly::is_invocable_v<Lambda, UserCounters&>>::type
addBenchmark(const char * file,StringPiece name,Lambda && lambda)261 addBenchmark(const char* file, StringPiece name, Lambda&& lambda) {
262 addBenchmark(file, name, [=](UserCounters& counters, unsigned int times) {
263 unsigned int niter = 0;
264 while (times-- > 0) {
265 niter += lambda(counters);
266 }
267 return niter;
268 });
269 }
270
271 struct dynamic;
272
273 void benchmarkResultsToDynamic(
274 const std::vector<detail::BenchmarkResult>& data, dynamic&);
275
276 void benchmarkResultsFromDynamic(
277 const dynamic&, std::vector<detail::BenchmarkResult>&);
278
279 void printResultComparison(
280 const std::vector<detail::BenchmarkResult>& base,
281 const std::vector<detail::BenchmarkResult>& test);
282
283 } // namespace folly
284
285 /**
286 * Introduces a benchmark function. Used internally, see BENCHMARK and
287 * friends below.
288 */
289
290 #define BENCHMARK_IMPL(funName, stringName, rv, paramType, paramName) \
291 static void funName(paramType); \
292 FOLLY_MAYBE_UNUSED static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = \
293 (::folly::addBenchmark( \
294 __FILE__, \
295 stringName, \
296 [](paramType paramName) -> unsigned { \
297 funName(paramName); \
298 return rv; \
299 }), \
300 true); \
301 static void funName(paramType paramName)
302
303 #define BENCHMARK_IMPL_COUNTERS( \
304 funName, stringName, counters, rv, paramType, paramName) \
305 static void funName( \
306 ::folly::UserCounters& FOLLY_PP_DETAIL_APPEND_VA_ARG(paramType)); \
307 FOLLY_MAYBE_UNUSED static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = \
308 (::folly::addBenchmark( \
309 __FILE__, \
310 stringName, \
311 [](::folly::UserCounters& counters FOLLY_PP_DETAIL_APPEND_VA_ARG( \
312 paramType paramName)) -> unsigned { \
313 funName(counters FOLLY_PP_DETAIL_APPEND_VA_ARG(paramName)); \
314 return rv; \
315 }), \
316 true); \
317 static void funName(::folly::UserCounters& counters \
318 FOLLY_PP_DETAIL_APPEND_VA_ARG(paramType paramName))
319
320 /**
321 * Introduces a benchmark function with support for returning the actual
322 * number of iterations. Used internally, see BENCHMARK_MULTI and friends
323 * below.
324 */
325 #define BENCHMARK_MULTI_IMPL(funName, stringName, paramType, paramName) \
326 static unsigned funName(paramType); \
327 FOLLY_MAYBE_UNUSED static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = \
328 (::folly::addBenchmark( \
329 __FILE__, \
330 stringName, \
331 [](paramType paramName) { return funName(paramName); }), \
332 true); \
333 static unsigned funName(paramType paramName)
334
335 /**
336 * Introduces a benchmark function. Use with either one or two arguments.
337 * The first is the name of the benchmark. Use something descriptive, such
338 * as insertVectorBegin. The second argument may be missing, or could be a
339 * symbolic counter. The counter dictates how many internal iteration the
340 * benchmark does. Example:
341 *
342 * BENCHMARK(vectorPushBack) {
343 * vector<int> v;
344 * v.push_back(42);
345 * }
346 *
347 * BENCHMARK(insertVectorBegin, iters) {
348 * vector<int> v;
349 * FOR_EACH_RANGE (i, 0, iters) {
350 * v.insert(v.begin(), 42);
351 * }
352 * }
353 */
354 #define BENCHMARK(name, ...) \
355 BENCHMARK_IMPL( \
356 name, \
357 FOLLY_PP_STRINGIZE(name), \
358 FB_ARG_2_OR_1(1, ##__VA_ARGS__), \
359 FB_ONE_OR_NONE(unsigned, ##__VA_ARGS__), \
360 __VA_ARGS__)
361
362 /**
363 * Allow users to record customized counter during benchmarking,
364 * there will be one extra column showing in the output result for each counter
365 *
366 * BENCHMARK_COUNTERS(insertVectorBegin, couters, iters) {
367 * vector<int> v;
368 * FOR_EACH_RANGE (i, 0, iters) {
369 * v.insert(v.begin(), 42);
370 * }
371 * BENCHMARK_SUSPEND {
372 * counters["foo"] = 10;
373 * }
374 * }
375 */
376 #define BENCHMARK_COUNTERS(name, counters, ...) \
377 BENCHMARK_IMPL_COUNTERS( \
378 name, \
379 FOLLY_PP_STRINGIZE(name), \
380 counters, \
381 FB_ARG_2_OR_1(1, ##__VA_ARGS__), \
382 FB_ONE_OR_NONE(unsigned, ##__VA_ARGS__), \
383 __VA_ARGS__)
384 /**
385 * Like BENCHMARK above, but allows the user to return the actual
386 * number of iterations executed in the function body. This can be
387 * useful if the benchmark function doesn't know upfront how many
388 * iterations it's going to run or if it runs through a certain
389 * number of test cases, e.g.:
390 *
391 * BENCHMARK_MULTI(benchmarkSomething) {
392 * std::vector<int> testCases { 0, 1, 1, 2, 3, 5 };
393 * for (int c : testCases) {
394 * doSomething(c);
395 * }
396 * return testCases.size();
397 * }
398 */
399 #define BENCHMARK_MULTI(name, ...) \
400 BENCHMARK_MULTI_IMPL( \
401 name, \
402 FOLLY_PP_STRINGIZE(name), \
403 FB_ONE_OR_NONE(unsigned, ##__VA_ARGS__), \
404 __VA_ARGS__)
405
406 /**
407 * Defines a benchmark that passes a parameter to another one. This is
408 * common for benchmarks that need a "problem size" in addition to
409 * "number of iterations". Consider:
410 *
411 * void pushBack(uint32_t n, size_t initialSize) {
412 * vector<int> v;
413 * BENCHMARK_SUSPEND {
414 * v.resize(initialSize);
415 * }
416 * FOR_EACH_RANGE (i, 0, n) {
417 * v.push_back(i);
418 * }
419 * }
420 * BENCHMARK_PARAM(pushBack, 0)
421 * BENCHMARK_PARAM(pushBack, 1000)
422 * BENCHMARK_PARAM(pushBack, 1000000)
423 *
424 * The benchmark above estimates the speed of push_back at different
425 * initial sizes of the vector. The framework will pass 0, 1000, and
426 * 1000000 for initialSize, and the iteration count for n.
427 */
428 #define BENCHMARK_PARAM(name, param) BENCHMARK_NAMED_PARAM(name, param, param)
429
430 /**
431 * Same as BENCHMARK_PARAM, but allows one to return the actual number of
432 * iterations that have been run.
433 */
434 #define BENCHMARK_PARAM_MULTI(name, param) \
435 BENCHMARK_NAMED_PARAM_MULTI(name, param, param)
436
437 /*
438 * Like BENCHMARK_PARAM(), but allows a custom name to be specified for each
439 * parameter, rather than using the parameter value.
440 *
441 * Useful when the parameter value is not a valid token for string pasting,
442 * of when you want to specify multiple parameter arguments.
443 *
444 * For example:
445 *
446 * void addValue(uint32_t n, int64_t bucketSize, int64_t min, int64_t max) {
447 * Histogram<int64_t> hist(bucketSize, min, max);
448 * int64_t num = min;
449 * FOR_EACH_RANGE (i, 0, n) {
450 * hist.addValue(num);
451 * ++num;
452 * if (num > max) { num = min; }
453 * }
454 * }
455 *
456 * BENCHMARK_NAMED_PARAM(addValue, 0_to_100, 1, 0, 100)
457 * BENCHMARK_NAMED_PARAM(addValue, 0_to_1000, 10, 0, 1000)
458 * BENCHMARK_NAMED_PARAM(addValue, 5k_to_20k, 250, 5000, 20000)
459 */
460 #define BENCHMARK_NAMED_PARAM(name, param_name, ...) \
461 BENCHMARK_IMPL( \
462 FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)), \
463 FOLLY_PP_STRINGIZE(name) "(" FOLLY_PP_STRINGIZE(param_name) ")", \
464 iters, \
465 unsigned, \
466 iters) { \
467 name(iters, ##__VA_ARGS__); \
468 }
469
470 /**
471 * Same as BENCHMARK_NAMED_PARAM, but allows one to return the actual number
472 * of iterations that have been run.
473 */
474 #define BENCHMARK_NAMED_PARAM_MULTI(name, param_name, ...) \
475 BENCHMARK_MULTI_IMPL( \
476 FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)), \
477 FOLLY_PP_STRINGIZE(name) "(" FOLLY_PP_STRINGIZE(param_name) ")", \
478 unsigned, \
479 iters) { \
480 return name(iters, ##__VA_ARGS__); \
481 }
482
483 /**
484 * Just like BENCHMARK, but prints the time relative to a
485 * baseline. The baseline is the most recent BENCHMARK() seen in
486 * the current scope. Example:
487 *
488 * // This is the baseline
489 * BENCHMARK(insertVectorBegin, n) {
490 * vector<int> v;
491 * FOR_EACH_RANGE (i, 0, n) {
492 * v.insert(v.begin(), 42);
493 * }
494 * }
495 *
496 * BENCHMARK_RELATIVE(insertListBegin, n) {
497 * list<int> s;
498 * FOR_EACH_RANGE (i, 0, n) {
499 * s.insert(s.begin(), 42);
500 * }
501 * }
502 *
503 * Any number of relative benchmark can be associated with a
504 * baseline. Another BENCHMARK() occurrence effectively establishes a
505 * new baseline.
506 */
507 #define BENCHMARK_RELATIVE(name, ...) \
508 BENCHMARK_IMPL( \
509 name, \
510 "%" FOLLY_PP_STRINGIZE(name), \
511 FB_ARG_2_OR_1(1, ##__VA_ARGS__), \
512 FB_ONE_OR_NONE(unsigned, ##__VA_ARGS__), \
513 __VA_ARGS__)
514
515 #define BENCHMARK_COUNTERS_RELATIVE(name, counters, ...) \
516 BENCHMARK_IMPL_COUNTERS( \
517 name, \
518 "%" FOLLY_PP_STRINGIZE(name), \
519 counters, \
520 FB_ARG_2_OR_1(1, ##__VA_ARGS__), \
521 FB_ONE_OR_NONE(unsigned, ##__VA_ARGS__), \
522 __VA_ARGS__)
523 /**
524 * Same as BENCHMARK_RELATIVE, but allows one to return the actual number
525 * of iterations that have been run.
526 */
527 #define BENCHMARK_RELATIVE_MULTI(name, ...) \
528 BENCHMARK_MULTI_IMPL( \
529 name, \
530 "%" FOLLY_PP_STRINGIZE(name), \
531 FB_ONE_OR_NONE(unsigned, ##__VA_ARGS__), \
532 __VA_ARGS__)
533
534 /**
535 * A combination of BENCHMARK_RELATIVE and BENCHMARK_PARAM.
536 */
537 #define BENCHMARK_RELATIVE_PARAM(name, param) \
538 BENCHMARK_RELATIVE_NAMED_PARAM(name, param, param)
539
540 /**
541 * Same as BENCHMARK_RELATIVE_PARAM, but allows one to return the actual
542 * number of iterations that have been run.
543 */
544 #define BENCHMARK_RELATIVE_PARAM_MULTI(name, param) \
545 BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(name, param, param)
546
547 /**
548 * A combination of BENCHMARK_RELATIVE and BENCHMARK_NAMED_PARAM.
549 */
550 #define BENCHMARK_RELATIVE_NAMED_PARAM(name, param_name, ...) \
551 BENCHMARK_IMPL( \
552 FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)), \
553 "%" FOLLY_PP_STRINGIZE(name) "(" FOLLY_PP_STRINGIZE(param_name) ")", \
554 iters, \
555 unsigned, \
556 iters) { \
557 name(iters, ##__VA_ARGS__); \
558 }
559
560 /**
561 * Same as BENCHMARK_RELATIVE_NAMED_PARAM, but allows one to return the
562 * actual number of iterations that have been run.
563 */
564 #define BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(name, param_name, ...) \
565 BENCHMARK_MULTI_IMPL( \
566 FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)), \
567 "%" FOLLY_PP_STRINGIZE(name) "(" FOLLY_PP_STRINGIZE(param_name) ")", \
568 unsigned, \
569 iters) { \
570 return name(iters, ##__VA_ARGS__); \
571 }
572
573 /**
574 * Draws a line of dashes.
575 */
576 #define BENCHMARK_DRAW_LINE() \
577 FOLLY_MAYBE_UNUSED static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = \
578 (::folly::addBenchmark(__FILE__, "-", []() -> unsigned { return 0; }), \
579 true)
580
581 /**
582 * Allows execution of code that doesn't count torward the benchmark's
583 * time budget. Example:
584 *
585 * BENCHMARK_START_GROUP(insertVectorBegin, n) {
586 * vector<int> v;
587 * BENCHMARK_SUSPEND {
588 * v.reserve(n);
589 * }
590 * FOR_EACH_RANGE (i, 0, n) {
591 * v.insert(v.begin(), 42);
592 * }
593 * }
594 */
595 #define BENCHMARK_SUSPEND \
596 if (auto FB_ANONYMOUS_VARIABLE(BENCHMARK_SUSPEND) = \
597 ::folly::BenchmarkSuspender()) { \
598 } else
599