1 /*
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <folly/BenchmarkUtil.h>
20 #include <folly/Portability.h>
21 #include <folly/Preprocessor.h> // for FB_ANONYMOUS_VARIABLE
22 #include <folly/Range.h>
23 #include <folly/ScopeGuard.h>
24 #include <folly/Traits.h>
25 #include <folly/functional/Invoke.h>
26 #include <folly/portability/GFlags.h>
27 
28 #include <cassert>
29 #include <chrono>
30 #include <functional>
31 #include <limits>
32 #include <type_traits>
33 #include <unordered_map>
34 
35 #include <boost/function_types/function_arity.hpp>
36 #include <glog/logging.h>
37 
38 DECLARE_bool(benchmark);
39 
40 namespace folly {
41 
42 /**
43  * Runs all benchmarks defined. Usually put in main().
44  */
45 void runBenchmarks();
46 
47 /**
48  * Runs all benchmarks defined if and only if the --benchmark flag has
49  * been passed to the program. Usually put in main().
50  */
runBenchmarksOnFlag()51 inline bool runBenchmarksOnFlag() {
52   if (FLAGS_benchmark) {
53     runBenchmarks();
54   }
55   return FLAGS_benchmark;
56 }
57 
58 class UserMetric {
59  public:
60   enum class Type { CUSTOM, TIME, METRIC };
61 
62   int64_t value{};
63   Type type{Type::CUSTOM};
64 
65   UserMetric() = default;
66   /* implicit */ UserMetric(int64_t val, Type typ = Type::CUSTOM)
value(val)67       : value(val), type(typ) {}
68 };
69 
70 using UserCounters = std::unordered_map<std::string, UserMetric>;
71 
72 namespace detail {
73 struct TimeIterData {
74   std::chrono::high_resolution_clock::duration duration;
75   unsigned int niter;
76   UserCounters userCounters;
77 };
78 
79 using BenchmarkFun = std::function<TimeIterData(unsigned int)>;
80 
81 struct BenchmarkRegistration {
82   std::string file;
83   std::string name;
84   BenchmarkFun func;
85   bool useCounter = false;
86 };
87 
88 struct BenchmarkResult {
89   std::string file;
90   std::string name;
91   double timeInNs;
92   UserCounters counters;
93 };
94 
95 /**
96  * Adds a benchmark wrapped in a std::function. Only used
97  * internally. Pass by value is intentional.
98  */
99 void addBenchmarkImpl(
100     const char* file, StringPiece name, BenchmarkFun, bool useCounter);
101 
102 /**
103  * Runs all benchmarks defined in the program, doesn't print by default.
104  * Usually used when customized printing of results is desired.
105  */
106 std::vector<BenchmarkResult> runBenchmarksWithResults();
107 
108 } // namespace detail
109 
110 /**
111  * Supporting type for BENCHMARK_SUSPEND defined below.
112  */
113 struct BenchmarkSuspender {
114   using Clock = std::chrono::high_resolution_clock;
115   using TimePoint = Clock::time_point;
116   using Duration = Clock::duration;
117 
118   struct DismissedTag {};
119   static inline constexpr DismissedTag Dismissed{};
120 
BenchmarkSuspenderBenchmarkSuspender121   BenchmarkSuspender() : start(Clock::now()) {}
122 
BenchmarkSuspenderBenchmarkSuspender123   explicit BenchmarkSuspender(DismissedTag) : start(TimePoint{}) {}
124 
125   BenchmarkSuspender(const BenchmarkSuspender&) = delete;
BenchmarkSuspenderBenchmarkSuspender126   BenchmarkSuspender(BenchmarkSuspender&& rhs) noexcept {
127     start = rhs.start;
128     rhs.start = {};
129   }
130 
131   BenchmarkSuspender& operator=(const BenchmarkSuspender&) = delete;
132   BenchmarkSuspender& operator=(BenchmarkSuspender&& rhs) noexcept {
133     if (start != TimePoint{}) {
134       tally();
135     }
136     start = rhs.start;
137     rhs.start = {};
138     return *this;
139   }
140 
~BenchmarkSuspenderBenchmarkSuspender141   ~BenchmarkSuspender() {
142     if (start != TimePoint{}) {
143       tally();
144     }
145   }
146 
dismissBenchmarkSuspender147   void dismiss() {
148     assert(start != TimePoint{});
149     tally();
150     start = {};
151   }
152 
rehireBenchmarkSuspender153   void rehire() {
154     assert(start == TimePoint{});
155     start = Clock::now();
156   }
157 
158   template <class F>
159   auto dismissing(F f) -> invoke_result_t<F> {
160     SCOPE_EXIT { rehire(); };
161     dismiss();
162     return f();
163   }
164 
165   /**
166    * This is for use inside of if-conditions, used in BENCHMARK macros.
167    * If-conditions bypass the explicit on operator bool.
168    */
169   explicit operator bool() const { return false; }
170 
171   /**
172    * Accumulates time spent outside benchmark.
173    */
174   static Duration timeSpent;
175 
176  private:
tallyBenchmarkSuspender177   void tally() {
178     auto end = Clock::now();
179     timeSpent += end - start;
180     start = end;
181   }
182 
183   TimePoint start;
184 };
185 
186 /**
187  * Adds a benchmark. Usually not called directly but instead through
188  * the macro BENCHMARK defined below. The lambda function involved
189  * must take exactly one parameter of type unsigned, and the benchmark
190  * uses it with counter semantics (iteration occurs inside the
191  * function).
192  */
193 template <typename Lambda>
194 typename std::enable_if<folly::is_invocable_v<Lambda, unsigned>>::type
addBenchmark(const char * file,StringPiece name,Lambda && lambda)195 addBenchmark(const char* file, StringPiece name, Lambda&& lambda) {
196   auto execute = [=](unsigned int times) {
197     BenchmarkSuspender::timeSpent = {};
198     unsigned int niter;
199 
200     // CORE MEASUREMENT STARTS
201     auto start = std::chrono::high_resolution_clock::now();
202     niter = lambda(times);
203     auto end = std::chrono::high_resolution_clock::now();
204     // CORE MEASUREMENT ENDS
205     return detail::TimeIterData{
206         (end - start) - BenchmarkSuspender::timeSpent, niter, UserCounters{}};
207   };
208 
209   detail::addBenchmarkImpl(file, name, detail::BenchmarkFun(execute), false);
210 }
211 
212 /**
213  * Adds a benchmark. Usually not called directly but instead through
214  * the macro BENCHMARK defined below. The lambda function involved
215  * must take zero parameters, and the benchmark calls it repeatedly
216  * (iteration occurs outside the function).
217  */
218 template <typename Lambda>
addBenchmark(const char * file,StringPiece name,Lambda && lambda)219 typename std::enable_if<folly::is_invocable_v<Lambda>>::type addBenchmark(
220     const char* file, StringPiece name, Lambda&& lambda) {
221   addBenchmark(file, name, [=](unsigned int times) {
222     unsigned int niter = 0;
223     while (times-- > 0) {
224       niter += lambda();
225     }
226     return niter;
227   });
228 }
229 
230 /**
231  * similar as previous two template specialization, but lambda will also take
232  * customized counters in the following two cases
233  */
234 template <typename Lambda>
235 typename std::enable_if<
236     folly::is_invocable_v<Lambda, UserCounters&, unsigned>>::type
addBenchmark(const char * file,StringPiece name,Lambda && lambda)237 addBenchmark(const char* file, StringPiece name, Lambda&& lambda) {
238   auto execute = [=](unsigned int times) {
239     BenchmarkSuspender::timeSpent = {};
240     unsigned int niter;
241 
242     // CORE MEASUREMENT STARTS
243     auto start = std::chrono::high_resolution_clock::now();
244     UserCounters counters;
245     niter = lambda(counters, times);
246     auto end = std::chrono::high_resolution_clock::now();
247     // CORE MEASUREMENT ENDS
248     return detail::TimeIterData{
249         (end - start) - BenchmarkSuspender::timeSpent, niter, counters};
250   };
251 
252   detail::addBenchmarkImpl(
253       file,
254       name,
255       std::function<detail::TimeIterData(unsigned int)>(execute),
256       true);
257 }
258 
259 template <typename Lambda>
260 typename std::enable_if<folly::is_invocable_v<Lambda, UserCounters&>>::type
addBenchmark(const char * file,StringPiece name,Lambda && lambda)261 addBenchmark(const char* file, StringPiece name, Lambda&& lambda) {
262   addBenchmark(file, name, [=](UserCounters& counters, unsigned int times) {
263     unsigned int niter = 0;
264     while (times-- > 0) {
265       niter += lambda(counters);
266     }
267     return niter;
268   });
269 }
270 
271 struct dynamic;
272 
273 void benchmarkResultsToDynamic(
274     const std::vector<detail::BenchmarkResult>& data, dynamic&);
275 
276 void benchmarkResultsFromDynamic(
277     const dynamic&, std::vector<detail::BenchmarkResult>&);
278 
279 void printResultComparison(
280     const std::vector<detail::BenchmarkResult>& base,
281     const std::vector<detail::BenchmarkResult>& test);
282 
283 } // namespace folly
284 
285 /**
286  * Introduces a benchmark function. Used internally, see BENCHMARK and
287  * friends below.
288  */
289 
290 #define BENCHMARK_IMPL(funName, stringName, rv, paramType, paramName)          \
291   static void funName(paramType);                                              \
292   FOLLY_MAYBE_UNUSED static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = \
293       (::folly::addBenchmark(                                                  \
294            __FILE__,                                                           \
295            stringName,                                                         \
296            [](paramType paramName) -> unsigned {                               \
297              funName(paramName);                                               \
298              return rv;                                                        \
299            }),                                                                 \
300        true);                                                                  \
301   static void funName(paramType paramName)
302 
303 #define BENCHMARK_IMPL_COUNTERS(                                               \
304     funName, stringName, counters, rv, paramType, paramName)                   \
305   static void funName(                                                         \
306       ::folly::UserCounters& FOLLY_PP_DETAIL_APPEND_VA_ARG(paramType));        \
307   FOLLY_MAYBE_UNUSED static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = \
308       (::folly::addBenchmark(                                                  \
309            __FILE__,                                                           \
310            stringName,                                                         \
311            [](::folly::UserCounters& counters FOLLY_PP_DETAIL_APPEND_VA_ARG(   \
312                paramType paramName)) -> unsigned {                             \
313              funName(counters FOLLY_PP_DETAIL_APPEND_VA_ARG(paramName));       \
314              return rv;                                                        \
315            }),                                                                 \
316        true);                                                                  \
317   static void funName(::folly::UserCounters& counters                          \
318                           FOLLY_PP_DETAIL_APPEND_VA_ARG(paramType paramName))
319 
320 /**
321  * Introduces a benchmark function with support for returning the actual
322  * number of iterations. Used internally, see BENCHMARK_MULTI and friends
323  * below.
324  */
325 #define BENCHMARK_MULTI_IMPL(funName, stringName, paramType, paramName)        \
326   static unsigned funName(paramType);                                          \
327   FOLLY_MAYBE_UNUSED static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = \
328       (::folly::addBenchmark(                                                  \
329            __FILE__,                                                           \
330            stringName,                                                         \
331            [](paramType paramName) { return funName(paramName); }),            \
332        true);                                                                  \
333   static unsigned funName(paramType paramName)
334 
335 /**
336  * Introduces a benchmark function. Use with either one or two arguments.
337  * The first is the name of the benchmark. Use something descriptive, such
338  * as insertVectorBegin. The second argument may be missing, or could be a
339  * symbolic counter. The counter dictates how many internal iteration the
340  * benchmark does. Example:
341  *
342  * BENCHMARK(vectorPushBack) {
343  *   vector<int> v;
344  *   v.push_back(42);
345  * }
346  *
347  * BENCHMARK(insertVectorBegin, iters) {
348  *   vector<int> v;
349  *   FOR_EACH_RANGE (i, 0, iters) {
350  *     v.insert(v.begin(), 42);
351  *   }
352  * }
353  */
354 #define BENCHMARK(name, ...)                   \
355   BENCHMARK_IMPL(                              \
356       name,                                    \
357       FOLLY_PP_STRINGIZE(name),                \
358       FB_ARG_2_OR_1(1, ##__VA_ARGS__),         \
359       FB_ONE_OR_NONE(unsigned, ##__VA_ARGS__), \
360       __VA_ARGS__)
361 
362 /**
363  * Allow users to record customized counter during benchmarking,
364  * there will be one extra column showing in the output result for each counter
365  *
366  * BENCHMARK_COUNTERS(insertVectorBegin, couters, iters) {
367  *   vector<int> v;
368  *   FOR_EACH_RANGE (i, 0, iters) {
369  *     v.insert(v.begin(), 42);
370  *   }
371  *   BENCHMARK_SUSPEND {
372  *      counters["foo"] = 10;
373  *   }
374  * }
375  */
376 #define BENCHMARK_COUNTERS(name, counters, ...) \
377   BENCHMARK_IMPL_COUNTERS(                      \
378       name,                                     \
379       FOLLY_PP_STRINGIZE(name),                 \
380       counters,                                 \
381       FB_ARG_2_OR_1(1, ##__VA_ARGS__),          \
382       FB_ONE_OR_NONE(unsigned, ##__VA_ARGS__),  \
383       __VA_ARGS__)
384 /**
385  * Like BENCHMARK above, but allows the user to return the actual
386  * number of iterations executed in the function body. This can be
387  * useful if the benchmark function doesn't know upfront how many
388  * iterations it's going to run or if it runs through a certain
389  * number of test cases, e.g.:
390  *
391  * BENCHMARK_MULTI(benchmarkSomething) {
392  *   std::vector<int> testCases { 0, 1, 1, 2, 3, 5 };
393  *   for (int c : testCases) {
394  *     doSomething(c);
395  *   }
396  *   return testCases.size();
397  * }
398  */
399 #define BENCHMARK_MULTI(name, ...)             \
400   BENCHMARK_MULTI_IMPL(                        \
401       name,                                    \
402       FOLLY_PP_STRINGIZE(name),                \
403       FB_ONE_OR_NONE(unsigned, ##__VA_ARGS__), \
404       __VA_ARGS__)
405 
406 /**
407  * Defines a benchmark that passes a parameter to another one. This is
408  * common for benchmarks that need a "problem size" in addition to
409  * "number of iterations". Consider:
410  *
411  * void pushBack(uint32_t n, size_t initialSize) {
412  *   vector<int> v;
413  *   BENCHMARK_SUSPEND {
414  *     v.resize(initialSize);
415  *   }
416  *   FOR_EACH_RANGE (i, 0, n) {
417  *    v.push_back(i);
418  *   }
419  * }
420  * BENCHMARK_PARAM(pushBack, 0)
421  * BENCHMARK_PARAM(pushBack, 1000)
422  * BENCHMARK_PARAM(pushBack, 1000000)
423  *
424  * The benchmark above estimates the speed of push_back at different
425  * initial sizes of the vector. The framework will pass 0, 1000, and
426  * 1000000 for initialSize, and the iteration count for n.
427  */
428 #define BENCHMARK_PARAM(name, param) BENCHMARK_NAMED_PARAM(name, param, param)
429 
430 /**
431  * Same as BENCHMARK_PARAM, but allows one to return the actual number of
432  * iterations that have been run.
433  */
434 #define BENCHMARK_PARAM_MULTI(name, param) \
435   BENCHMARK_NAMED_PARAM_MULTI(name, param, param)
436 
437 /*
438  * Like BENCHMARK_PARAM(), but allows a custom name to be specified for each
439  * parameter, rather than using the parameter value.
440  *
441  * Useful when the parameter value is not a valid token for string pasting,
442  * of when you want to specify multiple parameter arguments.
443  *
444  * For example:
445  *
446  * void addValue(uint32_t n, int64_t bucketSize, int64_t min, int64_t max) {
447  *   Histogram<int64_t> hist(bucketSize, min, max);
448  *   int64_t num = min;
449  *   FOR_EACH_RANGE (i, 0, n) {
450  *     hist.addValue(num);
451  *     ++num;
452  *     if (num > max) { num = min; }
453  *   }
454  * }
455  *
456  * BENCHMARK_NAMED_PARAM(addValue, 0_to_100, 1, 0, 100)
457  * BENCHMARK_NAMED_PARAM(addValue, 0_to_1000, 10, 0, 1000)
458  * BENCHMARK_NAMED_PARAM(addValue, 5k_to_20k, 250, 5000, 20000)
459  */
460 #define BENCHMARK_NAMED_PARAM(name, param_name, ...)                   \
461   BENCHMARK_IMPL(                                                      \
462       FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)),             \
463       FOLLY_PP_STRINGIZE(name) "(" FOLLY_PP_STRINGIZE(param_name) ")", \
464       iters,                                                           \
465       unsigned,                                                        \
466       iters) {                                                         \
467     name(iters, ##__VA_ARGS__);                                        \
468   }
469 
470 /**
471  * Same as BENCHMARK_NAMED_PARAM, but allows one to return the actual number
472  * of iterations that have been run.
473  */
474 #define BENCHMARK_NAMED_PARAM_MULTI(name, param_name, ...)             \
475   BENCHMARK_MULTI_IMPL(                                                \
476       FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)),             \
477       FOLLY_PP_STRINGIZE(name) "(" FOLLY_PP_STRINGIZE(param_name) ")", \
478       unsigned,                                                        \
479       iters) {                                                         \
480     return name(iters, ##__VA_ARGS__);                                 \
481   }
482 
483 /**
484  * Just like BENCHMARK, but prints the time relative to a
485  * baseline. The baseline is the most recent BENCHMARK() seen in
486  * the current scope. Example:
487  *
488  * // This is the baseline
489  * BENCHMARK(insertVectorBegin, n) {
490  *   vector<int> v;
491  *   FOR_EACH_RANGE (i, 0, n) {
492  *     v.insert(v.begin(), 42);
493  *   }
494  * }
495  *
496  * BENCHMARK_RELATIVE(insertListBegin, n) {
497  *   list<int> s;
498  *   FOR_EACH_RANGE (i, 0, n) {
499  *     s.insert(s.begin(), 42);
500  *   }
501  * }
502  *
503  * Any number of relative benchmark can be associated with a
504  * baseline. Another BENCHMARK() occurrence effectively establishes a
505  * new baseline.
506  */
507 #define BENCHMARK_RELATIVE(name, ...)          \
508   BENCHMARK_IMPL(                              \
509       name,                                    \
510       "%" FOLLY_PP_STRINGIZE(name),            \
511       FB_ARG_2_OR_1(1, ##__VA_ARGS__),         \
512       FB_ONE_OR_NONE(unsigned, ##__VA_ARGS__), \
513       __VA_ARGS__)
514 
515 #define BENCHMARK_COUNTERS_RELATIVE(name, counters, ...) \
516   BENCHMARK_IMPL_COUNTERS(                               \
517       name,                                              \
518       "%" FOLLY_PP_STRINGIZE(name),                      \
519       counters,                                          \
520       FB_ARG_2_OR_1(1, ##__VA_ARGS__),                   \
521       FB_ONE_OR_NONE(unsigned, ##__VA_ARGS__),           \
522       __VA_ARGS__)
523 /**
524  * Same as BENCHMARK_RELATIVE, but allows one to return the actual number
525  * of iterations that have been run.
526  */
527 #define BENCHMARK_RELATIVE_MULTI(name, ...)    \
528   BENCHMARK_MULTI_IMPL(                        \
529       name,                                    \
530       "%" FOLLY_PP_STRINGIZE(name),            \
531       FB_ONE_OR_NONE(unsigned, ##__VA_ARGS__), \
532       __VA_ARGS__)
533 
534 /**
535  * A combination of BENCHMARK_RELATIVE and BENCHMARK_PARAM.
536  */
537 #define BENCHMARK_RELATIVE_PARAM(name, param) \
538   BENCHMARK_RELATIVE_NAMED_PARAM(name, param, param)
539 
540 /**
541  * Same as BENCHMARK_RELATIVE_PARAM, but allows one to return the actual
542  * number of iterations that have been run.
543  */
544 #define BENCHMARK_RELATIVE_PARAM_MULTI(name, param) \
545   BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(name, param, param)
546 
547 /**
548  * A combination of BENCHMARK_RELATIVE and BENCHMARK_NAMED_PARAM.
549  */
550 #define BENCHMARK_RELATIVE_NAMED_PARAM(name, param_name, ...)              \
551   BENCHMARK_IMPL(                                                          \
552       FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)),                 \
553       "%" FOLLY_PP_STRINGIZE(name) "(" FOLLY_PP_STRINGIZE(param_name) ")", \
554       iters,                                                               \
555       unsigned,                                                            \
556       iters) {                                                             \
557     name(iters, ##__VA_ARGS__);                                            \
558   }
559 
560 /**
561  * Same as BENCHMARK_RELATIVE_NAMED_PARAM, but allows one to return the
562  * actual number of iterations that have been run.
563  */
564 #define BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(name, param_name, ...)        \
565   BENCHMARK_MULTI_IMPL(                                                    \
566       FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)),                 \
567       "%" FOLLY_PP_STRINGIZE(name) "(" FOLLY_PP_STRINGIZE(param_name) ")", \
568       unsigned,                                                            \
569       iters) {                                                             \
570     return name(iters, ##__VA_ARGS__);                                     \
571   }
572 
573 /**
574  * Draws a line of dashes.
575  */
576 #define BENCHMARK_DRAW_LINE()                                                  \
577   FOLLY_MAYBE_UNUSED static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = \
578       (::folly::addBenchmark(__FILE__, "-", []() -> unsigned { return 0; }),   \
579        true)
580 
581 /**
582  * Allows execution of code that doesn't count torward the benchmark's
583  * time budget. Example:
584  *
585  * BENCHMARK_START_GROUP(insertVectorBegin, n) {
586  *   vector<int> v;
587  *   BENCHMARK_SUSPEND {
588  *     v.reserve(n);
589  *   }
590  *   FOR_EACH_RANGE (i, 0, n) {
591  *     v.insert(v.begin(), 42);
592  *   }
593  * }
594  */
595 #define BENCHMARK_SUSPEND                             \
596   if (auto FB_ANONYMOUS_VARIABLE(BENCHMARK_SUSPEND) = \
597           ::folly::BenchmarkSuspender()) {            \
598   } else
599