1 // Copyright 2015 Google Inc. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // Support for registering benchmarks for functions.
16 
17 /* Example usage:
18 // Define a function that executes the code to be measured a
19 // specified number of times:
20 static void BM_StringCreation(benchmark::State& state) {
21   for (auto _ : state)
22     std::string empty_string;
23 }
24 
25 // Register the function as a benchmark
26 BENCHMARK(BM_StringCreation);
27 
28 // Define another benchmark
29 static void BM_StringCopy(benchmark::State& state) {
30   std::string x = "hello";
31   for (auto _ : state)
32     std::string copy(x);
33 }
34 BENCHMARK(BM_StringCopy);
35 
36 // Augment the main() program to invoke benchmarks if specified
37 // via the --benchmarks command line flag.  E.g.,
38 //       my_unittest --benchmark_filter=all
39 //       my_unittest --benchmark_filter=BM_StringCreation
40 //       my_unittest --benchmark_filter=String
41 //       my_unittest --benchmark_filter='Copy|Creation'
42 int main(int argc, char** argv) {
43   benchmark::Initialize(&argc, argv);
44   benchmark::RunSpecifiedBenchmarks();
45   benchmark::Shutdown();
46   return 0;
47 }
48 
49 // Sometimes a family of microbenchmarks can be implemented with
50 // just one routine that takes an extra argument to specify which
51 // one of the family of benchmarks to run.  For example, the following
52 // code defines a family of microbenchmarks for measuring the speed
53 // of memcpy() calls of different lengths:
54 
55 static void BM_memcpy(benchmark::State& state) {
56   char* src = new char[state.range(0)]; char* dst = new char[state.range(0)];
57   memset(src, 'x', state.range(0));
58   for (auto _ : state)
59     memcpy(dst, src, state.range(0));
60   state.SetBytesProcessed(state.iterations() * state.range(0));
61   delete[] src; delete[] dst;
62 }
63 BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10);
64 
65 // The preceding code is quite repetitive, and can be replaced with the
66 // following short-hand.  The following invocation will pick a few
67 // appropriate arguments in the specified range and will generate a
68 // microbenchmark for each such argument.
69 BENCHMARK(BM_memcpy)->Range(8, 8<<10);
70 
71 // You might have a microbenchmark that depends on two inputs.  For
72 // example, the following code defines a family of microbenchmarks for
73 // measuring the speed of set insertion.
74 static void BM_SetInsert(benchmark::State& state) {
75   set<int> data;
76   for (auto _ : state) {
77     state.PauseTiming();
78     data = ConstructRandomSet(state.range(0));
79     state.ResumeTiming();
80     for (int j = 0; j < state.range(1); ++j)
81       data.insert(RandomNumber());
82   }
83 }
84 BENCHMARK(BM_SetInsert)
85    ->Args({1<<10, 128})
86    ->Args({2<<10, 128})
87    ->Args({4<<10, 128})
88    ->Args({8<<10, 128})
89    ->Args({1<<10, 512})
90    ->Args({2<<10, 512})
91    ->Args({4<<10, 512})
92    ->Args({8<<10, 512});
93 
94 // The preceding code is quite repetitive, and can be replaced with
95 // the following short-hand.  The following macro will pick a few
96 // appropriate arguments in the product of the two specified ranges
97 // and will generate a microbenchmark for each such pair.
98 BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}});
99 
100 // For more complex patterns of inputs, passing a custom function
101 // to Apply allows programmatic specification of an
102 // arbitrary set of arguments to run the microbenchmark on.
103 // The following example enumerates a dense range on
104 // one parameter, and a sparse range on the second.
105 static void CustomArguments(benchmark::internal::Benchmark* b) {
106   for (int i = 0; i <= 10; ++i)
107     for (int j = 32; j <= 1024*1024; j *= 8)
108       b->Args({i, j});
109 }
110 BENCHMARK(BM_SetInsert)->Apply(CustomArguments);
111 
112 // Templated microbenchmarks work the same way:
113 // Produce then consume 'size' messages 'iters' times
114 // Measures throughput in the absence of multiprogramming.
115 template <class Q> int BM_Sequential(benchmark::State& state) {
116   Q q;
117   typename Q::value_type v;
118   for (auto _ : state) {
119     for (int i = state.range(0); i--; )
120       q.push(v);
121     for (int e = state.range(0); e--; )
122       q.Wait(&v);
123   }
124   // actually messages, not bytes:
125   state.SetBytesProcessed(state.iterations() * state.range(0));
126 }
127 BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
128 
129 Use `Benchmark::MinTime(double t)` to set the minimum time used to run the
130 benchmark. This option overrides the `benchmark_min_time` flag.
131 
132 void BM_test(benchmark::State& state) {
133  ... body ...
134 }
135 BENCHMARK(BM_test)->MinTime(2.0); // Run for at least 2 seconds.
136 
137 In a multithreaded test, it is guaranteed that none of the threads will start
138 until all have reached the loop start, and all will have finished before any
139 thread exits the loop body. As such, any global setup or teardown you want to
140 do can be wrapped in a check against the thread index:
141 
142 static void BM_MultiThreaded(benchmark::State& state) {
143   if (state.thread_index() == 0) {
144     // Setup code here.
145   }
146   for (auto _ : state) {
147     // Run the test as normal.
148   }
149   if (state.thread_index() == 0) {
150     // Teardown code here.
151   }
152 }
153 BENCHMARK(BM_MultiThreaded)->Threads(4);
154 
155 
156 If a benchmark runs a few milliseconds it may be hard to visually compare the
157 measured times, since the output data is given in nanoseconds per default. In
158 order to manually set the time unit, you can specify it manually:
159 
160 BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
161 */
162 
163 #ifndef BENCHMARK_BENCHMARK_H_
164 #define BENCHMARK_BENCHMARK_H_
165 
166 // The _MSVC_LANG check should detect Visual Studio 2015 Update 3 and newer.
167 #if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)
168 #define BENCHMARK_HAS_CXX11
169 #endif
170 
171 // This _MSC_VER check should detect VS 2017 v15.3 and newer.
172 #if __cplusplus >= 201703L || \
173     (defined(_MSC_VER) && _MSC_VER >= 1911 && _MSVC_LANG >= 201703L)
174 #define BENCHMARK_HAS_CXX17
175 #endif
176 
177 #include <stdint.h>
178 
179 #include <algorithm>
180 #include <cassert>
181 #include <cstddef>
182 #include <iosfwd>
183 #include <map>
184 #include <set>
185 #include <string>
186 #include <utility>
187 #include <vector>
188 
189 #if defined(BENCHMARK_HAS_CXX11)
190 #include <atomic>
191 #include <initializer_list>
192 #include <type_traits>
193 #include <utility>
194 #endif
195 
196 #if defined(_MSC_VER)
197 #include <intrin.h>  // for _ReadWriteBarrier
198 #endif
199 
200 #ifndef BENCHMARK_HAS_CXX11
201 #define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \
202   TypeName(const TypeName&);                         \
203   TypeName& operator=(const TypeName&)
204 #else
205 #define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \
206   TypeName(const TypeName&) = delete;                \
207   TypeName& operator=(const TypeName&) = delete
208 #endif
209 
210 #ifdef BENCHMARK_HAS_CXX17
211 #define BENCHMARK_UNUSED [[maybe_unused]]
212 #elif defined(__GNUC__) || defined(__clang__)
213 #define BENCHMARK_UNUSED __attribute__((unused))
214 #else
215 #define BENCHMARK_UNUSED
216 #endif
217 
218 #if defined(__GNUC__) || defined(__clang__)
219 #define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline))
220 #define BENCHMARK_NOEXCEPT noexcept
221 #define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
222 #elif defined(_MSC_VER) && !defined(__clang__)
223 #define BENCHMARK_ALWAYS_INLINE __forceinline
224 #if _MSC_VER >= 1900
225 #define BENCHMARK_NOEXCEPT noexcept
226 #define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
227 #else
228 #define BENCHMARK_NOEXCEPT
229 #define BENCHMARK_NOEXCEPT_OP(x)
230 #endif
231 #define __func__ __FUNCTION__
232 #else
233 #define BENCHMARK_ALWAYS_INLINE
234 #define BENCHMARK_NOEXCEPT
235 #define BENCHMARK_NOEXCEPT_OP(x)
236 #endif
237 
238 #define BENCHMARK_INTERNAL_TOSTRING2(x) #x
239 #define BENCHMARK_INTERNAL_TOSTRING(x) BENCHMARK_INTERNAL_TOSTRING2(x)
240 
241 #if defined(__GNUC__) || defined(__clang__)
242 #define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y)
243 #define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg)))
244 #else
245 #define BENCHMARK_BUILTIN_EXPECT(x, y) x
246 #define BENCHMARK_DEPRECATED_MSG(msg)
247 #define BENCHMARK_WARNING_MSG(msg)                           \
248   __pragma(message(__FILE__ "(" BENCHMARK_INTERNAL_TOSTRING( \
249       __LINE__) ") : warning note: " msg))
250 #endif
251 
252 #if defined(__GNUC__) && !defined(__clang__)
253 #define BENCHMARK_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
254 #endif
255 
256 #ifndef __has_builtin
257 #define __has_builtin(x) 0
258 #endif
259 
260 #if defined(__GNUC__) || __has_builtin(__builtin_unreachable)
261 #define BENCHMARK_UNREACHABLE() __builtin_unreachable()
262 #elif defined(_MSC_VER)
263 #define BENCHMARK_UNREACHABLE() __assume(false)
264 #else
265 #define BENCHMARK_UNREACHABLE() ((void)0)
266 #endif
267 
268 #ifdef BENCHMARK_HAS_CXX11
269 #define BENCHMARK_OVERRIDE override
270 #else
271 #define BENCHMARK_OVERRIDE
272 #endif
273 
274 namespace benchmark {
275 class BenchmarkReporter;
276 class MemoryManager;
277 
278 void Initialize(int* argc, char** argv);
279 void Shutdown();
280 
281 // Report to stdout all arguments in 'argv' as unrecognized except the first.
282 // Returns true there is at least on unrecognized argument (i.e. 'argc' > 1).
283 bool ReportUnrecognizedArguments(int argc, char** argv);
284 
285 // Generate a list of benchmarks matching the specified --benchmark_filter flag
286 // and if --benchmark_list_tests is specified return after printing the name
287 // of each matching benchmark. Otherwise run each matching benchmark and
288 // report the results.
289 //
290 // The second and third overload use the specified 'display_reporter' and
291 //  'file_reporter' respectively. 'file_reporter' will write to the file
292 //  specified
293 //   by '--benchmark_output'. If '--benchmark_output' is not given the
294 //  'file_reporter' is ignored.
295 //
296 // RETURNS: The number of matching benchmarks.
297 size_t RunSpecifiedBenchmarks();
298 size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter);
299 size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
300                               BenchmarkReporter* file_reporter);
301 
302 // Register a MemoryManager instance that will be used to collect and report
303 // allocation measurements for benchmark runs.
304 void RegisterMemoryManager(MemoryManager* memory_manager);
305 
306 // Add a key-value pair to output as part of the context stanza in the report.
307 void AddCustomContext(const std::string& key, const std::string& value);
308 
309 namespace internal {
310 class Benchmark;
311 class BenchmarkImp;
312 class BenchmarkFamilies;
313 
314 void UseCharPointer(char const volatile*);
315 
316 // Take ownership of the pointer and register the benchmark. Return the
317 // registered benchmark.
318 Benchmark* RegisterBenchmarkInternal(Benchmark*);
319 
320 // Ensure that the standard streams are properly initialized in every TU.
321 int InitializeStreams();
322 BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams();
323 
324 }  // namespace internal
325 
326 #if (!defined(__GNUC__) && !defined(__clang__)) || defined(__pnacl__) || \
327     defined(__EMSCRIPTEN__)
328 #define BENCHMARK_HAS_NO_INLINE_ASSEMBLY
329 #endif
330 
331 // Force the compiler to flush pending writes to global memory. Acts as an
332 // effective read/write barrier
333 #ifdef BENCHMARK_HAS_CXX11
ClobberMemory()334 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
335   std::atomic_signal_fence(std::memory_order_acq_rel);
336 }
337 #endif
338 
339 // The DoNotOptimize(...) function can be used to prevent a value or
340 // expression from being optimized away by the compiler. This function is
341 // intended to add little to no overhead.
342 // See: https://youtu.be/nXaxk27zwlk?t=2441
343 #ifndef BENCHMARK_HAS_NO_INLINE_ASSEMBLY
344 template <class Tp>
DoNotOptimize(Tp const & value)345 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
346   asm volatile("" : : "r,m"(value) : "memory");
347 }
348 
349 template <class Tp>
DoNotOptimize(Tp & value)350 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) {
351 #if defined(__clang__)
352   asm volatile("" : "+r,m"(value) : : "memory");
353 #else
354   asm volatile("" : "+m,r"(value) : : "memory");
355 #endif
356 }
357 
358 #ifndef BENCHMARK_HAS_CXX11
ClobberMemory()359 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
360   asm volatile("" : : : "memory");
361 }
362 #endif
363 #elif defined(_MSC_VER)
364 template <class Tp>
DoNotOptimize(Tp const & value)365 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
366   internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
367   _ReadWriteBarrier();
368 }
369 
370 #ifndef BENCHMARK_HAS_CXX11
ClobberMemory()371 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { _ReadWriteBarrier(); }
372 #endif
373 #else
374 template <class Tp>
DoNotOptimize(Tp const & value)375 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
376   internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
377 }
378 // FIXME Add ClobberMemory() for non-gnu and non-msvc compilers, before C++11.
379 #endif
380 
381 // This class is used for user-defined counters.
382 class Counter {
383  public:
384   enum Flags {
385     kDefaults = 0,
386     // Mark the counter as a rate. It will be presented divided
387     // by the duration of the benchmark.
388     kIsRate = 1U << 0U,
389     // Mark the counter as a thread-average quantity. It will be
390     // presented divided by the number of threads.
391     kAvgThreads = 1U << 1U,
392     // Mark the counter as a thread-average rate. See above.
393     kAvgThreadsRate = kIsRate | kAvgThreads,
394     // Mark the counter as a constant value, valid/same for *every* iteration.
395     // When reporting, it will be *multiplied* by the iteration count.
396     kIsIterationInvariant = 1U << 2U,
397     // Mark the counter as a constant rate.
398     // When reporting, it will be *multiplied* by the iteration count
399     // and then divided by the duration of the benchmark.
400     kIsIterationInvariantRate = kIsRate | kIsIterationInvariant,
401     // Mark the counter as a iteration-average quantity.
402     // It will be presented divided by the number of iterations.
403     kAvgIterations = 1U << 3U,
404     // Mark the counter as a iteration-average rate. See above.
405     kAvgIterationsRate = kIsRate | kAvgIterations,
406 
407     // In the end, invert the result. This is always done last!
408     kInvert = 1U << 31U
409   };
410 
411   enum OneK {
412     // 1'000 items per 1k
413     kIs1000 = 1000,
414     // 1'024 items per 1k
415     kIs1024 = 1024
416   };
417 
418   double value;
419   Flags flags;
420   OneK oneK;
421 
422   BENCHMARK_ALWAYS_INLINE
423   Counter(double v = 0., Flags f = kDefaults, OneK k = kIs1000)
value(v)424       : value(v), flags(f), oneK(k) {}
425 
426   BENCHMARK_ALWAYS_INLINE operator double const&() const { return value; }
427   BENCHMARK_ALWAYS_INLINE operator double&() { return value; }
428 };
429 
430 // A helper for user code to create unforeseen combinations of Flags, without
431 // having to do this cast manually each time, or providing this operator.
432 Counter::Flags inline operator|(const Counter::Flags& LHS,
433                                 const Counter::Flags& RHS) {
434   return static_cast<Counter::Flags>(static_cast<int>(LHS) |
435                                      static_cast<int>(RHS));
436 }
437 
438 // This is the container for the user-defined counters.
439 typedef std::map<std::string, Counter> UserCounters;
440 
441 // TimeUnit is passed to a benchmark in order to specify the order of magnitude
442 // for the measured time.
443 enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond, kSecond };
444 
445 // BigO is passed to a benchmark in order to specify the asymptotic
446 // computational
447 // complexity for the benchmark. In case oAuto is selected, complexity will be
448 // calculated automatically to the best fit.
449 enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda };
450 
451 typedef uint64_t IterationCount;
452 
453 enum StatisticUnit { kTime, kPercentage };
454 
455 // BigOFunc is passed to a benchmark in order to specify the asymptotic
456 // computational complexity for the benchmark.
457 typedef double(BigOFunc)(IterationCount);
458 
459 // StatisticsFunc is passed to a benchmark in order to compute some descriptive
460 // statistics over all the measurements of some type
461 typedef double(StatisticsFunc)(const std::vector<double>&);
462 
463 namespace internal {
464 struct Statistics {
465   std::string name_;
466   StatisticsFunc* compute_;
467   StatisticUnit unit_;
468 
469   Statistics(const std::string& name, StatisticsFunc* compute,
470              StatisticUnit unit = kTime)
name_Statistics471       : name_(name), compute_(compute), unit_(unit) {}
472 };
473 
474 class BenchmarkInstance;
475 class ThreadTimer;
476 class ThreadManager;
477 class PerfCountersMeasurement;
478 
479 enum AggregationReportMode
480 #if defined(BENCHMARK_HAS_CXX11)
481     : unsigned
482 #else
483 #endif
484 {
485   // The mode has not been manually specified
486   ARM_Unspecified = 0,
487   // The mode is user-specified.
488   // This may or may not be set when the following bit-flags are set.
489   ARM_Default = 1U << 0U,
490   // File reporter should only output aggregates.
491   ARM_FileReportAggregatesOnly = 1U << 1U,
492   // Display reporter should only output aggregates
493   ARM_DisplayReportAggregatesOnly = 1U << 2U,
494   // Both reporters should only display aggregates.
495   ARM_ReportAggregatesOnly =
496       ARM_FileReportAggregatesOnly | ARM_DisplayReportAggregatesOnly
497 };
498 
499 }  // namespace internal
500 
501 // State is passed to a running Benchmark and contains state for the
502 // benchmark to use.
503 class State {
504  public:
505   struct StateIterator;
506   friend struct StateIterator;
507 
508   // Returns iterators used to run each iteration of a benchmark using a
509   // C++11 ranged-based for loop. These functions should not be called directly.
510   //
511   // REQUIRES: The benchmark has not started running yet. Neither begin nor end
512   // have been called previously.
513   //
514   // NOTE: KeepRunning may not be used after calling either of these functions.
515   BENCHMARK_ALWAYS_INLINE StateIterator begin();
516   BENCHMARK_ALWAYS_INLINE StateIterator end();
517 
518   // Returns true if the benchmark should continue through another iteration.
519   // NOTE: A benchmark may not return from the test until KeepRunning() has
520   // returned false.
521   bool KeepRunning();
522 
523   // Returns true iff the benchmark should run n more iterations.
524   // REQUIRES: 'n' > 0.
525   // NOTE: A benchmark must not return from the test until KeepRunningBatch()
526   // has returned false.
527   // NOTE: KeepRunningBatch() may overshoot by up to 'n' iterations.
528   //
529   // Intended usage:
530   //   while (state.KeepRunningBatch(1000)) {
531   //     // process 1000 elements
532   //   }
533   bool KeepRunningBatch(IterationCount n);
534 
535   // REQUIRES: timer is running and 'SkipWithError(...)' has not been called
536   //           by the current thread.
537   // Stop the benchmark timer.  If not called, the timer will be
538   // automatically stopped after the last iteration of the benchmark loop.
539   //
540   // For threaded benchmarks the PauseTiming() function only pauses the timing
541   // for the current thread.
542   //
543   // NOTE: The "real time" measurement is per-thread. If different threads
544   // report different measurements the largest one is reported.
545   //
546   // NOTE: PauseTiming()/ResumeTiming() are relatively
547   // heavyweight, and so their use should generally be avoided
548   // within each benchmark iteration, if possible.
549   void PauseTiming();
550 
551   // REQUIRES: timer is not running and 'SkipWithError(...)' has not been called
552   //           by the current thread.
553   // Start the benchmark timer.  The timer is NOT running on entrance to the
554   // benchmark function. It begins running after control flow enters the
555   // benchmark loop.
556   //
557   // NOTE: PauseTiming()/ResumeTiming() are relatively
558   // heavyweight, and so their use should generally be avoided
559   // within each benchmark iteration, if possible.
560   void ResumeTiming();
561 
562   // REQUIRES: 'SkipWithError(...)' has not been called previously by the
563   //            current thread.
564   // Report the benchmark as resulting in an error with the specified 'msg'.
565   // After this call the user may explicitly 'return' from the benchmark.
566   //
567   // If the ranged-for style of benchmark loop is used, the user must explicitly
568   // break from the loop, otherwise all future iterations will be run.
569   // If the 'KeepRunning()' loop is used the current thread will automatically
570   // exit the loop at the end of the current iteration.
571   //
572   // For threaded benchmarks only the current thread stops executing and future
573   // calls to `KeepRunning()` will block until all threads have completed
574   // the `KeepRunning()` loop. If multiple threads report an error only the
575   // first error message is used.
576   //
577   // NOTE: Calling 'SkipWithError(...)' does not cause the benchmark to exit
578   // the current scope immediately. If the function is called from within
579   // the 'KeepRunning()' loop the current iteration will finish. It is the users
580   // responsibility to exit the scope as needed.
581   void SkipWithError(const char* msg);
582 
583   // Returns true if an error has been reported with 'SkipWithError(...)'.
error_occurred()584   bool error_occurred() const { return error_occurred_; }
585 
586   // REQUIRES: called exactly once per iteration of the benchmarking loop.
587   // Set the manually measured time for this benchmark iteration, which
588   // is used instead of automatically measured time if UseManualTime() was
589   // specified.
590   //
591   // For threaded benchmarks the final value will be set to the largest
592   // reported values.
593   void SetIterationTime(double seconds);
594 
595   // Set the number of bytes processed by the current benchmark
596   // execution.  This routine is typically called once at the end of a
597   // throughput oriented benchmark.
598   //
599   // REQUIRES: a benchmark has exited its benchmarking loop.
600   BENCHMARK_ALWAYS_INLINE
SetBytesProcessed(int64_t bytes)601   void SetBytesProcessed(int64_t bytes) {
602     counters["bytes_per_second"] =
603         Counter(static_cast<double>(bytes), Counter::kIsRate, Counter::kIs1024);
604   }
605 
606   BENCHMARK_ALWAYS_INLINE
bytes_processed()607   int64_t bytes_processed() const {
608     if (counters.find("bytes_per_second") != counters.end())
609       return static_cast<int64_t>(counters.at("bytes_per_second"));
610     return 0;
611   }
612 
613   // If this routine is called with complexity_n > 0 and complexity report is
614   // requested for the
615   // family benchmark, then current benchmark will be part of the computation
616   // and complexity_n will
617   // represent the length of N.
618   BENCHMARK_ALWAYS_INLINE
SetComplexityN(int64_t complexity_n)619   void SetComplexityN(int64_t complexity_n) { complexity_n_ = complexity_n; }
620 
621   BENCHMARK_ALWAYS_INLINE
complexity_length_n()622   int64_t complexity_length_n() const { return complexity_n_; }
623 
624   // If this routine is called with items > 0, then an items/s
625   // label is printed on the benchmark report line for the currently
626   // executing benchmark. It is typically called at the end of a processing
627   // benchmark where a processing items/second output is desired.
628   //
629   // REQUIRES: a benchmark has exited its benchmarking loop.
630   BENCHMARK_ALWAYS_INLINE
SetItemsProcessed(int64_t items)631   void SetItemsProcessed(int64_t items) {
632     counters["items_per_second"] =
633         Counter(static_cast<double>(items), benchmark::Counter::kIsRate);
634   }
635 
636   BENCHMARK_ALWAYS_INLINE
items_processed()637   int64_t items_processed() const {
638     if (counters.find("items_per_second") != counters.end())
639       return static_cast<int64_t>(counters.at("items_per_second"));
640     return 0;
641   }
642 
643   // If this routine is called, the specified label is printed at the
644   // end of the benchmark report line for the currently executing
645   // benchmark.  Example:
646   //  static void BM_Compress(benchmark::State& state) {
647   //    ...
648   //    double compress = input_size / output_size;
649   //    state.SetLabel(StrFormat("compress:%.1f%%", 100.0*compression));
650   //  }
651   // Produces output that looks like:
652   //  BM_Compress   50         50   14115038  compress:27.3%
653   //
654   // REQUIRES: a benchmark has exited its benchmarking loop.
655   void SetLabel(const char* label);
656 
SetLabel(const std::string & str)657   void BENCHMARK_ALWAYS_INLINE SetLabel(const std::string& str) {
658     this->SetLabel(str.c_str());
659   }
660 
661   // Range arguments for this run. CHECKs if the argument has been set.
662   BENCHMARK_ALWAYS_INLINE
663   int64_t range(std::size_t pos = 0) const {
664     assert(range_.size() > pos);
665     return range_[pos];
666   }
667 
668   BENCHMARK_DEPRECATED_MSG("use 'range(0)' instead")
range_x()669   int64_t range_x() const { return range(0); }
670 
671   BENCHMARK_DEPRECATED_MSG("use 'range(1)' instead")
range_y()672   int64_t range_y() const { return range(1); }
673 
674   // Number of threads concurrently executing the benchmark.
675   BENCHMARK_ALWAYS_INLINE
threads()676   int threads() const { return threads_; }
677 
678   // Index of the executing thread. Values from [0, threads).
679   BENCHMARK_ALWAYS_INLINE
thread_index()680   int thread_index() const { return thread_index_; }
681 
682   BENCHMARK_ALWAYS_INLINE
iterations()683   IterationCount iterations() const {
684     if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) {
685       return 0;
686     }
687     return max_iterations - total_iterations_ + batch_leftover_;
688   }
689 
690  private:
691   // items we expect on the first cache line (ie 64 bytes of the struct)
692   // When total_iterations_ is 0, KeepRunning() and friends will return false.
693   // May be larger than max_iterations.
694   IterationCount total_iterations_;
695 
696   // When using KeepRunningBatch(), batch_leftover_ holds the number of
697   // iterations beyond max_iters that were run. Used to track
698   // completed_iterations_ accurately.
699   IterationCount batch_leftover_;
700 
701  public:
702   const IterationCount max_iterations;
703 
704  private:
705   bool started_;
706   bool finished_;
707   bool error_occurred_;
708 
709  private:  // items we don't need on the first cache line
710   std::vector<int64_t> range_;
711 
712   int64_t complexity_n_;
713 
714  public:
715   // Container for user-defined counters.
716   UserCounters counters;
717 
718  private:
719   State(IterationCount max_iters, const std::vector<int64_t>& ranges,
720         int thread_i, int n_threads, internal::ThreadTimer* timer,
721         internal::ThreadManager* manager,
722         internal::PerfCountersMeasurement* perf_counters_measurement);
723 
724   void StartKeepRunning();
725   // Implementation of KeepRunning() and KeepRunningBatch().
726   // is_batch must be true unless n is 1.
727   bool KeepRunningInternal(IterationCount n, bool is_batch);
728   void FinishKeepRunning();
729 
730   const int thread_index_;
731   const int threads_;
732 
733   internal::ThreadTimer* const timer_;
734   internal::ThreadManager* const manager_;
735   internal::PerfCountersMeasurement* const perf_counters_measurement_;
736 
737   friend class internal::BenchmarkInstance;
738 };
739 
KeepRunning()740 inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunning() {
741   return KeepRunningInternal(1, /*is_batch=*/false);
742 }
743 
KeepRunningBatch(IterationCount n)744 inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningBatch(IterationCount n) {
745   return KeepRunningInternal(n, /*is_batch=*/true);
746 }
747 
KeepRunningInternal(IterationCount n,bool is_batch)748 inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningInternal(IterationCount n,
749                                                                bool is_batch) {
750   // total_iterations_ is set to 0 by the constructor, and always set to a
751   // nonzero value by StartKepRunning().
752   assert(n > 0);
753   // n must be 1 unless is_batch is true.
754   assert(is_batch || n == 1);
755   if (BENCHMARK_BUILTIN_EXPECT(total_iterations_ >= n, true)) {
756     total_iterations_ -= n;
757     return true;
758   }
759   if (!started_) {
760     StartKeepRunning();
761     if (!error_occurred_ && total_iterations_ >= n) {
762       total_iterations_ -= n;
763       return true;
764     }
765   }
766   // For non-batch runs, total_iterations_ must be 0 by now.
767   if (is_batch && total_iterations_ != 0) {
768     batch_leftover_ = n - total_iterations_;
769     total_iterations_ = 0;
770     return true;
771   }
772   FinishKeepRunning();
773   return false;
774 }
775 
776 struct State::StateIterator {
777   struct BENCHMARK_UNUSED Value {};
778   typedef std::forward_iterator_tag iterator_category;
779   typedef Value value_type;
780   typedef Value reference;
781   typedef Value pointer;
782   typedef std::ptrdiff_t difference_type;
783 
784  private:
785   friend class State;
786   BENCHMARK_ALWAYS_INLINE
StateIteratorStateIterator787   StateIterator() : cached_(0), parent_() {}
788 
789   BENCHMARK_ALWAYS_INLINE
StateIteratorStateIterator790   explicit StateIterator(State* st)
791       : cached_(st->error_occurred_ ? 0 : st->max_iterations), parent_(st) {}
792 
793  public:
794   BENCHMARK_ALWAYS_INLINE
795   Value operator*() const { return Value(); }
796 
797   BENCHMARK_ALWAYS_INLINE
798   StateIterator& operator++() {
799     assert(cached_ > 0);
800     --cached_;
801     return *this;
802   }
803 
804   BENCHMARK_ALWAYS_INLINE
805   bool operator!=(StateIterator const&) const {
806     if (BENCHMARK_BUILTIN_EXPECT(cached_ != 0, true)) return true;
807     parent_->FinishKeepRunning();
808     return false;
809   }
810 
811  private:
812   IterationCount cached_;
813   State* const parent_;
814 };
815 
begin()816 inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::begin() {
817   return StateIterator(this);
818 }
end()819 inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::end() {
820   StartKeepRunning();
821   return StateIterator();
822 }
823 
824 namespace internal {
825 
826 typedef void(Function)(State&);
827 
828 // ------------------------------------------------------
829 // Benchmark registration object.  The BENCHMARK() macro expands
830 // into an internal::Benchmark* object.  Various methods can
831 // be called on this object to change the properties of the benchmark.
832 // Each method returns "this" so that multiple method calls can
833 // chained into one expression.
834 class Benchmark {
835  public:
836   virtual ~Benchmark();
837 
838   // Note: the following methods all return "this" so that multiple
839   // method calls can be chained together in one expression.
840 
841   // Specify the name of the benchmark
842   Benchmark* Name(const std::string& name);
843 
844   // Run this benchmark once with "x" as the extra argument passed
845   // to the function.
846   // REQUIRES: The function passed to the constructor must accept an arg1.
847   Benchmark* Arg(int64_t x);
848 
849   // Run this benchmark with the given time unit for the generated output report
850   Benchmark* Unit(TimeUnit unit);
851 
852   // Run this benchmark once for a number of values picked from the
853   // range [start..limit].  (start and limit are always picked.)
854   // REQUIRES: The function passed to the constructor must accept an arg1.
855   Benchmark* Range(int64_t start, int64_t limit);
856 
857   // Run this benchmark once for all values in the range [start..limit] with
858   // specific step
859   // REQUIRES: The function passed to the constructor must accept an arg1.
860   Benchmark* DenseRange(int64_t start, int64_t limit, int step = 1);
861 
862   // Run this benchmark once with "args" as the extra arguments passed
863   // to the function.
864   // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
865   Benchmark* Args(const std::vector<int64_t>& args);
866 
867   // Equivalent to Args({x, y})
868   // NOTE: This is a legacy C++03 interface provided for compatibility only.
869   //   New code should use 'Args'.
ArgPair(int64_t x,int64_t y)870   Benchmark* ArgPair(int64_t x, int64_t y) {
871     std::vector<int64_t> args;
872     args.push_back(x);
873     args.push_back(y);
874     return Args(args);
875   }
876 
877   // Run this benchmark once for a number of values picked from the
878   // ranges [start..limit].  (starts and limits are always picked.)
879   // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
880   Benchmark* Ranges(const std::vector<std::pair<int64_t, int64_t> >& ranges);
881 
882   // Run this benchmark once for each combination of values in the (cartesian)
883   // product of the supplied argument lists.
884   // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
885   Benchmark* ArgsProduct(const std::vector<std::vector<int64_t> >& arglists);
886 
887   // Equivalent to ArgNames({name})
888   Benchmark* ArgName(const std::string& name);
889 
890   // Set the argument names to display in the benchmark name. If not called,
891   // only argument values will be shown.
892   Benchmark* ArgNames(const std::vector<std::string>& names);
893 
894   // Equivalent to Ranges({{lo1, hi1}, {lo2, hi2}}).
895   // NOTE: This is a legacy C++03 interface provided for compatibility only.
896   //   New code should use 'Ranges'.
RangePair(int64_t lo1,int64_t hi1,int64_t lo2,int64_t hi2)897   Benchmark* RangePair(int64_t lo1, int64_t hi1, int64_t lo2, int64_t hi2) {
898     std::vector<std::pair<int64_t, int64_t> > ranges;
899     ranges.push_back(std::make_pair(lo1, hi1));
900     ranges.push_back(std::make_pair(lo2, hi2));
901     return Ranges(ranges);
902   }
903 
904   // Pass this benchmark object to *func, which can customize
905   // the benchmark by calling various methods like Arg, Args,
906   // Threads, etc.
907   Benchmark* Apply(void (*func)(Benchmark* benchmark));
908 
909   // Set the range multiplier for non-dense range. If not called, the range
910   // multiplier kRangeMultiplier will be used.
911   Benchmark* RangeMultiplier(int multiplier);
912 
913   // Set the minimum amount of time to use when running this benchmark. This
914   // option overrides the `benchmark_min_time` flag.
915   // REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark.
916   Benchmark* MinTime(double t);
917 
918   // Specify the amount of iterations that should be run by this benchmark.
919   // REQUIRES: 'n > 0' and `MinTime` has not been called on this benchmark.
920   //
921   // NOTE: This function should only be used when *exact* iteration control is
922   //   needed and never to control or limit how long a benchmark runs, where
923   // `--benchmark_min_time=N` or `MinTime(...)` should be used instead.
924   Benchmark* Iterations(IterationCount n);
925 
926   // Specify the amount of times to repeat this benchmark. This option overrides
927   // the `benchmark_repetitions` flag.
928   // REQUIRES: `n > 0`
929   Benchmark* Repetitions(int n);
930 
931   // Specify if each repetition of the benchmark should be reported separately
932   // or if only the final statistics should be reported. If the benchmark
933   // is not repeated then the single result is always reported.
934   // Applies to *ALL* reporters (display and file).
935   Benchmark* ReportAggregatesOnly(bool value = true);
936 
937   // Same as ReportAggregatesOnly(), but applies to display reporter only.
938   Benchmark* DisplayAggregatesOnly(bool value = true);
939 
940   // By default, the CPU time is measured only for the main thread, which may
941   // be unrepresentative if the benchmark uses threads internally. If called,
942   // the total CPU time spent by all the threads will be measured instead.
943   // By default, the only the main thread CPU time will be measured.
944   Benchmark* MeasureProcessCPUTime();
945 
946   // If a particular benchmark should use the Wall clock instead of the CPU time
947   // (be it either the CPU time of the main thread only (default), or the
948   // total CPU usage of the benchmark), call this method. If called, the elapsed
949   // (wall) time will be used to control how many iterations are run, and in the
950   // printing of items/second or MB/seconds values.
951   // If not called, the CPU time used by the benchmark will be used.
952   Benchmark* UseRealTime();
953 
954   // If a benchmark must measure time manually (e.g. if GPU execution time is
955   // being
956   // measured), call this method. If called, each benchmark iteration should
957   // call
958   // SetIterationTime(seconds) to report the measured time, which will be used
959   // to control how many iterations are run, and in the printing of items/second
960   // or MB/second values.
961   Benchmark* UseManualTime();
962 
963   // Set the asymptotic computational complexity for the benchmark. If called
964   // the asymptotic computational complexity will be shown on the output.
965   Benchmark* Complexity(BigO complexity = benchmark::oAuto);
966 
967   // Set the asymptotic computational complexity for the benchmark. If called
968   // the asymptotic computational complexity will be shown on the output.
969   Benchmark* Complexity(BigOFunc* complexity);
970 
971   // Add this statistics to be computed over all the values of benchmark run
972   Benchmark* ComputeStatistics(std::string name, StatisticsFunc* statistics,
973                                StatisticUnit unit = kTime);
974 
975   // Support for running multiple copies of the same benchmark concurrently
976   // in multiple threads.  This may be useful when measuring the scaling
977   // of some piece of code.
978 
979   // Run one instance of this benchmark concurrently in t threads.
980   Benchmark* Threads(int t);
981 
982   // Pick a set of values T from [min_threads,max_threads].
983   // min_threads and max_threads are always included in T.  Run this
984   // benchmark once for each value in T.  The benchmark run for a
985   // particular value t consists of t threads running the benchmark
986   // function concurrently.  For example, consider:
987   //    BENCHMARK(Foo)->ThreadRange(1,16);
988   // This will run the following benchmarks:
989   //    Foo in 1 thread
990   //    Foo in 2 threads
991   //    Foo in 4 threads
992   //    Foo in 8 threads
993   //    Foo in 16 threads
994   Benchmark* ThreadRange(int min_threads, int max_threads);
995 
996   // For each value n in the range, run this benchmark once using n threads.
997   // min_threads and max_threads are always included in the range.
998   // stride specifies the increment. E.g. DenseThreadRange(1, 8, 3) starts
999   // a benchmark with 1, 4, 7 and 8 threads.
1000   Benchmark* DenseThreadRange(int min_threads, int max_threads, int stride = 1);
1001 
1002   // Equivalent to ThreadRange(NumCPUs(), NumCPUs())
1003   Benchmark* ThreadPerCpu();
1004 
1005   virtual void Run(State& state) = 0;
1006 
1007  protected:
1008   explicit Benchmark(const char* name);
1009   Benchmark(Benchmark const&);
1010   void SetName(const char* name);
1011 
1012   int ArgsCnt() const;
1013 
1014  private:
1015   friend class BenchmarkFamilies;
1016   friend class BenchmarkInstance;
1017 
1018   std::string name_;
1019   AggregationReportMode aggregation_report_mode_;
1020   std::vector<std::string> arg_names_;       // Args for all benchmark runs
1021   std::vector<std::vector<int64_t> > args_;  // Args for all benchmark runs
1022   TimeUnit time_unit_;
1023   int range_multiplier_;
1024   double min_time_;
1025   IterationCount iterations_;
1026   int repetitions_;
1027   bool measure_process_cpu_time_;
1028   bool use_real_time_;
1029   bool use_manual_time_;
1030   BigO complexity_;
1031   BigOFunc* complexity_lambda_;
1032   std::vector<Statistics> statistics_;
1033   std::vector<int> thread_counts_;
1034 
1035   Benchmark& operator=(Benchmark const&);
1036 };
1037 
1038 }  // namespace internal
1039 
1040 // Create and register a benchmark with the specified 'name' that invokes
1041 // the specified functor 'fn'.
1042 //
1043 // RETURNS: A pointer to the registered benchmark.
1044 internal::Benchmark* RegisterBenchmark(const char* name,
1045                                        internal::Function* fn);
1046 
1047 #if defined(BENCHMARK_HAS_CXX11)
1048 template <class Lambda>
1049 internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn);
1050 #endif
1051 
1052 // Remove all registered benchmarks. All pointers to previously registered
1053 // benchmarks are invalidated.
1054 void ClearRegisteredBenchmarks();
1055 
1056 namespace internal {
1057 // The class used to hold all Benchmarks created from static function.
1058 // (ie those created using the BENCHMARK(...) macros.
1059 class FunctionBenchmark : public Benchmark {
1060  public:
FunctionBenchmark(const char * name,Function * func)1061   FunctionBenchmark(const char* name, Function* func)
1062       : Benchmark(name), func_(func) {}
1063 
1064   virtual void Run(State& st) BENCHMARK_OVERRIDE;
1065 
1066  private:
1067   Function* func_;
1068 };
1069 
1070 #ifdef BENCHMARK_HAS_CXX11
1071 template <class Lambda>
1072 class LambdaBenchmark : public Benchmark {
1073  public:
Run(State & st)1074   virtual void Run(State& st) BENCHMARK_OVERRIDE { lambda_(st); }
1075 
1076  private:
1077   template <class OLambda>
LambdaBenchmark(const char * name,OLambda && lam)1078   LambdaBenchmark(const char* name, OLambda&& lam)
1079       : Benchmark(name), lambda_(std::forward<OLambda>(lam)) {}
1080 
1081   LambdaBenchmark(LambdaBenchmark const&) = delete;
1082 
1083  private:
1084   template <class Lam>
1085   friend Benchmark* ::benchmark::RegisterBenchmark(const char*, Lam&&);
1086 
1087   Lambda lambda_;
1088 };
1089 #endif
1090 
1091 }  // namespace internal
1092 
RegisterBenchmark(const char * name,internal::Function * fn)1093 inline internal::Benchmark* RegisterBenchmark(const char* name,
1094                                               internal::Function* fn) {
1095   return internal::RegisterBenchmarkInternal(
1096       ::new internal::FunctionBenchmark(name, fn));
1097 }
1098 
1099 #ifdef BENCHMARK_HAS_CXX11
1100 template <class Lambda>
RegisterBenchmark(const char * name,Lambda && fn)1101 internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn) {
1102   using BenchType =
1103       internal::LambdaBenchmark<typename std::decay<Lambda>::type>;
1104   return internal::RegisterBenchmarkInternal(
1105       ::new BenchType(name, std::forward<Lambda>(fn)));
1106 }
1107 #endif
1108 
1109 #if defined(BENCHMARK_HAS_CXX11) && \
1110     (!defined(BENCHMARK_GCC_VERSION) || BENCHMARK_GCC_VERSION >= 409)
1111 template <class Lambda, class... Args>
RegisterBenchmark(const char * name,Lambda && fn,Args &&...args)1112 internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn,
1113                                        Args&&... args) {
1114   return benchmark::RegisterBenchmark(
1115       name, [=](benchmark::State& st) { fn(st, args...); });
1116 }
1117 #else
1118 #define BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
1119 #endif
1120 
1121 // The base class for all fixture tests.
1122 class Fixture : public internal::Benchmark {
1123  public:
Fixture()1124   Fixture() : internal::Benchmark("") {}
1125 
Run(State & st)1126   virtual void Run(State& st) BENCHMARK_OVERRIDE {
1127     this->SetUp(st);
1128     this->BenchmarkCase(st);
1129     this->TearDown(st);
1130   }
1131 
1132   // These will be deprecated ...
SetUp(const State &)1133   virtual void SetUp(const State&) {}
TearDown(const State &)1134   virtual void TearDown(const State&) {}
1135   // ... In favor of these.
SetUp(State & st)1136   virtual void SetUp(State& st) { SetUp(const_cast<const State&>(st)); }
TearDown(State & st)1137   virtual void TearDown(State& st) { TearDown(const_cast<const State&>(st)); }
1138 
1139  protected:
1140   virtual void BenchmarkCase(State&) = 0;
1141 };
1142 
1143 }  // namespace benchmark
1144 
1145 // ------------------------------------------------------
1146 // Macro to register benchmarks
1147 
1148 // Check that __COUNTER__ is defined and that __COUNTER__ increases by 1
1149 // every time it is expanded. X + 1 == X + 0 is used in case X is defined to be
1150 // empty. If X is empty the expression becomes (+1 == +0).
1151 #if defined(__COUNTER__) && (__COUNTER__ + 1 == __COUNTER__ + 0)
1152 #define BENCHMARK_PRIVATE_UNIQUE_ID __COUNTER__
1153 #else
1154 #define BENCHMARK_PRIVATE_UNIQUE_ID __LINE__
1155 #endif
1156 
1157 // Helpers for generating unique variable names
1158 #define BENCHMARK_PRIVATE_NAME(n) \
1159   BENCHMARK_PRIVATE_CONCAT(benchmark_uniq_, BENCHMARK_PRIVATE_UNIQUE_ID, n)
1160 #define BENCHMARK_PRIVATE_CONCAT(a, b, c) BENCHMARK_PRIVATE_CONCAT2(a, b, c)
1161 #define BENCHMARK_PRIVATE_CONCAT2(a, b, c) a##b##c
1162 // Helper for concatenation with macro name expansion
1163 #define BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method) \
1164     BaseClass##_##Method##_Benchmark
1165 
1166 #define BENCHMARK_PRIVATE_DECLARE(n)                                 \
1167   static ::benchmark::internal::Benchmark* BENCHMARK_PRIVATE_NAME(n) \
1168       BENCHMARK_UNUSED
1169 
1170 #define BENCHMARK(n)                                     \
1171   BENCHMARK_PRIVATE_DECLARE(n) =                         \
1172       (::benchmark::internal::RegisterBenchmarkInternal( \
1173           new ::benchmark::internal::FunctionBenchmark(#n, n)))
1174 
1175 // Old-style macros
1176 #define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a))
1177 #define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->Args({(a1), (a2)})
1178 #define BENCHMARK_WITH_UNIT(n, t) BENCHMARK(n)->Unit((t))
1179 #define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi))
1180 #define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \
1181   BENCHMARK(n)->RangePair({{(l1), (h1)}, {(l2), (h2)}})
1182 
1183 #ifdef BENCHMARK_HAS_CXX11
1184 
1185 // Register a benchmark which invokes the function specified by `func`
1186 // with the additional arguments specified by `...`.
1187 //
1188 // For example:
1189 //
1190 // template <class ...ExtraArgs>`
1191 // void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) {
1192 //  [...]
1193 //}
1194 // /* Registers a benchmark named "BM_takes_args/int_string_test` */
1195 // BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc"));
1196 #define BENCHMARK_CAPTURE(func, test_case_name, ...)     \
1197   BENCHMARK_PRIVATE_DECLARE(func) =                      \
1198       (::benchmark::internal::RegisterBenchmarkInternal( \
1199           new ::benchmark::internal::FunctionBenchmark(  \
1200               #func "/" #test_case_name,                 \
1201               [](::benchmark::State& st) { func(st, __VA_ARGS__); })))
1202 
1203 #endif  // BENCHMARK_HAS_CXX11
1204 
1205 // This will register a benchmark for a templatized function.  For example:
1206 //
1207 // template<int arg>
1208 // void BM_Foo(int iters);
1209 //
1210 // BENCHMARK_TEMPLATE(BM_Foo, 1);
1211 //
1212 // will register BM_Foo<1> as a benchmark.
1213 #define BENCHMARK_TEMPLATE1(n, a)                        \
1214   BENCHMARK_PRIVATE_DECLARE(n) =                         \
1215       (::benchmark::internal::RegisterBenchmarkInternal( \
1216           new ::benchmark::internal::FunctionBenchmark(#n "<" #a ">", n<a>)))
1217 
1218 #define BENCHMARK_TEMPLATE2(n, a, b)                                         \
1219   BENCHMARK_PRIVATE_DECLARE(n) =                                             \
1220       (::benchmark::internal::RegisterBenchmarkInternal(                     \
1221           new ::benchmark::internal::FunctionBenchmark(#n "<" #a "," #b ">", \
1222                                                        n<a, b>)))
1223 
1224 #ifdef BENCHMARK_HAS_CXX11
1225 #define BENCHMARK_TEMPLATE(n, ...)                       \
1226   BENCHMARK_PRIVATE_DECLARE(n) =                         \
1227       (::benchmark::internal::RegisterBenchmarkInternal( \
1228           new ::benchmark::internal::FunctionBenchmark(  \
1229               #n "<" #__VA_ARGS__ ">", n<__VA_ARGS__>)))
1230 #else
1231 #define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a)
1232 #endif
1233 
1234 #define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method)                  \
1235   class BaseClass##_##Method##_Benchmark : public BaseClass {           \
1236    public:                                                              \
1237     BaseClass##_##Method##_Benchmark() : BaseClass() {                  \
1238       this->SetName(#BaseClass "/" #Method);                            \
1239     }                                                                   \
1240                                                                         \
1241    protected:                                                           \
1242     virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
1243   };
1244 
1245 #define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a)     \
1246   class BaseClass##_##Method##_Benchmark : public BaseClass<a> {        \
1247    public:                                                              \
1248     BaseClass##_##Method##_Benchmark() : BaseClass<a>() {               \
1249       this->SetName(#BaseClass "<" #a ">/" #Method);                    \
1250     }                                                                   \
1251                                                                         \
1252    protected:                                                           \
1253     virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
1254   };
1255 
1256 #define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b)  \
1257   class BaseClass##_##Method##_Benchmark : public BaseClass<a, b> {     \
1258    public:                                                              \
1259     BaseClass##_##Method##_Benchmark() : BaseClass<a, b>() {            \
1260       this->SetName(#BaseClass "<" #a "," #b ">/" #Method);             \
1261     }                                                                   \
1262                                                                         \
1263    protected:                                                           \
1264     virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
1265   };
1266 
1267 #ifdef BENCHMARK_HAS_CXX11
1268 #define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, ...)       \
1269   class BaseClass##_##Method##_Benchmark : public BaseClass<__VA_ARGS__> { \
1270    public:                                                                 \
1271     BaseClass##_##Method##_Benchmark() : BaseClass<__VA_ARGS__>() {        \
1272       this->SetName(#BaseClass "<" #__VA_ARGS__ ">/" #Method);             \
1273     }                                                                      \
1274                                                                            \
1275    protected:                                                              \
1276     virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE;    \
1277   };
1278 #else
1279 #define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(n, a) \
1280   BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(n, a)
1281 #endif
1282 
1283 #define BENCHMARK_DEFINE_F(BaseClass, Method)    \
1284   BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1285   void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1286 
1287 #define BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a)    \
1288   BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1289   void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1290 
1291 #define BENCHMARK_TEMPLATE2_DEFINE_F(BaseClass, Method, a, b)    \
1292   BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1293   void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1294 
1295 #ifdef BENCHMARK_HAS_CXX11
1296 #define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, ...)            \
1297   BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \
1298   void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1299 #else
1300 #define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, a) \
1301   BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a)
1302 #endif
1303 
1304 #define BENCHMARK_REGISTER_F(BaseClass, Method) \
1305   BENCHMARK_PRIVATE_REGISTER_F(BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method))
1306 
1307 #define BENCHMARK_PRIVATE_REGISTER_F(TestName) \
1308   BENCHMARK_PRIVATE_DECLARE(TestName) =        \
1309       (::benchmark::internal::RegisterBenchmarkInternal(new TestName()))
1310 
1311 // This macro will define and register a benchmark within a fixture class.
1312 #define BENCHMARK_F(BaseClass, Method)           \
1313   BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1314   BENCHMARK_REGISTER_F(BaseClass, Method);       \
1315   void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1316 
1317 #define BENCHMARK_TEMPLATE1_F(BaseClass, Method, a)           \
1318   BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1319   BENCHMARK_REGISTER_F(BaseClass, Method);                    \
1320   void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1321 
1322 #define BENCHMARK_TEMPLATE2_F(BaseClass, Method, a, b)           \
1323   BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1324   BENCHMARK_REGISTER_F(BaseClass, Method);                       \
1325   void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1326 
1327 #ifdef BENCHMARK_HAS_CXX11
1328 #define BENCHMARK_TEMPLATE_F(BaseClass, Method, ...)                   \
1329   BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \
1330   BENCHMARK_REGISTER_F(BaseClass, Method);                             \
1331   void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1332 #else
1333 #define BENCHMARK_TEMPLATE_F(BaseClass, Method, a) \
1334   BENCHMARK_TEMPLATE1_F(BaseClass, Method, a)
1335 #endif
1336 
1337 // Helper macro to create a main routine in a test that runs the benchmarks
1338 #define BENCHMARK_MAIN()                                                \
1339   int main(int argc, char** argv) {                                     \
1340     ::benchmark::Initialize(&argc, argv);                               \
1341     if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; \
1342     ::benchmark::RunSpecifiedBenchmarks();                              \
1343     ::benchmark::Shutdown();                                            \
1344     return 0;                                                           \
1345   }                                                                     \
1346   int main(int, char**)
1347 
1348 // ------------------------------------------------------
1349 // Benchmark Reporters
1350 
1351 namespace benchmark {
1352 
1353 struct CPUInfo {
1354   struct CacheInfo {
1355     std::string type;
1356     int level;
1357     int size;
1358     int num_sharing;
1359   };
1360 
1361   enum Scaling {
1362     UNKNOWN,
1363     ENABLED,
1364     DISABLED
1365   };
1366 
1367   int num_cpus;
1368   Scaling scaling;
1369   double cycles_per_second;
1370   std::vector<CacheInfo> caches;
1371   std::vector<double> load_avg;
1372 
1373   static const CPUInfo& Get();
1374 
1375  private:
1376   CPUInfo();
1377   BENCHMARK_DISALLOW_COPY_AND_ASSIGN(CPUInfo);
1378 };
1379 
1380 // Adding Struct for System Information
1381 struct SystemInfo {
1382   std::string name;
1383   static const SystemInfo& Get();
1384 
1385  private:
1386   SystemInfo();
1387   BENCHMARK_DISALLOW_COPY_AND_ASSIGN(SystemInfo);
1388 };
1389 
1390 // BenchmarkName contains the components of the Benchmark's name
1391 // which allows individual fields to be modified or cleared before
1392 // building the final name using 'str()'.
1393 struct BenchmarkName {
1394   std::string function_name;
1395   std::string args;
1396   std::string min_time;
1397   std::string iterations;
1398   std::string repetitions;
1399   std::string time_type;
1400   std::string threads;
1401 
1402   // Return the full name of the benchmark with each non-empty
1403   // field separated by a '/'
1404   std::string str() const;
1405 };
1406 
1407 // Interface for custom benchmark result printers.
1408 // By default, benchmark reports are printed to stdout. However an application
1409 // can control the destination of the reports by calling
1410 // RunSpecifiedBenchmarks and passing it a custom reporter object.
1411 // The reporter object must implement the following interface.
1412 class BenchmarkReporter {
1413  public:
1414   struct Context {
1415     CPUInfo const& cpu_info;
1416     SystemInfo const& sys_info;
1417     // The number of chars in the longest benchmark name.
1418     size_t name_field_width;
1419     static const char* executable_name;
1420     Context();
1421   };
1422 
1423   struct Run {
1424     static const int64_t no_repetition_index = -1;
1425     enum RunType { RT_Iteration, RT_Aggregate };
1426 
RunRun1427     Run()
1428         : run_type(RT_Iteration),
1429           aggregate_unit(kTime),
1430           error_occurred(false),
1431           iterations(1),
1432           threads(1),
1433           time_unit(kNanosecond),
1434           real_accumulated_time(0),
1435           cpu_accumulated_time(0),
1436           max_heapbytes_used(0),
1437           complexity(oNone),
1438           complexity_lambda(),
1439           complexity_n(0),
1440           report_big_o(false),
1441           report_rms(false),
1442           counters(),
1443           has_memory_result(false),
1444           allocs_per_iter(0.0),
1445           max_bytes_used(0) {}
1446 
1447     std::string benchmark_name() const;
1448     BenchmarkName run_name;
1449     int64_t family_index;
1450     int64_t per_family_instance_index;
1451     RunType run_type;
1452     std::string aggregate_name;
1453     StatisticUnit aggregate_unit;
1454     std::string report_label;  // Empty if not set by benchmark.
1455     bool error_occurred;
1456     std::string error_message;
1457 
1458     IterationCount iterations;
1459     int64_t threads;
1460     int64_t repetition_index;
1461     int64_t repetitions;
1462     TimeUnit time_unit;
1463     double real_accumulated_time;
1464     double cpu_accumulated_time;
1465 
1466     // Return a value representing the real time per iteration in the unit
1467     // specified by 'time_unit'.
1468     // NOTE: If 'iterations' is zero the returned value represents the
1469     // accumulated time.
1470     double GetAdjustedRealTime() const;
1471 
1472     // Return a value representing the cpu time per iteration in the unit
1473     // specified by 'time_unit'.
1474     // NOTE: If 'iterations' is zero the returned value represents the
1475     // accumulated time.
1476     double GetAdjustedCPUTime() const;
1477 
1478     // This is set to 0.0 if memory tracing is not enabled.
1479     double max_heapbytes_used;
1480 
1481     // Keep track of arguments to compute asymptotic complexity
1482     BigO complexity;
1483     BigOFunc* complexity_lambda;
1484     int64_t complexity_n;
1485 
1486     // what statistics to compute from the measurements
1487     const std::vector<internal::Statistics>* statistics;
1488 
1489     // Inform print function whether the current run is a complexity report
1490     bool report_big_o;
1491     bool report_rms;
1492 
1493     UserCounters counters;
1494 
1495     // Memory metrics.
1496     bool has_memory_result;
1497     double allocs_per_iter;
1498     int64_t max_bytes_used;
1499   };
1500 
1501   struct PerFamilyRunReports {
PerFamilyRunReportsPerFamilyRunReports1502     PerFamilyRunReports() : num_runs_total(0), num_runs_done(0) {}
1503 
1504     // How many runs will all instances of this benchmark perform?
1505     int num_runs_total;
1506 
1507     // How many runs have happened already?
1508     int num_runs_done;
1509 
1510     // The reports about (non-errneous!) runs of this family.
1511     std::vector<BenchmarkReporter::Run> Runs;
1512   };
1513 
1514   // Construct a BenchmarkReporter with the output stream set to 'std::cout'
1515   // and the error stream set to 'std::cerr'
1516   BenchmarkReporter();
1517 
1518   // Called once for every suite of benchmarks run.
1519   // The parameter "context" contains information that the
1520   // reporter may wish to use when generating its report, for example the
1521   // platform under which the benchmarks are running. The benchmark run is
1522   // never started if this function returns false, allowing the reporter
1523   // to skip runs based on the context information.
1524   virtual bool ReportContext(const Context& context) = 0;
1525 
1526   // Called once for each group of benchmark runs, gives information about
1527   // cpu-time and heap memory usage during the benchmark run. If the group
1528   // of runs contained more than two entries then 'report' contains additional
1529   // elements representing the mean and standard deviation of those runs.
1530   // Additionally if this group of runs was the last in a family of benchmarks
1531   // 'reports' contains additional entries representing the asymptotic
1532   // complexity and RMS of that benchmark family.
1533   virtual void ReportRuns(const std::vector<Run>& report) = 0;
1534 
1535   // Called once and only once after ever group of benchmarks is run and
1536   // reported.
Finalize()1537   virtual void Finalize() {}
1538 
1539   // REQUIRES: The object referenced by 'out' is valid for the lifetime
1540   // of the reporter.
SetOutputStream(std::ostream * out)1541   void SetOutputStream(std::ostream* out) {
1542     assert(out);
1543     output_stream_ = out;
1544   }
1545 
1546   // REQUIRES: The object referenced by 'err' is valid for the lifetime
1547   // of the reporter.
SetErrorStream(std::ostream * err)1548   void SetErrorStream(std::ostream* err) {
1549     assert(err);
1550     error_stream_ = err;
1551   }
1552 
GetOutputStream()1553   std::ostream& GetOutputStream() const { return *output_stream_; }
1554 
GetErrorStream()1555   std::ostream& GetErrorStream() const { return *error_stream_; }
1556 
1557   virtual ~BenchmarkReporter();
1558 
1559   // Write a human readable string to 'out' representing the specified
1560   // 'context'.
1561   // REQUIRES: 'out' is non-null.
1562   static void PrintBasicContext(std::ostream* out, Context const& context);
1563 
1564  private:
1565   std::ostream* output_stream_;
1566   std::ostream* error_stream_;
1567 };
1568 
1569 // Simple reporter that outputs benchmark data to the console. This is the
1570 // default reporter used by RunSpecifiedBenchmarks().
1571 class ConsoleReporter : public BenchmarkReporter {
1572  public:
1573   enum OutputOptions {
1574     OO_None = 0,
1575     OO_Color = 1,
1576     OO_Tabular = 2,
1577     OO_ColorTabular = OO_Color | OO_Tabular,
1578     OO_Defaults = OO_ColorTabular
1579   };
1580   explicit ConsoleReporter(OutputOptions opts_ = OO_Defaults)
output_options_(opts_)1581       : output_options_(opts_),
1582         name_field_width_(0),
1583         prev_counters_(),
1584         printed_header_(false) {}
1585 
1586   virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
1587   virtual void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
1588 
1589  protected:
1590   virtual void PrintRunData(const Run& report);
1591   virtual void PrintHeader(const Run& report);
1592 
1593   OutputOptions output_options_;
1594   size_t name_field_width_;
1595   UserCounters prev_counters_;
1596   bool printed_header_;
1597 };
1598 
1599 class JSONReporter : public BenchmarkReporter {
1600  public:
JSONReporter()1601   JSONReporter() : first_report_(true) {}
1602   virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
1603   virtual void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
1604   virtual void Finalize() BENCHMARK_OVERRIDE;
1605 
1606  private:
1607   void PrintRunData(const Run& report);
1608 
1609   bool first_report_;
1610 };
1611 
1612 class BENCHMARK_DEPRECATED_MSG(
1613     "The CSV Reporter will be removed in a future release") CSVReporter
1614     : public BenchmarkReporter {
1615  public:
CSVReporter()1616   CSVReporter() : printed_header_(false) {}
1617   virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
1618   virtual void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
1619 
1620  private:
1621   void PrintRunData(const Run& report);
1622 
1623   bool printed_header_;
1624   std::set<std::string> user_counter_names_;
1625 };
1626 
1627 // If a MemoryManager is registered, it can be used to collect and report
1628 // allocation metrics for a run of the benchmark.
1629 class MemoryManager {
1630  public:
1631   struct Result {
ResultResult1632     Result() : num_allocs(0), max_bytes_used(0) {}
1633 
1634     // The number of allocations made in total between Start and Stop.
1635     int64_t num_allocs;
1636 
1637     // The peak memory use between Start and Stop.
1638     int64_t max_bytes_used;
1639   };
1640 
~MemoryManager()1641   virtual ~MemoryManager() {}
1642 
1643   // Implement this to start recording allocation information.
1644   virtual void Start() = 0;
1645 
1646   // Implement this to stop recording and fill out the given Result structure.
1647   virtual void Stop(Result* result) = 0;
1648 };
1649 
GetTimeUnitString(TimeUnit unit)1650 inline const char* GetTimeUnitString(TimeUnit unit) {
1651   switch (unit) {
1652     case kSecond:
1653       return "s";
1654     case kMillisecond:
1655       return "ms";
1656     case kMicrosecond:
1657       return "us";
1658     case kNanosecond:
1659       return "ns";
1660   }
1661   BENCHMARK_UNREACHABLE();
1662 }
1663 
GetTimeUnitMultiplier(TimeUnit unit)1664 inline double GetTimeUnitMultiplier(TimeUnit unit) {
1665   switch (unit) {
1666     case kSecond:
1667       return 1;
1668     case kMillisecond:
1669       return 1e3;
1670     case kMicrosecond:
1671       return 1e6;
1672     case kNanosecond:
1673       return 1e9;
1674   }
1675   BENCHMARK_UNREACHABLE();
1676 }
1677 
1678 // Creates a list of integer values for the given range and multiplier.
1679 // This can be used together with ArgsProduct() to allow multiple ranges
1680 // with different multiplers.
1681 // Example:
1682 // ArgsProduct({
1683 //   CreateRange(0, 1024, /*multi=*/32),
1684 //   CreateRange(0, 100, /*multi=*/4),
1685 //   CreateDenseRange(0, 4, /*step=*/1),
1686 // });
1687 std::vector<int64_t> CreateRange(int64_t lo, int64_t hi, int multi);
1688 
1689 // Creates a list of integer values for the given range and step.
1690 std::vector<int64_t> CreateDenseRange(int64_t start, int64_t limit,
1691                                       int step);
1692 
1693 }  // namespace benchmark
1694 
1695 #endif  // BENCHMARK_BENCHMARK_H_
1696