1 // Copyright 2015 Google Inc. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // Support for registering benchmarks for functions.
16 
17 /* Example usage:
18 // Define a function that executes the code to be measured a
19 // specified number of times:
20 static void BM_StringCreation(benchmark::State& state) {
21   for (auto _ : state)
22     std::string empty_string;
23 }
24 
25 // Register the function as a benchmark
26 BENCHMARK(BM_StringCreation);
27 
28 // Define another benchmark
29 static void BM_StringCopy(benchmark::State& state) {
30   std::string x = "hello";
31   for (auto _ : state)
32     std::string copy(x);
33 }
34 BENCHMARK(BM_StringCopy);
35 
36 // Augment the main() program to invoke benchmarks if specified
37 // via the --benchmarks command line flag.  E.g.,
38 //       my_unittest --benchmark_filter=all
39 //       my_unittest --benchmark_filter=BM_StringCreation
40 //       my_unittest --benchmark_filter=String
41 //       my_unittest --benchmark_filter='Copy|Creation'
42 int main(int argc, char** argv) {
43   benchmark::Initialize(&argc, argv);
44   benchmark::RunSpecifiedBenchmarks();
45   return 0;
46 }
47 
48 // Sometimes a family of microbenchmarks can be implemented with
49 // just one routine that takes an extra argument to specify which
50 // one of the family of benchmarks to run.  For example, the following
51 // code defines a family of microbenchmarks for measuring the speed
52 // of memcpy() calls of different lengths:
53 
54 static void BM_memcpy(benchmark::State& state) {
55   char* src = new char[state.range(0)]; char* dst = new char[state.range(0)];
56   memset(src, 'x', state.range(0));
57   for (auto _ : state)
58     memcpy(dst, src, state.range(0));
59   state.SetBytesProcessed(state.iterations() * state.range(0));
60   delete[] src; delete[] dst;
61 }
62 BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10);
63 
64 // The preceding code is quite repetitive, and can be replaced with the
65 // following short-hand.  The following invocation will pick a few
66 // appropriate arguments in the specified range and will generate a
67 // microbenchmark for each such argument.
68 BENCHMARK(BM_memcpy)->Range(8, 8<<10);
69 
70 // You might have a microbenchmark that depends on two inputs.  For
71 // example, the following code defines a family of microbenchmarks for
72 // measuring the speed of set insertion.
73 static void BM_SetInsert(benchmark::State& state) {
74   set<int> data;
75   for (auto _ : state) {
76     state.PauseTiming();
77     data = ConstructRandomSet(state.range(0));
78     state.ResumeTiming();
79     for (int j = 0; j < state.range(1); ++j)
80       data.insert(RandomNumber());
81   }
82 }
83 BENCHMARK(BM_SetInsert)
84    ->Args({1<<10, 128})
85    ->Args({2<<10, 128})
86    ->Args({4<<10, 128})
87    ->Args({8<<10, 128})
88    ->Args({1<<10, 512})
89    ->Args({2<<10, 512})
90    ->Args({4<<10, 512})
91    ->Args({8<<10, 512});
92 
93 // The preceding code is quite repetitive, and can be replaced with
94 // the following short-hand.  The following macro will pick a few
95 // appropriate arguments in the product of the two specified ranges
96 // and will generate a microbenchmark for each such pair.
97 BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}});
98 
99 // For more complex patterns of inputs, passing a custom function
100 // to Apply allows programmatic specification of an
101 // arbitrary set of arguments to run the microbenchmark on.
102 // The following example enumerates a dense range on
103 // one parameter, and a sparse range on the second.
104 static void CustomArguments(benchmark::internal::Benchmark* b) {
105   for (int i = 0; i <= 10; ++i)
106     for (int j = 32; j <= 1024*1024; j *= 8)
107       b->Args({i, j});
108 }
109 BENCHMARK(BM_SetInsert)->Apply(CustomArguments);
110 
111 // Templated microbenchmarks work the same way:
112 // Produce then consume 'size' messages 'iters' times
113 // Measures throughput in the absence of multiprogramming.
114 template <class Q> int BM_Sequential(benchmark::State& state) {
115   Q q;
116   typename Q::value_type v;
117   for (auto _ : state) {
118     for (int i = state.range(0); i--; )
119       q.push(v);
120     for (int e = state.range(0); e--; )
121       q.Wait(&v);
122   }
123   // actually messages, not bytes:
124   state.SetBytesProcessed(state.iterations() * state.range(0));
125 }
126 BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
127 
128 Use `Benchmark::MinTime(double t)` to set the minimum time used to run the
129 benchmark. This option overrides the `benchmark_min_time` flag.
130 
131 void BM_test(benchmark::State& state) {
132  ... body ...
133 }
134 BENCHMARK(BM_test)->MinTime(2.0); // Run for at least 2 seconds.
135 
136 In a multithreaded test, it is guaranteed that none of the threads will start
137 until all have reached the loop start, and all will have finished before any
138 thread exits the loop body. As such, any global setup or teardown you want to
139 do can be wrapped in a check against the thread index:
140 
141 static void BM_MultiThreaded(benchmark::State& state) {
142   if (state.thread_index == 0) {
143     // Setup code here.
144   }
145   for (auto _ : state) {
146     // Run the test as normal.
147   }
148   if (state.thread_index == 0) {
149     // Teardown code here.
150   }
151 }
152 BENCHMARK(BM_MultiThreaded)->Threads(4);
153 
154 
155 If a benchmark runs a few milliseconds it may be hard to visually compare the
156 measured times, since the output data is given in nanoseconds per default. In
157 order to manually set the time unit, you can specify it manually:
158 
159 BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
160 */
161 
162 #ifndef BENCHMARK_BENCHMARK_H_
163 #define BENCHMARK_BENCHMARK_H_
164 
165 // The _MSVC_LANG check should detect Visual Studio 2015 Update 3 and newer.
166 #if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)
167 #define BENCHMARK_HAS_CXX11
168 #endif
169 
170 #include <stdint.h>
171 
172 #include <algorithm>
173 #include <cassert>
174 #include <cstddef>
175 #include <iosfwd>
176 #include <map>
177 #include <set>
178 #include <string>
179 #include <vector>
180 
181 #if defined(BENCHMARK_HAS_CXX11)
182 #include <initializer_list>
183 #include <type_traits>
184 #include <utility>
185 #endif
186 
187 #if defined(_MSC_VER)
188 #include <intrin.h>  // for _ReadWriteBarrier
189 #endif
190 
191 #ifndef BENCHMARK_HAS_CXX11
192 #define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \
193   TypeName(const TypeName&);                         \
194   TypeName& operator=(const TypeName&)
195 #else
196 #define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \
197   TypeName(const TypeName&) = delete;                \
198   TypeName& operator=(const TypeName&) = delete
199 #endif
200 
201 #if defined(__GNUC__)
202 #define BENCHMARK_UNUSED __attribute__((unused))
203 #define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline))
204 #define BENCHMARK_NOEXCEPT noexcept
205 #define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
206 #elif defined(_MSC_VER) && !defined(__clang__)
207 #define BENCHMARK_UNUSED
208 #define BENCHMARK_ALWAYS_INLINE __forceinline
209 #if _MSC_VER >= 1900
210 #define BENCHMARK_NOEXCEPT noexcept
211 #define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
212 #else
213 #define BENCHMARK_NOEXCEPT
214 #define BENCHMARK_NOEXCEPT_OP(x)
215 #endif
216 #define __func__ __FUNCTION__
217 #else
218 #define BENCHMARK_UNUSED
219 #define BENCHMARK_ALWAYS_INLINE
220 #define BENCHMARK_NOEXCEPT
221 #define BENCHMARK_NOEXCEPT_OP(x)
222 #endif
223 
224 #define BENCHMARK_INTERNAL_TOSTRING2(x) #x
225 #define BENCHMARK_INTERNAL_TOSTRING(x) BENCHMARK_INTERNAL_TOSTRING2(x)
226 
227 #if defined(__GNUC__) || defined(__clang__)
228 #define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y)
229 #define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg)))
230 #else
231 #define BENCHMARK_BUILTIN_EXPECT(x, y) x
232 #define BENCHMARK_DEPRECATED_MSG(msg)
233 #define BENCHMARK_WARNING_MSG(msg)                           \
234   __pragma(message(__FILE__ "(" BENCHMARK_INTERNAL_TOSTRING( \
235       __LINE__) ") : warning note: " msg))
236 #endif
237 
238 #if defined(__GNUC__) && !defined(__clang__)
239 #define BENCHMARK_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
240 #endif
241 
242 #ifndef __has_builtin
243 #define __has_builtin(x) 0
244 #endif
245 
246 #if defined(__GNUC__) || __has_builtin(__builtin_unreachable)
247 #define BENCHMARK_UNREACHABLE() __builtin_unreachable()
248 #elif defined(_MSC_VER)
249 #define BENCHMARK_UNREACHABLE() __assume(false)
250 #else
251 #define BENCHMARK_UNREACHABLE() ((void)0)
252 #endif
253 
254 namespace benchmark {
255 class BenchmarkReporter;
256 class MemoryManager;
257 
258 void Initialize(int* argc, char** argv);
259 
260 // Report to stdout all arguments in 'argv' as unrecognized except the first.
261 // Returns true there is at least on unrecognized argument (i.e. 'argc' > 1).
262 bool ReportUnrecognizedArguments(int argc, char** argv);
263 
264 // Generate a list of benchmarks matching the specified --benchmark_filter flag
265 // and if --benchmark_list_tests is specified return after printing the name
266 // of each matching benchmark. Otherwise run each matching benchmark and
267 // report the results.
268 //
269 // The second and third overload use the specified 'display_reporter' and
270 //  'file_reporter' respectively. 'file_reporter' will write to the file
271 //  specified
272 //   by '--benchmark_output'. If '--benchmark_output' is not given the
273 //  'file_reporter' is ignored.
274 //
275 // RETURNS: The number of matching benchmarks.
276 size_t RunSpecifiedBenchmarks();
277 size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter);
278 size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
279                               BenchmarkReporter* file_reporter);
280 
281 // Register a MemoryManager instance that will be used to collect and report
282 // allocation measurements for benchmark runs.
283 void RegisterMemoryManager(MemoryManager* memory_manager);
284 
285 namespace internal {
286 class Benchmark;
287 class BenchmarkImp;
288 class BenchmarkFamilies;
289 
290 void UseCharPointer(char const volatile*);
291 
292 // Take ownership of the pointer and register the benchmark. Return the
293 // registered benchmark.
294 Benchmark* RegisterBenchmarkInternal(Benchmark*);
295 
296 // Ensure that the standard streams are properly initialized in every TU.
297 int InitializeStreams();
298 BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams();
299 
300 }  // namespace internal
301 
302 #if (!defined(__GNUC__) && !defined(__clang__)) || defined(__pnacl__) || \
303     defined(__EMSCRIPTEN__)
304 #define BENCHMARK_HAS_NO_INLINE_ASSEMBLY
305 #endif
306 
307 // The DoNotOptimize(...) function can be used to prevent a value or
308 // expression from being optimized away by the compiler. This function is
309 // intended to add little to no overhead.
310 // See: https://youtu.be/nXaxk27zwlk?t=2441
311 #ifndef BENCHMARK_HAS_NO_INLINE_ASSEMBLY
312 template <class Tp>
DoNotOptimize(Tp const & value)313 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
314   asm volatile("" : : "r,m"(value) : "memory");
315 }
316 
317 template <class Tp>
DoNotOptimize(Tp & value)318 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) {
319 #if defined(__clang__)
320   asm volatile("" : "+r,m"(value) : : "memory");
321 #else
322   asm volatile("" : "+m,r"(value) : : "memory");
323 #endif
324 }
325 
326 // Force the compiler to flush pending writes to global memory. Acts as an
327 // effective read/write barrier
ClobberMemory()328 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
329   asm volatile("" : : : "memory");
330 }
331 #elif defined(_MSC_VER)
332 template <class Tp>
DoNotOptimize(Tp const & value)333 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
334   internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
335   _ReadWriteBarrier();
336 }
337 
ClobberMemory()338 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { _ReadWriteBarrier(); }
339 #else
340 template <class Tp>
DoNotOptimize(Tp const & value)341 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
342   internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
343 }
344 // FIXME Add ClobberMemory() for non-gnu and non-msvc compilers
345 #endif
346 
347 // This class is used for user-defined counters.
348 class Counter {
349  public:
350   enum Flags {
351     kDefaults = 0,
352     // Mark the counter as a rate. It will be presented divided
353     // by the duration of the benchmark.
354     kIsRate = 1U << 0U,
355     // Mark the counter as a thread-average quantity. It will be
356     // presented divided by the number of threads.
357     kAvgThreads = 1U << 1U,
358     // Mark the counter as a thread-average rate. See above.
359     kAvgThreadsRate = kIsRate | kAvgThreads,
360     // Mark the counter as a constant value, valid/same for *every* iteration.
361     // When reporting, it will be *multiplied* by the iteration count.
362     kIsIterationInvariant = 1U << 2U,
363     // Mark the counter as a constant rate.
364     // When reporting, it will be *multiplied* by the iteration count
365     // and then divided by the duration of the benchmark.
366     kIsIterationInvariantRate = kIsRate | kIsIterationInvariant,
367     // Mark the counter as a iteration-average quantity.
368     // It will be presented divided by the number of iterations.
369     kAvgIterations = 1U << 3U,
370     // Mark the counter as a iteration-average rate. See above.
371     kAvgIterationsRate = kIsRate | kAvgIterations
372   };
373 
374   enum OneK {
375     // 1'000 items per 1k
376     kIs1000 = 1000,
377     // 1'024 items per 1k
378     kIs1024 = 1024
379   };
380 
381   double value;
382   Flags flags;
383   OneK oneK;
384 
385   BENCHMARK_ALWAYS_INLINE
386   Counter(double v = 0., Flags f = kDefaults, OneK k = kIs1000)
value(v)387       : value(v), flags(f), oneK(k) {}
388 
389   BENCHMARK_ALWAYS_INLINE operator double const&() const { return value; }
390   BENCHMARK_ALWAYS_INLINE operator double&() { return value; }
391 };
392 
393 // A helper for user code to create unforeseen combinations of Flags, without
394 // having to do this cast manually each time, or providing this operator.
395 Counter::Flags inline operator|(const Counter::Flags& LHS,
396                                 const Counter::Flags& RHS) {
397   return static_cast<Counter::Flags>(static_cast<int>(LHS) |
398                                      static_cast<int>(RHS));
399 }
400 
401 // This is the container for the user-defined counters.
402 typedef std::map<std::string, Counter> UserCounters;
403 
404 // TimeUnit is passed to a benchmark in order to specify the order of magnitude
405 // for the measured time.
406 enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond };
407 
408 // BigO is passed to a benchmark in order to specify the asymptotic
409 // computational
410 // complexity for the benchmark. In case oAuto is selected, complexity will be
411 // calculated automatically to the best fit.
412 enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda };
413 
414 typedef uint64_t IterationCount;
415 
416 // BigOFunc is passed to a benchmark in order to specify the asymptotic
417 // computational complexity for the benchmark.
418 typedef double(BigOFunc)(IterationCount);
419 
420 // StatisticsFunc is passed to a benchmark in order to compute some descriptive
421 // statistics over all the measurements of some type
422 typedef double(StatisticsFunc)(const std::vector<double>&);
423 
424 namespace internal {
425 struct Statistics {
426   std::string name_;
427   StatisticsFunc* compute_;
428 
StatisticsStatistics429   Statistics(const std::string& name, StatisticsFunc* compute)
430       : name_(name), compute_(compute) {}
431 };
432 
433 struct BenchmarkInstance;
434 class ThreadTimer;
435 class ThreadManager;
436 
437 enum AggregationReportMode
438 #if defined(BENCHMARK_HAS_CXX11)
439     : unsigned
440 #else
441 #endif
442 {
443   // The mode has not been manually specified
444   ARM_Unspecified = 0,
445   // The mode is user-specified.
446   // This may or may not be set when the following bit-flags are set.
447   ARM_Default = 1U << 0U,
448   // File reporter should only output aggregates.
449   ARM_FileReportAggregatesOnly = 1U << 1U,
450   // Display reporter should only output aggregates
451   ARM_DisplayReportAggregatesOnly = 1U << 2U,
452   // Both reporters should only display aggregates.
453   ARM_ReportAggregatesOnly =
454       ARM_FileReportAggregatesOnly | ARM_DisplayReportAggregatesOnly
455 };
456 
457 }  // namespace internal
458 
459 // State is passed to a running Benchmark and contains state for the
460 // benchmark to use.
461 class State {
462  public:
463   struct StateIterator;
464   friend struct StateIterator;
465 
466   // Returns iterators used to run each iteration of a benchmark using a
467   // C++11 ranged-based for loop. These functions should not be called directly.
468   //
469   // REQUIRES: The benchmark has not started running yet. Neither begin nor end
470   // have been called previously.
471   //
472   // NOTE: KeepRunning may not be used after calling either of these functions.
473   BENCHMARK_ALWAYS_INLINE StateIterator begin();
474   BENCHMARK_ALWAYS_INLINE StateIterator end();
475 
476   // Returns true if the benchmark should continue through another iteration.
477   // NOTE: A benchmark may not return from the test until KeepRunning() has
478   // returned false.
479   bool KeepRunning();
480 
481   // Returns true iff the benchmark should run n more iterations.
482   // REQUIRES: 'n' > 0.
483   // NOTE: A benchmark must not return from the test until KeepRunningBatch()
484   // has returned false.
485   // NOTE: KeepRunningBatch() may overshoot by up to 'n' iterations.
486   //
487   // Intended usage:
488   //   while (state.KeepRunningBatch(1000)) {
489   //     // process 1000 elements
490   //   }
491   bool KeepRunningBatch(IterationCount n);
492 
493   // REQUIRES: timer is running and 'SkipWithError(...)' has not been called
494   //           by the current thread.
495   // Stop the benchmark timer.  If not called, the timer will be
496   // automatically stopped after the last iteration of the benchmark loop.
497   //
498   // For threaded benchmarks the PauseTiming() function only pauses the timing
499   // for the current thread.
500   //
501   // NOTE: The "real time" measurement is per-thread. If different threads
502   // report different measurements the largest one is reported.
503   //
504   // NOTE: PauseTiming()/ResumeTiming() are relatively
505   // heavyweight, and so their use should generally be avoided
506   // within each benchmark iteration, if possible.
507   void PauseTiming();
508 
509   // REQUIRES: timer is not running and 'SkipWithError(...)' has not been called
510   //           by the current thread.
511   // Start the benchmark timer.  The timer is NOT running on entrance to the
512   // benchmark function. It begins running after control flow enters the
513   // benchmark loop.
514   //
515   // NOTE: PauseTiming()/ResumeTiming() are relatively
516   // heavyweight, and so their use should generally be avoided
517   // within each benchmark iteration, if possible.
518   void ResumeTiming();
519 
520   // REQUIRES: 'SkipWithError(...)' has not been called previously by the
521   //            current thread.
522   // Report the benchmark as resulting in an error with the specified 'msg'.
523   // After this call the user may explicitly 'return' from the benchmark.
524   //
525   // If the ranged-for style of benchmark loop is used, the user must explicitly
526   // break from the loop, otherwise all future iterations will be run.
527   // If the 'KeepRunning()' loop is used the current thread will automatically
528   // exit the loop at the end of the current iteration.
529   //
530   // For threaded benchmarks only the current thread stops executing and future
531   // calls to `KeepRunning()` will block until all threads have completed
532   // the `KeepRunning()` loop. If multiple threads report an error only the
533   // first error message is used.
534   //
535   // NOTE: Calling 'SkipWithError(...)' does not cause the benchmark to exit
536   // the current scope immediately. If the function is called from within
537   // the 'KeepRunning()' loop the current iteration will finish. It is the users
538   // responsibility to exit the scope as needed.
539   void SkipWithError(const char* msg);
540 
541   // REQUIRES: called exactly once per iteration of the benchmarking loop.
542   // Set the manually measured time for this benchmark iteration, which
543   // is used instead of automatically measured time if UseManualTime() was
544   // specified.
545   //
546   // For threaded benchmarks the final value will be set to the largest
547   // reported values.
548   void SetIterationTime(double seconds);
549 
550   // Set the number of bytes processed by the current benchmark
551   // execution.  This routine is typically called once at the end of a
552   // throughput oriented benchmark.
553   //
554   // REQUIRES: a benchmark has exited its benchmarking loop.
555   BENCHMARK_ALWAYS_INLINE
SetBytesProcessed(int64_t bytes)556   void SetBytesProcessed(int64_t bytes) {
557     counters["bytes_per_second"] =
558         Counter(static_cast<double>(bytes), Counter::kIsRate, Counter::kIs1024);
559   }
560 
561   BENCHMARK_ALWAYS_INLINE
bytes_processed()562   int64_t bytes_processed() const {
563     if (counters.find("bytes_per_second") != counters.end())
564       return static_cast<int64_t>(counters.at("bytes_per_second"));
565     return 0;
566   }
567 
568   // If this routine is called with complexity_n > 0 and complexity report is
569   // requested for the
570   // family benchmark, then current benchmark will be part of the computation
571   // and complexity_n will
572   // represent the length of N.
573   BENCHMARK_ALWAYS_INLINE
SetComplexityN(int64_t complexity_n)574   void SetComplexityN(int64_t complexity_n) { complexity_n_ = complexity_n; }
575 
576   BENCHMARK_ALWAYS_INLINE
complexity_length_n()577   int64_t complexity_length_n() { return complexity_n_; }
578 
579   // If this routine is called with items > 0, then an items/s
580   // label is printed on the benchmark report line for the currently
581   // executing benchmark. It is typically called at the end of a processing
582   // benchmark where a processing items/second output is desired.
583   //
584   // REQUIRES: a benchmark has exited its benchmarking loop.
585   BENCHMARK_ALWAYS_INLINE
SetItemsProcessed(int64_t items)586   void SetItemsProcessed(int64_t items) {
587     counters["items_per_second"] =
588         Counter(static_cast<double>(items), benchmark::Counter::kIsRate);
589   }
590 
591   BENCHMARK_ALWAYS_INLINE
items_processed()592   int64_t items_processed() const {
593     if (counters.find("items_per_second") != counters.end())
594       return static_cast<int64_t>(counters.at("items_per_second"));
595     return 0;
596   }
597 
598   // If this routine is called, the specified label is printed at the
599   // end of the benchmark report line for the currently executing
600   // benchmark.  Example:
601   //  static void BM_Compress(benchmark::State& state) {
602   //    ...
603   //    double compress = input_size / output_size;
604   //    state.SetLabel(StrFormat("compress:%.1f%%", 100.0*compression));
605   //  }
606   // Produces output that looks like:
607   //  BM_Compress   50         50   14115038  compress:27.3%
608   //
609   // REQUIRES: a benchmark has exited its benchmarking loop.
610   void SetLabel(const char* label);
611 
SetLabel(const std::string & str)612   void BENCHMARK_ALWAYS_INLINE SetLabel(const std::string& str) {
613     this->SetLabel(str.c_str());
614   }
615 
616   // Range arguments for this run. CHECKs if the argument has been set.
617   BENCHMARK_ALWAYS_INLINE
618   int64_t range(std::size_t pos = 0) const {
619     assert(range_.size() > pos);
620     return range_[pos];
621   }
622 
623   BENCHMARK_DEPRECATED_MSG("use 'range(0)' instead")
range_x()624   int64_t range_x() const { return range(0); }
625 
626   BENCHMARK_DEPRECATED_MSG("use 'range(1)' instead")
range_y()627   int64_t range_y() const { return range(1); }
628 
629   BENCHMARK_ALWAYS_INLINE
iterations()630   IterationCount iterations() const {
631     if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) {
632       return 0;
633     }
634     return max_iterations - total_iterations_ + batch_leftover_;
635   }
636 
637  private
638      :  // items we expect on the first cache line (ie 64 bytes of the struct)
639   // When total_iterations_ is 0, KeepRunning() and friends will return false.
640   // May be larger than max_iterations.
641   IterationCount total_iterations_;
642 
643   // When using KeepRunningBatch(), batch_leftover_ holds the number of
644   // iterations beyond max_iters that were run. Used to track
645   // completed_iterations_ accurately.
646   IterationCount batch_leftover_;
647 
648  public:
649   const IterationCount max_iterations;
650 
651  private:
652   bool started_;
653   bool finished_;
654   bool error_occurred_;
655 
656  private:  // items we don't need on the first cache line
657   std::vector<int64_t> range_;
658 
659   int64_t complexity_n_;
660 
661  public:
662   // Container for user-defined counters.
663   UserCounters counters;
664   // Index of the executing thread. Values from [0, threads).
665   const int thread_index;
666   // Number of threads concurrently executing the benchmark.
667   const int threads;
668 
669  private:
670   State(IterationCount max_iters, const std::vector<int64_t>& ranges,
671         int thread_i, int n_threads, internal::ThreadTimer* timer,
672         internal::ThreadManager* manager);
673 
674   void StartKeepRunning();
675   // Implementation of KeepRunning() and KeepRunningBatch().
676   // is_batch must be true unless n is 1.
677   bool KeepRunningInternal(IterationCount n, bool is_batch);
678   void FinishKeepRunning();
679   internal::ThreadTimer* timer_;
680   internal::ThreadManager* manager_;
681 
682   friend struct internal::BenchmarkInstance;
683 };
684 
KeepRunning()685 inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunning() {
686   return KeepRunningInternal(1, /*is_batch=*/false);
687 }
688 
KeepRunningBatch(IterationCount n)689 inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningBatch(IterationCount n) {
690   return KeepRunningInternal(n, /*is_batch=*/true);
691 }
692 
KeepRunningInternal(IterationCount n,bool is_batch)693 inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningInternal(IterationCount n,
694                                                                bool is_batch) {
695   // total_iterations_ is set to 0 by the constructor, and always set to a
696   // nonzero value by StartKepRunning().
697   assert(n > 0);
698   // n must be 1 unless is_batch is true.
699   assert(is_batch || n == 1);
700   if (BENCHMARK_BUILTIN_EXPECT(total_iterations_ >= n, true)) {
701     total_iterations_ -= n;
702     return true;
703   }
704   if (!started_) {
705     StartKeepRunning();
706     if (!error_occurred_ && total_iterations_ >= n) {
707       total_iterations_ -= n;
708       return true;
709     }
710   }
711   // For non-batch runs, total_iterations_ must be 0 by now.
712   if (is_batch && total_iterations_ != 0) {
713     batch_leftover_ = n - total_iterations_;
714     total_iterations_ = 0;
715     return true;
716   }
717   FinishKeepRunning();
718   return false;
719 }
720 
721 struct State::StateIterator {
722   struct BENCHMARK_UNUSED Value {};
723   typedef std::forward_iterator_tag iterator_category;
724   typedef Value value_type;
725   typedef Value reference;
726   typedef Value pointer;
727   typedef std::ptrdiff_t difference_type;
728 
729  private:
730   friend class State;
731   BENCHMARK_ALWAYS_INLINE
StateIteratorStateIterator732   StateIterator() : cached_(0), parent_() {}
733 
734   BENCHMARK_ALWAYS_INLINE
StateIteratorStateIterator735   explicit StateIterator(State* st)
736       : cached_(st->error_occurred_ ? 0 : st->max_iterations), parent_(st) {}
737 
738  public:
739   BENCHMARK_ALWAYS_INLINE
740   Value operator*() const { return Value(); }
741 
742   BENCHMARK_ALWAYS_INLINE
743   StateIterator& operator++() {
744     assert(cached_ > 0);
745     --cached_;
746     return *this;
747   }
748 
749   BENCHMARK_ALWAYS_INLINE
750   bool operator!=(StateIterator const&) const {
751     if (BENCHMARK_BUILTIN_EXPECT(cached_ != 0, true)) return true;
752     parent_->FinishKeepRunning();
753     return false;
754   }
755 
756  private:
757   IterationCount cached_;
758   State* const parent_;
759 };
760 
begin()761 inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::begin() {
762   return StateIterator(this);
763 }
end()764 inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::end() {
765   StartKeepRunning();
766   return StateIterator();
767 }
768 
769 namespace internal {
770 
771 typedef void(Function)(State&);
772 
773 // ------------------------------------------------------
774 // Benchmark registration object.  The BENCHMARK() macro expands
775 // into an internal::Benchmark* object.  Various methods can
776 // be called on this object to change the properties of the benchmark.
777 // Each method returns "this" so that multiple method calls can
778 // chained into one expression.
779 class Benchmark {
780  public:
781   virtual ~Benchmark();
782 
783   // Note: the following methods all return "this" so that multiple
784   // method calls can be chained together in one expression.
785 
786   // Run this benchmark once with "x" as the extra argument passed
787   // to the function.
788   // REQUIRES: The function passed to the constructor must accept an arg1.
789   Benchmark* Arg(int64_t x);
790 
791   // Run this benchmark with the given time unit for the generated output report
792   Benchmark* Unit(TimeUnit unit);
793 
794   // Run this benchmark once for a number of values picked from the
795   // range [start..limit].  (start and limit are always picked.)
796   // REQUIRES: The function passed to the constructor must accept an arg1.
797   Benchmark* Range(int64_t start, int64_t limit);
798 
799   // Run this benchmark once for all values in the range [start..limit] with
800   // specific step
801   // REQUIRES: The function passed to the constructor must accept an arg1.
802   Benchmark* DenseRange(int64_t start, int64_t limit, int step = 1);
803 
804   // Run this benchmark once with "args" as the extra arguments passed
805   // to the function.
806   // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
807   Benchmark* Args(const std::vector<int64_t>& args);
808 
809   // Equivalent to Args({x, y})
810   // NOTE: This is a legacy C++03 interface provided for compatibility only.
811   //   New code should use 'Args'.
ArgPair(int64_t x,int64_t y)812   Benchmark* ArgPair(int64_t x, int64_t y) {
813     std::vector<int64_t> args;
814     args.push_back(x);
815     args.push_back(y);
816     return Args(args);
817   }
818 
819   // Run this benchmark once for a number of values picked from the
820   // ranges [start..limit].  (starts and limits are always picked.)
821   // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
822   Benchmark* Ranges(const std::vector<std::pair<int64_t, int64_t> >& ranges);
823 
824   // Equivalent to ArgNames({name})
825   Benchmark* ArgName(const std::string& name);
826 
827   // Set the argument names to display in the benchmark name. If not called,
828   // only argument values will be shown.
829   Benchmark* ArgNames(const std::vector<std::string>& names);
830 
831   // Equivalent to Ranges({{lo1, hi1}, {lo2, hi2}}).
832   // NOTE: This is a legacy C++03 interface provided for compatibility only.
833   //   New code should use 'Ranges'.
RangePair(int64_t lo1,int64_t hi1,int64_t lo2,int64_t hi2)834   Benchmark* RangePair(int64_t lo1, int64_t hi1, int64_t lo2, int64_t hi2) {
835     std::vector<std::pair<int64_t, int64_t> > ranges;
836     ranges.push_back(std::make_pair(lo1, hi1));
837     ranges.push_back(std::make_pair(lo2, hi2));
838     return Ranges(ranges);
839   }
840 
841   // Pass this benchmark object to *func, which can customize
842   // the benchmark by calling various methods like Arg, Args,
843   // Threads, etc.
844   Benchmark* Apply(void (*func)(Benchmark* benchmark));
845 
846   // Set the range multiplier for non-dense range. If not called, the range
847   // multiplier kRangeMultiplier will be used.
848   Benchmark* RangeMultiplier(int multiplier);
849 
850   // Set the minimum amount of time to use when running this benchmark. This
851   // option overrides the `benchmark_min_time` flag.
852   // REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark.
853   Benchmark* MinTime(double t);
854 
855   // Specify the amount of iterations that should be run by this benchmark.
856   // REQUIRES: 'n > 0' and `MinTime` has not been called on this benchmark.
857   //
858   // NOTE: This function should only be used when *exact* iteration control is
859   //   needed and never to control or limit how long a benchmark runs, where
860   // `--benchmark_min_time=N` or `MinTime(...)` should be used instead.
861   Benchmark* Iterations(IterationCount n);
862 
863   // Specify the amount of times to repeat this benchmark. This option overrides
864   // the `benchmark_repetitions` flag.
865   // REQUIRES: `n > 0`
866   Benchmark* Repetitions(int n);
867 
868   // Specify if each repetition of the benchmark should be reported separately
869   // or if only the final statistics should be reported. If the benchmark
870   // is not repeated then the single result is always reported.
871   // Applies to *ALL* reporters (display and file).
872   Benchmark* ReportAggregatesOnly(bool value = true);
873 
874   // Same as ReportAggregatesOnly(), but applies to display reporter only.
875   Benchmark* DisplayAggregatesOnly(bool value = true);
876 
877   // By default, the CPU time is measured only for the main thread, which may
878   // be unrepresentative if the benchmark uses threads internally. If called,
879   // the total CPU time spent by all the threads will be measured instead.
880   // By default, the only the main thread CPU time will be measured.
881   Benchmark* MeasureProcessCPUTime();
882 
883   // If a particular benchmark should use the Wall clock instead of the CPU time
884   // (be it either the CPU time of the main thread only (default), or the
885   // total CPU usage of the benchmark), call this method. If called, the elapsed
886   // (wall) time will be used to control how many iterations are run, and in the
887   // printing of items/second or MB/seconds values.
888   // If not called, the CPU time used by the benchmark will be used.
889   Benchmark* UseRealTime();
890 
891   // If a benchmark must measure time manually (e.g. if GPU execution time is
892   // being
893   // measured), call this method. If called, each benchmark iteration should
894   // call
895   // SetIterationTime(seconds) to report the measured time, which will be used
896   // to control how many iterations are run, and in the printing of items/second
897   // or MB/second values.
898   Benchmark* UseManualTime();
899 
900   // Set the asymptotic computational complexity for the benchmark. If called
901   // the asymptotic computational complexity will be shown on the output.
902   Benchmark* Complexity(BigO complexity = benchmark::oAuto);
903 
904   // Set the asymptotic computational complexity for the benchmark. If called
905   // the asymptotic computational complexity will be shown on the output.
906   Benchmark* Complexity(BigOFunc* complexity);
907 
908   // Add this statistics to be computed over all the values of benchmark run
909   Benchmark* ComputeStatistics(std::string name, StatisticsFunc* statistics);
910 
911   // Support for running multiple copies of the same benchmark concurrently
912   // in multiple threads.  This may be useful when measuring the scaling
913   // of some piece of code.
914 
915   // Run one instance of this benchmark concurrently in t threads.
916   Benchmark* Threads(int t);
917 
918   // Pick a set of values T from [min_threads,max_threads].
919   // min_threads and max_threads are always included in T.  Run this
920   // benchmark once for each value in T.  The benchmark run for a
921   // particular value t consists of t threads running the benchmark
922   // function concurrently.  For example, consider:
923   //    BENCHMARK(Foo)->ThreadRange(1,16);
924   // This will run the following benchmarks:
925   //    Foo in 1 thread
926   //    Foo in 2 threads
927   //    Foo in 4 threads
928   //    Foo in 8 threads
929   //    Foo in 16 threads
930   Benchmark* ThreadRange(int min_threads, int max_threads);
931 
932   // For each value n in the range, run this benchmark once using n threads.
933   // min_threads and max_threads are always included in the range.
934   // stride specifies the increment. E.g. DenseThreadRange(1, 8, 3) starts
935   // a benchmark with 1, 4, 7 and 8 threads.
936   Benchmark* DenseThreadRange(int min_threads, int max_threads, int stride = 1);
937 
938   // Equivalent to ThreadRange(NumCPUs(), NumCPUs())
939   Benchmark* ThreadPerCpu();
940 
941   virtual void Run(State& state) = 0;
942 
943  protected:
944   explicit Benchmark(const char* name);
945   Benchmark(Benchmark const&);
946   void SetName(const char* name);
947 
948   int ArgsCnt() const;
949 
950  private:
951   friend class BenchmarkFamilies;
952 
953   std::string name_;
954   AggregationReportMode aggregation_report_mode_;
955   std::vector<std::string> arg_names_;       // Args for all benchmark runs
956   std::vector<std::vector<int64_t> > args_;  // Args for all benchmark runs
957   TimeUnit time_unit_;
958   int range_multiplier_;
959   double min_time_;
960   IterationCount iterations_;
961   int repetitions_;
962   bool measure_process_cpu_time_;
963   bool use_real_time_;
964   bool use_manual_time_;
965   BigO complexity_;
966   BigOFunc* complexity_lambda_;
967   std::vector<Statistics> statistics_;
968   std::vector<int> thread_counts_;
969 
970   Benchmark& operator=(Benchmark const&);
971 };
972 
973 }  // namespace internal
974 
975 // Create and register a benchmark with the specified 'name' that invokes
976 // the specified functor 'fn'.
977 //
978 // RETURNS: A pointer to the registered benchmark.
979 internal::Benchmark* RegisterBenchmark(const char* name,
980                                        internal::Function* fn);
981 
982 #if defined(BENCHMARK_HAS_CXX11)
983 template <class Lambda>
984 internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn);
985 #endif
986 
987 // Remove all registered benchmarks. All pointers to previously registered
988 // benchmarks are invalidated.
989 void ClearRegisteredBenchmarks();
990 
991 namespace internal {
992 // The class used to hold all Benchmarks created from static function.
993 // (ie those created using the BENCHMARK(...) macros.
994 class FunctionBenchmark : public Benchmark {
995  public:
FunctionBenchmark(const char * name,Function * func)996   FunctionBenchmark(const char* name, Function* func)
997       : Benchmark(name), func_(func) {}
998 
999   virtual void Run(State& st);
1000 
1001  private:
1002   Function* func_;
1003 };
1004 
1005 #ifdef BENCHMARK_HAS_CXX11
1006 template <class Lambda>
1007 class LambdaBenchmark : public Benchmark {
1008  public:
Run(State & st)1009   virtual void Run(State& st) { lambda_(st); }
1010 
1011  private:
1012   template <class OLambda>
LambdaBenchmark(const char * name,OLambda && lam)1013   LambdaBenchmark(const char* name, OLambda&& lam)
1014       : Benchmark(name), lambda_(std::forward<OLambda>(lam)) {}
1015 
1016   LambdaBenchmark(LambdaBenchmark const&) = delete;
1017 
1018  private:
1019   template <class Lam>
1020   friend Benchmark* ::benchmark::RegisterBenchmark(const char*, Lam&&);
1021 
1022   Lambda lambda_;
1023 };
1024 #endif
1025 
1026 }  // namespace internal
1027 
RegisterBenchmark(const char * name,internal::Function * fn)1028 inline internal::Benchmark* RegisterBenchmark(const char* name,
1029                                               internal::Function* fn) {
1030   return internal::RegisterBenchmarkInternal(
1031       ::new internal::FunctionBenchmark(name, fn));
1032 }
1033 
1034 #ifdef BENCHMARK_HAS_CXX11
1035 template <class Lambda>
RegisterBenchmark(const char * name,Lambda && fn)1036 internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn) {
1037   using BenchType =
1038       internal::LambdaBenchmark<typename std::decay<Lambda>::type>;
1039   return internal::RegisterBenchmarkInternal(
1040       ::new BenchType(name, std::forward<Lambda>(fn)));
1041 }
1042 #endif
1043 
1044 #if defined(BENCHMARK_HAS_CXX11) && \
1045     (!defined(BENCHMARK_GCC_VERSION) || BENCHMARK_GCC_VERSION >= 409)
1046 template <class Lambda, class... Args>
RegisterBenchmark(const char * name,Lambda && fn,Args &&...args)1047 internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn,
1048                                        Args&&... args) {
1049   return benchmark::RegisterBenchmark(
1050       name, [=](benchmark::State& st) { fn(st, args...); });
1051 }
1052 #else
1053 #define BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
1054 #endif
1055 
1056 // The base class for all fixture tests.
1057 class Fixture : public internal::Benchmark {
1058  public:
Fixture()1059   Fixture() : internal::Benchmark("") {}
1060 
Run(State & st)1061   virtual void Run(State& st) {
1062     this->SetUp(st);
1063     this->BenchmarkCase(st);
1064     this->TearDown(st);
1065   }
1066 
1067   // These will be deprecated ...
SetUp(const State &)1068   virtual void SetUp(const State&) {}
TearDown(const State &)1069   virtual void TearDown(const State&) {}
1070   // ... In favor of these.
SetUp(State & st)1071   virtual void SetUp(State& st) { SetUp(const_cast<const State&>(st)); }
TearDown(State & st)1072   virtual void TearDown(State& st) { TearDown(const_cast<const State&>(st)); }
1073 
1074  protected:
1075   virtual void BenchmarkCase(State&) = 0;
1076 };
1077 
1078 }  // namespace benchmark
1079 
1080 // ------------------------------------------------------
1081 // Macro to register benchmarks
1082 
1083 // Check that __COUNTER__ is defined and that __COUNTER__ increases by 1
1084 // every time it is expanded. X + 1 == X + 0 is used in case X is defined to be
1085 // empty. If X is empty the expression becomes (+1 == +0).
1086 #if defined(__COUNTER__) && (__COUNTER__ + 1 == __COUNTER__ + 0)
1087 #define BENCHMARK_PRIVATE_UNIQUE_ID __COUNTER__
1088 #else
1089 #define BENCHMARK_PRIVATE_UNIQUE_ID __LINE__
1090 #endif
1091 
1092 // Helpers for generating unique variable names
1093 #define BENCHMARK_PRIVATE_NAME(n) \
1094   BENCHMARK_PRIVATE_CONCAT(_benchmark_, BENCHMARK_PRIVATE_UNIQUE_ID, n)
1095 #define BENCHMARK_PRIVATE_CONCAT(a, b, c) BENCHMARK_PRIVATE_CONCAT2(a, b, c)
1096 #define BENCHMARK_PRIVATE_CONCAT2(a, b, c) a##b##c
1097 
1098 #define BENCHMARK_PRIVATE_DECLARE(n)                                 \
1099   static ::benchmark::internal::Benchmark* BENCHMARK_PRIVATE_NAME(n) \
1100       BENCHMARK_UNUSED
1101 
1102 #define BENCHMARK(n)                                     \
1103   BENCHMARK_PRIVATE_DECLARE(n) =                         \
1104       (::benchmark::internal::RegisterBenchmarkInternal( \
1105           new ::benchmark::internal::FunctionBenchmark(#n, n)))
1106 
1107 // Old-style macros
1108 #define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a))
1109 #define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->Args({(a1), (a2)})
1110 #define BENCHMARK_WITH_UNIT(n, t) BENCHMARK(n)->Unit((t))
1111 #define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi))
1112 #define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \
1113   BENCHMARK(n)->RangePair({{(l1), (h1)}, {(l2), (h2)}})
1114 
1115 #ifdef BENCHMARK_HAS_CXX11
1116 
1117 // Register a benchmark which invokes the function specified by `func`
1118 // with the additional arguments specified by `...`.
1119 //
1120 // For example:
1121 //
1122 // template <class ...ExtraArgs>`
1123 // void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) {
1124 //  [...]
1125 //}
1126 // /* Registers a benchmark named "BM_takes_args/int_string_test` */
1127 // BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc"));
1128 #define BENCHMARK_CAPTURE(func, test_case_name, ...)     \
1129   BENCHMARK_PRIVATE_DECLARE(func) =                      \
1130       (::benchmark::internal::RegisterBenchmarkInternal( \
1131           new ::benchmark::internal::FunctionBenchmark(  \
1132               #func "/" #test_case_name,                 \
1133               [](::benchmark::State& st) { func(st, __VA_ARGS__); })))
1134 
1135 #endif  // BENCHMARK_HAS_CXX11
1136 
1137 // This will register a benchmark for a templatized function.  For example:
1138 //
1139 // template<int arg>
1140 // void BM_Foo(int iters);
1141 //
1142 // BENCHMARK_TEMPLATE(BM_Foo, 1);
1143 //
1144 // will register BM_Foo<1> as a benchmark.
1145 #define BENCHMARK_TEMPLATE1(n, a)                        \
1146   BENCHMARK_PRIVATE_DECLARE(n) =                         \
1147       (::benchmark::internal::RegisterBenchmarkInternal( \
1148           new ::benchmark::internal::FunctionBenchmark(#n "<" #a ">", n<a>)))
1149 
1150 #define BENCHMARK_TEMPLATE2(n, a, b)                                         \
1151   BENCHMARK_PRIVATE_DECLARE(n) =                                             \
1152       (::benchmark::internal::RegisterBenchmarkInternal(                     \
1153           new ::benchmark::internal::FunctionBenchmark(#n "<" #a "," #b ">", \
1154                                                        n<a, b>)))
1155 
1156 #ifdef BENCHMARK_HAS_CXX11
1157 #define BENCHMARK_TEMPLATE(n, ...)                       \
1158   BENCHMARK_PRIVATE_DECLARE(n) =                         \
1159       (::benchmark::internal::RegisterBenchmarkInternal( \
1160           new ::benchmark::internal::FunctionBenchmark(  \
1161               #n "<" #__VA_ARGS__ ">", n<__VA_ARGS__>)))
1162 #else
1163 #define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a)
1164 #endif
1165 
1166 #define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method)        \
1167   class BaseClass##_##Method##_Benchmark : public BaseClass { \
1168    public:                                                    \
1169     BaseClass##_##Method##_Benchmark() : BaseClass() {        \
1170       this->SetName(#BaseClass "/" #Method);                  \
1171     }                                                         \
1172                                                               \
1173    protected:                                                 \
1174     virtual void BenchmarkCase(::benchmark::State&);          \
1175   };
1176 
1177 #define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1178   class BaseClass##_##Method##_Benchmark : public BaseClass<a> {    \
1179    public:                                                          \
1180     BaseClass##_##Method##_Benchmark() : BaseClass<a>() {           \
1181       this->SetName(#BaseClass "<" #a ">/" #Method);                \
1182     }                                                               \
1183                                                                     \
1184    protected:                                                       \
1185     virtual void BenchmarkCase(::benchmark::State&);                \
1186   };
1187 
1188 #define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1189   class BaseClass##_##Method##_Benchmark : public BaseClass<a, b> {    \
1190    public:                                                             \
1191     BaseClass##_##Method##_Benchmark() : BaseClass<a, b>() {           \
1192       this->SetName(#BaseClass "<" #a "," #b ">/" #Method);            \
1193     }                                                                  \
1194                                                                        \
1195    protected:                                                          \
1196     virtual void BenchmarkCase(::benchmark::State&);                   \
1197   };
1198 
1199 #ifdef BENCHMARK_HAS_CXX11
1200 #define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, ...)       \
1201   class BaseClass##_##Method##_Benchmark : public BaseClass<__VA_ARGS__> { \
1202    public:                                                                 \
1203     BaseClass##_##Method##_Benchmark() : BaseClass<__VA_ARGS__>() {        \
1204       this->SetName(#BaseClass "<" #__VA_ARGS__ ">/" #Method);             \
1205     }                                                                      \
1206                                                                            \
1207    protected:                                                              \
1208     virtual void BenchmarkCase(::benchmark::State&);                       \
1209   };
1210 #else
1211 #define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(n, a) \
1212   BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(n, a)
1213 #endif
1214 
1215 #define BENCHMARK_DEFINE_F(BaseClass, Method)    \
1216   BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1217   void BaseClass##_##Method##_Benchmark::BenchmarkCase
1218 
1219 #define BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a)    \
1220   BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1221   void BaseClass##_##Method##_Benchmark::BenchmarkCase
1222 
1223 #define BENCHMARK_TEMPLATE2_DEFINE_F(BaseClass, Method, a, b)    \
1224   BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1225   void BaseClass##_##Method##_Benchmark::BenchmarkCase
1226 
1227 #ifdef BENCHMARK_HAS_CXX11
1228 #define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, ...)            \
1229   BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \
1230   void BaseClass##_##Method##_Benchmark::BenchmarkCase
1231 #else
1232 #define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, a) \
1233   BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a)
1234 #endif
1235 
1236 #define BENCHMARK_REGISTER_F(BaseClass, Method) \
1237   BENCHMARK_PRIVATE_REGISTER_F(BaseClass##_##Method##_Benchmark)
1238 
1239 #define BENCHMARK_PRIVATE_REGISTER_F(TestName) \
1240   BENCHMARK_PRIVATE_DECLARE(TestName) =        \
1241       (::benchmark::internal::RegisterBenchmarkInternal(new TestName()))
1242 
1243 // This macro will define and register a benchmark within a fixture class.
1244 #define BENCHMARK_F(BaseClass, Method)           \
1245   BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1246   BENCHMARK_REGISTER_F(BaseClass, Method);       \
1247   void BaseClass##_##Method##_Benchmark::BenchmarkCase
1248 
1249 #define BENCHMARK_TEMPLATE1_F(BaseClass, Method, a)           \
1250   BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1251   BENCHMARK_REGISTER_F(BaseClass, Method);                    \
1252   void BaseClass##_##Method##_Benchmark::BenchmarkCase
1253 
1254 #define BENCHMARK_TEMPLATE2_F(BaseClass, Method, a, b)           \
1255   BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1256   BENCHMARK_REGISTER_F(BaseClass, Method);                       \
1257   void BaseClass##_##Method##_Benchmark::BenchmarkCase
1258 
1259 #ifdef BENCHMARK_HAS_CXX11
1260 #define BENCHMARK_TEMPLATE_F(BaseClass, Method, ...)                   \
1261   BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \
1262   BENCHMARK_REGISTER_F(BaseClass, Method);                             \
1263   void BaseClass##_##Method##_Benchmark::BenchmarkCase
1264 #else
1265 #define BENCHMARK_TEMPLATE_F(BaseClass, Method, a) \
1266   BENCHMARK_TEMPLATE1_F(BaseClass, Method, a)
1267 #endif
1268 
1269 // Helper macro to create a main routine in a test that runs the benchmarks
1270 #define BENCHMARK_MAIN()                                                \
1271   int main(int argc, char** argv) {                                     \
1272     ::benchmark::Initialize(&argc, argv);                               \
1273     if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; \
1274     ::benchmark::RunSpecifiedBenchmarks();                              \
1275   }                                                                     \
1276   int main(int, char**)
1277 
1278 // ------------------------------------------------------
1279 // Benchmark Reporters
1280 
1281 namespace benchmark {
1282 
1283 struct CPUInfo {
1284   struct CacheInfo {
1285     std::string type;
1286     int level;
1287     int size;
1288     int num_sharing;
1289   };
1290 
1291   int num_cpus;
1292   double cycles_per_second;
1293   std::vector<CacheInfo> caches;
1294   bool scaling_enabled;
1295   std::vector<double> load_avg;
1296 
1297   static const CPUInfo& Get();
1298 
1299  private:
1300   CPUInfo();
1301   BENCHMARK_DISALLOW_COPY_AND_ASSIGN(CPUInfo);
1302 };
1303 
1304 // Adding Struct for System Information
1305 struct SystemInfo {
1306   std::string name;
1307   static const SystemInfo& Get();
1308 
1309  private:
1310   SystemInfo();
1311   BENCHMARK_DISALLOW_COPY_AND_ASSIGN(SystemInfo);
1312 };
1313 
1314 // BenchmarkName contains the components of the Benchmark's name
1315 // which allows individual fields to be modified or cleared before
1316 // building the final name using 'str()'.
1317 struct BenchmarkName {
1318   std::string function_name;
1319   std::string args;
1320   std::string min_time;
1321   std::string iterations;
1322   std::string repetitions;
1323   std::string time_type;
1324   std::string threads;
1325 
1326   // Return the full name of the benchmark with each non-empty
1327   // field separated by a '/'
1328   std::string str() const;
1329 };
1330 
1331 // Interface for custom benchmark result printers.
1332 // By default, benchmark reports are printed to stdout. However an application
1333 // can control the destination of the reports by calling
1334 // RunSpecifiedBenchmarks and passing it a custom reporter object.
1335 // The reporter object must implement the following interface.
1336 class BenchmarkReporter {
1337  public:
1338   struct Context {
1339     CPUInfo const& cpu_info;
1340     SystemInfo const& sys_info;
1341     // The number of chars in the longest benchmark name.
1342     size_t name_field_width;
1343     static const char* executable_name;
1344     Context();
1345   };
1346 
1347   struct Run {
1348     static const int64_t no_repetition_index = -1;
1349     enum RunType { RT_Iteration, RT_Aggregate };
1350 
RunRun1351     Run()
1352         : run_type(RT_Iteration),
1353           error_occurred(false),
1354           iterations(1),
1355           threads(1),
1356           time_unit(kNanosecond),
1357           real_accumulated_time(0),
1358           cpu_accumulated_time(0),
1359           max_heapbytes_used(0),
1360           complexity(oNone),
1361           complexity_lambda(),
1362           complexity_n(0),
1363           report_big_o(false),
1364           report_rms(false),
1365           counters(),
1366           has_memory_result(false),
1367           allocs_per_iter(0.0),
1368           max_bytes_used(0) {}
1369 
1370     std::string benchmark_name() const;
1371     BenchmarkName run_name;
1372     RunType run_type;
1373     std::string aggregate_name;
1374     std::string report_label;  // Empty if not set by benchmark.
1375     bool error_occurred;
1376     std::string error_message;
1377 
1378     IterationCount iterations;
1379     int64_t threads;
1380     int64_t repetition_index;
1381     int64_t repetitions;
1382     TimeUnit time_unit;
1383     double real_accumulated_time;
1384     double cpu_accumulated_time;
1385 
1386     // Return a value representing the real time per iteration in the unit
1387     // specified by 'time_unit'.
1388     // NOTE: If 'iterations' is zero the returned value represents the
1389     // accumulated time.
1390     double GetAdjustedRealTime() const;
1391 
1392     // Return a value representing the cpu time per iteration in the unit
1393     // specified by 'time_unit'.
1394     // NOTE: If 'iterations' is zero the returned value represents the
1395     // accumulated time.
1396     double GetAdjustedCPUTime() const;
1397 
1398     // This is set to 0.0 if memory tracing is not enabled.
1399     double max_heapbytes_used;
1400 
1401     // Keep track of arguments to compute asymptotic complexity
1402     BigO complexity;
1403     BigOFunc* complexity_lambda;
1404     int64_t complexity_n;
1405 
1406     // what statistics to compute from the measurements
1407     const std::vector<internal::Statistics>* statistics;
1408 
1409     // Inform print function whether the current run is a complexity report
1410     bool report_big_o;
1411     bool report_rms;
1412 
1413     UserCounters counters;
1414 
1415     // Memory metrics.
1416     bool has_memory_result;
1417     double allocs_per_iter;
1418     int64_t max_bytes_used;
1419   };
1420 
1421   // Construct a BenchmarkReporter with the output stream set to 'std::cout'
1422   // and the error stream set to 'std::cerr'
1423   BenchmarkReporter();
1424 
1425   // Called once for every suite of benchmarks run.
1426   // The parameter "context" contains information that the
1427   // reporter may wish to use when generating its report, for example the
1428   // platform under which the benchmarks are running. The benchmark run is
1429   // never started if this function returns false, allowing the reporter
1430   // to skip runs based on the context information.
1431   virtual bool ReportContext(const Context& context) = 0;
1432 
1433   // Called once for each group of benchmark runs, gives information about
1434   // cpu-time and heap memory usage during the benchmark run. If the group
1435   // of runs contained more than two entries then 'report' contains additional
1436   // elements representing the mean and standard deviation of those runs.
1437   // Additionally if this group of runs was the last in a family of benchmarks
1438   // 'reports' contains additional entries representing the asymptotic
1439   // complexity and RMS of that benchmark family.
1440   virtual void ReportRuns(const std::vector<Run>& report) = 0;
1441 
1442   // Called once and only once after ever group of benchmarks is run and
1443   // reported.
Finalize()1444   virtual void Finalize() {}
1445 
1446   // REQUIRES: The object referenced by 'out' is valid for the lifetime
1447   // of the reporter.
SetOutputStream(std::ostream * out)1448   void SetOutputStream(std::ostream* out) {
1449     assert(out);
1450     output_stream_ = out;
1451   }
1452 
1453   // REQUIRES: The object referenced by 'err' is valid for the lifetime
1454   // of the reporter.
SetErrorStream(std::ostream * err)1455   void SetErrorStream(std::ostream* err) {
1456     assert(err);
1457     error_stream_ = err;
1458   }
1459 
GetOutputStream()1460   std::ostream& GetOutputStream() const { return *output_stream_; }
1461 
GetErrorStream()1462   std::ostream& GetErrorStream() const { return *error_stream_; }
1463 
1464   virtual ~BenchmarkReporter();
1465 
1466   // Write a human readable string to 'out' representing the specified
1467   // 'context'.
1468   // REQUIRES: 'out' is non-null.
1469   static void PrintBasicContext(std::ostream* out, Context const& context);
1470 
1471  private:
1472   std::ostream* output_stream_;
1473   std::ostream* error_stream_;
1474 };
1475 
1476 // Simple reporter that outputs benchmark data to the console. This is the
1477 // default reporter used by RunSpecifiedBenchmarks().
1478 class ConsoleReporter : public BenchmarkReporter {
1479  public:
1480   enum OutputOptions {
1481     OO_None = 0,
1482     OO_Color = 1,
1483     OO_Tabular = 2,
1484     OO_ColorTabular = OO_Color | OO_Tabular,
1485     OO_Defaults = OO_ColorTabular
1486   };
1487   explicit ConsoleReporter(OutputOptions opts_ = OO_Defaults)
output_options_(opts_)1488       : output_options_(opts_),
1489         name_field_width_(0),
1490         prev_counters_(),
1491         printed_header_(false) {}
1492 
1493   virtual bool ReportContext(const Context& context);
1494   virtual void ReportRuns(const std::vector<Run>& reports);
1495 
1496  protected:
1497   virtual void PrintRunData(const Run& report);
1498   virtual void PrintHeader(const Run& report);
1499 
1500   OutputOptions output_options_;
1501   size_t name_field_width_;
1502   UserCounters prev_counters_;
1503   bool printed_header_;
1504 };
1505 
1506 class JSONReporter : public BenchmarkReporter {
1507  public:
JSONReporter()1508   JSONReporter() : first_report_(true) {}
1509   virtual bool ReportContext(const Context& context);
1510   virtual void ReportRuns(const std::vector<Run>& reports);
1511   virtual void Finalize();
1512 
1513  private:
1514   void PrintRunData(const Run& report);
1515 
1516   bool first_report_;
1517 };
1518 
1519 class BENCHMARK_DEPRECATED_MSG(
1520     "The CSV Reporter will be removed in a future release") CSVReporter
1521     : public BenchmarkReporter {
1522  public:
CSVReporter()1523   CSVReporter() : printed_header_(false) {}
1524   virtual bool ReportContext(const Context& context);
1525   virtual void ReportRuns(const std::vector<Run>& reports);
1526 
1527  private:
1528   void PrintRunData(const Run& report);
1529 
1530   bool printed_header_;
1531   std::set<std::string> user_counter_names_;
1532 };
1533 
1534 // If a MemoryManager is registered, it can be used to collect and report
1535 // allocation metrics for a run of the benchmark.
1536 class MemoryManager {
1537  public:
1538   struct Result {
ResultResult1539     Result() : num_allocs(0), max_bytes_used(0) {}
1540 
1541     // The number of allocations made in total between Start and Stop.
1542     int64_t num_allocs;
1543 
1544     // The peak memory use between Start and Stop.
1545     int64_t max_bytes_used;
1546   };
1547 
~MemoryManager()1548   virtual ~MemoryManager() {}
1549 
1550   // Implement this to start recording allocation information.
1551   virtual void Start() = 0;
1552 
1553   // Implement this to stop recording and fill out the given Result structure.
1554   virtual void Stop(Result* result) = 0;
1555 };
1556 
GetTimeUnitString(TimeUnit unit)1557 inline const char* GetTimeUnitString(TimeUnit unit) {
1558   switch (unit) {
1559     case kMillisecond:
1560       return "ms";
1561     case kMicrosecond:
1562       return "us";
1563     case kNanosecond:
1564       return "ns";
1565   }
1566   BENCHMARK_UNREACHABLE();
1567 }
1568 
GetTimeUnitMultiplier(TimeUnit unit)1569 inline double GetTimeUnitMultiplier(TimeUnit unit) {
1570   switch (unit) {
1571     case kMillisecond:
1572       return 1e3;
1573     case kMicrosecond:
1574       return 1e6;
1575     case kNanosecond:
1576       return 1e9;
1577   }
1578   BENCHMARK_UNREACHABLE();
1579 }
1580 
1581 }  // namespace benchmark
1582 
1583 #endif  // BENCHMARK_BENCHMARK_H_
1584