1 // Copyright 2015 Google Inc. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // Support for registering benchmarks for functions.
16 
17 /* Example usage:
18 // Define a function that executes the code to be measured a
19 // specified number of times:
20 static void BM_StringCreation(benchmark::State& state) {
21   for (auto _ : state)
22     std::string empty_string;
23 }
24 
25 // Register the function as a benchmark
26 BENCHMARK(BM_StringCreation);
27 
28 // Define another benchmark
29 static void BM_StringCopy(benchmark::State& state) {
30   std::string x = "hello";
31   for (auto _ : state)
32     std::string copy(x);
33 }
34 BENCHMARK(BM_StringCopy);
35 
36 // Augment the main() program to invoke benchmarks if specified
37 // via the --benchmarks command line flag.  E.g.,
38 //       my_unittest --benchmark_filter=all
39 //       my_unittest --benchmark_filter=BM_StringCreation
40 //       my_unittest --benchmark_filter=String
41 //       my_unittest --benchmark_filter='Copy|Creation'
42 int main(int argc, char** argv) {
43   benchmark::Initialize(&argc, argv);
44   benchmark::RunSpecifiedBenchmarks();
45   return 0;
46 }
47 
48 // Sometimes a family of microbenchmarks can be implemented with
49 // just one routine that takes an extra argument to specify which
50 // one of the family of benchmarks to run.  For example, the following
51 // code defines a family of microbenchmarks for measuring the speed
52 // of memcpy() calls of different lengths:
53 
54 static void BM_memcpy(benchmark::State& state) {
55   char* src = new char[state.range(0)]; char* dst = new char[state.range(0)];
56   memset(src, 'x', state.range(0));
57   for (auto _ : state)
58     memcpy(dst, src, state.range(0));
59   state.SetBytesProcessed(int64_t(state.iterations()) *
60                           int64_t(state.range(0)));
61   delete[] src; delete[] dst;
62 }
63 BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10);
64 
65 // The preceding code is quite repetitive, and can be replaced with the
66 // following short-hand.  The following invocation will pick a few
67 // appropriate arguments in the specified range and will generate a
68 // microbenchmark for each such argument.
69 BENCHMARK(BM_memcpy)->Range(8, 8<<10);
70 
71 // You might have a microbenchmark that depends on two inputs.  For
72 // example, the following code defines a family of microbenchmarks for
73 // measuring the speed of set insertion.
74 static void BM_SetInsert(benchmark::State& state) {
75   set<int> data;
76   for (auto _ : state) {
77     state.PauseTiming();
78     data = ConstructRandomSet(state.range(0));
79     state.ResumeTiming();
80     for (int j = 0; j < state.range(1); ++j)
81       data.insert(RandomNumber());
82   }
83 }
84 BENCHMARK(BM_SetInsert)
85    ->Args({1<<10, 128})
86    ->Args({2<<10, 128})
87    ->Args({4<<10, 128})
88    ->Args({8<<10, 128})
89    ->Args({1<<10, 512})
90    ->Args({2<<10, 512})
91    ->Args({4<<10, 512})
92    ->Args({8<<10, 512});
93 
94 // The preceding code is quite repetitive, and can be replaced with
95 // the following short-hand.  The following macro will pick a few
96 // appropriate arguments in the product of the two specified ranges
97 // and will generate a microbenchmark for each such pair.
98 BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}});
99 
100 // For more complex patterns of inputs, passing a custom function
101 // to Apply allows programmatic specification of an
102 // arbitrary set of arguments to run the microbenchmark on.
103 // The following example enumerates a dense range on
104 // one parameter, and a sparse range on the second.
105 static void CustomArguments(benchmark::internal::Benchmark* b) {
106   for (int i = 0; i <= 10; ++i)
107     for (int j = 32; j <= 1024*1024; j *= 8)
108       b->Args({i, j});
109 }
110 BENCHMARK(BM_SetInsert)->Apply(CustomArguments);
111 
112 // Templated microbenchmarks work the same way:
113 // Produce then consume 'size' messages 'iters' times
114 // Measures throughput in the absence of multiprogramming.
115 template <class Q> int BM_Sequential(benchmark::State& state) {
116   Q q;
117   typename Q::value_type v;
118   for (auto _ : state) {
119     for (int i = state.range(0); i--; )
120       q.push(v);
121     for (int e = state.range(0); e--; )
122       q.Wait(&v);
123   }
124   // actually messages, not bytes:
125   state.SetBytesProcessed(
126       static_cast<int64_t>(state.iterations())*state.range(0));
127 }
128 BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
129 
130 Use `Benchmark::MinTime(double t)` to set the minimum time used to run the
131 benchmark. This option overrides the `benchmark_min_time` flag.
132 
133 void BM_test(benchmark::State& state) {
134  ... body ...
135 }
136 BENCHMARK(BM_test)->MinTime(2.0); // Run for at least 2 seconds.
137 
138 In a multithreaded test, it is guaranteed that none of the threads will start
139 until all have reached the loop start, and all will have finished before any
140 thread exits the loop body. As such, any global setup or teardown you want to
141 do can be wrapped in a check against the thread index:
142 
143 static void BM_MultiThreaded(benchmark::State& state) {
144   if (state.thread_index == 0) {
145     // Setup code here.
146   }
147   for (auto _ : state) {
148     // Run the test as normal.
149   }
150   if (state.thread_index == 0) {
151     // Teardown code here.
152   }
153 }
154 BENCHMARK(BM_MultiThreaded)->Threads(4);
155 
156 
157 If a benchmark runs a few milliseconds it may be hard to visually compare the
158 measured times, since the output data is given in nanoseconds per default. In
159 order to manually set the time unit, you can specify it manually:
160 
161 BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
162 */
163 
164 #ifndef BENCHMARK_BENCHMARK_H_
165 #define BENCHMARK_BENCHMARK_H_
166 
167 
168 // The _MSVC_LANG check should detect Visual Studio 2015 Update 3 and newer.
169 #if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)
170 #define BENCHMARK_HAS_CXX11
171 #endif
172 
173 #include <stdint.h>
174 
175 #include <algorithm>
176 #include <cassert>
177 #include <cstddef>
178 #include <iosfwd>
179 #include <string>
180 #include <vector>
181 #include <map>
182 #include <set>
183 
184 #if defined(BENCHMARK_HAS_CXX11)
185 #include <type_traits>
186 #include <initializer_list>
187 #include <utility>
188 #endif
189 
190 #if defined(_MSC_VER)
191 #include <intrin.h> // for _ReadWriteBarrier
192 #endif
193 
194 #ifndef BENCHMARK_HAS_CXX11
195 #define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \
196   TypeName(const TypeName&);                         \
197   TypeName& operator=(const TypeName&)
198 #else
199 #define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \
200   TypeName(const TypeName&) = delete;                \
201   TypeName& operator=(const TypeName&) = delete
202 #endif
203 
204 #if defined(__GNUC__)
205 #define BENCHMARK_UNUSED __attribute__((unused))
206 #define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline))
207 #define BENCHMARK_NOEXCEPT noexcept
208 #define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
209 #elif defined(_MSC_VER) && !defined(__clang__)
210 #define BENCHMARK_UNUSED
211 #define BENCHMARK_ALWAYS_INLINE __forceinline
212 #if _MSC_VER >= 1900
213 #define BENCHMARK_NOEXCEPT noexcept
214 #define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
215 #else
216 #define BENCHMARK_NOEXCEPT
217 #define BENCHMARK_NOEXCEPT_OP(x)
218 #endif
219 #define __func__ __FUNCTION__
220 #else
221 #define BENCHMARK_UNUSED
222 #define BENCHMARK_ALWAYS_INLINE
223 #define BENCHMARK_NOEXCEPT
224 #define BENCHMARK_NOEXCEPT_OP(x)
225 #endif
226 
227 #define BENCHMARK_INTERNAL_TOSTRING2(x) #x
228 #define BENCHMARK_INTERNAL_TOSTRING(x) BENCHMARK_INTERNAL_TOSTRING2(x)
229 
230 #if defined(__GNUC__)
231 #define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y)
232 #define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg)))
233 #else
234 #define BENCHMARK_BUILTIN_EXPECT(x, y) x
235 #define BENCHMARK_DEPRECATED_MSG(msg)
236 #define BENCHMARK_WARNING_MSG(msg) __pragma(message(__FILE__ "(" BENCHMARK_INTERNAL_TOSTRING(__LINE__) ") : warning note: " msg))
237 #endif
238 
239 #if defined(__GNUC__) && !defined(__clang__)
240 #define BENCHMARK_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
241 #endif
242 
243 #ifndef __has_builtin
244 #define __has_builtin(x) 0
245 #endif
246 
247 #if defined(__GNUC__) || __has_builtin(__builtin_unreachable)
248   #define BENCHMARK_UNREACHABLE() __builtin_unreachable()
249 #elif defined(_MSC_VER)
250   #define BENCHMARK_UNREACHABLE() __assume(false)
251 #else
252   #define BENCHMARK_UNREACHABLE() ((void)0)
253 #endif
254 
255 namespace benchmark {
256 class BenchmarkReporter;
257 
258 void Initialize(int* argc, char** argv);
259 
260 // Report to stdout all arguments in 'argv' as unrecognized except the first.
261 // Returns true there is at least on unrecognized argument (i.e. 'argc' > 1).
262 bool ReportUnrecognizedArguments(int argc, char** argv);
263 
264 // Generate a list of benchmarks matching the specified --benchmark_filter flag
265 // and if --benchmark_list_tests is specified return after printing the name
266 // of each matching benchmark. Otherwise run each matching benchmark and
267 // report the results.
268 //
269 // The second and third overload use the specified 'console_reporter' and
270 //  'file_reporter' respectively. 'file_reporter' will write to the file
271 //  specified
272 //   by '--benchmark_output'. If '--benchmark_output' is not given the
273 //  'file_reporter' is ignored.
274 //
275 // RETURNS: The number of matching benchmarks.
276 size_t RunSpecifiedBenchmarks();
277 size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter);
278 size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter,
279                               BenchmarkReporter* file_reporter);
280 
281 // If this routine is called, peak memory allocation past this point in the
282 // benchmark is reported at the end of the benchmark report line. (It is
283 // computed by running the benchmark once with a single iteration and a memory
284 // tracer.)
285 // TODO(dominic)
286 // void MemoryUsage();
287 
288 namespace internal {
289 class Benchmark;
290 class BenchmarkImp;
291 class BenchmarkFamilies;
292 
293 void UseCharPointer(char const volatile*);
294 
295 // Take ownership of the pointer and register the benchmark. Return the
296 // registered benchmark.
297 Benchmark* RegisterBenchmarkInternal(Benchmark*);
298 
299 // Ensure that the standard streams are properly initialized in every TU.
300 int InitializeStreams();
301 BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams();
302 
303 }  // namespace internal
304 
305 
306 #if (!defined(__GNUC__) && !defined(__clang__)) || defined(__pnacl__) || \
307     defined(__EMSCRIPTEN__)
308 # define BENCHMARK_HAS_NO_INLINE_ASSEMBLY
309 #endif
310 
311 
312 // The DoNotOptimize(...) function can be used to prevent a value or
313 // expression from being optimized away by the compiler. This function is
314 // intended to add little to no overhead.
315 // See: https://youtu.be/nXaxk27zwlk?t=2441
316 #ifndef BENCHMARK_HAS_NO_INLINE_ASSEMBLY
317 template <class Tp>
318 inline BENCHMARK_ALWAYS_INLINE
DoNotOptimize(Tp const & value)319 void DoNotOptimize(Tp const& value) {
320     asm volatile("" : : "r,m"(value) : "memory");
321 }
322 
323 template <class Tp>
DoNotOptimize(Tp & value)324 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) {
325 #if defined(__clang__)
326   asm volatile("" : "+r,m"(value) : : "memory");
327 #else
328   asm volatile("" : "+m,r"(value) : : "memory");
329 #endif
330 }
331 
332 // Force the compiler to flush pending writes to global memory. Acts as an
333 // effective read/write barrier
ClobberMemory()334 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
335   asm volatile("" : : : "memory");
336 }
337 #elif defined(_MSC_VER)
338 template <class Tp>
DoNotOptimize(Tp const & value)339 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
340   internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
341   _ReadWriteBarrier();
342 }
343 
ClobberMemory()344 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
345   _ReadWriteBarrier();
346 }
347 #else
348 template <class Tp>
DoNotOptimize(Tp const & value)349 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
350   internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
351 }
352 // FIXME Add ClobberMemory() for non-gnu and non-msvc compilers
353 #endif
354 
355 
356 
357 // This class is used for user-defined counters.
358 class Counter {
359 public:
360 
361   enum Flags {
362     kDefaults   = 0,
363     // Mark the counter as a rate. It will be presented divided
364     // by the duration of the benchmark.
365     kIsRate     = 1,
366     // Mark the counter as a thread-average quantity. It will be
367     // presented divided by the number of threads.
368     kAvgThreads = 2,
369     // Mark the counter as a thread-average rate. See above.
370     kAvgThreadsRate = kIsRate|kAvgThreads
371   };
372 
373   double value;
374   Flags  flags;
375 
376   BENCHMARK_ALWAYS_INLINE
value(v)377   Counter(double v = 0., Flags f = kDefaults) : value(v), flags(f) {}
378 
379   BENCHMARK_ALWAYS_INLINE operator double const& () const { return value; }
380   BENCHMARK_ALWAYS_INLINE operator double      & ()       { return value; }
381 
382 };
383 
384 // This is the container for the user-defined counters.
385 typedef std::map<std::string, Counter> UserCounters;
386 
387 
388 // TimeUnit is passed to a benchmark in order to specify the order of magnitude
389 // for the measured time.
390 enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond };
391 
392 // BigO is passed to a benchmark in order to specify the asymptotic
393 // computational
394 // complexity for the benchmark. In case oAuto is selected, complexity will be
395 // calculated automatically to the best fit.
396 enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda };
397 
398 // BigOFunc is passed to a benchmark in order to specify the asymptotic
399 // computational complexity for the benchmark.
400 typedef double(BigOFunc)(int64_t);
401 
402 // StatisticsFunc is passed to a benchmark in order to compute some descriptive
403 // statistics over all the measurements of some type
404 typedef double(StatisticsFunc)(const std::vector<double>&);
405 
406 struct Statistics {
407   std::string name_;
408   StatisticsFunc* compute_;
409 
StatisticsStatistics410   Statistics(std::string name, StatisticsFunc* compute)
411     : name_(name), compute_(compute) {}
412 };
413 
414 namespace internal {
415 class ThreadTimer;
416 class ThreadManager;
417 
418 enum ReportMode
419 #if defined(BENCHMARK_HAS_CXX11)
420   : unsigned
421 #else
422 #endif
423   {
424   RM_Unspecified,  // The mode has not been manually specified
425   RM_Default,      // The mode is user-specified as default.
426   RM_ReportAggregatesOnly
427 };
428 }  // namespace internal
429 
430 // State is passed to a running Benchmark and contains state for the
431 // benchmark to use.
432 class State {
433  public:
434   struct StateIterator;
435   friend struct StateIterator;
436 
437   // Returns iterators used to run each iteration of a benchmark using a
438   // C++11 ranged-based for loop. These functions should not be called directly.
439   //
440   // REQUIRES: The benchmark has not started running yet. Neither begin nor end
441   // have been called previously.
442   //
443   // NOTE: KeepRunning may not be used after calling either of these functions.
444   BENCHMARK_ALWAYS_INLINE StateIterator begin();
445   BENCHMARK_ALWAYS_INLINE StateIterator end();
446 
447   // Returns true if the benchmark should continue through another iteration.
448   // NOTE: A benchmark may not return from the test until KeepRunning() has
449   // returned false.
450   bool KeepRunning();
451 
452   // Returns true iff the benchmark should run n more iterations.
453   // REQUIRES: 'n' > 0.
454   // NOTE: A benchmark must not return from the test until KeepRunningBatch()
455   // has returned false.
456   // NOTE: KeepRunningBatch() may overshoot by up to 'n' iterations.
457   //
458   // Intended usage:
459   //   while (state.KeepRunningBatch(1000)) {
460   //     // process 1000 elements
461   //   }
462   bool KeepRunningBatch(size_t n);
463 
464   // REQUIRES: timer is running and 'SkipWithError(...)' has not been called
465   //           by the current thread.
466   // Stop the benchmark timer.  If not called, the timer will be
467   // automatically stopped after the last iteration of the benchmark loop.
468   //
469   // For threaded benchmarks the PauseTiming() function only pauses the timing
470   // for the current thread.
471   //
472   // NOTE: The "real time" measurement is per-thread. If different threads
473   // report different measurements the largest one is reported.
474   //
475   // NOTE: PauseTiming()/ResumeTiming() are relatively
476   // heavyweight, and so their use should generally be avoided
477   // within each benchmark iteration, if possible.
478   void PauseTiming();
479 
480   // REQUIRES: timer is not running and 'SkipWithError(...)' has not been called
481   //           by the current thread.
482   // Start the benchmark timer.  The timer is NOT running on entrance to the
483   // benchmark function. It begins running after control flow enters the
484   // benchmark loop.
485   //
486   // NOTE: PauseTiming()/ResumeTiming() are relatively
487   // heavyweight, and so their use should generally be avoided
488   // within each benchmark iteration, if possible.
489   void ResumeTiming();
490 
491   // REQUIRES: 'SkipWithError(...)' has not been called previously by the
492   //            current thread.
493   // Report the benchmark as resulting in an error with the specified 'msg'.
494   // After this call the user may explicitly 'return' from the benchmark.
495   //
496   // If the ranged-for style of benchmark loop is used, the user must explicitly
497   // break from the loop, otherwise all future iterations will be run.
498   // If the 'KeepRunning()' loop is used the current thread will automatically
499   // exit the loop at the end of the current iteration.
500   //
501   // For threaded benchmarks only the current thread stops executing and future
502   // calls to `KeepRunning()` will block until all threads have completed
503   // the `KeepRunning()` loop. If multiple threads report an error only the
504   // first error message is used.
505   //
506   // NOTE: Calling 'SkipWithError(...)' does not cause the benchmark to exit
507   // the current scope immediately. If the function is called from within
508   // the 'KeepRunning()' loop the current iteration will finish. It is the users
509   // responsibility to exit the scope as needed.
510   void SkipWithError(const char* msg);
511 
512   // REQUIRES: called exactly once per iteration of the benchmarking loop.
513   // Set the manually measured time for this benchmark iteration, which
514   // is used instead of automatically measured time if UseManualTime() was
515   // specified.
516   //
517   // For threaded benchmarks the final value will be set to the largest
518   // reported values.
519   void SetIterationTime(double seconds);
520 
521   // Set the number of bytes processed by the current benchmark
522   // execution.  This routine is typically called once at the end of a
523   // throughput oriented benchmark.  If this routine is called with a
524   // value > 0, the report is printed in MB/sec instead of nanoseconds
525   // per iteration.
526   //
527   // REQUIRES: a benchmark has exited its benchmarking loop.
528   BENCHMARK_ALWAYS_INLINE
SetBytesProcessed(int64_t bytes)529   void SetBytesProcessed(int64_t bytes) { bytes_processed_ = bytes; }
530 
531   BENCHMARK_ALWAYS_INLINE
bytes_processed()532   int64_t bytes_processed() const { return bytes_processed_; }
533 
534   // If this routine is called with complexity_n > 0 and complexity report is
535   // requested for the
536   // family benchmark, then current benchmark will be part of the computation
537   // and complexity_n will
538   // represent the length of N.
539   BENCHMARK_ALWAYS_INLINE
SetComplexityN(int64_t complexity_n)540   void SetComplexityN(int64_t complexity_n) { complexity_n_ = complexity_n; }
541 
542   BENCHMARK_ALWAYS_INLINE
complexity_length_n()543   int64_t complexity_length_n() { return complexity_n_; }
544 
545   // If this routine is called with items > 0, then an items/s
546   // label is printed on the benchmark report line for the currently
547   // executing benchmark. It is typically called at the end of a processing
548   // benchmark where a processing items/second output is desired.
549   //
550   // REQUIRES: a benchmark has exited its benchmarking loop.
551   BENCHMARK_ALWAYS_INLINE
SetItemsProcessed(int64_t items)552   void SetItemsProcessed(int64_t items) { items_processed_ = items; }
553 
554   BENCHMARK_ALWAYS_INLINE
items_processed()555   int64_t items_processed() const { return items_processed_; }
556 
557   // If this routine is called, the specified label is printed at the
558   // end of the benchmark report line for the currently executing
559   // benchmark.  Example:
560   //  static void BM_Compress(benchmark::State& state) {
561   //    ...
562   //    double compress = input_size / output_size;
563   //    state.SetLabel(StrFormat("compress:%.1f%%", 100.0*compression));
564   //  }
565   // Produces output that looks like:
566   //  BM_Compress   50         50   14115038  compress:27.3%
567   //
568   // REQUIRES: a benchmark has exited its benchmarking loop.
569   void SetLabel(const char* label);
570 
SetLabel(const std::string & str)571   void BENCHMARK_ALWAYS_INLINE SetLabel(const std::string& str) {
572     this->SetLabel(str.c_str());
573   }
574 
575   // Range arguments for this run. CHECKs if the argument has been set.
576   BENCHMARK_ALWAYS_INLINE
577   int64_t range(std::size_t pos = 0) const {
578     assert(range_.size() > pos);
579     return range_[pos];
580   }
581 
582   BENCHMARK_DEPRECATED_MSG("use 'range(0)' instead")
range_x()583   int64_t range_x() const { return range(0); }
584 
585   BENCHMARK_DEPRECATED_MSG("use 'range(1)' instead")
range_y()586   int64_t range_y() const { return range(1); }
587 
588   BENCHMARK_ALWAYS_INLINE
iterations()589   size_t iterations() const {
590     if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) {
591       return 0;
592     }
593     return max_iterations - total_iterations_ + batch_leftover_;
594   }
595 
596 private: // items we expect on the first cache line (ie 64 bytes of the struct)
597 
598   // When total_iterations_ is 0, KeepRunning() and friends will return false.
599   // May be larger than max_iterations.
600   size_t total_iterations_;
601 
602   // When using KeepRunningBatch(), batch_leftover_ holds the number of
603   // iterations beyond max_iters that were run. Used to track
604   // completed_iterations_ accurately.
605   size_t batch_leftover_;
606 
607 public:
608   const size_t max_iterations;
609 
610 private:
611   bool started_;
612   bool finished_;
613   bool error_occurred_;
614 
615 private: // items we don't need on the first cache line
616   std::vector<int64_t> range_;
617 
618   int64_t bytes_processed_;
619   int64_t items_processed_;
620 
621   int64_t complexity_n_;
622 
623  public:
624   // Container for user-defined counters.
625   UserCounters counters;
626   // Index of the executing thread. Values from [0, threads).
627   const int thread_index;
628   // Number of threads concurrently executing the benchmark.
629   const int threads;
630 
631 
632   // TODO(EricWF) make me private
633   State(size_t max_iters, const std::vector<int64_t>& ranges, int thread_i,
634         int n_threads, internal::ThreadTimer* timer,
635         internal::ThreadManager* manager);
636 
637  private:
638   void StartKeepRunning();
639   // Implementation of KeepRunning() and KeepRunningBatch().
640   // is_batch must be true unless n is 1.
641   bool KeepRunningInternal(size_t n, bool is_batch);
642   void FinishKeepRunning();
643   internal::ThreadTimer* timer_;
644   internal::ThreadManager* manager_;
645   BENCHMARK_DISALLOW_COPY_AND_ASSIGN(State);
646 };
647 
648 inline BENCHMARK_ALWAYS_INLINE
KeepRunning()649 bool State::KeepRunning() {
650   return KeepRunningInternal(1, /*is_batch=*/ false);
651 }
652 
653 inline BENCHMARK_ALWAYS_INLINE
KeepRunningBatch(size_t n)654 bool State::KeepRunningBatch(size_t n) {
655   return KeepRunningInternal(n, /*is_batch=*/ true);
656 }
657 
658 inline BENCHMARK_ALWAYS_INLINE
KeepRunningInternal(size_t n,bool is_batch)659 bool State::KeepRunningInternal(size_t n, bool is_batch) {
660   // total_iterations_ is set to 0 by the constructor, and always set to a
661   // nonzero value by StartKepRunning().
662   assert(n > 0);
663   // n must be 1 unless is_batch is true.
664   assert(is_batch || n == 1);
665   if (BENCHMARK_BUILTIN_EXPECT(total_iterations_ >= n, true)) {
666     total_iterations_ -= n;
667     return true;
668   }
669   if (!started_) {
670     StartKeepRunning();
671     if (!error_occurred_ && total_iterations_ >= n) {
672       total_iterations_-= n;
673       return true;
674     }
675   }
676   // For non-batch runs, total_iterations_ must be 0 by now.
677   if (is_batch && total_iterations_ != 0) {
678     batch_leftover_  = n - total_iterations_;
679     total_iterations_ = 0;
680     return true;
681   }
682   FinishKeepRunning();
683   return false;
684 }
685 
686 struct State::StateIterator {
687   struct BENCHMARK_UNUSED Value {};
688   typedef std::forward_iterator_tag iterator_category;
689   typedef Value value_type;
690   typedef Value reference;
691   typedef Value pointer;
692   typedef std::ptrdiff_t difference_type;
693 
694  private:
695   friend class State;
696   BENCHMARK_ALWAYS_INLINE
StateIteratorStateIterator697   StateIterator() : cached_(0), parent_() {}
698 
699   BENCHMARK_ALWAYS_INLINE
StateIteratorStateIterator700   explicit StateIterator(State* st)
701       : cached_(st->error_occurred_ ? 0 : st->max_iterations), parent_(st) {}
702 
703  public:
704   BENCHMARK_ALWAYS_INLINE
705   Value operator*() const { return Value(); }
706 
707   BENCHMARK_ALWAYS_INLINE
708   StateIterator& operator++() {
709     assert(cached_ > 0);
710     --cached_;
711     return *this;
712   }
713 
714   BENCHMARK_ALWAYS_INLINE
715   bool operator!=(StateIterator const&) const {
716     if (BENCHMARK_BUILTIN_EXPECT(cached_ != 0, true)) return true;
717     parent_->FinishKeepRunning();
718     return false;
719   }
720 
721  private:
722   size_t cached_;
723   State* const parent_;
724 };
725 
begin()726 inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::begin() {
727   return StateIterator(this);
728 }
end()729 inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::end() {
730   StartKeepRunning();
731   return StateIterator();
732 }
733 
734 namespace internal {
735 
736 typedef void(Function)(State&);
737 
738 // ------------------------------------------------------
739 // Benchmark registration object.  The BENCHMARK() macro expands
740 // into an internal::Benchmark* object.  Various methods can
741 // be called on this object to change the properties of the benchmark.
742 // Each method returns "this" so that multiple method calls can
743 // chained into one expression.
744 class Benchmark {
745  public:
746   virtual ~Benchmark();
747 
748   // Note: the following methods all return "this" so that multiple
749   // method calls can be chained together in one expression.
750 
751   // Run this benchmark once with "x" as the extra argument passed
752   // to the function.
753   // REQUIRES: The function passed to the constructor must accept an arg1.
754   Benchmark* Arg(int64_t x);
755 
756   // Run this benchmark with the given time unit for the generated output report
757   Benchmark* Unit(TimeUnit unit);
758 
759   // Run this benchmark once for a number of values picked from the
760   // range [start..limit].  (start and limit are always picked.)
761   // REQUIRES: The function passed to the constructor must accept an arg1.
762   Benchmark* Range(int64_t start, int64_t limit);
763 
764   // Run this benchmark once for all values in the range [start..limit] with
765   // specific step
766   // REQUIRES: The function passed to the constructor must accept an arg1.
767   Benchmark* DenseRange(int64_t start, int64_t limit, int step = 1);
768 
769   // Run this benchmark once with "args" as the extra arguments passed
770   // to the function.
771   // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
772   Benchmark* Args(const std::vector<int64_t>& args);
773 
774   // Equivalent to Args({x, y})
775   // NOTE: This is a legacy C++03 interface provided for compatibility only.
776   //   New code should use 'Args'.
ArgPair(int64_t x,int64_t y)777   Benchmark* ArgPair(int64_t x, int64_t y) {
778     std::vector<int64_t> args;
779     args.push_back(x);
780     args.push_back(y);
781     return Args(args);
782   }
783 
784   // Run this benchmark once for a number of values picked from the
785   // ranges [start..limit].  (starts and limits are always picked.)
786   // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
787   Benchmark* Ranges(const std::vector<std::pair<int64_t, int64_t> >& ranges);
788 
789   // Equivalent to ArgNames({name})
790   Benchmark* ArgName(const std::string& name);
791 
792   // Set the argument names to display in the benchmark name. If not called,
793   // only argument values will be shown.
794   Benchmark* ArgNames(const std::vector<std::string>& names);
795 
796   // Equivalent to Ranges({{lo1, hi1}, {lo2, hi2}}).
797   // NOTE: This is a legacy C++03 interface provided for compatibility only.
798   //   New code should use 'Ranges'.
RangePair(int64_t lo1,int64_t hi1,int64_t lo2,int64_t hi2)799   Benchmark* RangePair(int64_t lo1, int64_t hi1, int64_t lo2, int64_t hi2) {
800     std::vector<std::pair<int64_t, int64_t> > ranges;
801     ranges.push_back(std::make_pair(lo1, hi1));
802     ranges.push_back(std::make_pair(lo2, hi2));
803     return Ranges(ranges);
804   }
805 
806   // Pass this benchmark object to *func, which can customize
807   // the benchmark by calling various methods like Arg, Args,
808   // Threads, etc.
809   Benchmark* Apply(void (*func)(Benchmark* benchmark));
810 
811   // Set the range multiplier for non-dense range. If not called, the range
812   // multiplier kRangeMultiplier will be used.
813   Benchmark* RangeMultiplier(int multiplier);
814 
815   // Set the minimum amount of time to use when running this benchmark. This
816   // option overrides the `benchmark_min_time` flag.
817   // REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark.
818   Benchmark* MinTime(double t);
819 
820   // Specify the amount of iterations that should be run by this benchmark.
821   // REQUIRES: 'n > 0' and `MinTime` has not been called on this benchmark.
822   //
823   // NOTE: This function should only be used when *exact* iteration control is
824   //   needed and never to control or limit how long a benchmark runs, where
825   // `--benchmark_min_time=N` or `MinTime(...)` should be used instead.
826   Benchmark* Iterations(size_t n);
827 
828   // Specify the amount of times to repeat this benchmark. This option overrides
829   // the `benchmark_repetitions` flag.
830   // REQUIRES: `n > 0`
831   Benchmark* Repetitions(int n);
832 
833   // Specify if each repetition of the benchmark should be reported separately
834   // or if only the final statistics should be reported. If the benchmark
835   // is not repeated then the single result is always reported.
836   Benchmark* ReportAggregatesOnly(bool value = true);
837 
838   // If a particular benchmark is I/O bound, runs multiple threads internally or
839   // if for some reason CPU timings are not representative, call this method. If
840   // called, the elapsed time will be used to control how many iterations are
841   // run, and in the printing of items/second or MB/seconds values.  If not
842   // called, the cpu time used by the benchmark will be used.
843   Benchmark* UseRealTime();
844 
845   // If a benchmark must measure time manually (e.g. if GPU execution time is
846   // being
847   // measured), call this method. If called, each benchmark iteration should
848   // call
849   // SetIterationTime(seconds) to report the measured time, which will be used
850   // to control how many iterations are run, and in the printing of items/second
851   // or MB/second values.
852   Benchmark* UseManualTime();
853 
854   // Set the asymptotic computational complexity for the benchmark. If called
855   // the asymptotic computational complexity will be shown on the output.
856   Benchmark* Complexity(BigO complexity = benchmark::oAuto);
857 
858   // Set the asymptotic computational complexity for the benchmark. If called
859   // the asymptotic computational complexity will be shown on the output.
860   Benchmark* Complexity(BigOFunc* complexity);
861 
862   // Add this statistics to be computed over all the values of benchmark run
863   Benchmark* ComputeStatistics(std::string name, StatisticsFunc* statistics);
864 
865   // Support for running multiple copies of the same benchmark concurrently
866   // in multiple threads.  This may be useful when measuring the scaling
867   // of some piece of code.
868 
869   // Run one instance of this benchmark concurrently in t threads.
870   Benchmark* Threads(int t);
871 
872   // Pick a set of values T from [min_threads,max_threads].
873   // min_threads and max_threads are always included in T.  Run this
874   // benchmark once for each value in T.  The benchmark run for a
875   // particular value t consists of t threads running the benchmark
876   // function concurrently.  For example, consider:
877   //    BENCHMARK(Foo)->ThreadRange(1,16);
878   // This will run the following benchmarks:
879   //    Foo in 1 thread
880   //    Foo in 2 threads
881   //    Foo in 4 threads
882   //    Foo in 8 threads
883   //    Foo in 16 threads
884   Benchmark* ThreadRange(int min_threads, int max_threads);
885 
886   // For each value n in the range, run this benchmark once using n threads.
887   // min_threads and max_threads are always included in the range.
888   // stride specifies the increment. E.g. DenseThreadRange(1, 8, 3) starts
889   // a benchmark with 1, 4, 7 and 8 threads.
890   Benchmark* DenseThreadRange(int min_threads, int max_threads, int stride = 1);
891 
892   // Equivalent to ThreadRange(NumCPUs(), NumCPUs())
893   Benchmark* ThreadPerCpu();
894 
895   virtual void Run(State& state) = 0;
896 
897   // Used inside the benchmark implementation
898   struct Instance;
899 
900  protected:
901   explicit Benchmark(const char* name);
902   Benchmark(Benchmark const&);
903   void SetName(const char* name);
904 
905   int ArgsCnt() const;
906 
907  private:
908   friend class BenchmarkFamilies;
909 
910   std::string name_;
911   ReportMode report_mode_;
912   std::vector<std::string> arg_names_;   // Args for all benchmark runs
913   std::vector<std::vector<int64_t> > args_;  // Args for all benchmark runs
914   TimeUnit time_unit_;
915   int range_multiplier_;
916   double min_time_;
917   size_t iterations_;
918   int repetitions_;
919   bool use_real_time_;
920   bool use_manual_time_;
921   BigO complexity_;
922   BigOFunc* complexity_lambda_;
923   std::vector<Statistics> statistics_;
924   std::vector<int> thread_counts_;
925 
926   Benchmark& operator=(Benchmark const&);
927 };
928 
929 }  // namespace internal
930 
931 // Create and register a benchmark with the specified 'name' that invokes
932 // the specified functor 'fn'.
933 //
934 // RETURNS: A pointer to the registered benchmark.
935 internal::Benchmark* RegisterBenchmark(const char* name,
936                                        internal::Function* fn);
937 
938 #if defined(BENCHMARK_HAS_CXX11)
939 template <class Lambda>
940 internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn);
941 #endif
942 
943 // Remove all registered benchmarks. All pointers to previously registered
944 // benchmarks are invalidated.
945 void ClearRegisteredBenchmarks();
946 
947 namespace internal {
948 // The class used to hold all Benchmarks created from static function.
949 // (ie those created using the BENCHMARK(...) macros.
950 class FunctionBenchmark : public Benchmark {
951  public:
FunctionBenchmark(const char * name,Function * func)952   FunctionBenchmark(const char* name, Function* func)
953       : Benchmark(name), func_(func) {}
954 
955   virtual void Run(State& st);
956 
957  private:
958   Function* func_;
959 };
960 
961 #ifdef BENCHMARK_HAS_CXX11
962 template <class Lambda>
963 class LambdaBenchmark : public Benchmark {
964  public:
Run(State & st)965   virtual void Run(State& st) { lambda_(st); }
966 
967  private:
968   template <class OLambda>
LambdaBenchmark(const char * name,OLambda && lam)969   LambdaBenchmark(const char* name, OLambda&& lam)
970       : Benchmark(name), lambda_(std::forward<OLambda>(lam)) {}
971 
972   LambdaBenchmark(LambdaBenchmark const&) = delete;
973 
974  private:
975   template <class Lam>
976   friend Benchmark* ::benchmark::RegisterBenchmark(const char*, Lam&&);
977 
978   Lambda lambda_;
979 };
980 #endif
981 
982 }  // namespace internal
983 
RegisterBenchmark(const char * name,internal::Function * fn)984 inline internal::Benchmark* RegisterBenchmark(const char* name,
985                                               internal::Function* fn) {
986   return internal::RegisterBenchmarkInternal(
987       ::new internal::FunctionBenchmark(name, fn));
988 }
989 
990 #ifdef BENCHMARK_HAS_CXX11
991 template <class Lambda>
RegisterBenchmark(const char * name,Lambda && fn)992 internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn) {
993   using BenchType =
994       internal::LambdaBenchmark<typename std::decay<Lambda>::type>;
995   return internal::RegisterBenchmarkInternal(
996       ::new BenchType(name, std::forward<Lambda>(fn)));
997 }
998 #endif
999 
1000 #if defined(BENCHMARK_HAS_CXX11) && \
1001     (!defined(BENCHMARK_GCC_VERSION) || BENCHMARK_GCC_VERSION >= 409)
1002 template <class Lambda, class... Args>
RegisterBenchmark(const char * name,Lambda && fn,Args &&...args)1003 internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn,
1004                                        Args&&... args) {
1005   return benchmark::RegisterBenchmark(
1006       name, [=](benchmark::State& st) { fn(st, args...); });
1007 }
1008 #else
1009 #define BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
1010 #endif
1011 
1012 // The base class for all fixture tests.
1013 class Fixture : public internal::Benchmark {
1014  public:
Fixture()1015   Fixture() : internal::Benchmark("") {}
1016 
Run(State & st)1017   virtual void Run(State& st) {
1018     this->SetUp(st);
1019     this->BenchmarkCase(st);
1020     this->TearDown(st);
1021   }
1022 
1023   // These will be deprecated ...
SetUp(const State &)1024   virtual void SetUp(const State&) {}
TearDown(const State &)1025   virtual void TearDown(const State&) {}
1026   // ... In favor of these.
SetUp(State & st)1027   virtual void SetUp(State& st) { SetUp(const_cast<const State&>(st)); }
TearDown(State & st)1028   virtual void TearDown(State& st) { TearDown(const_cast<const State&>(st)); }
1029 
1030  protected:
1031   virtual void BenchmarkCase(State&) = 0;
1032 };
1033 
1034 }  // namespace benchmark
1035 
1036 // ------------------------------------------------------
1037 // Macro to register benchmarks
1038 
1039 // Check that __COUNTER__ is defined and that __COUNTER__ increases by 1
1040 // every time it is expanded. X + 1 == X + 0 is used in case X is defined to be
1041 // empty. If X is empty the expression becomes (+1 == +0).
1042 #if defined(__COUNTER__) && (__COUNTER__ + 1 == __COUNTER__ + 0)
1043 #define BENCHMARK_PRIVATE_UNIQUE_ID __COUNTER__
1044 #else
1045 #define BENCHMARK_PRIVATE_UNIQUE_ID __LINE__
1046 #endif
1047 
1048 // Helpers for generating unique variable names
1049 #define BENCHMARK_PRIVATE_NAME(n) \
1050   BENCHMARK_PRIVATE_CONCAT(_benchmark_, BENCHMARK_PRIVATE_UNIQUE_ID, n)
1051 #define BENCHMARK_PRIVATE_CONCAT(a, b, c) BENCHMARK_PRIVATE_CONCAT2(a, b, c)
1052 #define BENCHMARK_PRIVATE_CONCAT2(a, b, c) a##b##c
1053 
1054 #define BENCHMARK_PRIVATE_DECLARE(n)                                 \
1055   static ::benchmark::internal::Benchmark* BENCHMARK_PRIVATE_NAME(n) \
1056       BENCHMARK_UNUSED
1057 
1058 #define BENCHMARK(n)                                     \
1059   BENCHMARK_PRIVATE_DECLARE(n) =                         \
1060       (::benchmark::internal::RegisterBenchmarkInternal( \
1061           new ::benchmark::internal::FunctionBenchmark(#n, n)))
1062 
1063 // Old-style macros
1064 #define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a))
1065 #define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->Args({(a1), (a2)})
1066 #define BENCHMARK_WITH_UNIT(n, t) BENCHMARK(n)->Unit((t))
1067 #define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi))
1068 #define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \
1069   BENCHMARK(n)->RangePair({{(l1), (h1)}, {(l2), (h2)}})
1070 
1071 #ifdef BENCHMARK_HAS_CXX11
1072 
1073 // Register a benchmark which invokes the function specified by `func`
1074 // with the additional arguments specified by `...`.
1075 //
1076 // For example:
1077 //
1078 // template <class ...ExtraArgs>`
1079 // void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) {
1080 //  [...]
1081 //}
1082 // /* Registers a benchmark named "BM_takes_args/int_string_test` */
1083 // BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc"));
1084 #define BENCHMARK_CAPTURE(func, test_case_name, ...)     \
1085   BENCHMARK_PRIVATE_DECLARE(func) =                      \
1086       (::benchmark::internal::RegisterBenchmarkInternal( \
1087           new ::benchmark::internal::FunctionBenchmark(  \
1088               #func "/" #test_case_name,                 \
1089               [](::benchmark::State& st) { func(st, __VA_ARGS__); })))
1090 
1091 #endif  // BENCHMARK_HAS_CXX11
1092 
1093 // This will register a benchmark for a templatized function.  For example:
1094 //
1095 // template<int arg>
1096 // void BM_Foo(int iters);
1097 //
1098 // BENCHMARK_TEMPLATE(BM_Foo, 1);
1099 //
1100 // will register BM_Foo<1> as a benchmark.
1101 #define BENCHMARK_TEMPLATE1(n, a)                        \
1102   BENCHMARK_PRIVATE_DECLARE(n) =                         \
1103       (::benchmark::internal::RegisterBenchmarkInternal( \
1104           new ::benchmark::internal::FunctionBenchmark(#n "<" #a ">", n<a>)))
1105 
1106 #define BENCHMARK_TEMPLATE2(n, a, b)                                         \
1107   BENCHMARK_PRIVATE_DECLARE(n) =                                             \
1108       (::benchmark::internal::RegisterBenchmarkInternal(                     \
1109           new ::benchmark::internal::FunctionBenchmark(#n "<" #a "," #b ">", \
1110                                                        n<a, b>)))
1111 
1112 #ifdef BENCHMARK_HAS_CXX11
1113 #define BENCHMARK_TEMPLATE(n, ...)                       \
1114   BENCHMARK_PRIVATE_DECLARE(n) =                         \
1115       (::benchmark::internal::RegisterBenchmarkInternal( \
1116           new ::benchmark::internal::FunctionBenchmark(  \
1117               #n "<" #__VA_ARGS__ ">", n<__VA_ARGS__>)))
1118 #else
1119 #define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a)
1120 #endif
1121 
1122 #define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method)        \
1123   class BaseClass##_##Method##_Benchmark : public BaseClass { \
1124    public:                                                    \
1125     BaseClass##_##Method##_Benchmark() : BaseClass() {        \
1126       this->SetName(#BaseClass "/" #Method);                  \
1127     }                                                         \
1128                                                               \
1129    protected:                                                 \
1130     virtual void BenchmarkCase(::benchmark::State&);          \
1131   };
1132 
1133 #define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1134   class BaseClass##_##Method##_Benchmark : public BaseClass<a> {    \
1135    public:                                                          \
1136     BaseClass##_##Method##_Benchmark() : BaseClass<a>() {           \
1137       this->SetName(#BaseClass"<" #a ">/" #Method);                 \
1138     }                                                               \
1139                                                                     \
1140    protected:                                                       \
1141     virtual void BenchmarkCase(::benchmark::State&);                \
1142   };
1143 
1144 #define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1145   class BaseClass##_##Method##_Benchmark : public BaseClass<a, b> {    \
1146    public:                                                             \
1147     BaseClass##_##Method##_Benchmark() : BaseClass<a, b>() {           \
1148       this->SetName(#BaseClass"<" #a "," #b ">/" #Method);             \
1149     }                                                                  \
1150                                                                        \
1151    protected:                                                          \
1152     virtual void BenchmarkCase(::benchmark::State&);                   \
1153   };
1154 
1155 #ifdef BENCHMARK_HAS_CXX11
1156 #define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, ...)       \
1157   class BaseClass##_##Method##_Benchmark : public BaseClass<__VA_ARGS__> { \
1158    public:                                                                 \
1159     BaseClass##_##Method##_Benchmark() : BaseClass<__VA_ARGS__>() {        \
1160       this->SetName(#BaseClass"<" #__VA_ARGS__ ">/" #Method);              \
1161     }                                                                      \
1162                                                                            \
1163    protected:                                                              \
1164     virtual void BenchmarkCase(::benchmark::State&);                       \
1165   };
1166 #else
1167 #define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(n, a) BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(n, a)
1168 #endif
1169 
1170 #define BENCHMARK_DEFINE_F(BaseClass, Method)    \
1171   BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1172   void BaseClass##_##Method##_Benchmark::BenchmarkCase
1173 
1174 #define BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a)    \
1175   BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1176   void BaseClass##_##Method##_Benchmark::BenchmarkCase
1177 
1178 #define BENCHMARK_TEMPLATE2_DEFINE_F(BaseClass, Method, a, b)    \
1179   BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1180   void BaseClass##_##Method##_Benchmark::BenchmarkCase
1181 
1182 #ifdef BENCHMARK_HAS_CXX11
1183 #define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, ...)            \
1184   BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \
1185   void BaseClass##_##Method##_Benchmark::BenchmarkCase
1186 #else
1187 #define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, a) BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a)
1188 #endif
1189 
1190 #define BENCHMARK_REGISTER_F(BaseClass, Method) \
1191   BENCHMARK_PRIVATE_REGISTER_F(BaseClass##_##Method##_Benchmark)
1192 
1193 #define BENCHMARK_PRIVATE_REGISTER_F(TestName) \
1194   BENCHMARK_PRIVATE_DECLARE(TestName) =        \
1195       (::benchmark::internal::RegisterBenchmarkInternal(new TestName()))
1196 
1197 // This macro will define and register a benchmark within a fixture class.
1198 #define BENCHMARK_F(BaseClass, Method)           \
1199   BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1200   BENCHMARK_REGISTER_F(BaseClass, Method);       \
1201   void BaseClass##_##Method##_Benchmark::BenchmarkCase
1202 
1203 #define BENCHMARK_TEMPLATE1_F(BaseClass, Method, a)           \
1204   BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1205   BENCHMARK_REGISTER_F(BaseClass, Method);                    \
1206   void BaseClass##_##Method##_Benchmark::BenchmarkCase
1207 
1208 #define BENCHMARK_TEMPLATE2_F(BaseClass, Method, a, b)           \
1209   BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1210   BENCHMARK_REGISTER_F(BaseClass, Method);                       \
1211   void BaseClass##_##Method##_Benchmark::BenchmarkCase
1212 
1213 #ifdef BENCHMARK_HAS_CXX11
1214 #define BENCHMARK_TEMPLATE_F(BaseClass, Method, ...)           \
1215   BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \
1216   BENCHMARK_REGISTER_F(BaseClass, Method);                     \
1217   void BaseClass##_##Method##_Benchmark::BenchmarkCase
1218 #else
1219 #define BENCHMARK_TEMPLATE_F(BaseClass, Method, a) BENCHMARK_TEMPLATE1_F(BaseClass, Method, a)
1220 #endif
1221 
1222 // Helper macro to create a main routine in a test that runs the benchmarks
1223 #define BENCHMARK_MAIN()                   \
1224   int main(int argc, char** argv) {        \
1225     ::benchmark::Initialize(&argc, argv);  \
1226     if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; \
1227     ::benchmark::RunSpecifiedBenchmarks(); \
1228   }                                        \
1229   int main(int, char**)
1230 
1231 
1232 // ------------------------------------------------------
1233 // Benchmark Reporters
1234 
1235 namespace benchmark {
1236 
1237 struct CPUInfo {
1238   struct CacheInfo {
1239     std::string type;
1240     int level;
1241     int size;
1242     int num_sharing;
1243   };
1244 
1245   int num_cpus;
1246   double cycles_per_second;
1247   std::vector<CacheInfo> caches;
1248   bool scaling_enabled;
1249 
1250   static const CPUInfo& Get();
1251 
1252  private:
1253   CPUInfo();
1254   BENCHMARK_DISALLOW_COPY_AND_ASSIGN(CPUInfo);
1255 };
1256 
1257 // Interface for custom benchmark result printers.
1258 // By default, benchmark reports are printed to stdout. However an application
1259 // can control the destination of the reports by calling
1260 // RunSpecifiedBenchmarks and passing it a custom reporter object.
1261 // The reporter object must implement the following interface.
1262 class BenchmarkReporter {
1263  public:
1264   struct Context {
1265     CPUInfo const& cpu_info;
1266     // The number of chars in the longest benchmark name.
1267     size_t name_field_width;
1268     static const char *executable_name;
1269     Context();
1270   };
1271 
1272   struct Run {
RunRun1273     Run()
1274         : error_occurred(false),
1275           iterations(1),
1276           time_unit(kNanosecond),
1277           real_accumulated_time(0),
1278           cpu_accumulated_time(0),
1279           bytes_per_second(0),
1280           items_per_second(0),
1281           max_heapbytes_used(0),
1282           complexity(oNone),
1283           complexity_lambda(),
1284           complexity_n(0),
1285           report_big_o(false),
1286           report_rms(false),
1287           counters() {}
1288 
1289     std::string benchmark_name;
1290     std::string report_label;  // Empty if not set by benchmark.
1291     bool error_occurred;
1292     std::string error_message;
1293 
1294     int64_t iterations;
1295     TimeUnit time_unit;
1296     double real_accumulated_time;
1297     double cpu_accumulated_time;
1298 
1299     // Return a value representing the real time per iteration in the unit
1300     // specified by 'time_unit'.
1301     // NOTE: If 'iterations' is zero the returned value represents the
1302     // accumulated time.
1303     double GetAdjustedRealTime() const;
1304 
1305     // Return a value representing the cpu time per iteration in the unit
1306     // specified by 'time_unit'.
1307     // NOTE: If 'iterations' is zero the returned value represents the
1308     // accumulated time.
1309     double GetAdjustedCPUTime() const;
1310 
1311     // Zero if not set by benchmark.
1312     double bytes_per_second;
1313     double items_per_second;
1314 
1315     // This is set to 0.0 if memory tracing is not enabled.
1316     double max_heapbytes_used;
1317 
1318     // Keep track of arguments to compute asymptotic complexity
1319     BigO complexity;
1320     BigOFunc* complexity_lambda;
1321     int64_t complexity_n;
1322 
1323     // what statistics to compute from the measurements
1324     const std::vector<Statistics>* statistics;
1325 
1326     // Inform print function whether the current run is a complexity report
1327     bool report_big_o;
1328     bool report_rms;
1329 
1330     UserCounters counters;
1331   };
1332 
1333   // Construct a BenchmarkReporter with the output stream set to 'std::cout'
1334   // and the error stream set to 'std::cerr'
1335   BenchmarkReporter();
1336 
1337   // Called once for every suite of benchmarks run.
1338   // The parameter "context" contains information that the
1339   // reporter may wish to use when generating its report, for example the
1340   // platform under which the benchmarks are running. The benchmark run is
1341   // never started if this function returns false, allowing the reporter
1342   // to skip runs based on the context information.
1343   virtual bool ReportContext(const Context& context) = 0;
1344 
1345   // Called once for each group of benchmark runs, gives information about
1346   // cpu-time and heap memory usage during the benchmark run. If the group
1347   // of runs contained more than two entries then 'report' contains additional
1348   // elements representing the mean and standard deviation of those runs.
1349   // Additionally if this group of runs was the last in a family of benchmarks
1350   // 'reports' contains additional entries representing the asymptotic
1351   // complexity and RMS of that benchmark family.
1352   virtual void ReportRuns(const std::vector<Run>& report) = 0;
1353 
1354   // Called once and only once after ever group of benchmarks is run and
1355   // reported.
Finalize()1356   virtual void Finalize() {}
1357 
1358   // REQUIRES: The object referenced by 'out' is valid for the lifetime
1359   // of the reporter.
SetOutputStream(std::ostream * out)1360   void SetOutputStream(std::ostream* out) {
1361     assert(out);
1362     output_stream_ = out;
1363   }
1364 
1365   // REQUIRES: The object referenced by 'err' is valid for the lifetime
1366   // of the reporter.
SetErrorStream(std::ostream * err)1367   void SetErrorStream(std::ostream* err) {
1368     assert(err);
1369     error_stream_ = err;
1370   }
1371 
GetOutputStream()1372   std::ostream& GetOutputStream() const { return *output_stream_; }
1373 
GetErrorStream()1374   std::ostream& GetErrorStream() const { return *error_stream_; }
1375 
1376   virtual ~BenchmarkReporter();
1377 
1378   // Write a human readable string to 'out' representing the specified
1379   // 'context'.
1380   // REQUIRES: 'out' is non-null.
1381   static void PrintBasicContext(std::ostream* out, Context const& context);
1382 
1383  private:
1384   std::ostream* output_stream_;
1385   std::ostream* error_stream_;
1386 };
1387 
1388 // Simple reporter that outputs benchmark data to the console. This is the
1389 // default reporter used by RunSpecifiedBenchmarks().
1390 class ConsoleReporter : public BenchmarkReporter {
1391 public:
1392   enum OutputOptions {
1393     OO_None = 0,
1394     OO_Color = 1,
1395     OO_Tabular = 2,
1396     OO_ColorTabular = OO_Color|OO_Tabular,
1397     OO_Defaults = OO_ColorTabular
1398   };
1399   explicit ConsoleReporter(OutputOptions opts_ = OO_Defaults)
output_options_(opts_)1400       : output_options_(opts_), name_field_width_(0),
1401         prev_counters_(), printed_header_(false) {}
1402 
1403   virtual bool ReportContext(const Context& context);
1404   virtual void ReportRuns(const std::vector<Run>& reports);
1405 
1406  protected:
1407   virtual void PrintRunData(const Run& report);
1408   virtual void PrintHeader(const Run& report);
1409 
1410   OutputOptions output_options_;
1411   size_t name_field_width_;
1412   UserCounters prev_counters_;
1413   bool printed_header_;
1414 };
1415 
1416 class JSONReporter : public BenchmarkReporter {
1417  public:
JSONReporter()1418   JSONReporter() : first_report_(true) {}
1419   virtual bool ReportContext(const Context& context);
1420   virtual void ReportRuns(const std::vector<Run>& reports);
1421   virtual void Finalize();
1422 
1423  private:
1424   void PrintRunData(const Run& report);
1425 
1426   bool first_report_;
1427 };
1428 
1429 class CSVReporter : public BenchmarkReporter {
1430  public:
CSVReporter()1431   CSVReporter() : printed_header_(false) {}
1432   virtual bool ReportContext(const Context& context);
1433   virtual void ReportRuns(const std::vector<Run>& reports);
1434 
1435  private:
1436   void PrintRunData(const Run& report);
1437 
1438   bool printed_header_;
1439   std::set< std::string > user_counter_names_;
1440 };
1441 
GetTimeUnitString(TimeUnit unit)1442 inline const char* GetTimeUnitString(TimeUnit unit) {
1443   switch (unit) {
1444     case kMillisecond:
1445       return "ms";
1446     case kMicrosecond:
1447       return "us";
1448     case kNanosecond:
1449       return "ns";
1450   }
1451   BENCHMARK_UNREACHABLE();
1452 }
1453 
GetTimeUnitMultiplier(TimeUnit unit)1454 inline double GetTimeUnitMultiplier(TimeUnit unit) {
1455   switch (unit) {
1456     case kMillisecond:
1457       return 1e3;
1458     case kMicrosecond:
1459       return 1e6;
1460     case kNanosecond:
1461       return 1e9;
1462   }
1463   BENCHMARK_UNREACHABLE();
1464 }
1465 
1466 } // namespace benchmark
1467 
1468 #endif  // BENCHMARK_BENCHMARK_H_
1469