1 // Copyright 2015 Google Inc. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 // Support for registering benchmarks for functions.
16
17 /* Example usage:
18 // Define a function that executes the code to be measured a
19 // specified number of times:
20 static void BM_StringCreation(benchmark::State& state) {
21 for (auto _ : state)
22 std::string empty_string;
23 }
24
25 // Register the function as a benchmark
26 BENCHMARK(BM_StringCreation);
27
28 // Define another benchmark
29 static void BM_StringCopy(benchmark::State& state) {
30 std::string x = "hello";
31 for (auto _ : state)
32 std::string copy(x);
33 }
34 BENCHMARK(BM_StringCopy);
35
36 // Augment the main() program to invoke benchmarks if specified
37 // via the --benchmarks command line flag. E.g.,
38 // my_unittest --benchmark_filter=all
39 // my_unittest --benchmark_filter=BM_StringCreation
40 // my_unittest --benchmark_filter=String
41 // my_unittest --benchmark_filter='Copy|Creation'
42 int main(int argc, char** argv) {
43 benchmark::Initialize(&argc, argv);
44 benchmark::RunSpecifiedBenchmarks();
45 benchmark::Shutdown();
46 return 0;
47 }
48
49 // Sometimes a family of microbenchmarks can be implemented with
50 // just one routine that takes an extra argument to specify which
51 // one of the family of benchmarks to run. For example, the following
52 // code defines a family of microbenchmarks for measuring the speed
53 // of memcpy() calls of different lengths:
54
55 static void BM_memcpy(benchmark::State& state) {
56 char* src = new char[state.range(0)]; char* dst = new char[state.range(0)];
57 memset(src, 'x', state.range(0));
58 for (auto _ : state)
59 memcpy(dst, src, state.range(0));
60 state.SetBytesProcessed(state.iterations() * state.range(0));
61 delete[] src; delete[] dst;
62 }
63 BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10);
64
65 // The preceding code is quite repetitive, and can be replaced with the
66 // following short-hand. The following invocation will pick a few
67 // appropriate arguments in the specified range and will generate a
68 // microbenchmark for each such argument.
69 BENCHMARK(BM_memcpy)->Range(8, 8<<10);
70
71 // You might have a microbenchmark that depends on two inputs. For
72 // example, the following code defines a family of microbenchmarks for
73 // measuring the speed of set insertion.
74 static void BM_SetInsert(benchmark::State& state) {
75 set<int> data;
76 for (auto _ : state) {
77 state.PauseTiming();
78 data = ConstructRandomSet(state.range(0));
79 state.ResumeTiming();
80 for (int j = 0; j < state.range(1); ++j)
81 data.insert(RandomNumber());
82 }
83 }
84 BENCHMARK(BM_SetInsert)
85 ->Args({1<<10, 128})
86 ->Args({2<<10, 128})
87 ->Args({4<<10, 128})
88 ->Args({8<<10, 128})
89 ->Args({1<<10, 512})
90 ->Args({2<<10, 512})
91 ->Args({4<<10, 512})
92 ->Args({8<<10, 512});
93
94 // The preceding code is quite repetitive, and can be replaced with
95 // the following short-hand. The following macro will pick a few
96 // appropriate arguments in the product of the two specified ranges
97 // and will generate a microbenchmark for each such pair.
98 BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}});
99
100 // For more complex patterns of inputs, passing a custom function
101 // to Apply allows programmatic specification of an
102 // arbitrary set of arguments to run the microbenchmark on.
103 // The following example enumerates a dense range on
104 // one parameter, and a sparse range on the second.
105 static void CustomArguments(benchmark::internal::Benchmark* b) {
106 for (int i = 0; i <= 10; ++i)
107 for (int j = 32; j <= 1024*1024; j *= 8)
108 b->Args({i, j});
109 }
110 BENCHMARK(BM_SetInsert)->Apply(CustomArguments);
111
112 // Templated microbenchmarks work the same way:
113 // Produce then consume 'size' messages 'iters' times
114 // Measures throughput in the absence of multiprogramming.
115 template <class Q> int BM_Sequential(benchmark::State& state) {
116 Q q;
117 typename Q::value_type v;
118 for (auto _ : state) {
119 for (int i = state.range(0); i--; )
120 q.push(v);
121 for (int e = state.range(0); e--; )
122 q.Wait(&v);
123 }
124 // actually messages, not bytes:
125 state.SetBytesProcessed(state.iterations() * state.range(0));
126 }
127 BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
128
129 Use `Benchmark::MinTime(double t)` to set the minimum time used to run the
130 benchmark. This option overrides the `benchmark_min_time` flag.
131
132 void BM_test(benchmark::State& state) {
133 ... body ...
134 }
135 BENCHMARK(BM_test)->MinTime(2.0); // Run for at least 2 seconds.
136
137 In a multithreaded test, it is guaranteed that none of the threads will start
138 until all have reached the loop start, and all will have finished before any
139 thread exits the loop body. As such, any global setup or teardown you want to
140 do can be wrapped in a check against the thread index:
141
142 static void BM_MultiThreaded(benchmark::State& state) {
143 if (state.thread_index() == 0) {
144 // Setup code here.
145 }
146 for (auto _ : state) {
147 // Run the test as normal.
148 }
149 if (state.thread_index() == 0) {
150 // Teardown code here.
151 }
152 }
153 BENCHMARK(BM_MultiThreaded)->Threads(4);
154
155
156 If a benchmark runs a few milliseconds it may be hard to visually compare the
157 measured times, since the output data is given in nanoseconds per default. In
158 order to manually set the time unit, you can specify it manually:
159
160 BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
161 */
162
163 #ifndef BENCHMARK_BENCHMARK_H_
164 #define BENCHMARK_BENCHMARK_H_
165
166 // The _MSVC_LANG check should detect Visual Studio 2015 Update 3 and newer.
167 #if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)
168 #define BENCHMARK_HAS_CXX11
169 #endif
170
171 // This _MSC_VER check should detect VS 2017 v15.3 and newer.
172 #if __cplusplus >= 201703L || \
173 (defined(_MSC_VER) && _MSC_VER >= 1911 && _MSVC_LANG >= 201703L)
174 #define BENCHMARK_HAS_CXX17
175 #endif
176
177 #include <stdint.h>
178
179 #include <algorithm>
180 #include <cassert>
181 #include <cstddef>
182 #include <iosfwd>
183 #include <map>
184 #include <set>
185 #include <string>
186 #include <utility>
187 #include <vector>
188
189 #if defined(BENCHMARK_HAS_CXX11)
190 #include <atomic>
191 #include <initializer_list>
192 #include <type_traits>
193 #include <utility>
194 #endif
195
196 #if defined(_MSC_VER)
197 #include <intrin.h> // for _ReadWriteBarrier
198 #endif
199
200 #ifndef BENCHMARK_HAS_CXX11
201 #define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \
202 TypeName(const TypeName&); \
203 TypeName& operator=(const TypeName&)
204 #else
205 #define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \
206 TypeName(const TypeName&) = delete; \
207 TypeName& operator=(const TypeName&) = delete
208 #endif
209
210 #ifdef BENCHMARK_HAS_CXX17
211 #define BENCHMARK_UNUSED [[maybe_unused]]
212 #elif defined(__GNUC__) || defined(__clang__)
213 #define BENCHMARK_UNUSED __attribute__((unused))
214 #else
215 #define BENCHMARK_UNUSED
216 #endif
217
218 #if defined(__GNUC__) || defined(__clang__)
219 #define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline))
220 #define BENCHMARK_NOEXCEPT noexcept
221 #define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
222 #elif defined(_MSC_VER) && !defined(__clang__)
223 #define BENCHMARK_ALWAYS_INLINE __forceinline
224 #if _MSC_VER >= 1900
225 #define BENCHMARK_NOEXCEPT noexcept
226 #define BENCHMARK_NOEXCEPT_OP(x) noexcept(x)
227 #else
228 #define BENCHMARK_NOEXCEPT
229 #define BENCHMARK_NOEXCEPT_OP(x)
230 #endif
231 #define __func__ __FUNCTION__
232 #else
233 #define BENCHMARK_ALWAYS_INLINE
234 #define BENCHMARK_NOEXCEPT
235 #define BENCHMARK_NOEXCEPT_OP(x)
236 #endif
237
238 #define BENCHMARK_INTERNAL_TOSTRING2(x) #x
239 #define BENCHMARK_INTERNAL_TOSTRING(x) BENCHMARK_INTERNAL_TOSTRING2(x)
240
241 #if defined(__GNUC__) || defined(__clang__)
242 #define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y)
243 #define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg)))
244 #else
245 #define BENCHMARK_BUILTIN_EXPECT(x, y) x
246 #define BENCHMARK_DEPRECATED_MSG(msg)
247 #define BENCHMARK_WARNING_MSG(msg) \
248 __pragma(message(__FILE__ "(" BENCHMARK_INTERNAL_TOSTRING( \
249 __LINE__) ") : warning note: " msg))
250 #endif
251
252 #if defined(__GNUC__) && !defined(__clang__)
253 #define BENCHMARK_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
254 #endif
255
256 #ifndef __has_builtin
257 #define __has_builtin(x) 0
258 #endif
259
260 #if defined(__GNUC__) || __has_builtin(__builtin_unreachable)
261 #define BENCHMARK_UNREACHABLE() __builtin_unreachable()
262 #elif defined(_MSC_VER)
263 #define BENCHMARK_UNREACHABLE() __assume(false)
264 #else
265 #define BENCHMARK_UNREACHABLE() ((void)0)
266 #endif
267
268 #ifdef BENCHMARK_HAS_CXX11
269 #define BENCHMARK_OVERRIDE override
270 #else
271 #define BENCHMARK_OVERRIDE
272 #endif
273
274 namespace benchmark {
275 class BenchmarkReporter;
276 class MemoryManager;
277
278 void Initialize(int* argc, char** argv);
279 void Shutdown();
280
281 // Report to stdout all arguments in 'argv' as unrecognized except the first.
282 // Returns true there is at least on unrecognized argument (i.e. 'argc' > 1).
283 bool ReportUnrecognizedArguments(int argc, char** argv);
284
285 // Generate a list of benchmarks matching the specified --benchmark_filter flag
286 // and if --benchmark_list_tests is specified return after printing the name
287 // of each matching benchmark. Otherwise run each matching benchmark and
288 // report the results.
289 //
290 // The second and third overload use the specified 'display_reporter' and
291 // 'file_reporter' respectively. 'file_reporter' will write to the file
292 // specified
293 // by '--benchmark_output'. If '--benchmark_output' is not given the
294 // 'file_reporter' is ignored.
295 //
296 // RETURNS: The number of matching benchmarks.
297 size_t RunSpecifiedBenchmarks();
298 size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter);
299 size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
300 BenchmarkReporter* file_reporter);
301
302 // Register a MemoryManager instance that will be used to collect and report
303 // allocation measurements for benchmark runs.
304 void RegisterMemoryManager(MemoryManager* memory_manager);
305
306 // Add a key-value pair to output as part of the context stanza in the report.
307 void AddCustomContext(const std::string& key, const std::string& value);
308
309 namespace internal {
310 class Benchmark;
311 class BenchmarkImp;
312 class BenchmarkFamilies;
313
314 void UseCharPointer(char const volatile*);
315
316 // Take ownership of the pointer and register the benchmark. Return the
317 // registered benchmark.
318 Benchmark* RegisterBenchmarkInternal(Benchmark*);
319
320 // Ensure that the standard streams are properly initialized in every TU.
321 int InitializeStreams();
322 BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams();
323
324 } // namespace internal
325
326 #if (!defined(__GNUC__) && !defined(__clang__)) || defined(__pnacl__) || \
327 defined(__EMSCRIPTEN__)
328 #define BENCHMARK_HAS_NO_INLINE_ASSEMBLY
329 #endif
330
331 // Force the compiler to flush pending writes to global memory. Acts as an
332 // effective read/write barrier
333 #ifdef BENCHMARK_HAS_CXX11
ClobberMemory()334 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
335 std::atomic_signal_fence(std::memory_order_acq_rel);
336 }
337 #endif
338
339 // The DoNotOptimize(...) function can be used to prevent a value or
340 // expression from being optimized away by the compiler. This function is
341 // intended to add little to no overhead.
342 // See: https://youtu.be/nXaxk27zwlk?t=2441
343 #ifndef BENCHMARK_HAS_NO_INLINE_ASSEMBLY
344 template <class Tp>
DoNotOptimize(Tp const & value)345 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
346 asm volatile("" : : "r,m"(value) : "memory");
347 }
348
349 template <class Tp>
DoNotOptimize(Tp & value)350 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) {
351 #if defined(__clang__)
352 asm volatile("" : "+r,m"(value) : : "memory");
353 #else
354 asm volatile("" : "+m,r"(value) : : "memory");
355 #endif
356 }
357
358 #ifndef BENCHMARK_HAS_CXX11
ClobberMemory()359 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
360 asm volatile("" : : : "memory");
361 }
362 #endif
363 #elif defined(_MSC_VER)
364 template <class Tp>
DoNotOptimize(Tp const & value)365 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
366 internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
367 _ReadWriteBarrier();
368 }
369
370 #ifndef BENCHMARK_HAS_CXX11
ClobberMemory()371 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { _ReadWriteBarrier(); }
372 #endif
373 #else
374 template <class Tp>
DoNotOptimize(Tp const & value)375 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
376 internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
377 }
378 // FIXME Add ClobberMemory() for non-gnu and non-msvc compilers, before C++11.
379 #endif
380
381 // This class is used for user-defined counters.
382 class Counter {
383 public:
384 enum Flags {
385 kDefaults = 0,
386 // Mark the counter as a rate. It will be presented divided
387 // by the duration of the benchmark.
388 kIsRate = 1U << 0U,
389 // Mark the counter as a thread-average quantity. It will be
390 // presented divided by the number of threads.
391 kAvgThreads = 1U << 1U,
392 // Mark the counter as a thread-average rate. See above.
393 kAvgThreadsRate = kIsRate | kAvgThreads,
394 // Mark the counter as a constant value, valid/same for *every* iteration.
395 // When reporting, it will be *multiplied* by the iteration count.
396 kIsIterationInvariant = 1U << 2U,
397 // Mark the counter as a constant rate.
398 // When reporting, it will be *multiplied* by the iteration count
399 // and then divided by the duration of the benchmark.
400 kIsIterationInvariantRate = kIsRate | kIsIterationInvariant,
401 // Mark the counter as a iteration-average quantity.
402 // It will be presented divided by the number of iterations.
403 kAvgIterations = 1U << 3U,
404 // Mark the counter as a iteration-average rate. See above.
405 kAvgIterationsRate = kIsRate | kAvgIterations,
406
407 // In the end, invert the result. This is always done last!
408 kInvert = 1U << 31U
409 };
410
411 enum OneK {
412 // 1'000 items per 1k
413 kIs1000 = 1000,
414 // 1'024 items per 1k
415 kIs1024 = 1024
416 };
417
418 double value;
419 Flags flags;
420 OneK oneK;
421
422 BENCHMARK_ALWAYS_INLINE
423 Counter(double v = 0., Flags f = kDefaults, OneK k = kIs1000)
value(v)424 : value(v), flags(f), oneK(k) {}
425
426 BENCHMARK_ALWAYS_INLINE operator double const&() const { return value; }
427 BENCHMARK_ALWAYS_INLINE operator double&() { return value; }
428 };
429
430 // A helper for user code to create unforeseen combinations of Flags, without
431 // having to do this cast manually each time, or providing this operator.
432 Counter::Flags inline operator|(const Counter::Flags& LHS,
433 const Counter::Flags& RHS) {
434 return static_cast<Counter::Flags>(static_cast<int>(LHS) |
435 static_cast<int>(RHS));
436 }
437
438 // This is the container for the user-defined counters.
439 typedef std::map<std::string, Counter> UserCounters;
440
441 // TimeUnit is passed to a benchmark in order to specify the order of magnitude
442 // for the measured time.
443 enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond, kSecond };
444
445 // BigO is passed to a benchmark in order to specify the asymptotic
446 // computational
447 // complexity for the benchmark. In case oAuto is selected, complexity will be
448 // calculated automatically to the best fit.
449 enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda };
450
451 typedef uint64_t IterationCount;
452
453 enum StatisticUnit { kTime, kPercentage };
454
455 // BigOFunc is passed to a benchmark in order to specify the asymptotic
456 // computational complexity for the benchmark.
457 typedef double(BigOFunc)(IterationCount);
458
459 // StatisticsFunc is passed to a benchmark in order to compute some descriptive
460 // statistics over all the measurements of some type
461 typedef double(StatisticsFunc)(const std::vector<double>&);
462
463 namespace internal {
464 struct Statistics {
465 std::string name_;
466 StatisticsFunc* compute_;
467 StatisticUnit unit_;
468
469 Statistics(const std::string& name, StatisticsFunc* compute,
470 StatisticUnit unit = kTime)
name_Statistics471 : name_(name), compute_(compute), unit_(unit) {}
472 };
473
474 class BenchmarkInstance;
475 class ThreadTimer;
476 class ThreadManager;
477 class PerfCountersMeasurement;
478
479 enum AggregationReportMode
480 #if defined(BENCHMARK_HAS_CXX11)
481 : unsigned
482 #else
483 #endif
484 {
485 // The mode has not been manually specified
486 ARM_Unspecified = 0,
487 // The mode is user-specified.
488 // This may or may not be set when the following bit-flags are set.
489 ARM_Default = 1U << 0U,
490 // File reporter should only output aggregates.
491 ARM_FileReportAggregatesOnly = 1U << 1U,
492 // Display reporter should only output aggregates
493 ARM_DisplayReportAggregatesOnly = 1U << 2U,
494 // Both reporters should only display aggregates.
495 ARM_ReportAggregatesOnly =
496 ARM_FileReportAggregatesOnly | ARM_DisplayReportAggregatesOnly
497 };
498
499 } // namespace internal
500
501 // State is passed to a running Benchmark and contains state for the
502 // benchmark to use.
503 class State {
504 public:
505 struct StateIterator;
506 friend struct StateIterator;
507
508 // Returns iterators used to run each iteration of a benchmark using a
509 // C++11 ranged-based for loop. These functions should not be called directly.
510 //
511 // REQUIRES: The benchmark has not started running yet. Neither begin nor end
512 // have been called previously.
513 //
514 // NOTE: KeepRunning may not be used after calling either of these functions.
515 BENCHMARK_ALWAYS_INLINE StateIterator begin();
516 BENCHMARK_ALWAYS_INLINE StateIterator end();
517
518 // Returns true if the benchmark should continue through another iteration.
519 // NOTE: A benchmark may not return from the test until KeepRunning() has
520 // returned false.
521 bool KeepRunning();
522
523 // Returns true iff the benchmark should run n more iterations.
524 // REQUIRES: 'n' > 0.
525 // NOTE: A benchmark must not return from the test until KeepRunningBatch()
526 // has returned false.
527 // NOTE: KeepRunningBatch() may overshoot by up to 'n' iterations.
528 //
529 // Intended usage:
530 // while (state.KeepRunningBatch(1000)) {
531 // // process 1000 elements
532 // }
533 bool KeepRunningBatch(IterationCount n);
534
535 // REQUIRES: timer is running and 'SkipWithError(...)' has not been called
536 // by the current thread.
537 // Stop the benchmark timer. If not called, the timer will be
538 // automatically stopped after the last iteration of the benchmark loop.
539 //
540 // For threaded benchmarks the PauseTiming() function only pauses the timing
541 // for the current thread.
542 //
543 // NOTE: The "real time" measurement is per-thread. If different threads
544 // report different measurements the largest one is reported.
545 //
546 // NOTE: PauseTiming()/ResumeTiming() are relatively
547 // heavyweight, and so their use should generally be avoided
548 // within each benchmark iteration, if possible.
549 void PauseTiming();
550
551 // REQUIRES: timer is not running and 'SkipWithError(...)' has not been called
552 // by the current thread.
553 // Start the benchmark timer. The timer is NOT running on entrance to the
554 // benchmark function. It begins running after control flow enters the
555 // benchmark loop.
556 //
557 // NOTE: PauseTiming()/ResumeTiming() are relatively
558 // heavyweight, and so their use should generally be avoided
559 // within each benchmark iteration, if possible.
560 void ResumeTiming();
561
562 // REQUIRES: 'SkipWithError(...)' has not been called previously by the
563 // current thread.
564 // Report the benchmark as resulting in an error with the specified 'msg'.
565 // After this call the user may explicitly 'return' from the benchmark.
566 //
567 // If the ranged-for style of benchmark loop is used, the user must explicitly
568 // break from the loop, otherwise all future iterations will be run.
569 // If the 'KeepRunning()' loop is used the current thread will automatically
570 // exit the loop at the end of the current iteration.
571 //
572 // For threaded benchmarks only the current thread stops executing and future
573 // calls to `KeepRunning()` will block until all threads have completed
574 // the `KeepRunning()` loop. If multiple threads report an error only the
575 // first error message is used.
576 //
577 // NOTE: Calling 'SkipWithError(...)' does not cause the benchmark to exit
578 // the current scope immediately. If the function is called from within
579 // the 'KeepRunning()' loop the current iteration will finish. It is the users
580 // responsibility to exit the scope as needed.
581 void SkipWithError(const char* msg);
582
583 // Returns true if an error has been reported with 'SkipWithError(...)'.
error_occurred()584 bool error_occurred() const { return error_occurred_; }
585
586 // REQUIRES: called exactly once per iteration of the benchmarking loop.
587 // Set the manually measured time for this benchmark iteration, which
588 // is used instead of automatically measured time if UseManualTime() was
589 // specified.
590 //
591 // For threaded benchmarks the final value will be set to the largest
592 // reported values.
593 void SetIterationTime(double seconds);
594
595 // Set the number of bytes processed by the current benchmark
596 // execution. This routine is typically called once at the end of a
597 // throughput oriented benchmark.
598 //
599 // REQUIRES: a benchmark has exited its benchmarking loop.
600 BENCHMARK_ALWAYS_INLINE
SetBytesProcessed(int64_t bytes)601 void SetBytesProcessed(int64_t bytes) {
602 counters["bytes_per_second"] =
603 Counter(static_cast<double>(bytes), Counter::kIsRate, Counter::kIs1024);
604 }
605
606 BENCHMARK_ALWAYS_INLINE
bytes_processed()607 int64_t bytes_processed() const {
608 if (counters.find("bytes_per_second") != counters.end())
609 return static_cast<int64_t>(counters.at("bytes_per_second"));
610 return 0;
611 }
612
613 // If this routine is called with complexity_n > 0 and complexity report is
614 // requested for the
615 // family benchmark, then current benchmark will be part of the computation
616 // and complexity_n will
617 // represent the length of N.
618 BENCHMARK_ALWAYS_INLINE
SetComplexityN(int64_t complexity_n)619 void SetComplexityN(int64_t complexity_n) { complexity_n_ = complexity_n; }
620
621 BENCHMARK_ALWAYS_INLINE
complexity_length_n()622 int64_t complexity_length_n() const { return complexity_n_; }
623
624 // If this routine is called with items > 0, then an items/s
625 // label is printed on the benchmark report line for the currently
626 // executing benchmark. It is typically called at the end of a processing
627 // benchmark where a processing items/second output is desired.
628 //
629 // REQUIRES: a benchmark has exited its benchmarking loop.
630 BENCHMARK_ALWAYS_INLINE
SetItemsProcessed(int64_t items)631 void SetItemsProcessed(int64_t items) {
632 counters["items_per_second"] =
633 Counter(static_cast<double>(items), benchmark::Counter::kIsRate);
634 }
635
636 BENCHMARK_ALWAYS_INLINE
items_processed()637 int64_t items_processed() const {
638 if (counters.find("items_per_second") != counters.end())
639 return static_cast<int64_t>(counters.at("items_per_second"));
640 return 0;
641 }
642
643 // If this routine is called, the specified label is printed at the
644 // end of the benchmark report line for the currently executing
645 // benchmark. Example:
646 // static void BM_Compress(benchmark::State& state) {
647 // ...
648 // double compress = input_size / output_size;
649 // state.SetLabel(StrFormat("compress:%.1f%%", 100.0*compression));
650 // }
651 // Produces output that looks like:
652 // BM_Compress 50 50 14115038 compress:27.3%
653 //
654 // REQUIRES: a benchmark has exited its benchmarking loop.
655 void SetLabel(const char* label);
656
SetLabel(const std::string & str)657 void BENCHMARK_ALWAYS_INLINE SetLabel(const std::string& str) {
658 this->SetLabel(str.c_str());
659 }
660
661 // Range arguments for this run. CHECKs if the argument has been set.
662 BENCHMARK_ALWAYS_INLINE
663 int64_t range(std::size_t pos = 0) const {
664 assert(range_.size() > pos);
665 return range_[pos];
666 }
667
668 BENCHMARK_DEPRECATED_MSG("use 'range(0)' instead")
range_x()669 int64_t range_x() const { return range(0); }
670
671 BENCHMARK_DEPRECATED_MSG("use 'range(1)' instead")
range_y()672 int64_t range_y() const { return range(1); }
673
674 // Number of threads concurrently executing the benchmark.
675 BENCHMARK_ALWAYS_INLINE
threads()676 int threads() const { return threads_; }
677
678 // Index of the executing thread. Values from [0, threads).
679 BENCHMARK_ALWAYS_INLINE
thread_index()680 int thread_index() const { return thread_index_; }
681
682 BENCHMARK_ALWAYS_INLINE
iterations()683 IterationCount iterations() const {
684 if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) {
685 return 0;
686 }
687 return max_iterations - total_iterations_ + batch_leftover_;
688 }
689
690 private:
691 // items we expect on the first cache line (ie 64 bytes of the struct)
692 // When total_iterations_ is 0, KeepRunning() and friends will return false.
693 // May be larger than max_iterations.
694 IterationCount total_iterations_;
695
696 // When using KeepRunningBatch(), batch_leftover_ holds the number of
697 // iterations beyond max_iters that were run. Used to track
698 // completed_iterations_ accurately.
699 IterationCount batch_leftover_;
700
701 public:
702 const IterationCount max_iterations;
703
704 private:
705 bool started_;
706 bool finished_;
707 bool error_occurred_;
708
709 private: // items we don't need on the first cache line
710 std::vector<int64_t> range_;
711
712 int64_t complexity_n_;
713
714 public:
715 // Container for user-defined counters.
716 UserCounters counters;
717
718 private:
719 State(IterationCount max_iters, const std::vector<int64_t>& ranges,
720 int thread_i, int n_threads, internal::ThreadTimer* timer,
721 internal::ThreadManager* manager,
722 internal::PerfCountersMeasurement* perf_counters_measurement);
723
724 void StartKeepRunning();
725 // Implementation of KeepRunning() and KeepRunningBatch().
726 // is_batch must be true unless n is 1.
727 bool KeepRunningInternal(IterationCount n, bool is_batch);
728 void FinishKeepRunning();
729
730 const int thread_index_;
731 const int threads_;
732
733 internal::ThreadTimer* const timer_;
734 internal::ThreadManager* const manager_;
735 internal::PerfCountersMeasurement* const perf_counters_measurement_;
736
737 friend class internal::BenchmarkInstance;
738 };
739
KeepRunning()740 inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunning() {
741 return KeepRunningInternal(1, /*is_batch=*/false);
742 }
743
KeepRunningBatch(IterationCount n)744 inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningBatch(IterationCount n) {
745 return KeepRunningInternal(n, /*is_batch=*/true);
746 }
747
KeepRunningInternal(IterationCount n,bool is_batch)748 inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningInternal(IterationCount n,
749 bool is_batch) {
750 // total_iterations_ is set to 0 by the constructor, and always set to a
751 // nonzero value by StartKepRunning().
752 assert(n > 0);
753 // n must be 1 unless is_batch is true.
754 assert(is_batch || n == 1);
755 if (BENCHMARK_BUILTIN_EXPECT(total_iterations_ >= n, true)) {
756 total_iterations_ -= n;
757 return true;
758 }
759 if (!started_) {
760 StartKeepRunning();
761 if (!error_occurred_ && total_iterations_ >= n) {
762 total_iterations_ -= n;
763 return true;
764 }
765 }
766 // For non-batch runs, total_iterations_ must be 0 by now.
767 if (is_batch && total_iterations_ != 0) {
768 batch_leftover_ = n - total_iterations_;
769 total_iterations_ = 0;
770 return true;
771 }
772 FinishKeepRunning();
773 return false;
774 }
775
776 struct State::StateIterator {
777 struct BENCHMARK_UNUSED Value {};
778 typedef std::forward_iterator_tag iterator_category;
779 typedef Value value_type;
780 typedef Value reference;
781 typedef Value pointer;
782 typedef std::ptrdiff_t difference_type;
783
784 private:
785 friend class State;
786 BENCHMARK_ALWAYS_INLINE
StateIteratorStateIterator787 StateIterator() : cached_(0), parent_() {}
788
789 BENCHMARK_ALWAYS_INLINE
StateIteratorStateIterator790 explicit StateIterator(State* st)
791 : cached_(st->error_occurred_ ? 0 : st->max_iterations), parent_(st) {}
792
793 public:
794 BENCHMARK_ALWAYS_INLINE
795 Value operator*() const { return Value(); }
796
797 BENCHMARK_ALWAYS_INLINE
798 StateIterator& operator++() {
799 assert(cached_ > 0);
800 --cached_;
801 return *this;
802 }
803
804 BENCHMARK_ALWAYS_INLINE
805 bool operator!=(StateIterator const&) const {
806 if (BENCHMARK_BUILTIN_EXPECT(cached_ != 0, true)) return true;
807 parent_->FinishKeepRunning();
808 return false;
809 }
810
811 private:
812 IterationCount cached_;
813 State* const parent_;
814 };
815
begin()816 inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::begin() {
817 return StateIterator(this);
818 }
end()819 inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::end() {
820 StartKeepRunning();
821 return StateIterator();
822 }
823
824 namespace internal {
825
826 typedef void(Function)(State&);
827
828 // ------------------------------------------------------
829 // Benchmark registration object. The BENCHMARK() macro expands
830 // into an internal::Benchmark* object. Various methods can
831 // be called on this object to change the properties of the benchmark.
832 // Each method returns "this" so that multiple method calls can
833 // chained into one expression.
834 class Benchmark {
835 public:
836 virtual ~Benchmark();
837
838 // Note: the following methods all return "this" so that multiple
839 // method calls can be chained together in one expression.
840
841 // Specify the name of the benchmark
842 Benchmark* Name(const std::string& name);
843
844 // Run this benchmark once with "x" as the extra argument passed
845 // to the function.
846 // REQUIRES: The function passed to the constructor must accept an arg1.
847 Benchmark* Arg(int64_t x);
848
849 // Run this benchmark with the given time unit for the generated output report
850 Benchmark* Unit(TimeUnit unit);
851
852 // Run this benchmark once for a number of values picked from the
853 // range [start..limit]. (start and limit are always picked.)
854 // REQUIRES: The function passed to the constructor must accept an arg1.
855 Benchmark* Range(int64_t start, int64_t limit);
856
857 // Run this benchmark once for all values in the range [start..limit] with
858 // specific step
859 // REQUIRES: The function passed to the constructor must accept an arg1.
860 Benchmark* DenseRange(int64_t start, int64_t limit, int step = 1);
861
862 // Run this benchmark once with "args" as the extra arguments passed
863 // to the function.
864 // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
865 Benchmark* Args(const std::vector<int64_t>& args);
866
867 // Equivalent to Args({x, y})
868 // NOTE: This is a legacy C++03 interface provided for compatibility only.
869 // New code should use 'Args'.
ArgPair(int64_t x,int64_t y)870 Benchmark* ArgPair(int64_t x, int64_t y) {
871 std::vector<int64_t> args;
872 args.push_back(x);
873 args.push_back(y);
874 return Args(args);
875 }
876
877 // Run this benchmark once for a number of values picked from the
878 // ranges [start..limit]. (starts and limits are always picked.)
879 // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
880 Benchmark* Ranges(const std::vector<std::pair<int64_t, int64_t> >& ranges);
881
882 // Run this benchmark once for each combination of values in the (cartesian)
883 // product of the supplied argument lists.
884 // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
885 Benchmark* ArgsProduct(const std::vector<std::vector<int64_t> >& arglists);
886
887 // Equivalent to ArgNames({name})
888 Benchmark* ArgName(const std::string& name);
889
890 // Set the argument names to display in the benchmark name. If not called,
891 // only argument values will be shown.
892 Benchmark* ArgNames(const std::vector<std::string>& names);
893
894 // Equivalent to Ranges({{lo1, hi1}, {lo2, hi2}}).
895 // NOTE: This is a legacy C++03 interface provided for compatibility only.
896 // New code should use 'Ranges'.
RangePair(int64_t lo1,int64_t hi1,int64_t lo2,int64_t hi2)897 Benchmark* RangePair(int64_t lo1, int64_t hi1, int64_t lo2, int64_t hi2) {
898 std::vector<std::pair<int64_t, int64_t> > ranges;
899 ranges.push_back(std::make_pair(lo1, hi1));
900 ranges.push_back(std::make_pair(lo2, hi2));
901 return Ranges(ranges);
902 }
903
904 // Pass this benchmark object to *func, which can customize
905 // the benchmark by calling various methods like Arg, Args,
906 // Threads, etc.
907 Benchmark* Apply(void (*func)(Benchmark* benchmark));
908
909 // Set the range multiplier for non-dense range. If not called, the range
910 // multiplier kRangeMultiplier will be used.
911 Benchmark* RangeMultiplier(int multiplier);
912
913 // Set the minimum amount of time to use when running this benchmark. This
914 // option overrides the `benchmark_min_time` flag.
915 // REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark.
916 Benchmark* MinTime(double t);
917
918 // Specify the amount of iterations that should be run by this benchmark.
919 // REQUIRES: 'n > 0' and `MinTime` has not been called on this benchmark.
920 //
921 // NOTE: This function should only be used when *exact* iteration control is
922 // needed and never to control or limit how long a benchmark runs, where
923 // `--benchmark_min_time=N` or `MinTime(...)` should be used instead.
924 Benchmark* Iterations(IterationCount n);
925
926 // Specify the amount of times to repeat this benchmark. This option overrides
927 // the `benchmark_repetitions` flag.
928 // REQUIRES: `n > 0`
929 Benchmark* Repetitions(int n);
930
931 // Specify if each repetition of the benchmark should be reported separately
932 // or if only the final statistics should be reported. If the benchmark
933 // is not repeated then the single result is always reported.
934 // Applies to *ALL* reporters (display and file).
935 Benchmark* ReportAggregatesOnly(bool value = true);
936
937 // Same as ReportAggregatesOnly(), but applies to display reporter only.
938 Benchmark* DisplayAggregatesOnly(bool value = true);
939
940 // By default, the CPU time is measured only for the main thread, which may
941 // be unrepresentative if the benchmark uses threads internally. If called,
942 // the total CPU time spent by all the threads will be measured instead.
943 // By default, the only the main thread CPU time will be measured.
944 Benchmark* MeasureProcessCPUTime();
945
946 // If a particular benchmark should use the Wall clock instead of the CPU time
947 // (be it either the CPU time of the main thread only (default), or the
948 // total CPU usage of the benchmark), call this method. If called, the elapsed
949 // (wall) time will be used to control how many iterations are run, and in the
950 // printing of items/second or MB/seconds values.
951 // If not called, the CPU time used by the benchmark will be used.
952 Benchmark* UseRealTime();
953
954 // If a benchmark must measure time manually (e.g. if GPU execution time is
955 // being
956 // measured), call this method. If called, each benchmark iteration should
957 // call
958 // SetIterationTime(seconds) to report the measured time, which will be used
959 // to control how many iterations are run, and in the printing of items/second
960 // or MB/second values.
961 Benchmark* UseManualTime();
962
963 // Set the asymptotic computational complexity for the benchmark. If called
964 // the asymptotic computational complexity will be shown on the output.
965 Benchmark* Complexity(BigO complexity = benchmark::oAuto);
966
967 // Set the asymptotic computational complexity for the benchmark. If called
968 // the asymptotic computational complexity will be shown on the output.
969 Benchmark* Complexity(BigOFunc* complexity);
970
971 // Add this statistics to be computed over all the values of benchmark run
972 Benchmark* ComputeStatistics(std::string name, StatisticsFunc* statistics,
973 StatisticUnit unit = kTime);
974
975 // Support for running multiple copies of the same benchmark concurrently
976 // in multiple threads. This may be useful when measuring the scaling
977 // of some piece of code.
978
979 // Run one instance of this benchmark concurrently in t threads.
980 Benchmark* Threads(int t);
981
982 // Pick a set of values T from [min_threads,max_threads].
983 // min_threads and max_threads are always included in T. Run this
984 // benchmark once for each value in T. The benchmark run for a
985 // particular value t consists of t threads running the benchmark
986 // function concurrently. For example, consider:
987 // BENCHMARK(Foo)->ThreadRange(1,16);
988 // This will run the following benchmarks:
989 // Foo in 1 thread
990 // Foo in 2 threads
991 // Foo in 4 threads
992 // Foo in 8 threads
993 // Foo in 16 threads
994 Benchmark* ThreadRange(int min_threads, int max_threads);
995
996 // For each value n in the range, run this benchmark once using n threads.
997 // min_threads and max_threads are always included in the range.
998 // stride specifies the increment. E.g. DenseThreadRange(1, 8, 3) starts
999 // a benchmark with 1, 4, 7 and 8 threads.
1000 Benchmark* DenseThreadRange(int min_threads, int max_threads, int stride = 1);
1001
1002 // Equivalent to ThreadRange(NumCPUs(), NumCPUs())
1003 Benchmark* ThreadPerCpu();
1004
1005 virtual void Run(State& state) = 0;
1006
1007 protected:
1008 explicit Benchmark(const char* name);
1009 Benchmark(Benchmark const&);
1010 void SetName(const char* name);
1011
1012 int ArgsCnt() const;
1013
1014 private:
1015 friend class BenchmarkFamilies;
1016 friend class BenchmarkInstance;
1017
1018 std::string name_;
1019 AggregationReportMode aggregation_report_mode_;
1020 std::vector<std::string> arg_names_; // Args for all benchmark runs
1021 std::vector<std::vector<int64_t> > args_; // Args for all benchmark runs
1022 TimeUnit time_unit_;
1023 int range_multiplier_;
1024 double min_time_;
1025 IterationCount iterations_;
1026 int repetitions_;
1027 bool measure_process_cpu_time_;
1028 bool use_real_time_;
1029 bool use_manual_time_;
1030 BigO complexity_;
1031 BigOFunc* complexity_lambda_;
1032 std::vector<Statistics> statistics_;
1033 std::vector<int> thread_counts_;
1034
1035 Benchmark& operator=(Benchmark const&);
1036 };
1037
1038 } // namespace internal
1039
1040 // Create and register a benchmark with the specified 'name' that invokes
1041 // the specified functor 'fn'.
1042 //
1043 // RETURNS: A pointer to the registered benchmark.
1044 internal::Benchmark* RegisterBenchmark(const char* name,
1045 internal::Function* fn);
1046
1047 #if defined(BENCHMARK_HAS_CXX11)
1048 template <class Lambda>
1049 internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn);
1050 #endif
1051
1052 // Remove all registered benchmarks. All pointers to previously registered
1053 // benchmarks are invalidated.
1054 void ClearRegisteredBenchmarks();
1055
1056 namespace internal {
1057 // The class used to hold all Benchmarks created from static function.
1058 // (ie those created using the BENCHMARK(...) macros.
1059 class FunctionBenchmark : public Benchmark {
1060 public:
FunctionBenchmark(const char * name,Function * func)1061 FunctionBenchmark(const char* name, Function* func)
1062 : Benchmark(name), func_(func) {}
1063
1064 virtual void Run(State& st) BENCHMARK_OVERRIDE;
1065
1066 private:
1067 Function* func_;
1068 };
1069
1070 #ifdef BENCHMARK_HAS_CXX11
1071 template <class Lambda>
1072 class LambdaBenchmark : public Benchmark {
1073 public:
Run(State & st)1074 virtual void Run(State& st) BENCHMARK_OVERRIDE { lambda_(st); }
1075
1076 private:
1077 template <class OLambda>
LambdaBenchmark(const char * name,OLambda && lam)1078 LambdaBenchmark(const char* name, OLambda&& lam)
1079 : Benchmark(name), lambda_(std::forward<OLambda>(lam)) {}
1080
1081 LambdaBenchmark(LambdaBenchmark const&) = delete;
1082
1083 private:
1084 template <class Lam>
1085 friend Benchmark* ::benchmark::RegisterBenchmark(const char*, Lam&&);
1086
1087 Lambda lambda_;
1088 };
1089 #endif
1090
1091 } // namespace internal
1092
RegisterBenchmark(const char * name,internal::Function * fn)1093 inline internal::Benchmark* RegisterBenchmark(const char* name,
1094 internal::Function* fn) {
1095 return internal::RegisterBenchmarkInternal(
1096 ::new internal::FunctionBenchmark(name, fn));
1097 }
1098
1099 #ifdef BENCHMARK_HAS_CXX11
1100 template <class Lambda>
RegisterBenchmark(const char * name,Lambda && fn)1101 internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn) {
1102 using BenchType =
1103 internal::LambdaBenchmark<typename std::decay<Lambda>::type>;
1104 return internal::RegisterBenchmarkInternal(
1105 ::new BenchType(name, std::forward<Lambda>(fn)));
1106 }
1107 #endif
1108
1109 #if defined(BENCHMARK_HAS_CXX11) && \
1110 (!defined(BENCHMARK_GCC_VERSION) || BENCHMARK_GCC_VERSION >= 409)
1111 template <class Lambda, class... Args>
RegisterBenchmark(const char * name,Lambda && fn,Args &&...args)1112 internal::Benchmark* RegisterBenchmark(const char* name, Lambda&& fn,
1113 Args&&... args) {
1114 return benchmark::RegisterBenchmark(
1115 name, [=](benchmark::State& st) { fn(st, args...); });
1116 }
1117 #else
1118 #define BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
1119 #endif
1120
1121 // The base class for all fixture tests.
1122 class Fixture : public internal::Benchmark {
1123 public:
Fixture()1124 Fixture() : internal::Benchmark("") {}
1125
Run(State & st)1126 virtual void Run(State& st) BENCHMARK_OVERRIDE {
1127 this->SetUp(st);
1128 this->BenchmarkCase(st);
1129 this->TearDown(st);
1130 }
1131
1132 // These will be deprecated ...
SetUp(const State &)1133 virtual void SetUp(const State&) {}
TearDown(const State &)1134 virtual void TearDown(const State&) {}
1135 // ... In favor of these.
SetUp(State & st)1136 virtual void SetUp(State& st) { SetUp(const_cast<const State&>(st)); }
TearDown(State & st)1137 virtual void TearDown(State& st) { TearDown(const_cast<const State&>(st)); }
1138
1139 protected:
1140 virtual void BenchmarkCase(State&) = 0;
1141 };
1142
1143 } // namespace benchmark
1144
1145 // ------------------------------------------------------
1146 // Macro to register benchmarks
1147
1148 // Check that __COUNTER__ is defined and that __COUNTER__ increases by 1
1149 // every time it is expanded. X + 1 == X + 0 is used in case X is defined to be
1150 // empty. If X is empty the expression becomes (+1 == +0).
1151 #if defined(__COUNTER__) && (__COUNTER__ + 1 == __COUNTER__ + 0)
1152 #define BENCHMARK_PRIVATE_UNIQUE_ID __COUNTER__
1153 #else
1154 #define BENCHMARK_PRIVATE_UNIQUE_ID __LINE__
1155 #endif
1156
1157 // Helpers for generating unique variable names
1158 #define BENCHMARK_PRIVATE_NAME(n) \
1159 BENCHMARK_PRIVATE_CONCAT(benchmark_uniq_, BENCHMARK_PRIVATE_UNIQUE_ID, n)
1160 #define BENCHMARK_PRIVATE_CONCAT(a, b, c) BENCHMARK_PRIVATE_CONCAT2(a, b, c)
1161 #define BENCHMARK_PRIVATE_CONCAT2(a, b, c) a##b##c
1162 // Helper for concatenation with macro name expansion
1163 #define BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method) \
1164 BaseClass##_##Method##_Benchmark
1165
1166 #define BENCHMARK_PRIVATE_DECLARE(n) \
1167 static ::benchmark::internal::Benchmark* BENCHMARK_PRIVATE_NAME(n) \
1168 BENCHMARK_UNUSED
1169
1170 #define BENCHMARK(n) \
1171 BENCHMARK_PRIVATE_DECLARE(n) = \
1172 (::benchmark::internal::RegisterBenchmarkInternal( \
1173 new ::benchmark::internal::FunctionBenchmark(#n, n)))
1174
1175 // Old-style macros
1176 #define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a))
1177 #define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->Args({(a1), (a2)})
1178 #define BENCHMARK_WITH_UNIT(n, t) BENCHMARK(n)->Unit((t))
1179 #define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi))
1180 #define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \
1181 BENCHMARK(n)->RangePair({{(l1), (h1)}, {(l2), (h2)}})
1182
1183 #ifdef BENCHMARK_HAS_CXX11
1184
1185 // Register a benchmark which invokes the function specified by `func`
1186 // with the additional arguments specified by `...`.
1187 //
1188 // For example:
1189 //
1190 // template <class ...ExtraArgs>`
1191 // void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) {
1192 // [...]
1193 //}
1194 // /* Registers a benchmark named "BM_takes_args/int_string_test` */
1195 // BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc"));
1196 #define BENCHMARK_CAPTURE(func, test_case_name, ...) \
1197 BENCHMARK_PRIVATE_DECLARE(func) = \
1198 (::benchmark::internal::RegisterBenchmarkInternal( \
1199 new ::benchmark::internal::FunctionBenchmark( \
1200 #func "/" #test_case_name, \
1201 [](::benchmark::State& st) { func(st, __VA_ARGS__); })))
1202
1203 #endif // BENCHMARK_HAS_CXX11
1204
1205 // This will register a benchmark for a templatized function. For example:
1206 //
1207 // template<int arg>
1208 // void BM_Foo(int iters);
1209 //
1210 // BENCHMARK_TEMPLATE(BM_Foo, 1);
1211 //
1212 // will register BM_Foo<1> as a benchmark.
1213 #define BENCHMARK_TEMPLATE1(n, a) \
1214 BENCHMARK_PRIVATE_DECLARE(n) = \
1215 (::benchmark::internal::RegisterBenchmarkInternal( \
1216 new ::benchmark::internal::FunctionBenchmark(#n "<" #a ">", n<a>)))
1217
1218 #define BENCHMARK_TEMPLATE2(n, a, b) \
1219 BENCHMARK_PRIVATE_DECLARE(n) = \
1220 (::benchmark::internal::RegisterBenchmarkInternal( \
1221 new ::benchmark::internal::FunctionBenchmark(#n "<" #a "," #b ">", \
1222 n<a, b>)))
1223
1224 #ifdef BENCHMARK_HAS_CXX11
1225 #define BENCHMARK_TEMPLATE(n, ...) \
1226 BENCHMARK_PRIVATE_DECLARE(n) = \
1227 (::benchmark::internal::RegisterBenchmarkInternal( \
1228 new ::benchmark::internal::FunctionBenchmark( \
1229 #n "<" #__VA_ARGS__ ">", n<__VA_ARGS__>)))
1230 #else
1231 #define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a)
1232 #endif
1233
1234 #define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1235 class BaseClass##_##Method##_Benchmark : public BaseClass { \
1236 public: \
1237 BaseClass##_##Method##_Benchmark() : BaseClass() { \
1238 this->SetName(#BaseClass "/" #Method); \
1239 } \
1240 \
1241 protected: \
1242 virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
1243 };
1244
1245 #define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1246 class BaseClass##_##Method##_Benchmark : public BaseClass<a> { \
1247 public: \
1248 BaseClass##_##Method##_Benchmark() : BaseClass<a>() { \
1249 this->SetName(#BaseClass "<" #a ">/" #Method); \
1250 } \
1251 \
1252 protected: \
1253 virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
1254 };
1255
1256 #define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1257 class BaseClass##_##Method##_Benchmark : public BaseClass<a, b> { \
1258 public: \
1259 BaseClass##_##Method##_Benchmark() : BaseClass<a, b>() { \
1260 this->SetName(#BaseClass "<" #a "," #b ">/" #Method); \
1261 } \
1262 \
1263 protected: \
1264 virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
1265 };
1266
1267 #ifdef BENCHMARK_HAS_CXX11
1268 #define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, ...) \
1269 class BaseClass##_##Method##_Benchmark : public BaseClass<__VA_ARGS__> { \
1270 public: \
1271 BaseClass##_##Method##_Benchmark() : BaseClass<__VA_ARGS__>() { \
1272 this->SetName(#BaseClass "<" #__VA_ARGS__ ">/" #Method); \
1273 } \
1274 \
1275 protected: \
1276 virtual void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
1277 };
1278 #else
1279 #define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(n, a) \
1280 BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(n, a)
1281 #endif
1282
1283 #define BENCHMARK_DEFINE_F(BaseClass, Method) \
1284 BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1285 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1286
1287 #define BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a) \
1288 BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1289 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1290
1291 #define BENCHMARK_TEMPLATE2_DEFINE_F(BaseClass, Method, a, b) \
1292 BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1293 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1294
1295 #ifdef BENCHMARK_HAS_CXX11
1296 #define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, ...) \
1297 BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \
1298 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1299 #else
1300 #define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, a) \
1301 BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a)
1302 #endif
1303
1304 #define BENCHMARK_REGISTER_F(BaseClass, Method) \
1305 BENCHMARK_PRIVATE_REGISTER_F(BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method))
1306
1307 #define BENCHMARK_PRIVATE_REGISTER_F(TestName) \
1308 BENCHMARK_PRIVATE_DECLARE(TestName) = \
1309 (::benchmark::internal::RegisterBenchmarkInternal(new TestName()))
1310
1311 // This macro will define and register a benchmark within a fixture class.
1312 #define BENCHMARK_F(BaseClass, Method) \
1313 BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1314 BENCHMARK_REGISTER_F(BaseClass, Method); \
1315 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1316
1317 #define BENCHMARK_TEMPLATE1_F(BaseClass, Method, a) \
1318 BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1319 BENCHMARK_REGISTER_F(BaseClass, Method); \
1320 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1321
1322 #define BENCHMARK_TEMPLATE2_F(BaseClass, Method, a, b) \
1323 BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1324 BENCHMARK_REGISTER_F(BaseClass, Method); \
1325 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1326
1327 #ifdef BENCHMARK_HAS_CXX11
1328 #define BENCHMARK_TEMPLATE_F(BaseClass, Method, ...) \
1329 BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \
1330 BENCHMARK_REGISTER_F(BaseClass, Method); \
1331 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1332 #else
1333 #define BENCHMARK_TEMPLATE_F(BaseClass, Method, a) \
1334 BENCHMARK_TEMPLATE1_F(BaseClass, Method, a)
1335 #endif
1336
1337 // Helper macro to create a main routine in a test that runs the benchmarks
1338 #define BENCHMARK_MAIN() \
1339 int main(int argc, char** argv) { \
1340 ::benchmark::Initialize(&argc, argv); \
1341 if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; \
1342 ::benchmark::RunSpecifiedBenchmarks(); \
1343 ::benchmark::Shutdown(); \
1344 return 0; \
1345 } \
1346 int main(int, char**)
1347
1348 // ------------------------------------------------------
1349 // Benchmark Reporters
1350
1351 namespace benchmark {
1352
1353 struct CPUInfo {
1354 struct CacheInfo {
1355 std::string type;
1356 int level;
1357 int size;
1358 int num_sharing;
1359 };
1360
1361 enum Scaling {
1362 UNKNOWN,
1363 ENABLED,
1364 DISABLED
1365 };
1366
1367 int num_cpus;
1368 Scaling scaling;
1369 double cycles_per_second;
1370 std::vector<CacheInfo> caches;
1371 std::vector<double> load_avg;
1372
1373 static const CPUInfo& Get();
1374
1375 private:
1376 CPUInfo();
1377 BENCHMARK_DISALLOW_COPY_AND_ASSIGN(CPUInfo);
1378 };
1379
1380 // Adding Struct for System Information
1381 struct SystemInfo {
1382 std::string name;
1383 static const SystemInfo& Get();
1384
1385 private:
1386 SystemInfo();
1387 BENCHMARK_DISALLOW_COPY_AND_ASSIGN(SystemInfo);
1388 };
1389
1390 // BenchmarkName contains the components of the Benchmark's name
1391 // which allows individual fields to be modified or cleared before
1392 // building the final name using 'str()'.
1393 struct BenchmarkName {
1394 std::string function_name;
1395 std::string args;
1396 std::string min_time;
1397 std::string iterations;
1398 std::string repetitions;
1399 std::string time_type;
1400 std::string threads;
1401
1402 // Return the full name of the benchmark with each non-empty
1403 // field separated by a '/'
1404 std::string str() const;
1405 };
1406
1407 // Interface for custom benchmark result printers.
1408 // By default, benchmark reports are printed to stdout. However an application
1409 // can control the destination of the reports by calling
1410 // RunSpecifiedBenchmarks and passing it a custom reporter object.
1411 // The reporter object must implement the following interface.
1412 class BenchmarkReporter {
1413 public:
1414 struct Context {
1415 CPUInfo const& cpu_info;
1416 SystemInfo const& sys_info;
1417 // The number of chars in the longest benchmark name.
1418 size_t name_field_width;
1419 static const char* executable_name;
1420 Context();
1421 };
1422
1423 struct Run {
1424 static const int64_t no_repetition_index = -1;
1425 enum RunType { RT_Iteration, RT_Aggregate };
1426
RunRun1427 Run()
1428 : run_type(RT_Iteration),
1429 aggregate_unit(kTime),
1430 error_occurred(false),
1431 iterations(1),
1432 threads(1),
1433 time_unit(kNanosecond),
1434 real_accumulated_time(0),
1435 cpu_accumulated_time(0),
1436 max_heapbytes_used(0),
1437 complexity(oNone),
1438 complexity_lambda(),
1439 complexity_n(0),
1440 report_big_o(false),
1441 report_rms(false),
1442 counters(),
1443 has_memory_result(false),
1444 allocs_per_iter(0.0),
1445 max_bytes_used(0) {}
1446
1447 std::string benchmark_name() const;
1448 BenchmarkName run_name;
1449 int64_t family_index;
1450 int64_t per_family_instance_index;
1451 RunType run_type;
1452 std::string aggregate_name;
1453 StatisticUnit aggregate_unit;
1454 std::string report_label; // Empty if not set by benchmark.
1455 bool error_occurred;
1456 std::string error_message;
1457
1458 IterationCount iterations;
1459 int64_t threads;
1460 int64_t repetition_index;
1461 int64_t repetitions;
1462 TimeUnit time_unit;
1463 double real_accumulated_time;
1464 double cpu_accumulated_time;
1465
1466 // Return a value representing the real time per iteration in the unit
1467 // specified by 'time_unit'.
1468 // NOTE: If 'iterations' is zero the returned value represents the
1469 // accumulated time.
1470 double GetAdjustedRealTime() const;
1471
1472 // Return a value representing the cpu time per iteration in the unit
1473 // specified by 'time_unit'.
1474 // NOTE: If 'iterations' is zero the returned value represents the
1475 // accumulated time.
1476 double GetAdjustedCPUTime() const;
1477
1478 // This is set to 0.0 if memory tracing is not enabled.
1479 double max_heapbytes_used;
1480
1481 // Keep track of arguments to compute asymptotic complexity
1482 BigO complexity;
1483 BigOFunc* complexity_lambda;
1484 int64_t complexity_n;
1485
1486 // what statistics to compute from the measurements
1487 const std::vector<internal::Statistics>* statistics;
1488
1489 // Inform print function whether the current run is a complexity report
1490 bool report_big_o;
1491 bool report_rms;
1492
1493 UserCounters counters;
1494
1495 // Memory metrics.
1496 bool has_memory_result;
1497 double allocs_per_iter;
1498 int64_t max_bytes_used;
1499 };
1500
1501 struct PerFamilyRunReports {
PerFamilyRunReportsPerFamilyRunReports1502 PerFamilyRunReports() : num_runs_total(0), num_runs_done(0) {}
1503
1504 // How many runs will all instances of this benchmark perform?
1505 int num_runs_total;
1506
1507 // How many runs have happened already?
1508 int num_runs_done;
1509
1510 // The reports about (non-errneous!) runs of this family.
1511 std::vector<BenchmarkReporter::Run> Runs;
1512 };
1513
1514 // Construct a BenchmarkReporter with the output stream set to 'std::cout'
1515 // and the error stream set to 'std::cerr'
1516 BenchmarkReporter();
1517
1518 // Called once for every suite of benchmarks run.
1519 // The parameter "context" contains information that the
1520 // reporter may wish to use when generating its report, for example the
1521 // platform under which the benchmarks are running. The benchmark run is
1522 // never started if this function returns false, allowing the reporter
1523 // to skip runs based on the context information.
1524 virtual bool ReportContext(const Context& context) = 0;
1525
1526 // Called once for each group of benchmark runs, gives information about
1527 // cpu-time and heap memory usage during the benchmark run. If the group
1528 // of runs contained more than two entries then 'report' contains additional
1529 // elements representing the mean and standard deviation of those runs.
1530 // Additionally if this group of runs was the last in a family of benchmarks
1531 // 'reports' contains additional entries representing the asymptotic
1532 // complexity and RMS of that benchmark family.
1533 virtual void ReportRuns(const std::vector<Run>& report) = 0;
1534
1535 // Called once and only once after ever group of benchmarks is run and
1536 // reported.
Finalize()1537 virtual void Finalize() {}
1538
1539 // REQUIRES: The object referenced by 'out' is valid for the lifetime
1540 // of the reporter.
SetOutputStream(std::ostream * out)1541 void SetOutputStream(std::ostream* out) {
1542 assert(out);
1543 output_stream_ = out;
1544 }
1545
1546 // REQUIRES: The object referenced by 'err' is valid for the lifetime
1547 // of the reporter.
SetErrorStream(std::ostream * err)1548 void SetErrorStream(std::ostream* err) {
1549 assert(err);
1550 error_stream_ = err;
1551 }
1552
GetOutputStream()1553 std::ostream& GetOutputStream() const { return *output_stream_; }
1554
GetErrorStream()1555 std::ostream& GetErrorStream() const { return *error_stream_; }
1556
1557 virtual ~BenchmarkReporter();
1558
1559 // Write a human readable string to 'out' representing the specified
1560 // 'context'.
1561 // REQUIRES: 'out' is non-null.
1562 static void PrintBasicContext(std::ostream* out, Context const& context);
1563
1564 private:
1565 std::ostream* output_stream_;
1566 std::ostream* error_stream_;
1567 };
1568
1569 // Simple reporter that outputs benchmark data to the console. This is the
1570 // default reporter used by RunSpecifiedBenchmarks().
1571 class ConsoleReporter : public BenchmarkReporter {
1572 public:
1573 enum OutputOptions {
1574 OO_None = 0,
1575 OO_Color = 1,
1576 OO_Tabular = 2,
1577 OO_ColorTabular = OO_Color | OO_Tabular,
1578 OO_Defaults = OO_ColorTabular
1579 };
1580 explicit ConsoleReporter(OutputOptions opts_ = OO_Defaults)
output_options_(opts_)1581 : output_options_(opts_),
1582 name_field_width_(0),
1583 prev_counters_(),
1584 printed_header_(false) {}
1585
1586 virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
1587 virtual void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
1588
1589 protected:
1590 virtual void PrintRunData(const Run& report);
1591 virtual void PrintHeader(const Run& report);
1592
1593 OutputOptions output_options_;
1594 size_t name_field_width_;
1595 UserCounters prev_counters_;
1596 bool printed_header_;
1597 };
1598
1599 class JSONReporter : public BenchmarkReporter {
1600 public:
JSONReporter()1601 JSONReporter() : first_report_(true) {}
1602 virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
1603 virtual void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
1604 virtual void Finalize() BENCHMARK_OVERRIDE;
1605
1606 private:
1607 void PrintRunData(const Run& report);
1608
1609 bool first_report_;
1610 };
1611
1612 class BENCHMARK_DEPRECATED_MSG(
1613 "The CSV Reporter will be removed in a future release") CSVReporter
1614 : public BenchmarkReporter {
1615 public:
CSVReporter()1616 CSVReporter() : printed_header_(false) {}
1617 virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
1618 virtual void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
1619
1620 private:
1621 void PrintRunData(const Run& report);
1622
1623 bool printed_header_;
1624 std::set<std::string> user_counter_names_;
1625 };
1626
1627 // If a MemoryManager is registered, it can be used to collect and report
1628 // allocation metrics for a run of the benchmark.
1629 class MemoryManager {
1630 public:
1631 struct Result {
ResultResult1632 Result() : num_allocs(0), max_bytes_used(0) {}
1633
1634 // The number of allocations made in total between Start and Stop.
1635 int64_t num_allocs;
1636
1637 // The peak memory use between Start and Stop.
1638 int64_t max_bytes_used;
1639 };
1640
~MemoryManager()1641 virtual ~MemoryManager() {}
1642
1643 // Implement this to start recording allocation information.
1644 virtual void Start() = 0;
1645
1646 // Implement this to stop recording and fill out the given Result structure.
1647 virtual void Stop(Result* result) = 0;
1648 };
1649
GetTimeUnitString(TimeUnit unit)1650 inline const char* GetTimeUnitString(TimeUnit unit) {
1651 switch (unit) {
1652 case kSecond:
1653 return "s";
1654 case kMillisecond:
1655 return "ms";
1656 case kMicrosecond:
1657 return "us";
1658 case kNanosecond:
1659 return "ns";
1660 }
1661 BENCHMARK_UNREACHABLE();
1662 }
1663
GetTimeUnitMultiplier(TimeUnit unit)1664 inline double GetTimeUnitMultiplier(TimeUnit unit) {
1665 switch (unit) {
1666 case kSecond:
1667 return 1;
1668 case kMillisecond:
1669 return 1e3;
1670 case kMicrosecond:
1671 return 1e6;
1672 case kNanosecond:
1673 return 1e9;
1674 }
1675 BENCHMARK_UNREACHABLE();
1676 }
1677
1678 // Creates a list of integer values for the given range and multiplier.
1679 // This can be used together with ArgsProduct() to allow multiple ranges
1680 // with different multiplers.
1681 // Example:
1682 // ArgsProduct({
1683 // CreateRange(0, 1024, /*multi=*/32),
1684 // CreateRange(0, 100, /*multi=*/4),
1685 // CreateDenseRange(0, 4, /*step=*/1),
1686 // });
1687 std::vector<int64_t> CreateRange(int64_t lo, int64_t hi, int multi);
1688
1689 // Creates a list of integer values for the given range and step.
1690 std::vector<int64_t> CreateDenseRange(int64_t start, int64_t limit,
1691 int step);
1692
1693 } // namespace benchmark
1694
1695 #endif // BENCHMARK_BENCHMARK_H_
1696