1 // Copyright 2015 Google Inc. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "benchmark/benchmark.h"
16 #include "internal_macros.h"
17
18 #ifndef BENCHMARK_OS_WINDOWS
19 #include <sys/time.h>
20 #include <sys/resource.h>
21 #include <unistd.h>
22 #endif
23
24 #include <cstdlib>
25 #include <cstring>
26 #include <cstdio>
27 #include <algorithm>
28 #include <atomic>
29 #include <condition_variable>
30 #include <iostream>
31 #include <fstream>
32 #include <memory>
33 #include <thread>
34
35 #include "check.h"
36 #include "commandlineflags.h"
37 #include "complexity.h"
38 #include "log.h"
39 #include "mutex.h"
40 #include "re.h"
41 #include "stat.h"
42 #include "string_util.h"
43 #include "sysinfo.h"
44 #include "timers.h"
45
46 DEFINE_bool(benchmark_list_tests, false,
47 "Print a list of benchmarks. This option overrides all other "
48 "options.");
49
50 DEFINE_string(benchmark_filter, ".",
51 "A regular expression that specifies the set of benchmarks "
52 "to execute. If this flag is empty, no benchmarks are run. "
53 "If this flag is the string \"all\", all benchmarks linked "
54 "into the process are run.");
55
56 DEFINE_double(benchmark_min_time, 0.5,
57 "Minimum number of seconds we should run benchmark before "
58 "results are considered significant. For cpu-time based "
59 "tests, this is the lower bound on the total cpu time "
60 "used by all threads that make up the test. For real-time "
61 "based tests, this is the lower bound on the elapsed time "
62 "of the benchmark execution, regardless of number of "
63 "threads.");
64
65 DEFINE_int32(benchmark_repetitions, 1,
66 "The number of runs of each benchmark. If greater than 1, the "
67 "mean and standard deviation of the runs will be reported.");
68
69 DEFINE_bool(benchmark_report_aggregates_only, false,
70 "Report the result of each benchmark repetitions. When 'true' is "
71 "specified only the mean, standard deviation, and other statistics "
72 "are reported for repeated benchmarks.");
73
74 DEFINE_string(benchmark_format, "console",
75 "The format to use for console output. Valid values are "
76 "'console', 'json', or 'csv'.");
77
78 DEFINE_string(benchmark_out_format, "json",
79 "The format to use for file output. Valid values are "
80 "'console', 'json', or 'csv'.");
81
82 DEFINE_string(benchmark_out, "", "The file to write additonal output to");
83
84 DEFINE_bool(color_print, true, "Enables colorized logging.");
85
86 DEFINE_int32(v, 0, "The level of verbose logging to output");
87
88
89 namespace benchmark {
90
91 namespace internal {
92
UseCharPointer(char const volatile *)93 void UseCharPointer(char const volatile*) {}
94
95 } // end namespace internal
96
97 namespace {
98
IsZero(double n)99 bool IsZero(double n) {
100 return std::abs(n) < std::numeric_limits<double>::epsilon();
101 }
102
103 // For non-dense Range, intermediate values are powers of kRangeMultiplier.
104 static const int kRangeMultiplier = 8;
105 // The size of a benchmark family determines is the number of inputs to repeat
106 // the benchmark on. If this is "large" then warn the user during configuration.
107 static const size_t kMaxFamilySize = 100;
108 static const size_t kMaxIterations = 1000000000;
109
110 } // end namespace
111
112 namespace internal {
113
114 // NOTE: This is a dummy "mutex" type used to denote the actual mutex
115 // returned by GetBenchmarkMutex(). This is only used to placate the thread
116 // safety warnings by giving the return of GetBenchmarkLock() a name.
117 struct CAPABILITY("mutex") BenchmarkLockType {};
118 BenchmarkLockType BenchmarkLockVar;
119
120 class ThreadManager {
121 public:
ThreadManager(int num_threads)122 ThreadManager(int num_threads)
123 : alive_threads_(num_threads), start_stop_barrier_(num_threads) {}
124
GetBenchmarkMutex() const125 Mutex& GetBenchmarkMutex() const
126 RETURN_CAPABILITY(::benchmark::internal::BenchmarkLockVar) {
127 return benchmark_mutex_;
128 }
129
StartStopBarrier()130 bool StartStopBarrier() EXCLUDES(end_cond_mutex_) {
131 return start_stop_barrier_.wait();
132 }
133
NotifyThreadComplete()134 void NotifyThreadComplete() EXCLUDES(end_cond_mutex_) {
135 start_stop_barrier_.removeThread();
136 if (--alive_threads_ == 0) {
137 MutexLock lock(end_cond_mutex_);
138 end_condition_.notify_all();
139 }
140 }
141
WaitForAllThreads()142 void WaitForAllThreads() EXCLUDES(end_cond_mutex_) {
143 MutexLock lock(end_cond_mutex_);
144 end_condition_.wait(lock.native_handle(),
145 [this]() { return alive_threads_ == 0; });
146 }
147
148 public:
149 GUARDED_BY(GetBenchmarkMutex()) double real_time_used = 0;
150 GUARDED_BY(GetBenchmarkMutex()) double cpu_time_used = 0;
151 GUARDED_BY(GetBenchmarkMutex()) double manual_time_used = 0;
152 GUARDED_BY(GetBenchmarkMutex()) int64_t bytes_processed = 0;
153 GUARDED_BY(GetBenchmarkMutex()) int64_t items_processed = 0;
154 GUARDED_BY(GetBenchmarkMutex()) int complexity_n = 0;
155 GUARDED_BY(GetBenchmarkMutex()) std::string report_label_;
156 GUARDED_BY(GetBenchmarkMutex()) std::string error_message_;
157 GUARDED_BY(GetBenchmarkMutex()) bool has_error_ = false;
158
159 private:
160 mutable Mutex benchmark_mutex_;
161 std::atomic<int> alive_threads_;
162 Barrier start_stop_barrier_;
163 Mutex end_cond_mutex_;
164 Condition end_condition_;
165 };
166
167 // Timer management class
168 class ThreadTimer {
169 public:
ThreadTimer()170 ThreadTimer()
171 : running_(false),
172 real_time_used_(0),
173 cpu_time_used_(0),
174 manual_time_used_(0) {}
175
176 // Called by each thread
StartTimer()177 void StartTimer() {
178 running_ = true;
179 start_real_time_ = ChronoClockNow();
180 start_cpu_time_ = ThreadCPUUsage();
181 }
182
183 // Called by each thread
StopTimer()184 void StopTimer() {
185 CHECK(running_);
186 running_ = false;
187 real_time_used_ += ChronoClockNow() - start_real_time_;
188 cpu_time_used_ += ThreadCPUUsage() - start_cpu_time_;
189 }
190
191 // Called by each thread
SetIterationTime(double seconds)192 void SetIterationTime(double seconds) { manual_time_used_ += seconds; }
193
running() const194 bool running() const { return running_; }
195
196 // REQUIRES: timer is not running
real_time_used()197 double real_time_used() {
198 CHECK(!running_);
199 return real_time_used_;
200 }
201
202 // REQUIRES: timer is not running
cpu_time_used()203 double cpu_time_used() {
204 CHECK(!running_);
205 return cpu_time_used_;
206 }
207
208 // REQUIRES: timer is not running
manual_time_used()209 double manual_time_used() {
210 CHECK(!running_);
211 return manual_time_used_;
212 }
213
214 private:
215 bool running_; // Is the timer running
216 double start_real_time_; // If running_
217 double start_cpu_time_; // If running_
218
219 // Accumulated time so far (does not contain current slice if running_)
220 double real_time_used_;
221 double cpu_time_used_;
222 // Manually set iteration time. User sets this with SetIterationTime(seconds).
223 double manual_time_used_;
224
225 };
226
227
228 enum ReportMode : unsigned {
229 RM_Unspecified, // The mode has not been manually specified
230 RM_Default, // The mode is user-specified as default.
231 RM_ReportAggregatesOnly
232 };
233
234 // Information kept per benchmark we may want to run
235 struct Benchmark::Instance {
236 std::string name;
237 Benchmark* benchmark;
238 ReportMode report_mode;
239 std::vector<int> arg;
240 TimeUnit time_unit;
241 int range_multiplier;
242 bool use_real_time;
243 bool use_manual_time;
244 BigO complexity;
245 BigOFunc* complexity_lambda;
246 bool last_benchmark_instance;
247 int repetitions;
248 double min_time;
249 int threads; // Number of concurrent threads to use
250 bool multithreaded; // Is benchmark multi-threaded?
251 };
252
253 // Class for managing registered benchmarks. Note that each registered
254 // benchmark identifies a family of related benchmarks to run.
255 class BenchmarkFamilies {
256 public:
257 static BenchmarkFamilies* GetInstance();
258
259 // Registers a benchmark family and returns the index assigned to it.
260 size_t AddBenchmark(std::unique_ptr<Benchmark> family);
261
262 // Extract the list of benchmark instances that match the specified
263 // regular expression.
264 bool FindBenchmarks(const std::string& re,
265 std::vector<Benchmark::Instance>* benchmarks,
266 std::ostream* Err);
267 private:
BenchmarkFamilies()268 BenchmarkFamilies() {}
269
270 std::vector<std::unique_ptr<Benchmark>> families_;
271 Mutex mutex_;
272 };
273
274
275 class BenchmarkImp {
276 public:
277 explicit BenchmarkImp(const char* name);
278 ~BenchmarkImp();
279
280 void Arg(int x);
281 void Unit(TimeUnit unit);
282 void Range(int start, int limit);
283 void DenseRange(int start, int limit, int step = 1);
284 void Args(const std::vector<int>& args);
285 void Ranges(const std::vector<std::pair<int, int>>& ranges);
286 void RangeMultiplier(int multiplier);
287 void MinTime(double n);
288 void Repetitions(int n);
289 void ReportAggregatesOnly(bool v);
290 void UseRealTime();
291 void UseManualTime();
292 void Complexity(BigO complexity);
293 void ComplexityLambda(BigOFunc* complexity);
294 void Threads(int t);
295 void ThreadRange(int min_threads, int max_threads);
296 void ThreadPerCpu();
297 void SetName(const char* name);
298
299 static void AddRange(std::vector<int>* dst, int lo, int hi, int mult);
300
ArgsCnt() const301 int ArgsCnt() const { return args_.empty() ? -1 : static_cast<int>(args_.front().size()); }
302
303 private:
304 friend class BenchmarkFamilies;
305
306 std::string name_;
307 ReportMode report_mode_;
308 std::vector< std::vector<int> > args_; // Args for all benchmark runs
309 TimeUnit time_unit_;
310 int range_multiplier_;
311 double min_time_;
312 int repetitions_;
313 bool use_real_time_;
314 bool use_manual_time_;
315 BigO complexity_;
316 BigOFunc* complexity_lambda_;
317 std::vector<int> thread_counts_;
318
319 BenchmarkImp& operator=(BenchmarkImp const&);
320 };
321
GetInstance()322 BenchmarkFamilies* BenchmarkFamilies::GetInstance() {
323 static BenchmarkFamilies instance;
324 return &instance;
325 }
326
327
AddBenchmark(std::unique_ptr<Benchmark> family)328 size_t BenchmarkFamilies::AddBenchmark(std::unique_ptr<Benchmark> family) {
329 MutexLock l(mutex_);
330 size_t index = families_.size();
331 families_.push_back(std::move(family));
332 return index;
333 }
334
FindBenchmarks(const std::string & spec,std::vector<Benchmark::Instance> * benchmarks,std::ostream * ErrStream)335 bool BenchmarkFamilies::FindBenchmarks(
336 const std::string& spec,
337 std::vector<Benchmark::Instance>* benchmarks,
338 std::ostream* ErrStream) {
339 CHECK(ErrStream);
340 auto& Err = *ErrStream;
341 // Make regular expression out of command-line flag
342 std::string error_msg;
343 Regex re;
344 if (!re.Init(spec, &error_msg)) {
345 Err << "Could not compile benchmark re: " << error_msg << std::endl;
346 return false;
347 }
348
349 // Special list of thread counts to use when none are specified
350 const std::vector<int> one_thread = {1};
351
352 MutexLock l(mutex_);
353 for (std::unique_ptr<Benchmark>& bench_family : families_) {
354 // Family was deleted or benchmark doesn't match
355 if (!bench_family) continue;
356 BenchmarkImp* family = bench_family->imp_;
357
358 if (family->ArgsCnt() == -1) {
359 family->Args({});
360 }
361 const std::vector<int>* thread_counts =
362 (family->thread_counts_.empty()
363 ? &one_thread
364 : &static_cast<const std::vector<int>&>(family->thread_counts_));
365 const size_t family_size = family->args_.size() * thread_counts->size();
366 // The benchmark will be run at least 'family_size' different inputs.
367 // If 'family_size' is very large warn the user.
368 if (family_size > kMaxFamilySize) {
369 Err << "The number of inputs is very large. " << family->name_
370 << " will be repeated at least " << family_size << " times.\n";
371 }
372 // reserve in the special case the regex ".", since we know the final
373 // family size.
374 if (spec == ".")
375 benchmarks->reserve(family_size);
376
377 for (auto const& args : family->args_) {
378 for (int num_threads : *thread_counts) {
379
380 Benchmark::Instance instance;
381 instance.name = family->name_;
382 instance.benchmark = bench_family.get();
383 instance.report_mode = family->report_mode_;
384 instance.arg = args;
385 instance.time_unit = family->time_unit_;
386 instance.range_multiplier = family->range_multiplier_;
387 instance.min_time = family->min_time_;
388 instance.repetitions = family->repetitions_;
389 instance.use_real_time = family->use_real_time_;
390 instance.use_manual_time = family->use_manual_time_;
391 instance.complexity = family->complexity_;
392 instance.complexity_lambda = family->complexity_lambda_;
393 instance.threads = num_threads;
394 instance.multithreaded = !(family->thread_counts_.empty());
395
396 // Add arguments to instance name
397 for (auto const& arg : args) {
398 AppendHumanReadable(arg, &instance.name);
399 }
400
401 if (!IsZero(family->min_time_)) {
402 instance.name += StringPrintF("/min_time:%0.3f", family->min_time_);
403 }
404 if (family->repetitions_ != 0) {
405 instance.name += StringPrintF("/repeats:%d", family->repetitions_);
406 }
407 if (family->use_manual_time_) {
408 instance.name += "/manual_time";
409 } else if (family->use_real_time_) {
410 instance.name += "/real_time";
411 }
412
413 // Add the number of threads used to the name
414 if (!family->thread_counts_.empty()) {
415 instance.name += StringPrintF("/threads:%d", instance.threads);
416 }
417
418 if (re.Match(instance.name)) {
419 instance.last_benchmark_instance = (&args == &family->args_.back());
420 benchmarks->push_back(std::move(instance));
421 }
422 }
423 }
424 }
425 return true;
426 }
427
BenchmarkImp(const char * name)428 BenchmarkImp::BenchmarkImp(const char* name)
429 : name_(name), report_mode_(RM_Unspecified), time_unit_(kNanosecond),
430 range_multiplier_(kRangeMultiplier), min_time_(0.0), repetitions_(0),
431 use_real_time_(false), use_manual_time_(false),
432 complexity_(oNone) {
433 }
434
~BenchmarkImp()435 BenchmarkImp::~BenchmarkImp() {
436 }
437
Arg(int x)438 void BenchmarkImp::Arg(int x) {
439 CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
440 args_.push_back({x});
441 }
442
Unit(TimeUnit unit)443 void BenchmarkImp::Unit(TimeUnit unit) {
444 time_unit_ = unit;
445 }
446
Range(int start,int limit)447 void BenchmarkImp::Range(int start, int limit) {
448 CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
449 std::vector<int> arglist;
450 AddRange(&arglist, start, limit, range_multiplier_);
451
452 for (int i : arglist) {
453 args_.push_back({i});
454 }
455 }
456
DenseRange(int start,int limit,int step)457 void BenchmarkImp::DenseRange(int start, int limit, int step) {
458 CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
459 CHECK_GE(start, 0);
460 CHECK_LE(start, limit);
461 for (int arg = start; arg <= limit; arg+= step) {
462 args_.push_back({arg});
463 }
464 }
465
Args(const std::vector<int> & args)466 void BenchmarkImp::Args(const std::vector<int>& args)
467 {
468 args_.push_back(args);
469 }
470
Ranges(const std::vector<std::pair<int,int>> & ranges)471 void BenchmarkImp::Ranges(const std::vector<std::pair<int, int>>& ranges) {
472 std::vector<std::vector<int>> arglists(ranges.size());
473 std::size_t total = 1;
474 for (std::size_t i = 0; i < ranges.size(); i++) {
475 AddRange(&arglists[i], ranges[i].first, ranges[i].second, range_multiplier_);
476 total *= arglists[i].size();
477 }
478
479 std::vector<std::size_t> ctr(arglists.size(), 0);
480
481 for (std::size_t i = 0; i < total; i++) {
482 std::vector<int> tmp;
483 tmp.reserve(arglists.size());
484
485 for (std::size_t j = 0; j < arglists.size(); j++) {
486 tmp.push_back(arglists[j].at(ctr[j]));
487 }
488
489 args_.push_back(std::move(tmp));
490
491 for (std::size_t j = 0; j < arglists.size(); j++) {
492 if (ctr[j] + 1 < arglists[j].size()) {
493 ++ctr[j];
494 break;
495 }
496 ctr[j] = 0;
497 }
498 }
499 }
500
RangeMultiplier(int multiplier)501 void BenchmarkImp::RangeMultiplier(int multiplier) {
502 CHECK(multiplier > 1);
503 range_multiplier_ = multiplier;
504 }
505
MinTime(double t)506 void BenchmarkImp::MinTime(double t) {
507 CHECK(t > 0.0);
508 min_time_ = t;
509 }
510
511
Repetitions(int n)512 void BenchmarkImp::Repetitions(int n) {
513 CHECK(n > 0);
514 repetitions_ = n;
515 }
516
ReportAggregatesOnly(bool value)517 void BenchmarkImp::ReportAggregatesOnly(bool value) {
518 report_mode_ = value ? RM_ReportAggregatesOnly : RM_Default;
519 }
520
UseRealTime()521 void BenchmarkImp::UseRealTime() {
522 CHECK(!use_manual_time_) << "Cannot set UseRealTime and UseManualTime simultaneously.";
523 use_real_time_ = true;
524 }
525
UseManualTime()526 void BenchmarkImp::UseManualTime() {
527 CHECK(!use_real_time_) << "Cannot set UseRealTime and UseManualTime simultaneously.";
528 use_manual_time_ = true;
529 }
530
Complexity(BigO complexity)531 void BenchmarkImp::Complexity(BigO complexity){
532 complexity_ = complexity;
533 }
534
ComplexityLambda(BigOFunc * complexity)535 void BenchmarkImp::ComplexityLambda(BigOFunc* complexity) {
536 complexity_lambda_ = complexity;
537 }
538
Threads(int t)539 void BenchmarkImp::Threads(int t) {
540 CHECK_GT(t, 0);
541 thread_counts_.push_back(t);
542 }
543
ThreadRange(int min_threads,int max_threads)544 void BenchmarkImp::ThreadRange(int min_threads, int max_threads) {
545 CHECK_GT(min_threads, 0);
546 CHECK_GE(max_threads, min_threads);
547
548 AddRange(&thread_counts_, min_threads, max_threads, 2);
549 }
550
ThreadPerCpu()551 void BenchmarkImp::ThreadPerCpu() {
552 static int num_cpus = NumCPUs();
553 thread_counts_.push_back(num_cpus);
554 }
555
SetName(const char * name)556 void BenchmarkImp::SetName(const char* name) {
557 name_ = name;
558 }
559
AddRange(std::vector<int> * dst,int lo,int hi,int mult)560 void BenchmarkImp::AddRange(std::vector<int>* dst, int lo, int hi, int mult) {
561 CHECK_GE(lo, 0);
562 CHECK_GE(hi, lo);
563 CHECK_GE(mult, 2);
564
565 // Add "lo"
566 dst->push_back(lo);
567
568 static const int kint32max = std::numeric_limits<int32_t>::max();
569
570 // Now space out the benchmarks in multiples of "mult"
571 for (int32_t i = 1; i < kint32max/mult; i *= mult) {
572 if (i >= hi) break;
573 if (i > lo) {
574 dst->push_back(i);
575 }
576 }
577 // Add "hi" (if different from "lo")
578 if (hi != lo) {
579 dst->push_back(hi);
580 }
581 }
582
Benchmark(const char * name)583 Benchmark::Benchmark(const char* name)
584 : imp_(new BenchmarkImp(name))
585 {
586 }
587
~Benchmark()588 Benchmark::~Benchmark() {
589 delete imp_;
590 }
591
Benchmark(Benchmark const & other)592 Benchmark::Benchmark(Benchmark const& other)
593 : imp_(new BenchmarkImp(*other.imp_))
594 {
595 }
596
Arg(int x)597 Benchmark* Benchmark::Arg(int x) {
598 CHECK(imp_->ArgsCnt() == -1 || imp_->ArgsCnt() == 1);
599 imp_->Arg(x);
600 return this;
601 }
602
Unit(TimeUnit unit)603 Benchmark* Benchmark::Unit(TimeUnit unit) {
604 imp_->Unit(unit);
605 return this;
606 }
607
Range(int start,int limit)608 Benchmark* Benchmark::Range(int start, int limit) {
609 CHECK(imp_->ArgsCnt() == -1 || imp_->ArgsCnt() == 1);
610 imp_->Range(start, limit);
611 return this;
612 }
613
Ranges(const std::vector<std::pair<int,int>> & ranges)614 Benchmark* Benchmark::Ranges(const std::vector<std::pair<int, int>>& ranges)
615 {
616 CHECK(imp_->ArgsCnt() == -1 || imp_->ArgsCnt() == static_cast<int>(ranges.size()));
617 imp_->Ranges(ranges);
618 return this;
619 }
620
DenseRange(int start,int limit,int step)621 Benchmark* Benchmark::DenseRange(int start, int limit, int step) {
622 CHECK(imp_->ArgsCnt() == -1 || imp_->ArgsCnt() == 1);
623 imp_->DenseRange(start, limit, step);
624 return this;
625 }
626
Args(const std::vector<int> & args)627 Benchmark* Benchmark::Args(const std::vector<int>& args) {
628 CHECK(imp_->ArgsCnt() == -1 || imp_->ArgsCnt() == static_cast<int>(args.size()));
629 imp_->Args(args);
630 return this;
631 }
632
Apply(void (* custom_arguments)(Benchmark * benchmark))633 Benchmark* Benchmark::Apply(void (*custom_arguments)(Benchmark* benchmark)) {
634 custom_arguments(this);
635 return this;
636 }
637
RangeMultiplier(int multiplier)638 Benchmark* Benchmark::RangeMultiplier(int multiplier) {
639 imp_->RangeMultiplier(multiplier);
640 return this;
641 }
642
643
Repetitions(int t)644 Benchmark* Benchmark::Repetitions(int t) {
645 imp_->Repetitions(t);
646 return this;
647 }
648
ReportAggregatesOnly(bool value)649 Benchmark* Benchmark::ReportAggregatesOnly(bool value) {
650 imp_->ReportAggregatesOnly(value);
651 return this;
652 }
653
MinTime(double t)654 Benchmark* Benchmark::MinTime(double t) {
655 imp_->MinTime(t);
656 return this;
657 }
658
UseRealTime()659 Benchmark* Benchmark::UseRealTime() {
660 imp_->UseRealTime();
661 return this;
662 }
663
UseManualTime()664 Benchmark* Benchmark::UseManualTime() {
665 imp_->UseManualTime();
666 return this;
667 }
668
Complexity(BigO complexity)669 Benchmark* Benchmark::Complexity(BigO complexity) {
670 imp_->Complexity(complexity);
671 return this;
672 }
673
Complexity(BigOFunc * complexity)674 Benchmark* Benchmark::Complexity(BigOFunc* complexity) {
675 imp_->Complexity(oLambda);
676 imp_->ComplexityLambda(complexity);
677 return this;
678 }
679
Threads(int t)680 Benchmark* Benchmark::Threads(int t) {
681 imp_->Threads(t);
682 return this;
683 }
684
ThreadRange(int min_threads,int max_threads)685 Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) {
686 imp_->ThreadRange(min_threads, max_threads);
687 return this;
688 }
689
ThreadPerCpu()690 Benchmark* Benchmark::ThreadPerCpu() {
691 imp_->ThreadPerCpu();
692 return this;
693 }
694
SetName(const char * name)695 void Benchmark::SetName(const char* name) {
696 imp_->SetName(name);
697 }
698
Run(State & st)699 void FunctionBenchmark::Run(State& st) {
700 func_(st);
701 }
702
703 } // end namespace internal
704
705 namespace {
706
707 // Execute one thread of benchmark b for the specified number of iterations.
708 // Adds the stats collected for the thread into *total.
RunInThread(const benchmark::internal::Benchmark::Instance * b,size_t iters,int thread_id,internal::ThreadManager * manager)709 void RunInThread(const benchmark::internal::Benchmark::Instance* b,
710 size_t iters, int thread_id,
711 internal::ThreadManager* manager) {
712 internal::ThreadTimer timer;
713 State st(iters, b->arg, thread_id, b->threads, &timer, manager);
714 b->benchmark->Run(st);
715 CHECK(st.iterations() == st.max_iterations) <<
716 "Benchmark returned before State::KeepRunning() returned false!";
717 {
718 MutexLock l(manager->GetBenchmarkMutex());
719 manager->cpu_time_used += timer.cpu_time_used();
720 manager->real_time_used += timer.real_time_used();
721 manager->manual_time_used += timer.manual_time_used();
722 manager->bytes_processed += st.bytes_processed();
723 manager->items_processed += st.items_processed();
724 manager->complexity_n += st.complexity_length_n();
725 }
726 manager->NotifyThreadComplete();
727 }
728
RunBenchmark(const benchmark::internal::Benchmark::Instance & b,std::vector<BenchmarkReporter::Run> * complexity_reports)729 std::vector<BenchmarkReporter::Run> RunBenchmark(
730 const benchmark::internal::Benchmark::Instance& b,
731 std::vector<BenchmarkReporter::Run>* complexity_reports) {
732 std::vector<BenchmarkReporter::Run> reports; // return value
733
734 size_t iters = 1;
735 const int num_threads = b.multithreaded ? b.threads : 1;
736 std::vector<std::thread> pool;
737 if (num_threads > 1) pool.resize(num_threads -1);
738
739 const int repeats = b.repetitions != 0 ? b.repetitions
740 : FLAGS_benchmark_repetitions;
741 const bool report_aggregates_only = repeats != 1 &&
742 (b.report_mode == internal::RM_Unspecified
743 ? FLAGS_benchmark_report_aggregates_only
744 : b.report_mode == internal::RM_ReportAggregatesOnly);
745 for (int i = 0; i < repeats; i++) {
746 std::string mem;
747 for (;;) {
748 // Try benchmark
749 VLOG(2) << "Running " << b.name << " for " << iters << "\n";
750
751 internal::ThreadManager manager(num_threads);
752 if (b.multithreaded) {
753 // If this is out first iteration of the while(true) loop then the
754 // threads haven't been started and can't be joined. Otherwise we need
755 // to join the thread before replacing them.
756 for (std::thread& thread : pool) {
757 if (thread.joinable())
758 thread.join();
759 }
760 for (std::size_t ti = 0; ti < pool.size(); ++ti) {
761 pool[ti] = std::thread(&RunInThread, &b, iters,
762 static_cast<int>(ti + 1), &manager);
763 }
764 }
765 RunInThread(&b, iters, 0, &manager);
766 manager.WaitForAllThreads();
767 MutexLock l(manager.GetBenchmarkMutex());
768
769 const double cpu_accumulated_time = manager.cpu_time_used;
770 const double real_accumulated_time = manager.real_time_used / num_threads;
771 const double manual_accumulated_time = manager.manual_time_used / num_threads;
772
773 VLOG(2) << "Ran in " << cpu_accumulated_time << "/"
774 << real_accumulated_time << "\n";
775
776 // Base decisions off of real time if requested by this benchmark.
777 double seconds = cpu_accumulated_time;
778 if (b.use_manual_time) {
779 seconds = manual_accumulated_time;
780 } else if (b.use_real_time) {
781 seconds = real_accumulated_time;
782 }
783
784 const double min_time = !IsZero(b.min_time) ? b.min_time
785 : FLAGS_benchmark_min_time;
786 // If this was the first run, was elapsed time or cpu time large enough?
787 // If this is not the first run, go with the current value of iter.
788 if ((i > 0) || manager.has_error_ || (iters >= kMaxIterations) ||
789 (seconds >= min_time) || (real_accumulated_time >= 5 * min_time)) {
790 // Create report about this benchmark run.
791 BenchmarkReporter::Run report;
792 report.benchmark_name = b.name;
793 report.error_occurred = manager.has_error_;
794 report.error_message = manager.error_message_;
795 report.report_label = manager.report_label_;
796 // Report the total iterations across all threads.
797 report.iterations = static_cast<int64_t>(iters) * b.threads;
798 report.time_unit = b.time_unit;
799
800 if (!report.error_occurred) {
801 double bytes_per_second = 0;
802 if (manager.bytes_processed > 0 && seconds > 0.0) {
803 bytes_per_second = (manager.bytes_processed / seconds);
804 }
805 double items_per_second = 0;
806 if (manager.items_processed > 0 && seconds > 0.0) {
807 items_per_second = (manager.items_processed / seconds);
808 }
809
810 if (b.use_manual_time) {
811 report.real_accumulated_time = manual_accumulated_time;
812 } else {
813 report.real_accumulated_time = real_accumulated_time;
814 }
815 report.cpu_accumulated_time = cpu_accumulated_time;
816 report.bytes_per_second = bytes_per_second;
817 report.items_per_second = items_per_second;
818 report.complexity_n = manager.complexity_n;
819 report.complexity = b.complexity;
820 report.complexity_lambda = b.complexity_lambda;
821 if(report.complexity != oNone)
822 complexity_reports->push_back(report);
823 }
824
825 reports.push_back(report);
826 break;
827 }
828
829 // See how much iterations should be increased by
830 // Note: Avoid division by zero with max(seconds, 1ns).
831 double multiplier = min_time * 1.4 / std::max(seconds, 1e-9);
832 // If our last run was at least 10% of FLAGS_benchmark_min_time then we
833 // use the multiplier directly. Otherwise we use at most 10 times
834 // expansion.
835 // NOTE: When the last run was at least 10% of the min time the max
836 // expansion should be 14x.
837 bool is_significant = (seconds / min_time) > 0.1;
838 multiplier = is_significant ? multiplier : std::min(10.0, multiplier);
839 if (multiplier <= 1.0) multiplier = 2.0;
840 double next_iters = std::max(multiplier * iters, iters + 1.0);
841 if (next_iters > kMaxIterations) {
842 next_iters = kMaxIterations;
843 }
844 VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n";
845 iters = static_cast<int>(next_iters + 0.5);
846 }
847 }
848 if (b.multithreaded) {
849 for (std::thread& thread : pool)
850 thread.join();
851 }
852 // Calculate additional statistics
853 auto stat_reports = ComputeStats(reports);
854 if((b.complexity != oNone) && b.last_benchmark_instance) {
855 auto additional_run_stats = ComputeBigO(*complexity_reports);
856 stat_reports.insert(stat_reports.end(), additional_run_stats.begin(),
857 additional_run_stats.end());
858 complexity_reports->clear();
859 }
860
861 if (report_aggregates_only) reports.clear();
862 reports.insert(reports.end(), stat_reports.begin(), stat_reports.end());
863 return reports;
864 }
865
866 } // namespace
867
State(size_t max_iters,const std::vector<int> & ranges,int thread_i,int n_threads,internal::ThreadTimer * timer,internal::ThreadManager * manager)868 State::State(size_t max_iters, const std::vector<int>& ranges, int thread_i,
869 int n_threads, internal::ThreadTimer* timer,
870 internal::ThreadManager* manager)
871 : started_(false),
872 finished_(false),
873 total_iterations_(0),
874 range_(ranges),
875 bytes_processed_(0),
876 items_processed_(0),
877 complexity_n_(0),
878 error_occurred_(false),
879 thread_index(thread_i),
880 threads(n_threads),
881 max_iterations(max_iters),
882 timer_(timer),
883 manager_(manager) {
884 CHECK(max_iterations != 0) << "At least one iteration must be run";
885 CHECK_LT(thread_index, threads) << "thread_index must be less than threads";
886 }
887
PauseTiming()888 void State::PauseTiming() {
889 // Add in time accumulated so far
890 CHECK(started_ && !finished_ && !error_occurred_);
891 timer_->StopTimer();
892 }
893
ResumeTiming()894 void State::ResumeTiming() {
895 CHECK(started_ && !finished_ && !error_occurred_);
896 timer_->StartTimer();
897 }
898
SkipWithError(const char * msg)899 void State::SkipWithError(const char* msg) {
900 CHECK(msg);
901 error_occurred_ = true;
902 {
903 MutexLock l(manager_->GetBenchmarkMutex());
904 if (manager_->has_error_ == false) {
905 manager_->error_message_ = msg;
906 manager_->has_error_ = true;
907 }
908 }
909 total_iterations_ = max_iterations;
910 if (timer_->running()) timer_->StopTimer();
911 }
912
SetIterationTime(double seconds)913 void State::SetIterationTime(double seconds)
914 {
915 timer_->SetIterationTime(seconds);
916 }
917
SetLabel(const char * label)918 void State::SetLabel(const char* label) {
919 MutexLock l(manager_->GetBenchmarkMutex());
920 manager_->report_label_ = label;
921 }
922
StartKeepRunning()923 void State::StartKeepRunning() {
924 CHECK(!started_ && !finished_);
925 started_ = true;
926 manager_->StartStopBarrier();
927 if (!error_occurred_) ResumeTiming();
928 }
929
FinishKeepRunning()930 void State::FinishKeepRunning() {
931 CHECK(started_ && (!finished_ || error_occurred_));
932 if (!error_occurred_) {
933 PauseTiming();
934 }
935 // Total iterations now is one greater than max iterations. Fix this.
936 total_iterations_ = max_iterations;
937 finished_ = true;
938 manager_->StartStopBarrier();
939 }
940
941 namespace internal {
942 namespace {
943
RunMatchingBenchmarks(const std::vector<Benchmark::Instance> & benchmarks,BenchmarkReporter * console_reporter,BenchmarkReporter * file_reporter)944 void RunMatchingBenchmarks(const std::vector<Benchmark::Instance>& benchmarks,
945 BenchmarkReporter* console_reporter,
946 BenchmarkReporter* file_reporter) {
947 // Note the file_reporter can be null.
948 CHECK(console_reporter != nullptr);
949
950 // Determine the width of the name field using a minimum width of 10.
951 bool has_repetitions = FLAGS_benchmark_repetitions > 1;
952 size_t name_field_width = 10;
953 for (const Benchmark::Instance& benchmark : benchmarks) {
954 name_field_width =
955 std::max<size_t>(name_field_width, benchmark.name.size());
956 has_repetitions |= benchmark.repetitions > 1;
957 }
958 if (has_repetitions)
959 name_field_width += std::strlen("_stddev");
960
961 // Print header here
962 BenchmarkReporter::Context context;
963 context.num_cpus = NumCPUs();
964 context.mhz_per_cpu = CyclesPerSecond() / 1000000.0f;
965
966 context.cpu_scaling_enabled = CpuScalingEnabled();
967 context.name_field_width = name_field_width;
968
969 // Keep track of runing times of all instances of current benchmark
970 std::vector<BenchmarkReporter::Run> complexity_reports;
971
972 if (console_reporter->ReportContext(context)
973 && (!file_reporter || file_reporter->ReportContext(context))) {
974 for (const auto& benchmark : benchmarks) {
975 std::vector<BenchmarkReporter::Run> reports =
976 RunBenchmark(benchmark, &complexity_reports);
977 console_reporter->ReportRuns(reports);
978 if (file_reporter) file_reporter->ReportRuns(reports);
979 }
980 }
981 console_reporter->Finalize();
982 if (file_reporter) file_reporter->Finalize();
983 }
984
985 std::unique_ptr<BenchmarkReporter>
CreateReporter(std::string const & name,ConsoleReporter::OutputOptions allow_color)986 CreateReporter(std::string const& name, ConsoleReporter::OutputOptions allow_color) {
987 typedef std::unique_ptr<BenchmarkReporter> PtrType;
988 if (name == "console") {
989 return PtrType(new ConsoleReporter(allow_color));
990 } else if (name == "json") {
991 return PtrType(new JSONReporter);
992 } else if (name == "csv") {
993 return PtrType(new CSVReporter);
994 } else {
995 std::cerr << "Unexpected format: '" << name << "'\n";
996 std::exit(1);
997 }
998 }
999
1000 } // end namespace
1001 } // end namespace internal
1002
RunSpecifiedBenchmarks()1003 size_t RunSpecifiedBenchmarks() {
1004 return RunSpecifiedBenchmarks(nullptr, nullptr);
1005 }
1006
1007
RunSpecifiedBenchmarks(BenchmarkReporter * console_reporter)1008 size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter) {
1009 return RunSpecifiedBenchmarks(console_reporter, nullptr);
1010 }
1011
1012
RunSpecifiedBenchmarks(BenchmarkReporter * console_reporter,BenchmarkReporter * file_reporter)1013 size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter,
1014 BenchmarkReporter* file_reporter) {
1015 std::string spec = FLAGS_benchmark_filter;
1016 if (spec.empty() || spec == "all")
1017 spec = "."; // Regexp that matches all benchmarks
1018
1019 // Setup the reporters
1020 std::ofstream output_file;
1021 std::unique_ptr<BenchmarkReporter> default_console_reporter;
1022 std::unique_ptr<BenchmarkReporter> default_file_reporter;
1023 if (!console_reporter) {
1024 auto output_opts = FLAGS_color_print ? ConsoleReporter::OO_Color
1025 : ConsoleReporter::OO_None;
1026 default_console_reporter = internal::CreateReporter(
1027 FLAGS_benchmark_format, output_opts);
1028 console_reporter = default_console_reporter.get();
1029 }
1030 auto& Out = console_reporter->GetOutputStream();
1031 auto& Err = console_reporter->GetErrorStream();
1032
1033 std::string const& fname = FLAGS_benchmark_out;
1034 if (fname == "" && file_reporter) {
1035 Err << "A custom file reporter was provided but "
1036 "--benchmark_out=<file> was not specified." << std::endl;
1037 std::exit(1);
1038 }
1039 if (fname != "") {
1040 output_file.open(fname);
1041 if (!output_file.is_open()) {
1042 Err << "invalid file name: '" << fname << std::endl;
1043 std::exit(1);
1044 }
1045 if (!file_reporter) {
1046 default_file_reporter = internal::CreateReporter(
1047 FLAGS_benchmark_out_format, ConsoleReporter::OO_None);
1048 file_reporter = default_file_reporter.get();
1049 }
1050 file_reporter->SetOutputStream(&output_file);
1051 file_reporter->SetErrorStream(&output_file);
1052 }
1053
1054 std::vector<internal::Benchmark::Instance> benchmarks;
1055 auto families = internal::BenchmarkFamilies::GetInstance();
1056 if (!families->FindBenchmarks(spec, &benchmarks, &Err)) return 0;
1057
1058 if (FLAGS_benchmark_list_tests) {
1059 for (auto const& benchmark : benchmarks)
1060 Out << benchmark.name << "\n";
1061 } else {
1062 internal::RunMatchingBenchmarks(benchmarks, console_reporter, file_reporter);
1063 }
1064
1065 return benchmarks.size();
1066 }
1067
1068 namespace internal {
1069
PrintUsageAndExit()1070 void PrintUsageAndExit() {
1071 fprintf(stdout,
1072 "benchmark"
1073 " [--benchmark_list_tests={true|false}]\n"
1074 " [--benchmark_filter=<regex>]\n"
1075 " [--benchmark_min_time=<min_time>]\n"
1076 " [--benchmark_repetitions=<num_repetitions>]\n"
1077 " [--benchmark_report_aggregates_only={true|false}\n"
1078 " [--benchmark_format=<console|json|csv>]\n"
1079 " [--benchmark_out=<filename>]\n"
1080 " [--benchmark_out_format=<json|console|csv>]\n"
1081 " [--color_print={true|false}]\n"
1082 " [--v=<verbosity>]\n");
1083 exit(0);
1084 }
1085
ParseCommandLineFlags(int * argc,char ** argv)1086 void ParseCommandLineFlags(int* argc, char** argv) {
1087 using namespace benchmark;
1088 for (int i = 1; i < *argc; ++i) {
1089 if (
1090 ParseBoolFlag(argv[i], "benchmark_list_tests",
1091 &FLAGS_benchmark_list_tests) ||
1092 ParseStringFlag(argv[i], "benchmark_filter",
1093 &FLAGS_benchmark_filter) ||
1094 ParseDoubleFlag(argv[i], "benchmark_min_time",
1095 &FLAGS_benchmark_min_time) ||
1096 ParseInt32Flag(argv[i], "benchmark_repetitions",
1097 &FLAGS_benchmark_repetitions) ||
1098 ParseBoolFlag(argv[i], "benchmark_report_aggregates_only",
1099 &FLAGS_benchmark_report_aggregates_only) ||
1100 ParseStringFlag(argv[i], "benchmark_format",
1101 &FLAGS_benchmark_format) ||
1102 ParseStringFlag(argv[i], "benchmark_out",
1103 &FLAGS_benchmark_out) ||
1104 ParseStringFlag(argv[i], "benchmark_out_format",
1105 &FLAGS_benchmark_out_format) ||
1106 ParseBoolFlag(argv[i], "color_print",
1107 &FLAGS_color_print) ||
1108 ParseInt32Flag(argv[i], "v", &FLAGS_v)) {
1109 for (int j = i; j != *argc; ++j) argv[j] = argv[j + 1];
1110
1111 --(*argc);
1112 --i;
1113 } else if (IsFlag(argv[i], "help")) {
1114 PrintUsageAndExit();
1115 }
1116 }
1117 for (auto const* flag : {&FLAGS_benchmark_format,
1118 &FLAGS_benchmark_out_format})
1119 if (*flag != "console" && *flag != "json" && *flag != "csv") {
1120 PrintUsageAndExit();
1121 }
1122 }
1123
RegisterBenchmarkInternal(Benchmark * bench)1124 Benchmark* RegisterBenchmarkInternal(Benchmark* bench) {
1125 std::unique_ptr<Benchmark> bench_ptr(bench);
1126 BenchmarkFamilies* families = BenchmarkFamilies::GetInstance();
1127 families->AddBenchmark(std::move(bench_ptr));
1128 return bench;
1129 }
1130
InitializeStreams()1131 int InitializeStreams() {
1132 static std::ios_base::Init init;
1133 return 0;
1134 }
1135
1136 } // end namespace internal
1137
Initialize(int * argc,char ** argv)1138 void Initialize(int* argc, char** argv) {
1139 internal::ParseCommandLineFlags(argc, argv);
1140 internal::LogLevel() = FLAGS_v;
1141 }
1142
1143 } // end namespace benchmark
1144