1 // Copyright 2015 Google Inc. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "benchmark/benchmark.h"
16 #include "internal_macros.h"
17 
18 #ifndef BENCHMARK_OS_WINDOWS
19 #include <sys/time.h>
20 #include <sys/resource.h>
21 #include <unistd.h>
22 #endif
23 
24 #include <cstdlib>
25 #include <cstring>
26 #include <cstdio>
27 #include <algorithm>
28 #include <atomic>
29 #include <condition_variable>
30 #include <iostream>
31 #include <fstream>
32 #include <memory>
33 #include <thread>
34 
35 #include "check.h"
36 #include "commandlineflags.h"
37 #include "complexity.h"
38 #include "log.h"
39 #include "mutex.h"
40 #include "re.h"
41 #include "stat.h"
42 #include "string_util.h"
43 #include "sysinfo.h"
44 #include "timers.h"
45 
46 DEFINE_bool(benchmark_list_tests, false,
47             "Print a list of benchmarks. This option overrides all other "
48             "options.");
49 
50 DEFINE_string(benchmark_filter, ".",
51               "A regular expression that specifies the set of benchmarks "
52               "to execute.  If this flag is empty, no benchmarks are run.  "
53               "If this flag is the string \"all\", all benchmarks linked "
54               "into the process are run.");
55 
56 DEFINE_double(benchmark_min_time, 0.5,
57               "Minimum number of seconds we should run benchmark before "
58               "results are considered significant.  For cpu-time based "
59               "tests, this is the lower bound on the total cpu time "
60               "used by all threads that make up the test.  For real-time "
61               "based tests, this is the lower bound on the elapsed time "
62               "of the benchmark execution, regardless of number of "
63               "threads.");
64 
65 DEFINE_int32(benchmark_repetitions, 1,
66              "The number of runs of each benchmark. If greater than 1, the "
67              "mean and standard deviation of the runs will be reported.");
68 
69 DEFINE_bool(benchmark_report_aggregates_only, false,
70             "Report the result of each benchmark repetitions. When 'true' is "
71             "specified only the mean, standard deviation, and other statistics "
72             "are reported for repeated benchmarks.");
73 
74 DEFINE_string(benchmark_format, "console",
75               "The format to use for console output. Valid values are "
76               "'console', 'json', or 'csv'.");
77 
78 DEFINE_string(benchmark_out_format, "json",
79               "The format to use for file output. Valid values are "
80               "'console', 'json', or 'csv'.");
81 
82 DEFINE_string(benchmark_out, "", "The file to write additonal output to");
83 
84 DEFINE_bool(color_print, true, "Enables colorized logging.");
85 
86 DEFINE_int32(v, 0, "The level of verbose logging to output");
87 
88 
89 namespace benchmark {
90 
91 namespace internal {
92 
UseCharPointer(char const volatile *)93 void UseCharPointer(char const volatile*) {}
94 
95 }  // end namespace internal
96 
97 namespace {
98 
IsZero(double n)99 bool IsZero(double n) {
100     return std::abs(n) < std::numeric_limits<double>::epsilon();
101 }
102 
103 // For non-dense Range, intermediate values are powers of kRangeMultiplier.
104 static const int kRangeMultiplier = 8;
105 // The size of a benchmark family determines is the number of inputs to repeat
106 // the benchmark on. If this is "large" then warn the user during configuration.
107 static const size_t kMaxFamilySize = 100;
108 static const size_t kMaxIterations = 1000000000;
109 
110 }  // end namespace
111 
112 namespace internal {
113 
114 // NOTE: This is a dummy "mutex" type used to denote the actual mutex
115 // returned by GetBenchmarkMutex(). This is only used to placate the thread
116 // safety warnings by giving the return of GetBenchmarkLock() a name.
117 struct CAPABILITY("mutex") BenchmarkLockType {};
118 BenchmarkLockType BenchmarkLockVar;
119 
120 class ThreadManager {
121  public:
ThreadManager(int num_threads)122   ThreadManager(int num_threads)
123       : alive_threads_(num_threads), start_stop_barrier_(num_threads) {}
124 
GetBenchmarkMutex() const125   Mutex& GetBenchmarkMutex() const
126       RETURN_CAPABILITY(::benchmark::internal::BenchmarkLockVar) {
127     return benchmark_mutex_;
128   }
129 
StartStopBarrier()130   bool StartStopBarrier() EXCLUDES(end_cond_mutex_) {
131     return start_stop_barrier_.wait();
132   }
133 
NotifyThreadComplete()134   void NotifyThreadComplete() EXCLUDES(end_cond_mutex_) {
135     start_stop_barrier_.removeThread();
136     if (--alive_threads_ == 0) {
137       MutexLock lock(end_cond_mutex_);
138       end_condition_.notify_all();
139     }
140   }
141 
WaitForAllThreads()142   void WaitForAllThreads() EXCLUDES(end_cond_mutex_) {
143     MutexLock lock(end_cond_mutex_);
144     end_condition_.wait(lock.native_handle(),
145                         [this]() { return alive_threads_ == 0; });
146   }
147 
148  public:
149   GUARDED_BY(GetBenchmarkMutex()) double real_time_used = 0;
150   GUARDED_BY(GetBenchmarkMutex()) double cpu_time_used = 0;
151   GUARDED_BY(GetBenchmarkMutex()) double manual_time_used = 0;
152   GUARDED_BY(GetBenchmarkMutex()) int64_t bytes_processed = 0;
153   GUARDED_BY(GetBenchmarkMutex()) int64_t items_processed = 0;
154   GUARDED_BY(GetBenchmarkMutex()) int complexity_n = 0;
155   GUARDED_BY(GetBenchmarkMutex()) std::string report_label_;
156   GUARDED_BY(GetBenchmarkMutex()) std::string error_message_;
157   GUARDED_BY(GetBenchmarkMutex()) bool has_error_ = false;
158 
159  private:
160   mutable Mutex benchmark_mutex_;
161   std::atomic<int> alive_threads_;
162   Barrier start_stop_barrier_;
163   Mutex end_cond_mutex_;
164   Condition end_condition_;
165 };
166 
167 // Timer management class
168 class ThreadTimer {
169  public:
ThreadTimer()170   ThreadTimer()
171       : running_(false),
172         real_time_used_(0),
173         cpu_time_used_(0),
174         manual_time_used_(0) {}
175 
176   // Called by each thread
StartTimer()177   void StartTimer() {
178     running_ = true;
179     start_real_time_ = ChronoClockNow();
180     start_cpu_time_ = ThreadCPUUsage();
181   }
182 
183   // Called by each thread
StopTimer()184   void StopTimer() {
185     CHECK(running_);
186     running_ = false;
187     real_time_used_ += ChronoClockNow() - start_real_time_;
188     cpu_time_used_ += ThreadCPUUsage() - start_cpu_time_;
189   }
190 
191   // Called by each thread
SetIterationTime(double seconds)192   void SetIterationTime(double seconds) { manual_time_used_ += seconds; }
193 
running() const194   bool running() const { return running_; }
195 
196   // REQUIRES: timer is not running
real_time_used()197   double real_time_used() {
198     CHECK(!running_);
199     return real_time_used_;
200   }
201 
202   // REQUIRES: timer is not running
cpu_time_used()203   double cpu_time_used() {
204     CHECK(!running_);
205     return cpu_time_used_;
206   }
207 
208   // REQUIRES: timer is not running
manual_time_used()209   double manual_time_used() {
210     CHECK(!running_);
211     return manual_time_used_;
212   }
213 
214  private:
215   bool running_;                // Is the timer running
216   double start_real_time_;      // If running_
217   double start_cpu_time_;       // If running_
218 
219   // Accumulated time so far (does not contain current slice if running_)
220   double real_time_used_;
221   double cpu_time_used_;
222   // Manually set iteration time. User sets this with SetIterationTime(seconds).
223   double manual_time_used_;
224 
225 };
226 
227 
228 enum ReportMode : unsigned {
229     RM_Unspecified, // The mode has not been manually specified
230     RM_Default,     // The mode is user-specified as default.
231     RM_ReportAggregatesOnly
232 };
233 
234 // Information kept per benchmark we may want to run
235 struct Benchmark::Instance {
236   std::string      name;
237   Benchmark*       benchmark;
238   ReportMode       report_mode;
239   std::vector<int> arg;
240   TimeUnit         time_unit;
241   int              range_multiplier;
242   bool             use_real_time;
243   bool             use_manual_time;
244   BigO             complexity;
245   BigOFunc*        complexity_lambda;
246   bool             last_benchmark_instance;
247   int              repetitions;
248   double           min_time;
249   int              threads;    // Number of concurrent threads to use
250   bool             multithreaded;  // Is benchmark multi-threaded?
251 };
252 
253 // Class for managing registered benchmarks.  Note that each registered
254 // benchmark identifies a family of related benchmarks to run.
255 class BenchmarkFamilies {
256  public:
257   static BenchmarkFamilies* GetInstance();
258 
259   // Registers a benchmark family and returns the index assigned to it.
260   size_t AddBenchmark(std::unique_ptr<Benchmark> family);
261 
262   // Extract the list of benchmark instances that match the specified
263   // regular expression.
264   bool FindBenchmarks(const std::string& re,
265                       std::vector<Benchmark::Instance>* benchmarks,
266                       std::ostream* Err);
267  private:
BenchmarkFamilies()268   BenchmarkFamilies() {}
269 
270   std::vector<std::unique_ptr<Benchmark>> families_;
271   Mutex mutex_;
272 };
273 
274 
275 class BenchmarkImp {
276 public:
277   explicit BenchmarkImp(const char* name);
278   ~BenchmarkImp();
279 
280   void Arg(int x);
281   void Unit(TimeUnit unit);
282   void Range(int start, int limit);
283   void DenseRange(int start, int limit, int step = 1);
284   void Args(const std::vector<int>& args);
285   void Ranges(const std::vector<std::pair<int, int>>& ranges);
286   void RangeMultiplier(int multiplier);
287   void MinTime(double n);
288   void Repetitions(int n);
289   void ReportAggregatesOnly(bool v);
290   void UseRealTime();
291   void UseManualTime();
292   void Complexity(BigO complexity);
293   void ComplexityLambda(BigOFunc* complexity);
294   void Threads(int t);
295   void ThreadRange(int min_threads, int max_threads);
296   void ThreadPerCpu();
297   void SetName(const char* name);
298 
299   static void AddRange(std::vector<int>* dst, int lo, int hi, int mult);
300 
ArgsCnt() const301   int ArgsCnt() const { return args_.empty() ? -1 : static_cast<int>(args_.front().size()); }
302 
303 private:
304   friend class BenchmarkFamilies;
305 
306   std::string name_;
307   ReportMode report_mode_;
308   std::vector< std::vector<int> > args_;  // Args for all benchmark runs
309   TimeUnit time_unit_;
310   int range_multiplier_;
311   double min_time_;
312   int repetitions_;
313   bool use_real_time_;
314   bool use_manual_time_;
315   BigO complexity_;
316   BigOFunc* complexity_lambda_;
317   std::vector<int> thread_counts_;
318 
319   BenchmarkImp& operator=(BenchmarkImp const&);
320 };
321 
GetInstance()322 BenchmarkFamilies* BenchmarkFamilies::GetInstance() {
323   static BenchmarkFamilies instance;
324   return &instance;
325 }
326 
327 
AddBenchmark(std::unique_ptr<Benchmark> family)328 size_t BenchmarkFamilies::AddBenchmark(std::unique_ptr<Benchmark> family) {
329   MutexLock l(mutex_);
330   size_t index = families_.size();
331   families_.push_back(std::move(family));
332   return index;
333 }
334 
FindBenchmarks(const std::string & spec,std::vector<Benchmark::Instance> * benchmarks,std::ostream * ErrStream)335 bool BenchmarkFamilies::FindBenchmarks(
336     const std::string& spec,
337     std::vector<Benchmark::Instance>* benchmarks,
338     std::ostream* ErrStream) {
339   CHECK(ErrStream);
340   auto& Err = *ErrStream;
341   // Make regular expression out of command-line flag
342   std::string error_msg;
343   Regex re;
344   if (!re.Init(spec, &error_msg)) {
345     Err << "Could not compile benchmark re: " << error_msg << std::endl;
346     return false;
347   }
348 
349   // Special list of thread counts to use when none are specified
350   const std::vector<int> one_thread = {1};
351 
352   MutexLock l(mutex_);
353   for (std::unique_ptr<Benchmark>& bench_family : families_) {
354     // Family was deleted or benchmark doesn't match
355     if (!bench_family) continue;
356     BenchmarkImp* family = bench_family->imp_;
357 
358     if (family->ArgsCnt() == -1) {
359       family->Args({});
360     }
361     const std::vector<int>* thread_counts =
362         (family->thread_counts_.empty()
363          ? &one_thread
364          : &static_cast<const std::vector<int>&>(family->thread_counts_));
365     const size_t family_size = family->args_.size() * thread_counts->size();
366     // The benchmark will be run at least 'family_size' different inputs.
367     // If 'family_size' is very large warn the user.
368     if (family_size > kMaxFamilySize) {
369       Err <<  "The number of inputs is very large. " << family->name_
370           << " will be repeated at least " << family_size << " times.\n";
371     }
372     // reserve in the special case the regex ".", since we know the final
373     // family size.
374     if (spec == ".")
375       benchmarks->reserve(family_size);
376 
377     for (auto const& args : family->args_) {
378       for (int num_threads : *thread_counts) {
379 
380         Benchmark::Instance instance;
381         instance.name = family->name_;
382         instance.benchmark = bench_family.get();
383         instance.report_mode = family->report_mode_;
384         instance.arg = args;
385         instance.time_unit = family->time_unit_;
386         instance.range_multiplier = family->range_multiplier_;
387         instance.min_time = family->min_time_;
388         instance.repetitions = family->repetitions_;
389         instance.use_real_time = family->use_real_time_;
390         instance.use_manual_time = family->use_manual_time_;
391         instance.complexity = family->complexity_;
392         instance.complexity_lambda = family->complexity_lambda_;
393         instance.threads = num_threads;
394         instance.multithreaded = !(family->thread_counts_.empty());
395 
396         // Add arguments to instance name
397         for (auto const& arg : args) {
398           AppendHumanReadable(arg, &instance.name);
399         }
400 
401         if (!IsZero(family->min_time_)) {
402           instance.name +=  StringPrintF("/min_time:%0.3f",  family->min_time_);
403         }
404         if (family->repetitions_ != 0) {
405           instance.name +=  StringPrintF("/repeats:%d",  family->repetitions_);
406         }
407         if (family->use_manual_time_) {
408           instance.name +=  "/manual_time";
409         } else if (family->use_real_time_) {
410           instance.name +=  "/real_time";
411         }
412 
413         // Add the number of threads used to the name
414         if (!family->thread_counts_.empty()) {
415           instance.name += StringPrintF("/threads:%d", instance.threads);
416         }
417 
418         if (re.Match(instance.name)) {
419           instance.last_benchmark_instance = (&args == &family->args_.back());
420           benchmarks->push_back(std::move(instance));
421         }
422       }
423     }
424   }
425   return true;
426 }
427 
BenchmarkImp(const char * name)428 BenchmarkImp::BenchmarkImp(const char* name)
429     : name_(name), report_mode_(RM_Unspecified), time_unit_(kNanosecond),
430       range_multiplier_(kRangeMultiplier), min_time_(0.0), repetitions_(0),
431       use_real_time_(false), use_manual_time_(false),
432       complexity_(oNone) {
433 }
434 
~BenchmarkImp()435 BenchmarkImp::~BenchmarkImp() {
436 }
437 
Arg(int x)438 void BenchmarkImp::Arg(int x) {
439   CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
440   args_.push_back({x});
441 }
442 
Unit(TimeUnit unit)443 void BenchmarkImp::Unit(TimeUnit unit) {
444   time_unit_ = unit;
445 }
446 
Range(int start,int limit)447 void BenchmarkImp::Range(int start, int limit) {
448   CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
449   std::vector<int> arglist;
450   AddRange(&arglist, start, limit, range_multiplier_);
451 
452   for (int i : arglist) {
453     args_.push_back({i});
454   }
455 }
456 
DenseRange(int start,int limit,int step)457 void BenchmarkImp::DenseRange(int start, int limit, int step) {
458   CHECK(ArgsCnt() == -1 || ArgsCnt() == 1);
459   CHECK_GE(start, 0);
460   CHECK_LE(start, limit);
461   for (int arg = start; arg <= limit; arg+= step) {
462     args_.push_back({arg});
463   }
464 }
465 
Args(const std::vector<int> & args)466 void BenchmarkImp::Args(const std::vector<int>& args)
467 {
468   args_.push_back(args);
469 }
470 
Ranges(const std::vector<std::pair<int,int>> & ranges)471 void BenchmarkImp::Ranges(const std::vector<std::pair<int, int>>& ranges) {
472   std::vector<std::vector<int>> arglists(ranges.size());
473   std::size_t total = 1;
474   for (std::size_t i = 0; i < ranges.size(); i++) {
475     AddRange(&arglists[i], ranges[i].first, ranges[i].second, range_multiplier_);
476     total *= arglists[i].size();
477   }
478 
479   std::vector<std::size_t> ctr(arglists.size(), 0);
480 
481   for (std::size_t i = 0; i < total; i++) {
482     std::vector<int> tmp;
483     tmp.reserve(arglists.size());
484 
485     for (std::size_t j = 0; j < arglists.size(); j++) {
486       tmp.push_back(arglists[j].at(ctr[j]));
487     }
488 
489     args_.push_back(std::move(tmp));
490 
491     for (std::size_t j = 0; j < arglists.size(); j++) {
492       if (ctr[j] + 1 < arglists[j].size()) {
493         ++ctr[j];
494         break;
495       }
496       ctr[j] = 0;
497     }
498   }
499 }
500 
RangeMultiplier(int multiplier)501 void BenchmarkImp::RangeMultiplier(int multiplier) {
502   CHECK(multiplier > 1);
503   range_multiplier_ = multiplier;
504 }
505 
MinTime(double t)506 void BenchmarkImp::MinTime(double t) {
507   CHECK(t > 0.0);
508   min_time_ = t;
509 }
510 
511 
Repetitions(int n)512 void BenchmarkImp::Repetitions(int n) {
513   CHECK(n > 0);
514   repetitions_ = n;
515 }
516 
ReportAggregatesOnly(bool value)517 void BenchmarkImp::ReportAggregatesOnly(bool value) {
518   report_mode_ = value ? RM_ReportAggregatesOnly : RM_Default;
519 }
520 
UseRealTime()521 void BenchmarkImp::UseRealTime() {
522   CHECK(!use_manual_time_) << "Cannot set UseRealTime and UseManualTime simultaneously.";
523   use_real_time_ = true;
524 }
525 
UseManualTime()526 void BenchmarkImp::UseManualTime() {
527   CHECK(!use_real_time_) << "Cannot set UseRealTime and UseManualTime simultaneously.";
528   use_manual_time_ = true;
529 }
530 
Complexity(BigO complexity)531 void BenchmarkImp::Complexity(BigO complexity){
532   complexity_ = complexity;
533 }
534 
ComplexityLambda(BigOFunc * complexity)535 void BenchmarkImp::ComplexityLambda(BigOFunc* complexity) {
536   complexity_lambda_ = complexity;
537 }
538 
Threads(int t)539 void BenchmarkImp::Threads(int t) {
540   CHECK_GT(t, 0);
541   thread_counts_.push_back(t);
542 }
543 
ThreadRange(int min_threads,int max_threads)544 void BenchmarkImp::ThreadRange(int min_threads, int max_threads) {
545   CHECK_GT(min_threads, 0);
546   CHECK_GE(max_threads, min_threads);
547 
548   AddRange(&thread_counts_, min_threads, max_threads, 2);
549 }
550 
ThreadPerCpu()551 void BenchmarkImp::ThreadPerCpu() {
552   static int num_cpus = NumCPUs();
553   thread_counts_.push_back(num_cpus);
554 }
555 
SetName(const char * name)556 void BenchmarkImp::SetName(const char* name) {
557   name_ = name;
558 }
559 
AddRange(std::vector<int> * dst,int lo,int hi,int mult)560 void BenchmarkImp::AddRange(std::vector<int>* dst, int lo, int hi, int mult) {
561   CHECK_GE(lo, 0);
562   CHECK_GE(hi, lo);
563   CHECK_GE(mult, 2);
564 
565   // Add "lo"
566   dst->push_back(lo);
567 
568   static const int kint32max = std::numeric_limits<int32_t>::max();
569 
570   // Now space out the benchmarks in multiples of "mult"
571   for (int32_t i = 1; i < kint32max/mult; i *= mult) {
572     if (i >= hi) break;
573     if (i > lo) {
574       dst->push_back(i);
575     }
576   }
577   // Add "hi" (if different from "lo")
578   if (hi != lo) {
579     dst->push_back(hi);
580   }
581 }
582 
Benchmark(const char * name)583 Benchmark::Benchmark(const char* name)
584     : imp_(new BenchmarkImp(name))
585 {
586 }
587 
~Benchmark()588 Benchmark::~Benchmark()  {
589   delete imp_;
590 }
591 
Benchmark(Benchmark const & other)592 Benchmark::Benchmark(Benchmark const& other)
593   : imp_(new BenchmarkImp(*other.imp_))
594 {
595 }
596 
Arg(int x)597 Benchmark* Benchmark::Arg(int x) {
598   CHECK(imp_->ArgsCnt() == -1 || imp_->ArgsCnt() == 1);
599   imp_->Arg(x);
600   return this;
601 }
602 
Unit(TimeUnit unit)603 Benchmark* Benchmark::Unit(TimeUnit unit) {
604   imp_->Unit(unit);
605   return this;
606 }
607 
Range(int start,int limit)608 Benchmark* Benchmark::Range(int start, int limit) {
609   CHECK(imp_->ArgsCnt() == -1 || imp_->ArgsCnt() == 1);
610   imp_->Range(start, limit);
611   return this;
612 }
613 
Ranges(const std::vector<std::pair<int,int>> & ranges)614 Benchmark* Benchmark::Ranges(const std::vector<std::pair<int, int>>& ranges)
615 {
616   CHECK(imp_->ArgsCnt() == -1 || imp_->ArgsCnt() == static_cast<int>(ranges.size()));
617   imp_->Ranges(ranges);
618   return this;
619 }
620 
DenseRange(int start,int limit,int step)621 Benchmark* Benchmark::DenseRange(int start, int limit, int step) {
622   CHECK(imp_->ArgsCnt() == -1 || imp_->ArgsCnt() == 1);
623   imp_->DenseRange(start, limit, step);
624   return this;
625 }
626 
Args(const std::vector<int> & args)627 Benchmark* Benchmark::Args(const std::vector<int>& args) {
628   CHECK(imp_->ArgsCnt() == -1 || imp_->ArgsCnt() == static_cast<int>(args.size()));
629   imp_->Args(args);
630   return this;
631 }
632 
Apply(void (* custom_arguments)(Benchmark * benchmark))633 Benchmark* Benchmark::Apply(void (*custom_arguments)(Benchmark* benchmark)) {
634   custom_arguments(this);
635   return this;
636 }
637 
RangeMultiplier(int multiplier)638 Benchmark* Benchmark::RangeMultiplier(int multiplier) {
639   imp_->RangeMultiplier(multiplier);
640   return this;
641 }
642 
643 
Repetitions(int t)644 Benchmark* Benchmark::Repetitions(int t) {
645   imp_->Repetitions(t);
646   return this;
647 }
648 
ReportAggregatesOnly(bool value)649 Benchmark* Benchmark::ReportAggregatesOnly(bool value) {
650   imp_->ReportAggregatesOnly(value);
651   return this;
652 }
653 
MinTime(double t)654 Benchmark* Benchmark::MinTime(double t) {
655   imp_->MinTime(t);
656   return this;
657 }
658 
UseRealTime()659 Benchmark* Benchmark::UseRealTime() {
660   imp_->UseRealTime();
661   return this;
662 }
663 
UseManualTime()664 Benchmark* Benchmark::UseManualTime() {
665   imp_->UseManualTime();
666   return this;
667 }
668 
Complexity(BigO complexity)669 Benchmark* Benchmark::Complexity(BigO complexity) {
670   imp_->Complexity(complexity);
671   return this;
672 }
673 
Complexity(BigOFunc * complexity)674 Benchmark* Benchmark::Complexity(BigOFunc* complexity) {
675   imp_->Complexity(oLambda);
676   imp_->ComplexityLambda(complexity);
677   return this;
678 }
679 
Threads(int t)680 Benchmark* Benchmark::Threads(int t) {
681   imp_->Threads(t);
682   return this;
683 }
684 
ThreadRange(int min_threads,int max_threads)685 Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) {
686   imp_->ThreadRange(min_threads, max_threads);
687   return this;
688 }
689 
ThreadPerCpu()690 Benchmark* Benchmark::ThreadPerCpu() {
691   imp_->ThreadPerCpu();
692   return this;
693 }
694 
SetName(const char * name)695 void Benchmark::SetName(const char* name) {
696   imp_->SetName(name);
697 }
698 
Run(State & st)699 void FunctionBenchmark::Run(State& st) {
700   func_(st);
701 }
702 
703 } // end namespace internal
704 
705 namespace {
706 
707 // Execute one thread of benchmark b for the specified number of iterations.
708 // Adds the stats collected for the thread into *total.
RunInThread(const benchmark::internal::Benchmark::Instance * b,size_t iters,int thread_id,internal::ThreadManager * manager)709 void RunInThread(const benchmark::internal::Benchmark::Instance* b,
710                  size_t iters, int thread_id,
711                  internal::ThreadManager* manager) {
712   internal::ThreadTimer timer;
713   State st(iters, b->arg, thread_id, b->threads, &timer, manager);
714   b->benchmark->Run(st);
715   CHECK(st.iterations() == st.max_iterations) <<
716     "Benchmark returned before State::KeepRunning() returned false!";
717   {
718     MutexLock l(manager->GetBenchmarkMutex());
719     manager->cpu_time_used += timer.cpu_time_used();
720     manager->real_time_used += timer.real_time_used();
721     manager->manual_time_used += timer.manual_time_used();
722     manager->bytes_processed += st.bytes_processed();
723     manager->items_processed += st.items_processed();
724     manager->complexity_n += st.complexity_length_n();
725   }
726   manager->NotifyThreadComplete();
727 }
728 
RunBenchmark(const benchmark::internal::Benchmark::Instance & b,std::vector<BenchmarkReporter::Run> * complexity_reports)729 std::vector<BenchmarkReporter::Run> RunBenchmark(
730     const benchmark::internal::Benchmark::Instance& b,
731     std::vector<BenchmarkReporter::Run>* complexity_reports) {
732   std::vector<BenchmarkReporter::Run> reports; // return value
733 
734   size_t iters = 1;
735   const int num_threads = b.multithreaded ? b.threads : 1;
736   std::vector<std::thread> pool;
737   if (num_threads > 1) pool.resize(num_threads -1);
738 
739   const int repeats = b.repetitions != 0 ? b.repetitions
740                                          : FLAGS_benchmark_repetitions;
741   const bool report_aggregates_only = repeats != 1 &&
742       (b.report_mode == internal::RM_Unspecified
743         ? FLAGS_benchmark_report_aggregates_only
744         : b.report_mode == internal::RM_ReportAggregatesOnly);
745   for (int i = 0; i < repeats; i++) {
746     std::string mem;
747     for (;;) {
748       // Try benchmark
749       VLOG(2) << "Running " << b.name << " for " << iters << "\n";
750 
751       internal::ThreadManager manager(num_threads);
752       if (b.multithreaded) {
753         // If this is out first iteration of the while(true) loop then the
754         // threads haven't been started and can't be joined. Otherwise we need
755         // to join the thread before replacing them.
756         for (std::thread& thread : pool) {
757           if (thread.joinable())
758             thread.join();
759         }
760         for (std::size_t ti = 0; ti < pool.size(); ++ti) {
761           pool[ti] = std::thread(&RunInThread, &b, iters,
762                                  static_cast<int>(ti + 1), &manager);
763         }
764       }
765       RunInThread(&b, iters, 0, &manager);
766       manager.WaitForAllThreads();
767       MutexLock l(manager.GetBenchmarkMutex());
768 
769       const double cpu_accumulated_time = manager.cpu_time_used;
770       const double real_accumulated_time = manager.real_time_used / num_threads;
771       const double manual_accumulated_time = manager.manual_time_used / num_threads;
772 
773       VLOG(2) << "Ran in " << cpu_accumulated_time << "/"
774               << real_accumulated_time << "\n";
775 
776       // Base decisions off of real time if requested by this benchmark.
777       double seconds = cpu_accumulated_time;
778       if (b.use_manual_time) {
779           seconds = manual_accumulated_time;
780       } else if (b.use_real_time) {
781           seconds = real_accumulated_time;
782       }
783 
784       const double min_time = !IsZero(b.min_time) ? b.min_time
785                                                   : FLAGS_benchmark_min_time;
786       // If this was the first run, was elapsed time or cpu time large enough?
787       // If this is not the first run, go with the current value of iter.
788       if ((i > 0) || manager.has_error_ || (iters >= kMaxIterations) ||
789           (seconds >= min_time) || (real_accumulated_time >= 5 * min_time)) {
790         // Create report about this benchmark run.
791         BenchmarkReporter::Run report;
792         report.benchmark_name = b.name;
793         report.error_occurred = manager.has_error_;
794         report.error_message = manager.error_message_;
795         report.report_label = manager.report_label_;
796         // Report the total iterations across all threads.
797         report.iterations = static_cast<int64_t>(iters) * b.threads;
798         report.time_unit = b.time_unit;
799 
800         if (!report.error_occurred) {
801           double bytes_per_second = 0;
802           if (manager.bytes_processed > 0 && seconds > 0.0) {
803             bytes_per_second = (manager.bytes_processed / seconds);
804           }
805           double items_per_second = 0;
806           if (manager.items_processed > 0 && seconds > 0.0) {
807             items_per_second = (manager.items_processed / seconds);
808           }
809 
810           if (b.use_manual_time) {
811             report.real_accumulated_time = manual_accumulated_time;
812           } else {
813             report.real_accumulated_time = real_accumulated_time;
814           }
815           report.cpu_accumulated_time = cpu_accumulated_time;
816           report.bytes_per_second = bytes_per_second;
817           report.items_per_second = items_per_second;
818           report.complexity_n = manager.complexity_n;
819           report.complexity = b.complexity;
820           report.complexity_lambda = b.complexity_lambda;
821           if(report.complexity != oNone)
822             complexity_reports->push_back(report);
823         }
824 
825         reports.push_back(report);
826         break;
827       }
828 
829       // See how much iterations should be increased by
830       // Note: Avoid division by zero with max(seconds, 1ns).
831       double multiplier = min_time * 1.4 / std::max(seconds, 1e-9);
832       // If our last run was at least 10% of FLAGS_benchmark_min_time then we
833       // use the multiplier directly. Otherwise we use at most 10 times
834       // expansion.
835       // NOTE: When the last run was at least 10% of the min time the max
836       // expansion should be 14x.
837       bool is_significant = (seconds / min_time) > 0.1;
838       multiplier = is_significant ? multiplier : std::min(10.0, multiplier);
839       if (multiplier <= 1.0) multiplier = 2.0;
840       double next_iters = std::max(multiplier * iters, iters + 1.0);
841       if (next_iters > kMaxIterations) {
842         next_iters = kMaxIterations;
843       }
844       VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n";
845       iters = static_cast<int>(next_iters + 0.5);
846     }
847   }
848   if (b.multithreaded) {
849     for (std::thread& thread : pool)
850       thread.join();
851   }
852   // Calculate additional statistics
853   auto stat_reports = ComputeStats(reports);
854   if((b.complexity != oNone) && b.last_benchmark_instance) {
855     auto additional_run_stats = ComputeBigO(*complexity_reports);
856     stat_reports.insert(stat_reports.end(), additional_run_stats.begin(),
857                    additional_run_stats.end());
858     complexity_reports->clear();
859   }
860 
861   if (report_aggregates_only) reports.clear();
862   reports.insert(reports.end(), stat_reports.begin(), stat_reports.end());
863   return reports;
864 }
865 
866 }  // namespace
867 
State(size_t max_iters,const std::vector<int> & ranges,int thread_i,int n_threads,internal::ThreadTimer * timer,internal::ThreadManager * manager)868 State::State(size_t max_iters, const std::vector<int>& ranges, int thread_i,
869              int n_threads, internal::ThreadTimer* timer,
870              internal::ThreadManager* manager)
871     : started_(false),
872       finished_(false),
873       total_iterations_(0),
874       range_(ranges),
875       bytes_processed_(0),
876       items_processed_(0),
877       complexity_n_(0),
878       error_occurred_(false),
879       thread_index(thread_i),
880       threads(n_threads),
881       max_iterations(max_iters),
882       timer_(timer),
883       manager_(manager) {
884   CHECK(max_iterations != 0) << "At least one iteration must be run";
885   CHECK_LT(thread_index, threads) << "thread_index must be less than threads";
886 }
887 
PauseTiming()888 void State::PauseTiming() {
889   // Add in time accumulated so far
890   CHECK(started_ && !finished_ && !error_occurred_);
891   timer_->StopTimer();
892 }
893 
ResumeTiming()894 void State::ResumeTiming() {
895   CHECK(started_ && !finished_ && !error_occurred_);
896   timer_->StartTimer();
897 }
898 
SkipWithError(const char * msg)899 void State::SkipWithError(const char* msg) {
900   CHECK(msg);
901   error_occurred_ = true;
902   {
903     MutexLock l(manager_->GetBenchmarkMutex());
904     if (manager_->has_error_ == false) {
905       manager_->error_message_ = msg;
906       manager_->has_error_ = true;
907     }
908   }
909   total_iterations_ = max_iterations;
910   if (timer_->running()) timer_->StopTimer();
911 }
912 
SetIterationTime(double seconds)913 void State::SetIterationTime(double seconds)
914 {
915   timer_->SetIterationTime(seconds);
916 }
917 
SetLabel(const char * label)918 void State::SetLabel(const char* label) {
919   MutexLock l(manager_->GetBenchmarkMutex());
920   manager_->report_label_ = label;
921 }
922 
StartKeepRunning()923 void State::StartKeepRunning() {
924   CHECK(!started_ && !finished_);
925   started_ = true;
926   manager_->StartStopBarrier();
927   if (!error_occurred_) ResumeTiming();
928 }
929 
FinishKeepRunning()930 void State::FinishKeepRunning() {
931   CHECK(started_ && (!finished_ || error_occurred_));
932   if (!error_occurred_) {
933     PauseTiming();
934   }
935   // Total iterations now is one greater than max iterations. Fix this.
936   total_iterations_ = max_iterations;
937   finished_ = true;
938   manager_->StartStopBarrier();
939 }
940 
941 namespace internal {
942 namespace {
943 
RunMatchingBenchmarks(const std::vector<Benchmark::Instance> & benchmarks,BenchmarkReporter * console_reporter,BenchmarkReporter * file_reporter)944 void RunMatchingBenchmarks(const std::vector<Benchmark::Instance>& benchmarks,
945                            BenchmarkReporter* console_reporter,
946                            BenchmarkReporter* file_reporter) {
947   // Note the file_reporter can be null.
948   CHECK(console_reporter != nullptr);
949 
950   // Determine the width of the name field using a minimum width of 10.
951   bool has_repetitions = FLAGS_benchmark_repetitions > 1;
952   size_t name_field_width = 10;
953   for (const Benchmark::Instance& benchmark : benchmarks) {
954     name_field_width =
955         std::max<size_t>(name_field_width, benchmark.name.size());
956     has_repetitions |= benchmark.repetitions > 1;
957   }
958   if (has_repetitions)
959     name_field_width += std::strlen("_stddev");
960 
961   // Print header here
962   BenchmarkReporter::Context context;
963   context.num_cpus = NumCPUs();
964   context.mhz_per_cpu = CyclesPerSecond() / 1000000.0f;
965 
966   context.cpu_scaling_enabled = CpuScalingEnabled();
967   context.name_field_width = name_field_width;
968 
969   // Keep track of runing times of all instances of current benchmark
970   std::vector<BenchmarkReporter::Run> complexity_reports;
971 
972   if (console_reporter->ReportContext(context)
973       && (!file_reporter || file_reporter->ReportContext(context))) {
974     for (const auto& benchmark : benchmarks) {
975       std::vector<BenchmarkReporter::Run> reports =
976           RunBenchmark(benchmark, &complexity_reports);
977       console_reporter->ReportRuns(reports);
978       if (file_reporter) file_reporter->ReportRuns(reports);
979     }
980   }
981   console_reporter->Finalize();
982   if (file_reporter) file_reporter->Finalize();
983 }
984 
985 std::unique_ptr<BenchmarkReporter>
CreateReporter(std::string const & name,ConsoleReporter::OutputOptions allow_color)986 CreateReporter(std::string const& name, ConsoleReporter::OutputOptions allow_color) {
987   typedef std::unique_ptr<BenchmarkReporter> PtrType;
988   if (name == "console") {
989     return PtrType(new ConsoleReporter(allow_color));
990   } else if (name == "json") {
991     return PtrType(new JSONReporter);
992   } else if (name == "csv") {
993     return PtrType(new CSVReporter);
994   } else {
995     std::cerr << "Unexpected format: '" << name << "'\n";
996     std::exit(1);
997   }
998 }
999 
1000 } // end namespace
1001 } // end namespace internal
1002 
RunSpecifiedBenchmarks()1003 size_t RunSpecifiedBenchmarks() {
1004   return RunSpecifiedBenchmarks(nullptr, nullptr);
1005 }
1006 
1007 
RunSpecifiedBenchmarks(BenchmarkReporter * console_reporter)1008 size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter) {
1009   return RunSpecifiedBenchmarks(console_reporter, nullptr);
1010 }
1011 
1012 
RunSpecifiedBenchmarks(BenchmarkReporter * console_reporter,BenchmarkReporter * file_reporter)1013 size_t RunSpecifiedBenchmarks(BenchmarkReporter* console_reporter,
1014                               BenchmarkReporter* file_reporter) {
1015   std::string spec = FLAGS_benchmark_filter;
1016   if (spec.empty() || spec == "all")
1017     spec = ".";  // Regexp that matches all benchmarks
1018 
1019   // Setup the reporters
1020   std::ofstream output_file;
1021   std::unique_ptr<BenchmarkReporter> default_console_reporter;
1022   std::unique_ptr<BenchmarkReporter> default_file_reporter;
1023   if (!console_reporter) {
1024     auto output_opts = FLAGS_color_print ? ConsoleReporter::OO_Color
1025                                           : ConsoleReporter::OO_None;
1026     default_console_reporter = internal::CreateReporter(
1027           FLAGS_benchmark_format, output_opts);
1028     console_reporter = default_console_reporter.get();
1029   }
1030   auto& Out = console_reporter->GetOutputStream();
1031   auto& Err = console_reporter->GetErrorStream();
1032 
1033   std::string const& fname = FLAGS_benchmark_out;
1034   if (fname == "" && file_reporter) {
1035     Err << "A custom file reporter was provided but "
1036                    "--benchmark_out=<file> was not specified." << std::endl;
1037     std::exit(1);
1038   }
1039   if (fname != "") {
1040     output_file.open(fname);
1041     if (!output_file.is_open()) {
1042       Err << "invalid file name: '" << fname << std::endl;
1043       std::exit(1);
1044     }
1045     if (!file_reporter) {
1046       default_file_reporter = internal::CreateReporter(
1047             FLAGS_benchmark_out_format, ConsoleReporter::OO_None);
1048       file_reporter = default_file_reporter.get();
1049     }
1050     file_reporter->SetOutputStream(&output_file);
1051     file_reporter->SetErrorStream(&output_file);
1052   }
1053 
1054   std::vector<internal::Benchmark::Instance> benchmarks;
1055   auto families = internal::BenchmarkFamilies::GetInstance();
1056   if (!families->FindBenchmarks(spec, &benchmarks, &Err)) return 0;
1057 
1058   if (FLAGS_benchmark_list_tests) {
1059     for (auto const& benchmark : benchmarks)
1060       Out <<  benchmark.name << "\n";
1061   } else {
1062     internal::RunMatchingBenchmarks(benchmarks, console_reporter, file_reporter);
1063   }
1064 
1065   return benchmarks.size();
1066 }
1067 
1068 namespace internal {
1069 
PrintUsageAndExit()1070 void PrintUsageAndExit() {
1071   fprintf(stdout,
1072           "benchmark"
1073           " [--benchmark_list_tests={true|false}]\n"
1074           "          [--benchmark_filter=<regex>]\n"
1075           "          [--benchmark_min_time=<min_time>]\n"
1076           "          [--benchmark_repetitions=<num_repetitions>]\n"
1077           "          [--benchmark_report_aggregates_only={true|false}\n"
1078           "          [--benchmark_format=<console|json|csv>]\n"
1079           "          [--benchmark_out=<filename>]\n"
1080           "          [--benchmark_out_format=<json|console|csv>]\n"
1081           "          [--color_print={true|false}]\n"
1082           "          [--v=<verbosity>]\n");
1083   exit(0);
1084 }
1085 
ParseCommandLineFlags(int * argc,char ** argv)1086 void ParseCommandLineFlags(int* argc, char** argv) {
1087   using namespace benchmark;
1088   for (int i = 1; i < *argc; ++i) {
1089     if (
1090         ParseBoolFlag(argv[i], "benchmark_list_tests",
1091                       &FLAGS_benchmark_list_tests) ||
1092         ParseStringFlag(argv[i], "benchmark_filter",
1093                         &FLAGS_benchmark_filter) ||
1094         ParseDoubleFlag(argv[i], "benchmark_min_time",
1095                         &FLAGS_benchmark_min_time) ||
1096         ParseInt32Flag(argv[i], "benchmark_repetitions",
1097                        &FLAGS_benchmark_repetitions) ||
1098         ParseBoolFlag(argv[i], "benchmark_report_aggregates_only",
1099                        &FLAGS_benchmark_report_aggregates_only) ||
1100         ParseStringFlag(argv[i], "benchmark_format",
1101                         &FLAGS_benchmark_format) ||
1102         ParseStringFlag(argv[i], "benchmark_out",
1103                         &FLAGS_benchmark_out) ||
1104         ParseStringFlag(argv[i], "benchmark_out_format",
1105                         &FLAGS_benchmark_out_format) ||
1106         ParseBoolFlag(argv[i], "color_print",
1107                        &FLAGS_color_print) ||
1108         ParseInt32Flag(argv[i], "v", &FLAGS_v)) {
1109       for (int j = i; j != *argc; ++j) argv[j] = argv[j + 1];
1110 
1111       --(*argc);
1112       --i;
1113     } else if (IsFlag(argv[i], "help")) {
1114       PrintUsageAndExit();
1115     }
1116   }
1117   for (auto const* flag : {&FLAGS_benchmark_format,
1118                            &FLAGS_benchmark_out_format})
1119   if (*flag != "console" && *flag != "json" && *flag != "csv") {
1120     PrintUsageAndExit();
1121   }
1122 }
1123 
RegisterBenchmarkInternal(Benchmark * bench)1124 Benchmark* RegisterBenchmarkInternal(Benchmark* bench) {
1125     std::unique_ptr<Benchmark> bench_ptr(bench);
1126     BenchmarkFamilies* families = BenchmarkFamilies::GetInstance();
1127     families->AddBenchmark(std::move(bench_ptr));
1128     return bench;
1129 }
1130 
InitializeStreams()1131 int InitializeStreams() {
1132     static std::ios_base::Init init;
1133     return 0;
1134 }
1135 
1136 } // end namespace internal
1137 
Initialize(int * argc,char ** argv)1138 void Initialize(int* argc, char** argv) {
1139   internal::ParseCommandLineFlags(argc, argv);
1140   internal::LogLevel() = FLAGS_v;
1141 }
1142 
1143 } // end namespace benchmark
1144