1 /*
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // @author Andrei Alexandrescu (andrei.alexandrescu@fb.com)
18 
19 #include <folly/Benchmark.h>
20 
21 #include <algorithm>
22 #include <cmath>
23 #include <cstring>
24 #include <iostream>
25 #include <limits>
26 #include <map>
27 #include <memory>
28 #include <numeric>
29 #include <utility>
30 #include <vector>
31 
32 #include <boost/regex.hpp>
33 
34 #include <folly/FileUtil.h>
35 #include <folly/MapUtil.h>
36 #include <folly/String.h>
37 #include <folly/container/Foreach.h>
38 #include <folly/json.h>
39 
40 using namespace std;
41 
42 DEFINE_bool(benchmark, false, "Run benchmarks.");
43 
44 DEFINE_bool(json, false, "Output in JSON format.");
45 DEFINE_string(
46     bm_relative_to,
47     "",
48     "Print benchmark results relative to an earlier dump (via --bm_json_verbose)");
49 
50 DEFINE_string(
51     bm_json_verbose,
52     "",
53     "File to write verbose JSON format (for BenchmarkCompare / --bm_relative_to). "
54     "NOTE: this is written independent of the above --json / --bm_relative_to.");
55 
56 DEFINE_string(
57     bm_regex, "", "Only benchmarks whose names match this regex will be run.");
58 
59 DEFINE_int64(
60     bm_min_usec,
61     100,
62     "Minimum # of microseconds we'll accept for each benchmark.");
63 
64 DEFINE_int32(
65     bm_min_iters, 1, "Minimum # of iterations we'll try for each benchmark.");
66 
67 DEFINE_int64(
68     bm_max_iters,
69     1 << 30,
70     "Maximum # of iterations we'll try for each benchmark.");
71 
72 DEFINE_int32(
73     bm_max_secs, 1, "Maximum # of seconds we'll spend on each benchmark.");
74 
75 namespace folly {
76 
77 std::chrono::high_resolution_clock::duration BenchmarkSuspender::timeSpent;
78 
79 typedef function<detail::TimeIterData(unsigned int)> BenchmarkFun;
80 
benchmarks()81 vector<detail::BenchmarkRegistration>& benchmarks() {
82   static vector<detail::BenchmarkRegistration> _benchmarks;
83   return _benchmarks;
84 }
85 
86 #define FB_FOLLY_GLOBAL_BENCHMARK_BASELINE fbFollyGlobalBenchmarkBaseline
87 #define FB_STRINGIZE_X2(x) FOLLY_PP_STRINGIZE(x)
88 
89 // Add the global baseline
BENCHMARK(FB_FOLLY_GLOBAL_BENCHMARK_BASELINE)90 BENCHMARK(FB_FOLLY_GLOBAL_BENCHMARK_BASELINE) {
91 #ifdef _MSC_VER
92   _ReadWriteBarrier();
93 #else
94   asm volatile("");
95 #endif
96 }
97 
getGlobalBenchmarkBaselineIndex()98 size_t getGlobalBenchmarkBaselineIndex() {
99   const char* global = FB_STRINGIZE_X2(FB_FOLLY_GLOBAL_BENCHMARK_BASELINE);
100   auto it = std::find_if(
101       benchmarks().begin(),
102       benchmarks().end(),
103       [global](const detail::BenchmarkRegistration& v) {
104         return v.name == global;
105       });
106   CHECK(it != benchmarks().end());
107   return size_t(std::distance(benchmarks().begin(), it));
108 }
109 
110 #undef FB_STRINGIZE_X2
111 #undef FB_FOLLY_GLOBAL_BENCHMARK_BASELINE
112 
addBenchmarkImpl(const char * file,StringPiece name,BenchmarkFun fun,bool useCounter)113 void detail::addBenchmarkImpl(
114     const char* file, StringPiece name, BenchmarkFun fun, bool useCounter) {
115   benchmarks().push_back({file, name.str(), std::move(fun), useCounter});
116 }
117 
runBenchmarkGetNSPerIteration(const BenchmarkFun & fun,const double globalBaseline)118 static std::pair<double, UserCounters> runBenchmarkGetNSPerIteration(
119     const BenchmarkFun& fun, const double globalBaseline) {
120   using std::chrono::duration_cast;
121   using std::chrono::high_resolution_clock;
122   using std::chrono::microseconds;
123   using std::chrono::nanoseconds;
124   using std::chrono::seconds;
125 
126   // They key here is accuracy; too low numbers means the accuracy was
127   // coarse. We up the ante until we get to at least minNanoseconds
128   // timings.
129   static_assert(
130       std::is_same<high_resolution_clock::duration, nanoseconds>::value,
131       "High resolution clock must be nanosecond resolution.");
132   // We choose a minimum minimum (sic) of 100,000 nanoseconds, but if
133   // the clock resolution is worse than that, it will be larger. In
134   // essence we're aiming at making the quantization noise 0.01%.
135   static const auto minNanoseconds = std::max<nanoseconds>(
136       nanoseconds(100000), microseconds(FLAGS_bm_min_usec));
137 
138   // We do measurements in several epochs and take the minimum, to
139   // account for jitter.
140   static const unsigned int epochs = 1000;
141   // We establish a total time budget as we don't want a measurement
142   // to take too long. This will curtail the number of actual epochs.
143   const auto timeBudget = seconds(FLAGS_bm_max_secs);
144   auto global = high_resolution_clock::now();
145 
146   std::vector<std::pair<double, UserCounters>> epochResults(epochs);
147   size_t actualEpochs = 0;
148 
149   for (; actualEpochs < epochs; ++actualEpochs) {
150     const auto maxIters = uint32_t(FLAGS_bm_max_iters);
151     for (auto n = uint32_t(FLAGS_bm_min_iters); n < maxIters; n *= 2) {
152       detail::TimeIterData timeIterData = fun(static_cast<unsigned int>(n));
153       if (timeIterData.duration < minNanoseconds) {
154         continue;
155       }
156       // We got an accurate enough timing, done. But only save if
157       // smaller than the current result.
158       auto nsecs = duration_cast<nanoseconds>(timeIterData.duration);
159       epochResults[actualEpochs] = std::make_pair(
160           max(0.0, double(nsecs.count()) / timeIterData.niter - globalBaseline),
161           std::move(timeIterData.userCounters));
162       // Done with the current epoch, we got a meaningful timing.
163       break;
164     }
165     auto now = high_resolution_clock::now();
166     if (now - global >= timeBudget) {
167       // No more time budget available.
168       ++actualEpochs;
169       break;
170     }
171   }
172 
173   // Current state of the art: get the minimum. After some
174   // experimentation, it seems taking the minimum is the best.
175   auto iter = min_element(
176       epochResults.begin(),
177       epochResults.begin() + actualEpochs,
178       [](const auto& a, const auto& b) { return a.first < b.first; });
179 
180   // If the benchmark was basically drowned in baseline noise, it's
181   // possible it became negative.
182   return std::make_pair(max(0.0, iter->first), iter->second);
183 }
184 
185 struct ScaleInfo {
186   double boundary;
187   const char* suffix;
188 };
189 
190 static const ScaleInfo kTimeSuffixes[]{
191     {365.25 * 24 * 3600, "years"},
192     {24 * 3600, "days"},
193     {3600, "hr"},
194     {60, "min"},
195     {1, "s"},
196     {1E-3, "ms"},
197     {1E-6, "us"},
198     {1E-9, "ns"},
199     {1E-12, "ps"},
200     {1E-15, "fs"},
201     {0, nullptr},
202 };
203 
204 static const ScaleInfo kMetricSuffixes[]{
205     {1E24, "Y"}, // yotta
206     {1E21, "Z"}, // zetta
207     {1E18, "X"}, // "exa" written with suffix 'X' so as to not create
208                  //   confusion with scientific notation
209     {1E15, "P"}, // peta
210     {1E12, "T"}, // terra
211     {1E9, "G"}, // giga
212     {1E6, "M"}, // mega
213     {1E3, "K"}, // kilo
214     {1, ""},
215     {1E-3, "m"}, // milli
216     {1E-6, "u"}, // micro
217     {1E-9, "n"}, // nano
218     {1E-12, "p"}, // pico
219     {1E-15, "f"}, // femto
220     {1E-18, "a"}, // atto
221     {1E-21, "z"}, // zepto
222     {1E-24, "y"}, // yocto
223     {0, nullptr},
224 };
225 
humanReadable(double n,unsigned int decimals,const ScaleInfo * scales)226 static string humanReadable(
227     double n, unsigned int decimals, const ScaleInfo* scales) {
228   if (std::isinf(n) || std::isnan(n)) {
229     return folly::to<string>(n);
230   }
231 
232   const double absValue = fabs(n);
233   const ScaleInfo* scale = scales;
234   while (absValue < scale[0].boundary && scale[1].suffix != nullptr) {
235     ++scale;
236   }
237 
238   const double scaledValue = n / scale->boundary;
239   return stringPrintf("%.*f%s", decimals, scaledValue, scale->suffix);
240 }
241 
readableTime(double n,unsigned int decimals)242 static string readableTime(double n, unsigned int decimals) {
243   return humanReadable(n, decimals, kTimeSuffixes);
244 }
245 
metricReadable(double n,unsigned int decimals)246 static string metricReadable(double n, unsigned int decimals) {
247   return humanReadable(n, decimals, kMetricSuffixes);
248 }
249 
250 namespace {
251 class BenchmarkResultsPrinter {
252  public:
253   BenchmarkResultsPrinter() = default;
BenchmarkResultsPrinter(std::set<std::string> counterNames)254   explicit BenchmarkResultsPrinter(std::set<std::string> counterNames)
255       : counterNames_(std::move(counterNames)),
256         namesLength_{std::accumulate(
257             counterNames_.begin(),
258             counterNames_.end(),
259             size_t{0},
260             [](size_t acc, auto&& name) { return acc + 2 + name.length(); })} {}
261 
262   static constexpr unsigned int columns{76};
separator(char pad)263   void separator(char pad) {
264     puts(string(columns + namesLength_, pad).c_str());
265   }
266 
header(const string & file)267   void header(const string& file) {
268     separator('=');
269     printf("%-*srelative  time/iter  iters/s", columns - 28, file.c_str());
270     for (auto const& name : counterNames_) {
271       printf("  %s", name.c_str());
272     }
273     printf("\n");
274     separator('=');
275   }
276 
print(const vector<detail::BenchmarkResult> & data)277   void print(const vector<detail::BenchmarkResult>& data) {
278     for (auto& datum : data) {
279       auto file = datum.file;
280       if (file != lastFile_) {
281         // New file starting
282         header(file);
283         lastFile_ = file;
284       }
285 
286       string s = datum.name;
287       if (s == "-") {
288         separator('-');
289         continue;
290       }
291       bool useBaseline /* = void */;
292       if (s[0] == '%') {
293         s.erase(0, 1);
294         useBaseline = true;
295       } else {
296         baselineNsPerIter_ = datum.timeInNs;
297         useBaseline = false;
298       }
299       s.resize(columns - 29, ' ');
300       auto nsPerIter = datum.timeInNs;
301       auto secPerIter = nsPerIter / 1E9;
302       auto itersPerSec = (secPerIter == 0)
303           ? std::numeric_limits<double>::infinity()
304           : (1 / secPerIter);
305       if (!useBaseline) {
306         // Print without baseline
307         printf(
308             "%*s           %9s  %7s",
309             static_cast<int>(s.size()),
310             s.c_str(),
311             readableTime(secPerIter, 2).c_str(),
312             metricReadable(itersPerSec, 2).c_str());
313       } else {
314         // Print with baseline
315         auto rel = baselineNsPerIter_ / nsPerIter * 100.0;
316         printf(
317             "%*s %7.2f%%  %9s  %7s",
318             static_cast<int>(s.size()),
319             s.c_str(),
320             rel,
321             readableTime(secPerIter, 2).c_str(),
322             metricReadable(itersPerSec, 2).c_str());
323       }
324       for (auto const& name : counterNames_) {
325         if (auto ptr = folly::get_ptr(datum.counters, name)) {
326           switch (ptr->type) {
327             case UserMetric::Type::TIME:
328               printf(
329                   "  %*s",
330                   int(name.length()),
331                   readableTime(ptr->value, 2).c_str());
332               break;
333             case UserMetric::Type::METRIC:
334               printf(
335                   "  %*s",
336                   int(name.length()),
337                   metricReadable(ptr->value, 2).c_str());
338               break;
339             case UserMetric::Type::CUSTOM:
340             default:
341               printf("  %*" PRId64, int(name.length()), ptr->value);
342           }
343         } else {
344           printf("  %*s", int(name.length()), "NaN");
345         }
346       }
347       printf("\n");
348     }
349   }
350 
351  private:
352   std::set<std::string> counterNames_;
353   size_t namesLength_{0};
354   double baselineNsPerIter_{numeric_limits<double>::max()};
355   string lastFile_;
356 };
357 } // namespace
358 
printBenchmarkResultsAsJson(const vector<detail::BenchmarkResult> & data)359 static void printBenchmarkResultsAsJson(
360     const vector<detail::BenchmarkResult>& data) {
361   dynamic d = dynamic::object;
362   for (auto& datum : data) {
363     d[datum.name] = datum.timeInNs * 1000.;
364   }
365 
366   printf("%s\n", toPrettyJson(d).c_str());
367 }
368 
benchmarkResultsToDynamic(const vector<detail::BenchmarkResult> & data,dynamic & out)369 void benchmarkResultsToDynamic(
370     const vector<detail::BenchmarkResult>& data, dynamic& out) {
371   out = dynamic::array;
372   for (auto& datum : data) {
373     if (!datum.counters.empty()) {
374       dynamic obj = dynamic::object;
375       for (auto& counter : datum.counters) {
376         dynamic counterInfo = dynamic::object;
377         counterInfo["value"] = counter.second.value;
378         counterInfo["type"] = static_cast<int>(counter.second.type);
379         obj[counter.first] = counterInfo;
380       }
381       out.push_back(
382           dynamic::array(datum.file, datum.name, datum.timeInNs, obj));
383     } else {
384       out.push_back(dynamic::array(datum.file, datum.name, datum.timeInNs));
385     }
386   }
387 }
388 
benchmarkResultsFromDynamic(const dynamic & d,vector<detail::BenchmarkResult> & results)389 void benchmarkResultsFromDynamic(
390     const dynamic& d, vector<detail::BenchmarkResult>& results) {
391   for (auto& datum : d) {
392     results.push_back(
393         {datum[0].asString(),
394          datum[1].asString(),
395          datum[2].asDouble(),
396          UserCounters{}});
397   }
398 }
399 
resultKey(const detail::BenchmarkResult & result)400 static pair<StringPiece, StringPiece> resultKey(
401     const detail::BenchmarkResult& result) {
402   return pair<StringPiece, StringPiece>(result.file, result.name);
403 }
404 
printResultComparison(const vector<detail::BenchmarkResult> & base,const vector<detail::BenchmarkResult> & test)405 void printResultComparison(
406     const vector<detail::BenchmarkResult>& base,
407     const vector<detail::BenchmarkResult>& test) {
408   map<pair<StringPiece, StringPiece>, double> baselines;
409 
410   for (auto& baseResult : base) {
411     baselines[resultKey(baseResult)] = baseResult.timeInNs;
412   }
413   //
414   // Width available
415   static const unsigned int columns = 76;
416 
417   // Compute the longest benchmark name
418   size_t longestName = 0;
419   for (auto& datum : test) {
420     longestName = max(longestName, datum.name.size());
421   }
422 
423   // Print a horizontal rule
424   auto separator = [&](char pad) { puts(string(columns, pad).c_str()); };
425 
426   // Print header for a file
427   auto header = [&](const string& file) {
428     separator('=');
429     printf("%-*srelative  time/iter  iters/s\n", columns - 28, file.c_str());
430     separator('=');
431   };
432 
433   string lastFile;
434 
435   for (auto& datum : test) {
436     folly::Optional<double> baseline =
437         folly::get_optional(baselines, resultKey(datum));
438     auto file = datum.file;
439     if (file != lastFile) {
440       // New file starting
441       header(file);
442       lastFile = file;
443     }
444 
445     string s = datum.name;
446     if (s == "-") {
447       separator('-');
448       continue;
449     }
450     if (s[0] == '%') {
451       s.erase(0, 1);
452     }
453     s.resize(columns - 29, ' ');
454     auto nsPerIter = datum.timeInNs;
455     auto secPerIter = nsPerIter / 1E9;
456     auto itersPerSec = (secPerIter == 0)
457         ? std::numeric_limits<double>::infinity()
458         : (1 / secPerIter);
459     if (!baseline) {
460       // Print without baseline
461       printf(
462           "%*s           %9s  %7s\n",
463           static_cast<int>(s.size()),
464           s.c_str(),
465           readableTime(secPerIter, 2).c_str(),
466           metricReadable(itersPerSec, 2).c_str());
467     } else {
468       // Print with baseline
469       auto rel = *baseline / nsPerIter * 100.0;
470       printf(
471           "%*s %7.2f%%  %9s  %7s\n",
472           static_cast<int>(s.size()),
473           s.c_str(),
474           rel,
475           readableTime(secPerIter, 2).c_str(),
476           metricReadable(itersPerSec, 2).c_str());
477     }
478   }
479   separator('=');
480 }
481 
checkRunMode()482 void checkRunMode() {
483   if (folly::kIsDebug || folly::kIsSanitize) {
484     std::cerr << "WARNING: Benchmark running "
485               << (folly::kIsDebug ? "in DEBUG mode" : "with SANITIZERS")
486               << std::endl;
487   }
488 }
489 
490 namespace {
491 
492 std::pair<std::set<std::string>, std::vector<detail::BenchmarkResult>>
runBenchmarksWithPrinter(BenchmarkResultsPrinter * FOLLY_NULLABLE printer)493 runBenchmarksWithPrinter(BenchmarkResultsPrinter* FOLLY_NULLABLE printer) {
494   vector<detail::BenchmarkResult> results;
495   results.reserve(benchmarks().size() - 1);
496 
497   std::unique_ptr<boost::regex> bmRegex;
498   if (!FLAGS_bm_regex.empty()) {
499     bmRegex = std::make_unique<boost::regex>(FLAGS_bm_regex);
500   }
501 
502   // PLEASE KEEP QUIET. MEASUREMENTS IN PROGRESS.
503 
504   size_t baselineIndex = getGlobalBenchmarkBaselineIndex();
505 
506   auto const globalBaseline =
507       runBenchmarkGetNSPerIteration(benchmarks()[baselineIndex].func, 0);
508 
509   std::set<std::string> counterNames;
510   FOR_EACH_RANGE (i, 0, benchmarks().size()) {
511     if (i == baselineIndex) {
512       continue;
513     }
514     std::pair<double, UserCounters> elapsed;
515     auto& bm = benchmarks()[i];
516     if (bm.name != "-") { // skip separators
517       if (bmRegex && !boost::regex_search(bm.name, *bmRegex)) {
518         continue;
519       }
520       elapsed = runBenchmarkGetNSPerIteration(bm.func, globalBaseline.first);
521     }
522 
523     // if customized user counters is used, it cannot print the result in real
524     // time as it needs to run all cases first to know the complete set of
525     // counters have been used, then the header can be printed out properly
526     if (printer != nullptr) {
527       printer->print({{bm.file, bm.name, elapsed.first, elapsed.second}});
528     } else {
529       results.push_back({bm.file, bm.name, elapsed.first, elapsed.second});
530     }
531 
532     // get all counter names
533     for (auto const& kv : elapsed.second) {
534       counterNames.insert(kv.first);
535     }
536   }
537 
538   // MEASUREMENTS DONE.
539 
540   return std::make_pair(std::move(counterNames), std::move(results));
541 }
542 
resultsFromFile(const std::string & filename)543 std::vector<detail::BenchmarkResult> resultsFromFile(
544     const std::string& filename) {
545   std::string content;
546   readFile(filename.c_str(), content);
547   std::vector<detail::BenchmarkResult> ret;
548   if (!content.empty()) {
549     benchmarkResultsFromDynamic(parseJson(content), ret);
550   }
551   return ret;
552 }
553 
writeResultsToFile(const std::vector<detail::BenchmarkResult> & results,const std::string & filename)554 bool writeResultsToFile(
555     const std::vector<detail::BenchmarkResult>& results,
556     const std::string& filename) {
557   dynamic d;
558   benchmarkResultsToDynamic(results, d);
559   return writeFile(toPrettyJson(d), filename.c_str());
560 }
561 
562 } // namespace
563 
564 namespace detail {
565 
runBenchmarksWithResults()566 std::vector<BenchmarkResult> runBenchmarksWithResults() {
567   return runBenchmarksWithPrinter(nullptr).second;
568 }
569 
570 } // namespace detail
571 
runBenchmarks()572 void runBenchmarks() {
573   CHECK(!benchmarks().empty());
574 
575   checkRunMode();
576 
577   BenchmarkResultsPrinter printer;
578   bool useCounter =
579       std::any_of(benchmarks().begin(), benchmarks().end(), [](const auto& bm) {
580         return bm.useCounter;
581       });
582   // PLEASE KEEP QUIET. MEASUREMENTS IN PROGRESS.
583 
584   auto benchmarkResults = runBenchmarksWithPrinter(
585       FLAGS_bm_relative_to.empty() && !FLAGS_json && !useCounter ? &printer
586                                                                  : nullptr);
587 
588   // PLEASE MAKE NOISE. MEASUREMENTS DONE.
589 
590   if (FLAGS_json) {
591     printBenchmarkResultsAsJson(benchmarkResults.second);
592   } else if (!FLAGS_bm_relative_to.empty()) {
593     printResultComparison(
594         resultsFromFile(FLAGS_bm_relative_to), benchmarkResults.second);
595   } else {
596     printer = BenchmarkResultsPrinter{std::move(benchmarkResults.first)};
597     printer.print(benchmarkResults.second);
598     printer.separator('=');
599   }
600 
601   if (!FLAGS_bm_json_verbose.empty()) {
602     writeResultsToFile(benchmarkResults.second, FLAGS_bm_json_verbose);
603   }
604 
605   checkRunMode();
606 }
607 
608 } // namespace folly
609