1 /*
2 * Copyright (c) Facebook, Inc. and its affiliates.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 // @author Andrei Alexandrescu (andrei.alexandrescu@fb.com)
18
19 #include <folly/Benchmark.h>
20
21 #include <algorithm>
22 #include <cmath>
23 #include <cstring>
24 #include <iostream>
25 #include <limits>
26 #include <map>
27 #include <memory>
28 #include <numeric>
29 #include <utility>
30 #include <vector>
31
32 #include <boost/regex.hpp>
33
34 #include <folly/FileUtil.h>
35 #include <folly/MapUtil.h>
36 #include <folly/String.h>
37 #include <folly/container/Foreach.h>
38 #include <folly/json.h>
39
40 using namespace std;
41
42 DEFINE_bool(benchmark, false, "Run benchmarks.");
43
44 DEFINE_bool(json, false, "Output in JSON format.");
45 DEFINE_string(
46 bm_relative_to,
47 "",
48 "Print benchmark results relative to an earlier dump (via --bm_json_verbose)");
49
50 DEFINE_string(
51 bm_json_verbose,
52 "",
53 "File to write verbose JSON format (for BenchmarkCompare / --bm_relative_to). "
54 "NOTE: this is written independent of the above --json / --bm_relative_to.");
55
56 DEFINE_string(
57 bm_regex, "", "Only benchmarks whose names match this regex will be run.");
58
59 DEFINE_int64(
60 bm_min_usec,
61 100,
62 "Minimum # of microseconds we'll accept for each benchmark.");
63
64 DEFINE_int32(
65 bm_min_iters, 1, "Minimum # of iterations we'll try for each benchmark.");
66
67 DEFINE_int64(
68 bm_max_iters,
69 1 << 30,
70 "Maximum # of iterations we'll try for each benchmark.");
71
72 DEFINE_int32(
73 bm_max_secs, 1, "Maximum # of seconds we'll spend on each benchmark.");
74
75 namespace folly {
76
77 std::chrono::high_resolution_clock::duration BenchmarkSuspender::timeSpent;
78
79 typedef function<detail::TimeIterData(unsigned int)> BenchmarkFun;
80
benchmarks()81 vector<detail::BenchmarkRegistration>& benchmarks() {
82 static vector<detail::BenchmarkRegistration> _benchmarks;
83 return _benchmarks;
84 }
85
86 #define FB_FOLLY_GLOBAL_BENCHMARK_BASELINE fbFollyGlobalBenchmarkBaseline
87 #define FB_STRINGIZE_X2(x) FOLLY_PP_STRINGIZE(x)
88
89 // Add the global baseline
BENCHMARK(FB_FOLLY_GLOBAL_BENCHMARK_BASELINE)90 BENCHMARK(FB_FOLLY_GLOBAL_BENCHMARK_BASELINE) {
91 #ifdef _MSC_VER
92 _ReadWriteBarrier();
93 #else
94 asm volatile("");
95 #endif
96 }
97
getGlobalBenchmarkBaselineIndex()98 size_t getGlobalBenchmarkBaselineIndex() {
99 const char* global = FB_STRINGIZE_X2(FB_FOLLY_GLOBAL_BENCHMARK_BASELINE);
100 auto it = std::find_if(
101 benchmarks().begin(),
102 benchmarks().end(),
103 [global](const detail::BenchmarkRegistration& v) {
104 return v.name == global;
105 });
106 CHECK(it != benchmarks().end());
107 return size_t(std::distance(benchmarks().begin(), it));
108 }
109
110 #undef FB_STRINGIZE_X2
111 #undef FB_FOLLY_GLOBAL_BENCHMARK_BASELINE
112
addBenchmarkImpl(const char * file,StringPiece name,BenchmarkFun fun,bool useCounter)113 void detail::addBenchmarkImpl(
114 const char* file, StringPiece name, BenchmarkFun fun, bool useCounter) {
115 benchmarks().push_back({file, name.str(), std::move(fun), useCounter});
116 }
117
runBenchmarkGetNSPerIteration(const BenchmarkFun & fun,const double globalBaseline)118 static std::pair<double, UserCounters> runBenchmarkGetNSPerIteration(
119 const BenchmarkFun& fun, const double globalBaseline) {
120 using std::chrono::duration_cast;
121 using std::chrono::high_resolution_clock;
122 using std::chrono::microseconds;
123 using std::chrono::nanoseconds;
124 using std::chrono::seconds;
125
126 // They key here is accuracy; too low numbers means the accuracy was
127 // coarse. We up the ante until we get to at least minNanoseconds
128 // timings.
129 static_assert(
130 std::is_same<high_resolution_clock::duration, nanoseconds>::value,
131 "High resolution clock must be nanosecond resolution.");
132 // We choose a minimum minimum (sic) of 100,000 nanoseconds, but if
133 // the clock resolution is worse than that, it will be larger. In
134 // essence we're aiming at making the quantization noise 0.01%.
135 static const auto minNanoseconds = std::max<nanoseconds>(
136 nanoseconds(100000), microseconds(FLAGS_bm_min_usec));
137
138 // We do measurements in several epochs and take the minimum, to
139 // account for jitter.
140 static const unsigned int epochs = 1000;
141 // We establish a total time budget as we don't want a measurement
142 // to take too long. This will curtail the number of actual epochs.
143 const auto timeBudget = seconds(FLAGS_bm_max_secs);
144 auto global = high_resolution_clock::now();
145
146 std::vector<std::pair<double, UserCounters>> epochResults(epochs);
147 size_t actualEpochs = 0;
148
149 for (; actualEpochs < epochs; ++actualEpochs) {
150 const auto maxIters = uint32_t(FLAGS_bm_max_iters);
151 for (auto n = uint32_t(FLAGS_bm_min_iters); n < maxIters; n *= 2) {
152 detail::TimeIterData timeIterData = fun(static_cast<unsigned int>(n));
153 if (timeIterData.duration < minNanoseconds) {
154 continue;
155 }
156 // We got an accurate enough timing, done. But only save if
157 // smaller than the current result.
158 auto nsecs = duration_cast<nanoseconds>(timeIterData.duration);
159 epochResults[actualEpochs] = std::make_pair(
160 max(0.0, double(nsecs.count()) / timeIterData.niter - globalBaseline),
161 std::move(timeIterData.userCounters));
162 // Done with the current epoch, we got a meaningful timing.
163 break;
164 }
165 auto now = high_resolution_clock::now();
166 if (now - global >= timeBudget) {
167 // No more time budget available.
168 ++actualEpochs;
169 break;
170 }
171 }
172
173 // Current state of the art: get the minimum. After some
174 // experimentation, it seems taking the minimum is the best.
175 auto iter = min_element(
176 epochResults.begin(),
177 epochResults.begin() + actualEpochs,
178 [](const auto& a, const auto& b) { return a.first < b.first; });
179
180 // If the benchmark was basically drowned in baseline noise, it's
181 // possible it became negative.
182 return std::make_pair(max(0.0, iter->first), iter->second);
183 }
184
185 struct ScaleInfo {
186 double boundary;
187 const char* suffix;
188 };
189
190 static const ScaleInfo kTimeSuffixes[]{
191 {365.25 * 24 * 3600, "years"},
192 {24 * 3600, "days"},
193 {3600, "hr"},
194 {60, "min"},
195 {1, "s"},
196 {1E-3, "ms"},
197 {1E-6, "us"},
198 {1E-9, "ns"},
199 {1E-12, "ps"},
200 {1E-15, "fs"},
201 {0, nullptr},
202 };
203
204 static const ScaleInfo kMetricSuffixes[]{
205 {1E24, "Y"}, // yotta
206 {1E21, "Z"}, // zetta
207 {1E18, "X"}, // "exa" written with suffix 'X' so as to not create
208 // confusion with scientific notation
209 {1E15, "P"}, // peta
210 {1E12, "T"}, // terra
211 {1E9, "G"}, // giga
212 {1E6, "M"}, // mega
213 {1E3, "K"}, // kilo
214 {1, ""},
215 {1E-3, "m"}, // milli
216 {1E-6, "u"}, // micro
217 {1E-9, "n"}, // nano
218 {1E-12, "p"}, // pico
219 {1E-15, "f"}, // femto
220 {1E-18, "a"}, // atto
221 {1E-21, "z"}, // zepto
222 {1E-24, "y"}, // yocto
223 {0, nullptr},
224 };
225
humanReadable(double n,unsigned int decimals,const ScaleInfo * scales)226 static string humanReadable(
227 double n, unsigned int decimals, const ScaleInfo* scales) {
228 if (std::isinf(n) || std::isnan(n)) {
229 return folly::to<string>(n);
230 }
231
232 const double absValue = fabs(n);
233 const ScaleInfo* scale = scales;
234 while (absValue < scale[0].boundary && scale[1].suffix != nullptr) {
235 ++scale;
236 }
237
238 const double scaledValue = n / scale->boundary;
239 return stringPrintf("%.*f%s", decimals, scaledValue, scale->suffix);
240 }
241
readableTime(double n,unsigned int decimals)242 static string readableTime(double n, unsigned int decimals) {
243 return humanReadable(n, decimals, kTimeSuffixes);
244 }
245
metricReadable(double n,unsigned int decimals)246 static string metricReadable(double n, unsigned int decimals) {
247 return humanReadable(n, decimals, kMetricSuffixes);
248 }
249
250 namespace {
251 class BenchmarkResultsPrinter {
252 public:
253 BenchmarkResultsPrinter() = default;
BenchmarkResultsPrinter(std::set<std::string> counterNames)254 explicit BenchmarkResultsPrinter(std::set<std::string> counterNames)
255 : counterNames_(std::move(counterNames)),
256 namesLength_{std::accumulate(
257 counterNames_.begin(),
258 counterNames_.end(),
259 size_t{0},
260 [](size_t acc, auto&& name) { return acc + 2 + name.length(); })} {}
261
262 static constexpr unsigned int columns{76};
separator(char pad)263 void separator(char pad) {
264 puts(string(columns + namesLength_, pad).c_str());
265 }
266
header(const string & file)267 void header(const string& file) {
268 separator('=');
269 printf("%-*srelative time/iter iters/s", columns - 28, file.c_str());
270 for (auto const& name : counterNames_) {
271 printf(" %s", name.c_str());
272 }
273 printf("\n");
274 separator('=');
275 }
276
print(const vector<detail::BenchmarkResult> & data)277 void print(const vector<detail::BenchmarkResult>& data) {
278 for (auto& datum : data) {
279 auto file = datum.file;
280 if (file != lastFile_) {
281 // New file starting
282 header(file);
283 lastFile_ = file;
284 }
285
286 string s = datum.name;
287 if (s == "-") {
288 separator('-');
289 continue;
290 }
291 bool useBaseline /* = void */;
292 if (s[0] == '%') {
293 s.erase(0, 1);
294 useBaseline = true;
295 } else {
296 baselineNsPerIter_ = datum.timeInNs;
297 useBaseline = false;
298 }
299 s.resize(columns - 29, ' ');
300 auto nsPerIter = datum.timeInNs;
301 auto secPerIter = nsPerIter / 1E9;
302 auto itersPerSec = (secPerIter == 0)
303 ? std::numeric_limits<double>::infinity()
304 : (1 / secPerIter);
305 if (!useBaseline) {
306 // Print without baseline
307 printf(
308 "%*s %9s %7s",
309 static_cast<int>(s.size()),
310 s.c_str(),
311 readableTime(secPerIter, 2).c_str(),
312 metricReadable(itersPerSec, 2).c_str());
313 } else {
314 // Print with baseline
315 auto rel = baselineNsPerIter_ / nsPerIter * 100.0;
316 printf(
317 "%*s %7.2f%% %9s %7s",
318 static_cast<int>(s.size()),
319 s.c_str(),
320 rel,
321 readableTime(secPerIter, 2).c_str(),
322 metricReadable(itersPerSec, 2).c_str());
323 }
324 for (auto const& name : counterNames_) {
325 if (auto ptr = folly::get_ptr(datum.counters, name)) {
326 switch (ptr->type) {
327 case UserMetric::Type::TIME:
328 printf(
329 " %*s",
330 int(name.length()),
331 readableTime(ptr->value, 2).c_str());
332 break;
333 case UserMetric::Type::METRIC:
334 printf(
335 " %*s",
336 int(name.length()),
337 metricReadable(ptr->value, 2).c_str());
338 break;
339 case UserMetric::Type::CUSTOM:
340 default:
341 printf(" %*" PRId64, int(name.length()), ptr->value);
342 }
343 } else {
344 printf(" %*s", int(name.length()), "NaN");
345 }
346 }
347 printf("\n");
348 }
349 }
350
351 private:
352 std::set<std::string> counterNames_;
353 size_t namesLength_{0};
354 double baselineNsPerIter_{numeric_limits<double>::max()};
355 string lastFile_;
356 };
357 } // namespace
358
printBenchmarkResultsAsJson(const vector<detail::BenchmarkResult> & data)359 static void printBenchmarkResultsAsJson(
360 const vector<detail::BenchmarkResult>& data) {
361 dynamic d = dynamic::object;
362 for (auto& datum : data) {
363 d[datum.name] = datum.timeInNs * 1000.;
364 }
365
366 printf("%s\n", toPrettyJson(d).c_str());
367 }
368
benchmarkResultsToDynamic(const vector<detail::BenchmarkResult> & data,dynamic & out)369 void benchmarkResultsToDynamic(
370 const vector<detail::BenchmarkResult>& data, dynamic& out) {
371 out = dynamic::array;
372 for (auto& datum : data) {
373 if (!datum.counters.empty()) {
374 dynamic obj = dynamic::object;
375 for (auto& counter : datum.counters) {
376 dynamic counterInfo = dynamic::object;
377 counterInfo["value"] = counter.second.value;
378 counterInfo["type"] = static_cast<int>(counter.second.type);
379 obj[counter.first] = counterInfo;
380 }
381 out.push_back(
382 dynamic::array(datum.file, datum.name, datum.timeInNs, obj));
383 } else {
384 out.push_back(dynamic::array(datum.file, datum.name, datum.timeInNs));
385 }
386 }
387 }
388
benchmarkResultsFromDynamic(const dynamic & d,vector<detail::BenchmarkResult> & results)389 void benchmarkResultsFromDynamic(
390 const dynamic& d, vector<detail::BenchmarkResult>& results) {
391 for (auto& datum : d) {
392 results.push_back(
393 {datum[0].asString(),
394 datum[1].asString(),
395 datum[2].asDouble(),
396 UserCounters{}});
397 }
398 }
399
resultKey(const detail::BenchmarkResult & result)400 static pair<StringPiece, StringPiece> resultKey(
401 const detail::BenchmarkResult& result) {
402 return pair<StringPiece, StringPiece>(result.file, result.name);
403 }
404
printResultComparison(const vector<detail::BenchmarkResult> & base,const vector<detail::BenchmarkResult> & test)405 void printResultComparison(
406 const vector<detail::BenchmarkResult>& base,
407 const vector<detail::BenchmarkResult>& test) {
408 map<pair<StringPiece, StringPiece>, double> baselines;
409
410 for (auto& baseResult : base) {
411 baselines[resultKey(baseResult)] = baseResult.timeInNs;
412 }
413 //
414 // Width available
415 static const unsigned int columns = 76;
416
417 // Compute the longest benchmark name
418 size_t longestName = 0;
419 for (auto& datum : test) {
420 longestName = max(longestName, datum.name.size());
421 }
422
423 // Print a horizontal rule
424 auto separator = [&](char pad) { puts(string(columns, pad).c_str()); };
425
426 // Print header for a file
427 auto header = [&](const string& file) {
428 separator('=');
429 printf("%-*srelative time/iter iters/s\n", columns - 28, file.c_str());
430 separator('=');
431 };
432
433 string lastFile;
434
435 for (auto& datum : test) {
436 folly::Optional<double> baseline =
437 folly::get_optional(baselines, resultKey(datum));
438 auto file = datum.file;
439 if (file != lastFile) {
440 // New file starting
441 header(file);
442 lastFile = file;
443 }
444
445 string s = datum.name;
446 if (s == "-") {
447 separator('-');
448 continue;
449 }
450 if (s[0] == '%') {
451 s.erase(0, 1);
452 }
453 s.resize(columns - 29, ' ');
454 auto nsPerIter = datum.timeInNs;
455 auto secPerIter = nsPerIter / 1E9;
456 auto itersPerSec = (secPerIter == 0)
457 ? std::numeric_limits<double>::infinity()
458 : (1 / secPerIter);
459 if (!baseline) {
460 // Print without baseline
461 printf(
462 "%*s %9s %7s\n",
463 static_cast<int>(s.size()),
464 s.c_str(),
465 readableTime(secPerIter, 2).c_str(),
466 metricReadable(itersPerSec, 2).c_str());
467 } else {
468 // Print with baseline
469 auto rel = *baseline / nsPerIter * 100.0;
470 printf(
471 "%*s %7.2f%% %9s %7s\n",
472 static_cast<int>(s.size()),
473 s.c_str(),
474 rel,
475 readableTime(secPerIter, 2).c_str(),
476 metricReadable(itersPerSec, 2).c_str());
477 }
478 }
479 separator('=');
480 }
481
checkRunMode()482 void checkRunMode() {
483 if (folly::kIsDebug || folly::kIsSanitize) {
484 std::cerr << "WARNING: Benchmark running "
485 << (folly::kIsDebug ? "in DEBUG mode" : "with SANITIZERS")
486 << std::endl;
487 }
488 }
489
490 namespace {
491
492 std::pair<std::set<std::string>, std::vector<detail::BenchmarkResult>>
runBenchmarksWithPrinter(BenchmarkResultsPrinter * FOLLY_NULLABLE printer)493 runBenchmarksWithPrinter(BenchmarkResultsPrinter* FOLLY_NULLABLE printer) {
494 vector<detail::BenchmarkResult> results;
495 results.reserve(benchmarks().size() - 1);
496
497 std::unique_ptr<boost::regex> bmRegex;
498 if (!FLAGS_bm_regex.empty()) {
499 bmRegex = std::make_unique<boost::regex>(FLAGS_bm_regex);
500 }
501
502 // PLEASE KEEP QUIET. MEASUREMENTS IN PROGRESS.
503
504 size_t baselineIndex = getGlobalBenchmarkBaselineIndex();
505
506 auto const globalBaseline =
507 runBenchmarkGetNSPerIteration(benchmarks()[baselineIndex].func, 0);
508
509 std::set<std::string> counterNames;
510 FOR_EACH_RANGE (i, 0, benchmarks().size()) {
511 if (i == baselineIndex) {
512 continue;
513 }
514 std::pair<double, UserCounters> elapsed;
515 auto& bm = benchmarks()[i];
516 if (bm.name != "-") { // skip separators
517 if (bmRegex && !boost::regex_search(bm.name, *bmRegex)) {
518 continue;
519 }
520 elapsed = runBenchmarkGetNSPerIteration(bm.func, globalBaseline.first);
521 }
522
523 // if customized user counters is used, it cannot print the result in real
524 // time as it needs to run all cases first to know the complete set of
525 // counters have been used, then the header can be printed out properly
526 if (printer != nullptr) {
527 printer->print({{bm.file, bm.name, elapsed.first, elapsed.second}});
528 } else {
529 results.push_back({bm.file, bm.name, elapsed.first, elapsed.second});
530 }
531
532 // get all counter names
533 for (auto const& kv : elapsed.second) {
534 counterNames.insert(kv.first);
535 }
536 }
537
538 // MEASUREMENTS DONE.
539
540 return std::make_pair(std::move(counterNames), std::move(results));
541 }
542
resultsFromFile(const std::string & filename)543 std::vector<detail::BenchmarkResult> resultsFromFile(
544 const std::string& filename) {
545 std::string content;
546 readFile(filename.c_str(), content);
547 std::vector<detail::BenchmarkResult> ret;
548 if (!content.empty()) {
549 benchmarkResultsFromDynamic(parseJson(content), ret);
550 }
551 return ret;
552 }
553
writeResultsToFile(const std::vector<detail::BenchmarkResult> & results,const std::string & filename)554 bool writeResultsToFile(
555 const std::vector<detail::BenchmarkResult>& results,
556 const std::string& filename) {
557 dynamic d;
558 benchmarkResultsToDynamic(results, d);
559 return writeFile(toPrettyJson(d), filename.c_str());
560 }
561
562 } // namespace
563
564 namespace detail {
565
runBenchmarksWithResults()566 std::vector<BenchmarkResult> runBenchmarksWithResults() {
567 return runBenchmarksWithPrinter(nullptr).second;
568 }
569
570 } // namespace detail
571
runBenchmarks()572 void runBenchmarks() {
573 CHECK(!benchmarks().empty());
574
575 checkRunMode();
576
577 BenchmarkResultsPrinter printer;
578 bool useCounter =
579 std::any_of(benchmarks().begin(), benchmarks().end(), [](const auto& bm) {
580 return bm.useCounter;
581 });
582 // PLEASE KEEP QUIET. MEASUREMENTS IN PROGRESS.
583
584 auto benchmarkResults = runBenchmarksWithPrinter(
585 FLAGS_bm_relative_to.empty() && !FLAGS_json && !useCounter ? &printer
586 : nullptr);
587
588 // PLEASE MAKE NOISE. MEASUREMENTS DONE.
589
590 if (FLAGS_json) {
591 printBenchmarkResultsAsJson(benchmarkResults.second);
592 } else if (!FLAGS_bm_relative_to.empty()) {
593 printResultComparison(
594 resultsFromFile(FLAGS_bm_relative_to), benchmarkResults.second);
595 } else {
596 printer = BenchmarkResultsPrinter{std::move(benchmarkResults.first)};
597 printer.print(benchmarkResults.second);
598 printer.separator('=');
599 }
600
601 if (!FLAGS_bm_json_verbose.empty()) {
602 writeResultsToFile(benchmarkResults.second, FLAGS_bm_json_verbose);
603 }
604
605 checkRunMode();
606 }
607
608 } // namespace folly
609