1 //===- xray-account.h - XRay Function Call Accounting ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements basic function call accounting from an XRay trace.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include <algorithm>
14 #include <cassert>
15 #include <numeric>
16 #include <system_error>
17 #include <utility>
18 
19 #include "xray-account.h"
20 #include "xray-registry.h"
21 #include "llvm/Support/ErrorHandling.h"
22 #include "llvm/Support/FormatVariadic.h"
23 #include "llvm/XRay/InstrumentationMap.h"
24 #include "llvm/XRay/Trace.h"
25 
26 using namespace llvm;
27 using namespace llvm::xray;
28 
29 static cl::SubCommand Account("account", "Function call accounting");
30 static cl::opt<std::string> AccountInput(cl::Positional,
31                                          cl::desc("<xray log file>"),
32                                          cl::Required, cl::sub(Account));
33 static cl::opt<bool>
34     AccountKeepGoing("keep-going", cl::desc("Keep going on errors encountered"),
35                      cl::sub(Account), cl::init(false));
36 static cl::alias AccountKeepGoing2("k", cl::aliasopt(AccountKeepGoing),
37                                    cl::desc("Alias for -keep_going"));
38 static cl::opt<bool> AccountRecursiveCallsOnly(
39     "recursive-calls-only", cl::desc("Only count the calls that are recursive"),
40     cl::sub(Account), cl::init(false));
41 static cl::opt<bool> AccountDeduceSiblingCalls(
42     "deduce-sibling-calls",
43     cl::desc("Deduce sibling calls when unrolling function call stacks"),
44     cl::sub(Account), cl::init(false));
45 static cl::alias
46     AccountDeduceSiblingCalls2("d", cl::aliasopt(AccountDeduceSiblingCalls),
47                                cl::desc("Alias for -deduce_sibling_calls"));
48 static cl::opt<std::string>
49     AccountOutput("output", cl::value_desc("output file"), cl::init("-"),
50                   cl::desc("output file; use '-' for stdout"),
51                   cl::sub(Account));
52 static cl::alias AccountOutput2("o", cl::aliasopt(AccountOutput),
53                                 cl::desc("Alias for -output"));
54 enum class AccountOutputFormats { TEXT, CSV };
55 static cl::opt<AccountOutputFormats>
56     AccountOutputFormat("format", cl::desc("output format"),
57                         cl::values(clEnumValN(AccountOutputFormats::TEXT,
58                                               "text", "report stats in text"),
59                                    clEnumValN(AccountOutputFormats::CSV, "csv",
60                                               "report stats in csv")),
61                         cl::sub(Account));
62 static cl::alias AccountOutputFormat2("f", cl::desc("Alias of -format"),
63                                       cl::aliasopt(AccountOutputFormat));
64 
65 enum class SortField {
66   FUNCID,
67   COUNT,
68   MIN,
69   MED,
70   PCT90,
71   PCT99,
72   MAX,
73   SUM,
74   FUNC,
75 };
76 
77 static cl::opt<SortField> AccountSortOutput(
78     "sort", cl::desc("sort output by this field"), cl::value_desc("field"),
79     cl::sub(Account), cl::init(SortField::FUNCID),
80     cl::values(clEnumValN(SortField::FUNCID, "funcid", "function id"),
81                clEnumValN(SortField::COUNT, "count", "funciton call counts"),
82                clEnumValN(SortField::MIN, "min", "minimum function durations"),
83                clEnumValN(SortField::MED, "med", "median function durations"),
84                clEnumValN(SortField::PCT90, "90p", "90th percentile durations"),
85                clEnumValN(SortField::PCT99, "99p", "99th percentile durations"),
86                clEnumValN(SortField::MAX, "max", "maximum function durations"),
87                clEnumValN(SortField::SUM, "sum", "sum of call durations"),
88                clEnumValN(SortField::FUNC, "func", "function names")));
89 static cl::alias AccountSortOutput2("s", cl::aliasopt(AccountSortOutput),
90                                     cl::desc("Alias for -sort"));
91 
92 enum class SortDirection {
93   ASCENDING,
94   DESCENDING,
95 };
96 static cl::opt<SortDirection> AccountSortOrder(
97     "sortorder", cl::desc("sort ordering"), cl::init(SortDirection::ASCENDING),
98     cl::values(clEnumValN(SortDirection::ASCENDING, "asc", "ascending"),
99                clEnumValN(SortDirection::DESCENDING, "dsc", "descending")),
100     cl::sub(Account));
101 static cl::alias AccountSortOrder2("r", cl::aliasopt(AccountSortOrder),
102                                    cl::desc("Alias for -sortorder"));
103 
104 static cl::opt<int> AccountTop("top", cl::desc("only show the top N results"),
105                                cl::value_desc("N"), cl::sub(Account),
106                                cl::init(-1));
107 static cl::alias AccountTop2("p", cl::desc("Alias for -top"),
108                              cl::aliasopt(AccountTop));
109 
110 static cl::opt<std::string>
111     AccountInstrMap("instr_map",
112                     cl::desc("binary with the instrumentation map, or "
113                              "a separate instrumentation map"),
114                     cl::value_desc("binary with xray_instr_map"),
115                     cl::sub(Account), cl::init(""));
116 static cl::alias AccountInstrMap2("m", cl::aliasopt(AccountInstrMap),
117                                   cl::desc("Alias for -instr_map"));
118 
119 namespace {
120 
121 template <class T, class U> void setMinMax(std::pair<T, T> &MM, U &&V) {
122   if (MM.first == 0 || MM.second == 0)
123     MM = std::make_pair(std::forward<U>(V), std::forward<U>(V));
124   else
125     MM = std::make_pair(std::min(MM.first, V), std::max(MM.second, V));
126 }
127 
128 template <class T> T diff(T L, T R) { return std::max(L, R) - std::min(L, R); }
129 
130 } // namespace
131 
132 using RecursionStatus = LatencyAccountant::FunctionStack::RecursionStatus;
133 RecursionStatus &RecursionStatus::operator++() {
134   auto Depth = Bitfield::get<RecursionStatus::Depth>(Storage);
135   assert(Depth >= 0 && Depth < std::numeric_limits<decltype(Depth)>::max());
136   ++Depth;
137   Bitfield::set<RecursionStatus::Depth>(Storage, Depth); // ++Storage
138   // Did this function just (maybe indirectly) call itself the first time?
139   if (!isRecursive() && Depth == 2) // Storage == 2  /  Storage s> 1
140     Bitfield::set<RecursionStatus::IsRecursive>(Storage,
141                                                 true); // Storage |= INT_MIN
142   return *this;
143 }
144 RecursionStatus &RecursionStatus::operator--() {
145   auto Depth = Bitfield::get<RecursionStatus::Depth>(Storage);
146   assert(Depth > 0);
147   --Depth;
148   Bitfield::set<RecursionStatus::Depth>(Storage, Depth); // --Storage
149   // Did we leave a function that previouly (maybe indirectly) called itself?
150   if (isRecursive() && Depth == 0) // Storage == INT_MIN
151     Bitfield::set<RecursionStatus::IsRecursive>(Storage, false); // Storage = 0
152   return *this;
153 }
154 bool RecursionStatus::isRecursive() const {
155   return Bitfield::get<RecursionStatus::IsRecursive>(Storage); // Storage s< 0
156 }
157 
158 bool LatencyAccountant::accountRecord(const XRayRecord &Record) {
159   setMinMax(PerThreadMinMaxTSC[Record.TId], Record.TSC);
160   setMinMax(PerCPUMinMaxTSC[Record.CPU], Record.TSC);
161 
162   if (CurrentMaxTSC == 0)
163     CurrentMaxTSC = Record.TSC;
164 
165   if (Record.TSC < CurrentMaxTSC)
166     return false;
167 
168   auto &ThreadStack = PerThreadFunctionStack[Record.TId];
169   if (RecursiveCallsOnly && !ThreadStack.RecursionDepth)
170     ThreadStack.RecursionDepth.emplace();
171   switch (Record.Type) {
172   case RecordTypes::CUSTOM_EVENT:
173   case RecordTypes::TYPED_EVENT:
174     // TODO: Support custom and typed event accounting in the future.
175     return true;
176   case RecordTypes::ENTER:
177   case RecordTypes::ENTER_ARG: {
178     ThreadStack.Stack.emplace_back(Record.FuncId, Record.TSC);
179     if (ThreadStack.RecursionDepth)
180       ++(*ThreadStack.RecursionDepth)[Record.FuncId];
181     break;
182   }
183   case RecordTypes::EXIT:
184   case RecordTypes::TAIL_EXIT: {
185     if (ThreadStack.Stack.empty())
186       return false;
187 
188     if (ThreadStack.Stack.back().first == Record.FuncId) {
189       const auto &Top = ThreadStack.Stack.back();
190       if (!ThreadStack.RecursionDepth ||
191           (*ThreadStack.RecursionDepth)[Top.first].isRecursive())
192         recordLatency(Top.first, diff(Top.second, Record.TSC));
193       if (ThreadStack.RecursionDepth)
194         --(*ThreadStack.RecursionDepth)[Top.first];
195       ThreadStack.Stack.pop_back();
196       break;
197     }
198 
199     if (!DeduceSiblingCalls)
200       return false;
201 
202     // Look for the parent up the stack.
203     auto Parent =
204         std::find_if(ThreadStack.Stack.rbegin(), ThreadStack.Stack.rend(),
205                      [&](const std::pair<const int32_t, uint64_t> &E) {
206                        return E.first == Record.FuncId;
207                      });
208     if (Parent == ThreadStack.Stack.rend())
209       return false;
210 
211     // Account time for this apparently sibling call exit up the stack.
212     // Considering the following case:
213     //
214     //   f()
215     //    g()
216     //      h()
217     //
218     // We might only ever see the following entries:
219     //
220     //   -> f()
221     //   -> g()
222     //   -> h()
223     //   <- h()
224     //   <- f()
225     //
226     // Now we don't see the exit to g() because some older version of the XRay
227     // runtime wasn't instrumenting tail exits. If we don't deduce tail calls,
228     // we may potentially never account time for g() -- and this code would have
229     // already bailed out, because `<- f()` doesn't match the current "top" of
230     // stack where we're waiting for the exit to `g()` instead. This is not
231     // ideal and brittle -- so instead we provide a potentially inaccurate
232     // accounting of g() instead, computing it from the exit of f().
233     //
234     // While it might be better that we account the time between `-> g()` and
235     // `-> h()` as the proper accounting of time for g() here, this introduces
236     // complexity to do correctly (need to backtrack, etc.).
237     //
238     // FIXME: Potentially implement the more complex deduction algorithm?
239     auto R = make_range(std::next(Parent).base(), ThreadStack.Stack.end());
240     for (auto &E : R) {
241       if (!ThreadStack.RecursionDepth ||
242           (*ThreadStack.RecursionDepth)[E.first].isRecursive())
243         recordLatency(E.first, diff(E.second, Record.TSC));
244     }
245     for (auto &Top : reverse(R)) {
246       if (ThreadStack.RecursionDepth)
247         --(*ThreadStack.RecursionDepth)[Top.first];
248       ThreadStack.Stack.pop_back();
249     }
250     break;
251   }
252   }
253 
254   return true;
255 }
256 
257 namespace {
258 
259 // We consolidate the data into a struct which we can output in various forms.
260 struct ResultRow {
261   uint64_t Count;
262   double Min;
263   double Median;
264   double Pct90;
265   double Pct99;
266   double Max;
267   double Sum;
268   std::string DebugInfo;
269   std::string Function;
270 };
271 
272 ResultRow getStats(MutableArrayRef<uint64_t> Timings) {
273   assert(!Timings.empty());
274   ResultRow R;
275   R.Sum = std::accumulate(Timings.begin(), Timings.end(), 0.0);
276   auto MinMax = std::minmax_element(Timings.begin(), Timings.end());
277   R.Min = *MinMax.first;
278   R.Max = *MinMax.second;
279   R.Count = Timings.size();
280 
281   auto MedianOff = Timings.size() / 2;
282   std::nth_element(Timings.begin(), Timings.begin() + MedianOff, Timings.end());
283   R.Median = Timings[MedianOff];
284 
285   auto Pct90Off = std::floor(Timings.size() * 0.9);
286   std::nth_element(Timings.begin(), Timings.begin() + (uint64_t)Pct90Off,
287                    Timings.end());
288   R.Pct90 = Timings[Pct90Off];
289 
290   auto Pct99Off = std::floor(Timings.size() * 0.99);
291   std::nth_element(Timings.begin(), Timings.begin() + (uint64_t)Pct99Off,
292                    Timings.end());
293   R.Pct99 = Timings[Pct99Off];
294   return R;
295 }
296 
297 } // namespace
298 
299 using TupleType = std::tuple<int32_t, uint64_t, ResultRow>;
300 
301 template <typename F>
302 static void sortByKey(std::vector<TupleType> &Results, F Fn) {
303   bool ASC = AccountSortOrder == SortDirection::ASCENDING;
304   llvm::sort(Results, [=](const TupleType &L, const TupleType &R) {
305     return ASC ? Fn(L) < Fn(R) : Fn(L) > Fn(R);
306   });
307 }
308 
309 template <class F>
310 void LatencyAccountant::exportStats(const XRayFileHeader &Header, F Fn) const {
311   std::vector<TupleType> Results;
312   Results.reserve(FunctionLatencies.size());
313   for (auto FT : FunctionLatencies) {
314     const auto &FuncId = FT.first;
315     auto &Timings = FT.second;
316     Results.emplace_back(FuncId, Timings.size(), getStats(Timings));
317     auto &Row = std::get<2>(Results.back());
318     if (Header.CycleFrequency) {
319       double CycleFrequency = Header.CycleFrequency;
320       Row.Min /= CycleFrequency;
321       Row.Median /= CycleFrequency;
322       Row.Pct90 /= CycleFrequency;
323       Row.Pct99 /= CycleFrequency;
324       Row.Max /= CycleFrequency;
325       Row.Sum /= CycleFrequency;
326     }
327 
328     Row.Function = FuncIdHelper.SymbolOrNumber(FuncId);
329     Row.DebugInfo = FuncIdHelper.FileLineAndColumn(FuncId);
330   }
331 
332   // Sort the data according to user-provided flags.
333   switch (AccountSortOutput) {
334   case SortField::FUNCID:
335     sortByKey(Results, [](const TupleType &X) { return std::get<0>(X); });
336     break;
337   case SortField::COUNT:
338     sortByKey(Results, [](const TupleType &X) { return std::get<1>(X); });
339     break;
340   case SortField::MIN:
341     sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Min; });
342     break;
343   case SortField::MED:
344     sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Median; });
345     break;
346   case SortField::PCT90:
347     sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Pct90; });
348     break;
349   case SortField::PCT99:
350     sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Pct99; });
351     break;
352   case SortField::MAX:
353     sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Max; });
354     break;
355   case SortField::SUM:
356     sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Sum; });
357     break;
358   case SortField::FUNC:
359     llvm_unreachable("Not implemented");
360   }
361 
362   if (AccountTop > 0) {
363     auto MaxTop =
364         std::min(AccountTop.getValue(), static_cast<int>(Results.size()));
365     Results.erase(Results.begin() + MaxTop, Results.end());
366   }
367 
368   for (const auto &R : Results)
369     Fn(std::get<0>(R), std::get<1>(R), std::get<2>(R));
370 }
371 
372 void LatencyAccountant::exportStatsAsText(raw_ostream &OS,
373                                           const XRayFileHeader &Header) const {
374   OS << "Functions with latencies: " << FunctionLatencies.size() << "\n";
375 
376   // We spend some effort to make the text output more readable, so we do the
377   // following formatting decisions for each of the fields:
378   //
379   //   - funcid: 32-bit, but we can determine the largest number and be
380   //   between
381   //     a minimum of 5 characters, up to 9 characters, right aligned.
382   //   - count:  64-bit, but we can determine the largest number and be
383   //   between
384   //     a minimum of 5 characters, up to 9 characters, right aligned.
385   //   - min, median, 90pct, 99pct, max: double precision, but we want to keep
386   //     the values in seconds, with microsecond precision (0.000'001), so we
387   //     have at most 6 significant digits, with the whole number part to be
388   //     at
389   //     least 1 character. For readability we'll right-align, with full 9
390   //     characters each.
391   //   - debug info, function name: we format this as a concatenation of the
392   //     debug info and the function name.
393   //
394   static constexpr char StatsHeaderFormat[] =
395       "{0,+9} {1,+10} [{2,+9}, {3,+9}, {4,+9}, {5,+9}, {6,+9}] {7,+9}";
396   static constexpr char StatsFormat[] =
397       R"({0,+9} {1,+10} [{2,+9:f6}, {3,+9:f6}, {4,+9:f6}, {5,+9:f6}, {6,+9:f6}] {7,+9:f6})";
398   OS << llvm::formatv(StatsHeaderFormat, "funcid", "count", "min", "med", "90p",
399                       "99p", "max", "sum")
400      << llvm::formatv("  {0,-12}\n", "function");
401   exportStats(Header, [&](int32_t FuncId, size_t Count, const ResultRow &Row) {
402     OS << llvm::formatv(StatsFormat, FuncId, Count, Row.Min, Row.Median,
403                         Row.Pct90, Row.Pct99, Row.Max, Row.Sum)
404        << "  " << Row.DebugInfo << ": " << Row.Function << "\n";
405   });
406 }
407 
408 void LatencyAccountant::exportStatsAsCSV(raw_ostream &OS,
409                                          const XRayFileHeader &Header) const {
410   OS << "funcid,count,min,median,90%ile,99%ile,max,sum,debug,function\n";
411   exportStats(Header, [&](int32_t FuncId, size_t Count, const ResultRow &Row) {
412     OS << FuncId << ',' << Count << ',' << Row.Min << ',' << Row.Median << ','
413        << Row.Pct90 << ',' << Row.Pct99 << ',' << Row.Max << "," << Row.Sum
414        << ",\"" << Row.DebugInfo << "\",\"" << Row.Function << "\"\n";
415   });
416 }
417 
418 using namespace llvm::xray;
419 
420 namespace llvm {
421 template <> struct format_provider<llvm::xray::RecordTypes> {
422   static void format(const llvm::xray::RecordTypes &T, raw_ostream &Stream,
423                      StringRef Style) {
424     switch (T) {
425     case RecordTypes::ENTER:
426       Stream << "enter";
427       break;
428     case RecordTypes::ENTER_ARG:
429       Stream << "enter-arg";
430       break;
431     case RecordTypes::EXIT:
432       Stream << "exit";
433       break;
434     case RecordTypes::TAIL_EXIT:
435       Stream << "tail-exit";
436       break;
437     case RecordTypes::CUSTOM_EVENT:
438       Stream << "custom-event";
439       break;
440     case RecordTypes::TYPED_EVENT:
441       Stream << "typed-event";
442       break;
443     }
444   }
445 };
446 } // namespace llvm
447 
448 static CommandRegistration Unused(&Account, []() -> Error {
449   InstrumentationMap Map;
450   if (!AccountInstrMap.empty()) {
451     auto InstrumentationMapOrError = loadInstrumentationMap(AccountInstrMap);
452     if (!InstrumentationMapOrError)
453       return joinErrors(make_error<StringError>(
454                             Twine("Cannot open instrumentation map '") +
455                                 AccountInstrMap + "'",
456                             std::make_error_code(std::errc::invalid_argument)),
457                         InstrumentationMapOrError.takeError());
458     Map = std::move(*InstrumentationMapOrError);
459   }
460 
461   std::error_code EC;
462   raw_fd_ostream OS(AccountOutput, EC, sys::fs::OpenFlags::OF_TextWithCRLF);
463   if (EC)
464     return make_error<StringError>(
465         Twine("Cannot open file '") + AccountOutput + "' for writing.", EC);
466 
467   const auto &FunctionAddresses = Map.getFunctionAddresses();
468   symbolize::LLVMSymbolizer Symbolizer;
469   llvm::xray::FuncIdConversionHelper FuncIdHelper(AccountInstrMap, Symbolizer,
470                                                   FunctionAddresses);
471   xray::LatencyAccountant FCA(FuncIdHelper, AccountRecursiveCallsOnly,
472                               AccountDeduceSiblingCalls);
473   auto TraceOrErr = loadTraceFile(AccountInput);
474   if (!TraceOrErr)
475     return joinErrors(
476         make_error<StringError>(
477             Twine("Failed loading input file '") + AccountInput + "'",
478             std::make_error_code(std::errc::executable_format_error)),
479         TraceOrErr.takeError());
480 
481   auto &T = *TraceOrErr;
482   for (const auto &Record : T) {
483     if (FCA.accountRecord(Record))
484       continue;
485     errs()
486         << "Error processing record: "
487         << llvm::formatv(
488                R"({{type: {0}; cpu: {1}; record-type: {2}; function-id: {3}; tsc: {4}; thread-id: {5}; process-id: {6}}})",
489                Record.RecordType, Record.CPU, Record.Type, Record.FuncId,
490                Record.TSC, Record.TId, Record.PId)
491         << '\n';
492     for (const auto &ThreadStack : FCA.getPerThreadFunctionStack()) {
493       errs() << "Thread ID: " << ThreadStack.first << "\n";
494       if (ThreadStack.second.Stack.empty()) {
495         errs() << "  (empty stack)\n";
496         continue;
497       }
498       auto Level = ThreadStack.second.Stack.size();
499       for (const auto &Entry : llvm::reverse(ThreadStack.second.Stack))
500         errs() << "  #" << Level-- << "\t"
501                << FuncIdHelper.SymbolOrNumber(Entry.first) << '\n';
502     }
503     if (!AccountKeepGoing)
504       return make_error<StringError>(
505           Twine("Failed accounting function calls in file '") + AccountInput +
506               "'.",
507           std::make_error_code(std::errc::executable_format_error));
508   }
509   switch (AccountOutputFormat) {
510   case AccountOutputFormats::TEXT:
511     FCA.exportStatsAsText(OS, T.getFileHeader());
512     break;
513   case AccountOutputFormats::CSV:
514     FCA.exportStatsAsCSV(OS, T.getFileHeader());
515     break;
516   }
517 
518   return Error::success();
519 });
520