1 //===-- PerfReader.h - perfscript reader -----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
10 #define LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
11 #include "ErrorHandling.h"
12 #include "ProfiledBinary.h"
13 #include "llvm/Support/Casting.h"
14 #include "llvm/Support/CommandLine.h"
15 #include "llvm/Support/Regex.h"
16 #include <cstdint>
17 #include <fstream>
18 #include <list>
19 #include <map>
20 #include <vector>
21 
22 using namespace llvm;
23 using namespace sampleprof;
24 
25 namespace llvm {
26 namespace sampleprof {
27 
28 // Stream based trace line iterator
29 class TraceStream {
30   std::string CurrentLine;
31   std::ifstream Fin;
32   bool IsAtEoF = false;
33   uint64_t LineNumber = 0;
34 
35 public:
TraceStream(StringRef Filename)36   TraceStream(StringRef Filename) : Fin(Filename.str()) {
37     if (!Fin.good())
38       exitWithError("Error read input perf script file", Filename);
39     advance();
40   }
41 
getCurrentLine()42   StringRef getCurrentLine() {
43     assert(!IsAtEoF && "Line iterator reaches the End-of-File!");
44     return CurrentLine;
45   }
46 
getLineNumber()47   uint64_t getLineNumber() { return LineNumber; }
48 
isAtEoF()49   bool isAtEoF() { return IsAtEoF; }
50 
51   // Read the next line
advance()52   void advance() {
53     if (!std::getline(Fin, CurrentLine)) {
54       IsAtEoF = true;
55       return;
56     }
57     LineNumber++;
58   }
59 };
60 
61 // The type of input format.
62 enum PerfFormat {
63   UnknownFormat = 0,
64   PerfData = 1,            // Raw linux perf.data.
65   PerfScript = 2,          // Perf script create by `perf script` command.
66   UnsymbolizedProfile = 3, // Unsymbolized profile generated by llvm-profgen.
67 
68 };
69 
70 // The type of perfscript content.
71 enum PerfContent {
72   UnknownContent = 0,
73   LBR = 1,      // Only LBR sample.
74   LBRStack = 2, // Hybrid sample including call stack and LBR stack.
75 };
76 
77 struct PerfInputFile {
78   std::string InputFile;
79   PerfFormat Format = PerfFormat::UnknownFormat;
80   PerfContent Content = PerfContent::UnknownContent;
81 };
82 
83 // The parsed LBR sample entry.
84 struct LBREntry {
85   uint64_t Source = 0;
86   uint64_t Target = 0;
LBREntryLBREntry87   LBREntry(uint64_t S, uint64_t T) : Source(S), Target(T) {}
88 
89 #ifndef NDEBUG
printLBREntry90   void print() const {
91     dbgs() << "from " << format("%#010x", Source) << " to "
92            << format("%#010x", Target);
93   }
94 #endif
95 };
96 
97 #ifndef NDEBUG
printLBRStack(const SmallVectorImpl<LBREntry> & LBRStack)98 static inline void printLBRStack(const SmallVectorImpl<LBREntry> &LBRStack) {
99   for (size_t I = 0; I < LBRStack.size(); I++) {
100     dbgs() << "[" << I << "] ";
101     LBRStack[I].print();
102     dbgs() << "\n";
103   }
104 }
105 
printCallStack(const SmallVectorImpl<uint64_t> & CallStack)106 static inline void printCallStack(const SmallVectorImpl<uint64_t> &CallStack) {
107   for (size_t I = 0; I < CallStack.size(); I++) {
108     dbgs() << "[" << I << "] " << format("%#010x", CallStack[I]) << "\n";
109   }
110 }
111 #endif
112 
113 // Hash interface for generic data of type T
114 // Data should implement a \fn getHashCode and a \fn isEqual
115 // Currently getHashCode is non-virtual to avoid the overhead of calling vtable,
116 // i.e we explicitly calculate hash of derived class, assign to base class's
117 // HashCode. This also provides the flexibility for calculating the hash code
118 // incrementally(like rolling hash) during frame stack unwinding since unwinding
119 // only changes the leaf of frame stack. \fn isEqual is a virtual function,
120 // which will have perf overhead. In the future, if we redesign a better hash
121 // function, then we can just skip this or switch to non-virtual function(like
122 // just ignore comparison if hash conflicts probabilities is low)
123 template <class T> class Hashable {
124 public:
125   std::shared_ptr<T> Data;
Hashable(const std::shared_ptr<T> & D)126   Hashable(const std::shared_ptr<T> &D) : Data(D) {}
127 
128   // Hash code generation
129   struct Hash {
operatorHash130     uint64_t operator()(const Hashable<T> &Key) const {
131       // Don't make it virtual for getHashCode
132       uint64_t Hash = Key.Data->getHashCode();
133       assert(Hash && "Should generate HashCode for it!");
134       return Hash;
135     }
136   };
137 
138   // Hash equal
139   struct Equal {
operatorEqual140     bool operator()(const Hashable<T> &LHS, const Hashable<T> &RHS) const {
141       // Precisely compare the data, vtable will have overhead.
142       return LHS.Data->isEqual(RHS.Data.get());
143     }
144   };
145 
getPtr()146   T *getPtr() const { return Data.get(); }
147 };
148 
149 struct PerfSample {
150   // LBR stack recorded in FIFO order.
151   SmallVector<LBREntry, 16> LBRStack;
152   // Call stack recorded in FILO(leaf to root) order, it's used for CS-profile
153   // generation
154   SmallVector<uint64_t, 16> CallStack;
155 
156   virtual ~PerfSample() = default;
getHashCodePerfSample157   uint64_t getHashCode() const {
158     // Use simple DJB2 hash
159     auto HashCombine = [](uint64_t H, uint64_t V) {
160       return ((H << 5) + H) + V;
161     };
162     uint64_t Hash = 5381;
163     for (const auto &Value : CallStack) {
164       Hash = HashCombine(Hash, Value);
165     }
166     for (const auto &Entry : LBRStack) {
167       Hash = HashCombine(Hash, Entry.Source);
168       Hash = HashCombine(Hash, Entry.Target);
169     }
170     return Hash;
171   }
172 
isEqualPerfSample173   bool isEqual(const PerfSample *Other) const {
174     const SmallVector<uint64_t, 16> &OtherCallStack = Other->CallStack;
175     const SmallVector<LBREntry, 16> &OtherLBRStack = Other->LBRStack;
176 
177     if (CallStack.size() != OtherCallStack.size() ||
178         LBRStack.size() != OtherLBRStack.size())
179       return false;
180 
181     if (!std::equal(CallStack.begin(), CallStack.end(), OtherCallStack.begin()))
182       return false;
183 
184     for (size_t I = 0; I < OtherLBRStack.size(); I++) {
185       if (LBRStack[I].Source != OtherLBRStack[I].Source ||
186           LBRStack[I].Target != OtherLBRStack[I].Target)
187         return false;
188     }
189     return true;
190   }
191 
192 #ifndef NDEBUG
193   uint64_t Linenum = 0;
194 
printPerfSample195   void print() const {
196     dbgs() << "Line " << Linenum << "\n";
197     dbgs() << "LBR stack\n";
198     printLBRStack(LBRStack);
199     dbgs() << "Call stack\n";
200     printCallStack(CallStack);
201   }
202 #endif
203 };
204 // After parsing the sample, we record the samples by aggregating them
205 // into this counter. The key stores the sample data and the value is
206 // the sample repeat times.
207 using AggregatedCounter =
208     std::unordered_map<Hashable<PerfSample>, uint64_t,
209                        Hashable<PerfSample>::Hash, Hashable<PerfSample>::Equal>;
210 
211 using SampleVector = SmallVector<std::tuple<uint64_t, uint64_t, uint64_t>, 16>;
212 
isValidFallThroughRange(uint64_t Start,uint64_t End,ProfiledBinary * Binary)213 inline bool isValidFallThroughRange(uint64_t Start, uint64_t End,
214                                     ProfiledBinary *Binary) {
215   // Start bigger than End is considered invalid.
216   // LBR ranges cross the unconditional jmp are also assumed invalid.
217   // It's found that perf data may contain duplicate LBR entries that could form
218   // a range that does not reflect real execution flow on some Intel targets,
219   // e.g. Skylake. Such ranges are ususally very long. Exclude them since there
220   // cannot be a linear execution range that spans over unconditional jmp.
221   return Start <= End && !Binary->rangeCrossUncondBranch(Start, End);
222 }
223 
224 // The state for the unwinder, it doesn't hold the data but only keep the
225 // pointer/index of the data, While unwinding, the CallStack is changed
226 // dynamicially and will be recorded as the context of the sample
227 struct UnwindState {
228   // Profiled binary that current frame address belongs to
229   const ProfiledBinary *Binary;
230   // Call stack trie node
231   struct ProfiledFrame {
232     const uint64_t Address = DummyRoot;
233     ProfiledFrame *Parent;
234     SampleVector RangeSamples;
235     SampleVector BranchSamples;
236     std::unordered_map<uint64_t, std::unique_ptr<ProfiledFrame>> Children;
237 
238     ProfiledFrame(uint64_t Addr = 0, ProfiledFrame *P = nullptr)
AddressUnwindState::ProfiledFrame239         : Address(Addr), Parent(P) {}
getOrCreateChildFrameUnwindState::ProfiledFrame240     ProfiledFrame *getOrCreateChildFrame(uint64_t Address) {
241       assert(Address && "Address can't be zero!");
242       auto Ret = Children.emplace(
243           Address, std::make_unique<ProfiledFrame>(Address, this));
244       return Ret.first->second.get();
245     }
recordRangeCountUnwindState::ProfiledFrame246     void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Count) {
247       RangeSamples.emplace_back(std::make_tuple(Start, End, Count));
248     }
recordBranchCountUnwindState::ProfiledFrame249     void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Count) {
250       BranchSamples.emplace_back(std::make_tuple(Source, Target, Count));
251     }
isDummyRootUnwindState::ProfiledFrame252     bool isDummyRoot() { return Address == DummyRoot; }
isExternalFrameUnwindState::ProfiledFrame253     bool isExternalFrame() { return Address == ExternalAddr; }
isLeafFrameUnwindState::ProfiledFrame254     bool isLeafFrame() { return Children.empty(); }
255   };
256 
257   ProfiledFrame DummyTrieRoot;
258   ProfiledFrame *CurrentLeafFrame;
259   // Used to fall through the LBR stack
260   uint32_t LBRIndex = 0;
261   // Reference to PerfSample.LBRStack
262   const SmallVector<LBREntry, 16> &LBRStack;
263   // Used to iterate the address range
264   InstructionPointer InstPtr;
265   // Indicate whether unwinding is currently in a bad state which requires to
266   // skip all subsequent unwinding.
267   bool Invalid = false;
UnwindStateUnwindState268   UnwindState(const PerfSample *Sample, const ProfiledBinary *Binary)
269       : Binary(Binary), LBRStack(Sample->LBRStack),
270         InstPtr(Binary, Sample->CallStack.front()) {
271     initFrameTrie(Sample->CallStack);
272   }
273 
validateInitialStateUnwindState274   bool validateInitialState() {
275     uint64_t LBRLeaf = LBRStack[LBRIndex].Target;
276     uint64_t LeafAddr = CurrentLeafFrame->Address;
277     assert((LBRLeaf != ExternalAddr || LBRLeaf == LeafAddr) &&
278            "External leading LBR should match the leaf frame.");
279 
280     // When we take a stack sample, ideally the sampling distance between the
281     // leaf IP of stack and the last LBR target shouldn't be very large.
282     // Use a heuristic size (0x100) to filter out broken records.
283     if (LeafAddr < LBRLeaf || LeafAddr - LBRLeaf >= 0x100) {
284       WithColor::warning() << "Bogus trace: stack tip = "
285                            << format("%#010x", LeafAddr)
286                            << ", LBR tip = " << format("%#010x\n", LBRLeaf);
287       return false;
288     }
289     return true;
290   }
291 
checkStateConsistencyUnwindState292   void checkStateConsistency() {
293     assert(InstPtr.Address == CurrentLeafFrame->Address &&
294            "IP should align with context leaf");
295   }
296 
setInvalidUnwindState297   void setInvalid() { Invalid = true; }
hasNextLBRUnwindState298   bool hasNextLBR() const { return LBRIndex < LBRStack.size(); }
getCurrentLBRSourceUnwindState299   uint64_t getCurrentLBRSource() const { return LBRStack[LBRIndex].Source; }
getCurrentLBRTargetUnwindState300   uint64_t getCurrentLBRTarget() const { return LBRStack[LBRIndex].Target; }
getCurrentLBRUnwindState301   const LBREntry &getCurrentLBR() const { return LBRStack[LBRIndex]; }
IsLastLBRUnwindState302   bool IsLastLBR() const { return LBRIndex == 0; }
getLBRStackSizeUnwindState303   bool getLBRStackSize() const { return LBRStack.size(); }
advanceLBRUnwindState304   void advanceLBR() { LBRIndex++; }
getParentFrameUnwindState305   ProfiledFrame *getParentFrame() { return CurrentLeafFrame->Parent; }
306 
pushFrameUnwindState307   void pushFrame(uint64_t Address) {
308     CurrentLeafFrame = CurrentLeafFrame->getOrCreateChildFrame(Address);
309   }
310 
switchToFrameUnwindState311   void switchToFrame(uint64_t Address) {
312     if (CurrentLeafFrame->Address == Address)
313       return;
314     CurrentLeafFrame = CurrentLeafFrame->Parent->getOrCreateChildFrame(Address);
315   }
316 
popFrameUnwindState317   void popFrame() { CurrentLeafFrame = CurrentLeafFrame->Parent; }
318 
clearCallStackUnwindState319   void clearCallStack() { CurrentLeafFrame = &DummyTrieRoot; }
320 
initFrameTrieUnwindState321   void initFrameTrie(const SmallVectorImpl<uint64_t> &CallStack) {
322     ProfiledFrame *Cur = &DummyTrieRoot;
323     for (auto Address : reverse(CallStack)) {
324       Cur = Cur->getOrCreateChildFrame(Address);
325     }
326     CurrentLeafFrame = Cur;
327   }
328 
getDummyRootPtrUnwindState329   ProfiledFrame *getDummyRootPtr() { return &DummyTrieRoot; }
330 };
331 
332 // Base class for sample counter key with context
333 struct ContextKey {
334   uint64_t HashCode = 0;
335   virtual ~ContextKey() = default;
getHashCodeContextKey336   uint64_t getHashCode() {
337     if (HashCode == 0)
338       genHashCode();
339     return HashCode;
340   }
341   virtual void genHashCode() = 0;
isEqualContextKey342   virtual bool isEqual(const ContextKey *K) const {
343     return HashCode == K->HashCode;
344   };
345 
346   // Utilities for LLVM-style RTTI
347   enum ContextKind { CK_StringBased, CK_AddrBased };
348   const ContextKind Kind;
getKindContextKey349   ContextKind getKind() const { return Kind; }
ContextKeyContextKey350   ContextKey(ContextKind K) : Kind(K){};
351 };
352 
353 // String based context id
354 struct StringBasedCtxKey : public ContextKey {
355   SampleContextFrameVector Context;
356 
357   bool WasLeafInlined;
StringBasedCtxKeyStringBasedCtxKey358   StringBasedCtxKey() : ContextKey(CK_StringBased), WasLeafInlined(false){};
classofStringBasedCtxKey359   static bool classof(const ContextKey *K) {
360     return K->getKind() == CK_StringBased;
361   }
362 
isEqualStringBasedCtxKey363   bool isEqual(const ContextKey *K) const override {
364     const StringBasedCtxKey *Other = dyn_cast<StringBasedCtxKey>(K);
365     return Context == Other->Context;
366   }
367 
genHashCodeStringBasedCtxKey368   void genHashCode() override {
369     HashCode = hash_value(SampleContextFrames(Context));
370   }
371 };
372 
373 // Address-based context id
374 struct AddrBasedCtxKey : public ContextKey {
375   SmallVector<uint64_t, 16> Context;
376 
377   bool WasLeafInlined;
AddrBasedCtxKeyAddrBasedCtxKey378   AddrBasedCtxKey() : ContextKey(CK_AddrBased), WasLeafInlined(false){};
classofAddrBasedCtxKey379   static bool classof(const ContextKey *K) {
380     return K->getKind() == CK_AddrBased;
381   }
382 
isEqualAddrBasedCtxKey383   bool isEqual(const ContextKey *K) const override {
384     const AddrBasedCtxKey *Other = dyn_cast<AddrBasedCtxKey>(K);
385     return Context == Other->Context;
386   }
387 
genHashCodeAddrBasedCtxKey388   void genHashCode() override {
389     HashCode = hash_combine_range(Context.begin(), Context.end());
390   }
391 };
392 
393 // The counter of branch samples for one function indexed by the branch,
394 // which is represented as the source and target offset pair.
395 using BranchSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
396 // The counter of range samples for one function indexed by the range,
397 // which is represented as the start and end offset pair.
398 using RangeSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
399 // Wrapper for sample counters including range counter and branch counter
400 struct SampleCounter {
401   RangeSample RangeCounter;
402   BranchSample BranchCounter;
403 
recordRangeCountSampleCounter404   void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Repeat) {
405     assert(Start <= End && "Invalid instruction range");
406     RangeCounter[{Start, End}] += Repeat;
407   }
recordBranchCountSampleCounter408   void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Repeat) {
409     BranchCounter[{Source, Target}] += Repeat;
410   }
411 };
412 
413 // Sample counter with context to support context-sensitive profile
414 using ContextSampleCounterMap =
415     std::unordered_map<Hashable<ContextKey>, SampleCounter,
416                        Hashable<ContextKey>::Hash, Hashable<ContextKey>::Equal>;
417 
418 struct FrameStack {
419   SmallVector<uint64_t, 16> Stack;
420   ProfiledBinary *Binary;
FrameStackFrameStack421   FrameStack(ProfiledBinary *B) : Binary(B) {}
pushFrameFrameStack422   bool pushFrame(UnwindState::ProfiledFrame *Cur) {
423     assert(!Cur->isExternalFrame() &&
424            "External frame's not expected for context stack.");
425     Stack.push_back(Cur->Address);
426     return true;
427   }
428 
popFrameFrameStack429   void popFrame() {
430     if (!Stack.empty())
431       Stack.pop_back();
432   }
433   std::shared_ptr<StringBasedCtxKey> getContextKey();
434 };
435 
436 struct AddressStack {
437   SmallVector<uint64_t, 16> Stack;
438   ProfiledBinary *Binary;
AddressStackAddressStack439   AddressStack(ProfiledBinary *B) : Binary(B) {}
pushFrameAddressStack440   bool pushFrame(UnwindState::ProfiledFrame *Cur) {
441     assert(!Cur->isExternalFrame() &&
442            "External frame's not expected for context stack.");
443     Stack.push_back(Cur->Address);
444     return true;
445   }
446 
popFrameAddressStack447   void popFrame() {
448     if (!Stack.empty())
449       Stack.pop_back();
450   }
451   std::shared_ptr<AddrBasedCtxKey> getContextKey();
452 };
453 
454 /*
455 As in hybrid sample we have a group of LBRs and the most recent sampling call
456 stack, we can walk through those LBRs to infer more call stacks which would be
457 used as context for profile. VirtualUnwinder is the class to do the call stack
458 unwinding based on LBR state. Two types of unwinding are processd here:
459 1) LBR unwinding and 2) linear range unwinding.
460 Specifically, for each LBR entry(can be classified into call, return, regular
461 branch), LBR unwinding will replay the operation by pushing, popping or
462 switching leaf frame towards the call stack and since the initial call stack
463 is most recently sampled, the replay should be in anti-execution order, i.e. for
464 the regular case, pop the call stack when LBR is call, push frame on call stack
465 when LBR is return. After each LBR processed, it also needs to align with the
466 next LBR by going through instructions from previous LBR's target to current
467 LBR's source, which is the linear unwinding. As instruction from linear range
468 can come from different function by inlining, linear unwinding will do the range
469 splitting and record counters by the range with same inline context. Over those
470 unwinding process we will record each call stack as context id and LBR/linear
471 range as sample counter for further CS profile generation.
472 */
473 class VirtualUnwinder {
474 public:
VirtualUnwinder(ContextSampleCounterMap * Counter,ProfiledBinary * B)475   VirtualUnwinder(ContextSampleCounterMap *Counter, ProfiledBinary *B)
476       : CtxCounterMap(Counter), Binary(B) {}
477   bool unwind(const PerfSample *Sample, uint64_t Repeat);
getUntrackedCallsites()478   std::set<uint64_t> &getUntrackedCallsites() { return UntrackedCallsites; }
479 
480   uint64_t NumTotalBranches = 0;
481   uint64_t NumExtCallBranch = 0;
482   uint64_t NumMissingExternalFrame = 0;
483   uint64_t NumMismatchedProEpiBranch = 0;
484   uint64_t NumMismatchedExtCallBranch = 0;
485   uint64_t NumUnpairedExtAddr = 0;
486   uint64_t NumPairedExtAddr = 0;
487 
488 private:
isSourceExternal(UnwindState & State)489   bool isSourceExternal(UnwindState &State) const {
490     return State.getCurrentLBRSource() == ExternalAddr;
491   }
492 
isTargetExternal(UnwindState & State)493   bool isTargetExternal(UnwindState &State) const {
494     return State.getCurrentLBRTarget() == ExternalAddr;
495   }
496 
497   // Determine whether the return source is from external code by checking if
498   // the target's the next inst is a call inst.
isReturnFromExternal(UnwindState & State)499   bool isReturnFromExternal(UnwindState &State) const {
500     return isSourceExternal(State) &&
501            (Binary->getCallAddrFromFrameAddr(State.getCurrentLBRTarget()) != 0);
502   }
503 
504   // If the source is external address but it's not the `return` case, treat it
505   // as a call from external.
isCallFromExternal(UnwindState & State)506   bool isCallFromExternal(UnwindState &State) const {
507     return isSourceExternal(State) &&
508            Binary->getCallAddrFromFrameAddr(State.getCurrentLBRTarget()) == 0;
509   }
510 
isCallState(UnwindState & State)511   bool isCallState(UnwindState &State) const {
512     // The tail call frame is always missing here in stack sample, we will
513     // use a specific tail call tracker to infer it.
514     if (!isValidState(State))
515       return false;
516 
517     if (Binary->addressIsCall(State.getCurrentLBRSource()))
518       return true;
519 
520     return isCallFromExternal(State);
521   }
522 
isReturnState(UnwindState & State)523   bool isReturnState(UnwindState &State) const {
524     if (!isValidState(State))
525       return false;
526 
527     // Simply check addressIsReturn, as ret is always reliable, both for
528     // regular call and tail call.
529     if (Binary->addressIsReturn(State.getCurrentLBRSource()))
530       return true;
531 
532     return isReturnFromExternal(State);
533   }
534 
isValidState(UnwindState & State)535   bool isValidState(UnwindState &State) const { return !State.Invalid; }
536 
537   void unwindCall(UnwindState &State);
538   void unwindLinear(UnwindState &State, uint64_t Repeat);
539   void unwindReturn(UnwindState &State);
540   void unwindBranch(UnwindState &State);
541 
542   template <typename T>
543   void collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, T &Stack);
544   // Collect each samples on trie node by DFS traversal
545   template <typename T>
546   void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur, T &Stack);
547   void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur);
548 
549   void recordRangeCount(uint64_t Start, uint64_t End, UnwindState &State,
550                         uint64_t Repeat);
551   void recordBranchCount(const LBREntry &Branch, UnwindState &State,
552                          uint64_t Repeat);
553 
554   ContextSampleCounterMap *CtxCounterMap;
555   // Profiled binary that current frame address belongs to
556   ProfiledBinary *Binary;
557   // Keep track of all untracked callsites
558   std::set<uint64_t> UntrackedCallsites;
559 };
560 
561 // Read perf trace to parse the events and samples.
562 class PerfReaderBase {
563 public:
PerfReaderBase(ProfiledBinary * B,StringRef PerfTrace)564   PerfReaderBase(ProfiledBinary *B, StringRef PerfTrace)
565       : Binary(B), PerfTraceFile(PerfTrace) {
566     // Initialize the base address to preferred address.
567     Binary->setBaseAddress(Binary->getPreferredBaseAddress());
568   };
569   virtual ~PerfReaderBase() = default;
570   static std::unique_ptr<PerfReaderBase>
571   create(ProfiledBinary *Binary, PerfInputFile &PerfInput,
572          std::optional<uint32_t> PIDFilter);
573 
574   // Entry of the reader to parse multiple perf traces
575   virtual void parsePerfTraces() = 0;
getSampleCounters()576   const ContextSampleCounterMap &getSampleCounters() const {
577     return SampleCounters;
578   }
profileIsCS()579   bool profileIsCS() { return ProfileIsCS; }
580 
581 protected:
582   ProfiledBinary *Binary = nullptr;
583   StringRef PerfTraceFile;
584 
585   ContextSampleCounterMap SampleCounters;
586   bool ProfileIsCS = false;
587 
588   uint64_t NumTotalSample = 0;
589   uint64_t NumLeafExternalFrame = 0;
590   uint64_t NumLeadingOutgoingLBR = 0;
591 };
592 
593 // Read perf script to parse the events and samples.
594 class PerfScriptReader : public PerfReaderBase {
595 public:
PerfScriptReader(ProfiledBinary * B,StringRef PerfTrace,std::optional<uint32_t> PID)596   PerfScriptReader(ProfiledBinary *B, StringRef PerfTrace,
597                    std::optional<uint32_t> PID)
598       : PerfReaderBase(B, PerfTrace), PIDFilter(PID){};
599 
600   // Entry of the reader to parse multiple perf traces
601   void parsePerfTraces() override;
602   // Generate perf script from perf data
603   static PerfInputFile
604   convertPerfDataToTrace(ProfiledBinary *Binary, PerfInputFile &File,
605                          std::optional<uint32_t> PIDFilter);
606   // Extract perf script type by peaking at the input
607   static PerfContent checkPerfScriptType(StringRef FileName);
608 
609 protected:
610   // The parsed MMap event
611   struct MMapEvent {
612     uint64_t PID = 0;
613     uint64_t Address = 0;
614     uint64_t Size = 0;
615     uint64_t Offset = 0;
616     StringRef BinaryPath;
617   };
618 
619   // Check whether a given line is LBR sample
620   static bool isLBRSample(StringRef Line);
621   // Check whether a given line is MMAP event
622   static bool isMMap2Event(StringRef Line);
623   // Parse a single line of a PERF_RECORD_MMAP2 event looking for a
624   // mapping between the binary name and its memory layout.
625   static bool extractMMap2EventForBinary(ProfiledBinary *Binary, StringRef Line,
626                                          MMapEvent &MMap);
627   // Update base address based on mmap events
628   void updateBinaryAddress(const MMapEvent &Event);
629   // Parse mmap event and update binary address
630   void parseMMap2Event(TraceStream &TraceIt);
631   // Parse perf events/samples and do aggregation
632   void parseAndAggregateTrace();
633   // Parse either an MMAP event or a perf sample
634   void parseEventOrSample(TraceStream &TraceIt);
635   // Warn if the relevant mmap event is missing.
636   void warnIfMissingMMap();
637   // Emit accumulate warnings.
638   void warnTruncatedStack();
639   // Warn if range is invalid.
640   void warnInvalidRange();
641   // Extract call stack from the perf trace lines
642   bool extractCallstack(TraceStream &TraceIt,
643                         SmallVectorImpl<uint64_t> &CallStack);
644   // Extract LBR stack from one perf trace line
645   bool extractLBRStack(TraceStream &TraceIt,
646                        SmallVectorImpl<LBREntry> &LBRStack);
647   uint64_t parseAggregatedCount(TraceStream &TraceIt);
648   // Parse one sample from multiple perf lines, override this for different
649   // sample type
650   void parseSample(TraceStream &TraceIt);
651   // An aggregated count is given to indicate how many times the sample is
652   // repeated.
parseSample(TraceStream & TraceIt,uint64_t Count)653   virtual void parseSample(TraceStream &TraceIt, uint64_t Count){};
654   void computeCounterFromLBR(const PerfSample *Sample, uint64_t Repeat);
655   // Post process the profile after trace aggregation, we will do simple range
656   // overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample).
657   virtual void generateUnsymbolizedProfile();
658   void writeUnsymbolizedProfile(StringRef Filename);
659   void writeUnsymbolizedProfile(raw_fd_ostream &OS);
660 
661   // Samples with the repeating time generated by the perf reader
662   AggregatedCounter AggregatedSamples;
663   // Keep track of all invalid return addresses
664   std::set<uint64_t> InvalidReturnAddresses;
665   // PID for the process of interest
666   std::optional<uint32_t> PIDFilter;
667 };
668 
669 /*
670   The reader of LBR only perf script.
671   A typical LBR sample is like:
672     40062f 0x4005c8/0x4005dc/P/-/-/0   0x40062f/0x4005b0/P/-/-/0 ...
673           ... 0x4005c8/0x4005dc/P/-/-/0
674 */
675 class LBRPerfReader : public PerfScriptReader {
676 public:
LBRPerfReader(ProfiledBinary * Binary,StringRef PerfTrace,std::optional<uint32_t> PID)677   LBRPerfReader(ProfiledBinary *Binary, StringRef PerfTrace,
678                 std::optional<uint32_t> PID)
679       : PerfScriptReader(Binary, PerfTrace, PID){};
680   // Parse the LBR only sample.
681   void parseSample(TraceStream &TraceIt, uint64_t Count) override;
682 };
683 
684 /*
685   Hybrid perf script includes a group of hybrid samples(LBRs + call stack),
686   which is used to generate CS profile. An example of hybrid sample:
687     4005dc    # call stack leaf
688     400634
689     400684    # call stack root
690     0x4005c8/0x4005dc/P/-/-/0   0x40062f/0x4005b0/P/-/-/0 ...
691           ... 0x4005c8/0x4005dc/P/-/-/0    # LBR Entries
692 */
693 class HybridPerfReader : public PerfScriptReader {
694 public:
HybridPerfReader(ProfiledBinary * Binary,StringRef PerfTrace,std::optional<uint32_t> PID)695   HybridPerfReader(ProfiledBinary *Binary, StringRef PerfTrace,
696                    std::optional<uint32_t> PID)
697       : PerfScriptReader(Binary, PerfTrace, PID){};
698   // Parse the hybrid sample including the call and LBR line
699   void parseSample(TraceStream &TraceIt, uint64_t Count) override;
700   void generateUnsymbolizedProfile() override;
701 
702 private:
703   // Unwind the hybrid samples after aggregration
704   void unwindSamples();
705 };
706 
707 /*
708    Format of unsymbolized profile:
709 
710     [frame1 @ frame2 @ ...]  # If it's a CS profile
711       number of entries in RangeCounter
712       from_1-to_1:count_1
713       from_2-to_2:count_2
714       ......
715       from_n-to_n:count_n
716       number of entries in BranchCounter
717       src_1->dst_1:count_1
718       src_2->dst_2:count_2
719       ......
720       src_n->dst_n:count_n
721     [frame1 @ frame2 @ ...]  # Next context
722       ......
723 
724 Note that non-CS profile doesn't have the empty `[]` context.
725 */
726 class UnsymbolizedProfileReader : public PerfReaderBase {
727 public:
UnsymbolizedProfileReader(ProfiledBinary * Binary,StringRef PerfTrace)728   UnsymbolizedProfileReader(ProfiledBinary *Binary, StringRef PerfTrace)
729       : PerfReaderBase(Binary, PerfTrace){};
730   void parsePerfTraces() override;
731 
732 private:
733   void readSampleCounters(TraceStream &TraceIt, SampleCounter &SCounters);
734   void readUnsymbolizedProfile(StringRef Filename);
735 
736   std::unordered_set<std::string> ContextStrSet;
737 };
738 
739 } // end namespace sampleprof
740 } // end namespace llvm
741 
742 #endif
743