1 //===-- PerfReader.h - perfscript reader -----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
10 #define LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
11 #include "ErrorHandling.h"
12 #include "ProfiledBinary.h"
13 #include "llvm/Support/Casting.h"
14 #include "llvm/Support/CommandLine.h"
15 #include "llvm/Support/Regex.h"
16 #include <fstream>
17 #include <list>
18 #include <map>
19 #include <vector>
20 
21 using namespace llvm;
22 using namespace sampleprof;
23 
24 namespace llvm {
25 namespace sampleprof {
26 
27 // Stream based trace line iterator
28 class TraceStream {
29   std::string CurrentLine;
30   std::ifstream Fin;
31   bool IsAtEoF = false;
32   uint64_t LineNumber = 0;
33 
34 public:
TraceStream(StringRef Filename)35   TraceStream(StringRef Filename) : Fin(Filename.str()) {
36     if (!Fin.good())
37       exitWithError("Error read input perf script file", Filename);
38     advance();
39   }
40 
getCurrentLine()41   StringRef getCurrentLine() {
42     assert(!IsAtEoF && "Line iterator reaches the End-of-File!");
43     return CurrentLine;
44   }
45 
getLineNumber()46   uint64_t getLineNumber() { return LineNumber; }
47 
isAtEoF()48   bool isAtEoF() { return IsAtEoF; }
49 
50   // Read the next line
advance()51   void advance() {
52     if (!std::getline(Fin, CurrentLine)) {
53       IsAtEoF = true;
54       return;
55     }
56     LineNumber++;
57   }
58 };
59 
60 // The type of perfscript
61 enum PerfScriptType {
62   PERF_UNKNOWN = 0,
63   PERF_INVALID = 1,
64   PERF_LBR = 2,       // Only LBR sample
65   PERF_LBR_STACK = 3, // Hybrid sample including call stack and LBR stack.
66 };
67 
68 // The parsed LBR sample entry.
69 struct LBREntry {
70   uint64_t Source = 0;
71   uint64_t Target = 0;
72   // An artificial branch stands for a series of consecutive branches starting
73   // from the current binary with a transition through external code and
74   // eventually landing back in the current binary.
75   bool IsArtificial = false;
LBREntryLBREntry76   LBREntry(uint64_t S, uint64_t T, bool I)
77       : Source(S), Target(T), IsArtificial(I) {}
78 };
79 
80 // Hash interface for generic data of type T
81 // Data should implement a \fn getHashCode and a \fn isEqual
82 // Currently getHashCode is non-virtual to avoid the overhead of calling vtable,
83 // i.e we explicitly calculate hash of derived class, assign to base class's
84 // HashCode. This also provides the flexibility for calculating the hash code
85 // incrementally(like rolling hash) during frame stack unwinding since unwinding
86 // only changes the leaf of frame stack. \fn isEqual is a virtual function,
87 // which will have perf overhead. In the future, if we redesign a better hash
88 // function, then we can just skip this or switch to non-virtual function(like
89 // just ignore comparision if hash conflicts probabilities is low)
90 template <class T> class Hashable {
91 public:
92   std::shared_ptr<T> Data;
Hashable(const std::shared_ptr<T> & D)93   Hashable(const std::shared_ptr<T> &D) : Data(D) {}
94 
95   // Hash code generation
96   struct Hash {
operatorHash97     uint64_t operator()(const Hashable<T> &Key) const {
98       // Don't make it virtual for getHashCode
99       assert(Key.Data->getHashCode() && "Should generate HashCode for it!");
100       return Key.Data->getHashCode();
101     }
102   };
103 
104   // Hash equal
105   struct Equal {
operatorEqual106     bool operator()(const Hashable<T> &LHS, const Hashable<T> &RHS) const {
107       // Precisely compare the data, vtable will have overhead.
108       return LHS.Data->isEqual(RHS.Data.get());
109     }
110   };
111 
getPtr()112   T *getPtr() const { return Data.get(); }
113 };
114 
115 // Base class to extend for all types of perf sample
116 struct PerfSample {
117   uint64_t HashCode = 0;
118 
119   virtual ~PerfSample() = default;
getHashCodePerfSample120   uint64_t getHashCode() const { return HashCode; }
isEqualPerfSample121   virtual bool isEqual(const PerfSample *K) const {
122     return HashCode == K->HashCode;
123   };
124 
125   // Utilities for LLVM-style RTTI
126   enum PerfKind { PK_HybridSample };
127   const PerfKind Kind;
getKindPerfSample128   PerfKind getKind() const { return Kind; }
PerfSamplePerfSample129   PerfSample(PerfKind K) : Kind(K){};
130 };
131 
132 // The parsed hybrid sample including call stack and LBR stack.
133 struct HybridSample : public PerfSample {
134   // Profiled binary that current frame address belongs to
135   ProfiledBinary *Binary;
136   // Call stack recorded in FILO(leaf to root) order
137   SmallVector<uint64_t, 16> CallStack;
138   // LBR stack recorded in FIFO order
139   SmallVector<LBREntry, 16> LBRStack;
140 
HybridSampleHybridSample141   HybridSample() : PerfSample(PK_HybridSample){};
classofHybridSample142   static bool classof(const PerfSample *K) {
143     return K->getKind() == PK_HybridSample;
144   }
145 
146   // Used for sample aggregation
isEqualHybridSample147   bool isEqual(const PerfSample *K) const override {
148     const HybridSample *Other = dyn_cast<HybridSample>(K);
149     if (Other->Binary != Binary)
150       return false;
151     const SmallVector<uint64_t, 16> &OtherCallStack = Other->CallStack;
152     const SmallVector<LBREntry, 16> &OtherLBRStack = Other->LBRStack;
153 
154     if (CallStack.size() != OtherCallStack.size() ||
155         LBRStack.size() != OtherLBRStack.size())
156       return false;
157 
158     auto Iter = CallStack.begin();
159     for (auto Address : OtherCallStack) {
160       if (Address != *Iter++)
161         return false;
162     }
163 
164     for (size_t I = 0; I < OtherLBRStack.size(); I++) {
165       if (LBRStack[I].Source != OtherLBRStack[I].Source ||
166           LBRStack[I].Target != OtherLBRStack[I].Target)
167         return false;
168     }
169     return true;
170   }
171 
genHashCodeHybridSample172   void genHashCode() {
173     // Use simple DJB2 hash
174     auto HashCombine = [](uint64_t H, uint64_t V) {
175       return ((H << 5) + H) + V;
176     };
177     uint64_t Hash = 5381;
178     Hash = HashCombine(Hash, reinterpret_cast<uint64_t>(Binary));
179     for (const auto &Value : CallStack) {
180       Hash = HashCombine(Hash, Value);
181     }
182     for (const auto &Entry : LBRStack) {
183       Hash = HashCombine(Hash, Entry.Source);
184       Hash = HashCombine(Hash, Entry.Target);
185     }
186     HashCode = Hash;
187   }
188 };
189 
190 // After parsing the sample, we record the samples by aggregating them
191 // into this counter. The key stores the sample data and the value is
192 // the sample repeat times.
193 using AggregatedCounter =
194     std::unordered_map<Hashable<PerfSample>, uint64_t,
195                        Hashable<PerfSample>::Hash, Hashable<PerfSample>::Equal>;
196 
197 using SampleVector = SmallVector<std::tuple<uint64_t, uint64_t, uint64_t>, 16>;
198 // The state for the unwinder, it doesn't hold the data but only keep the
199 // pointer/index of the data, While unwinding, the CallStack is changed
200 // dynamicially and will be recorded as the context of the sample
201 struct UnwindState {
202   // Profiled binary that current frame address belongs to
203   const ProfiledBinary *Binary;
204   // Call stack trie node
205   struct ProfiledFrame {
206     const uint64_t Address = 0;
207     ProfiledFrame *Parent;
208     SampleVector RangeSamples;
209     SampleVector BranchSamples;
210     std::unordered_map<uint64_t, std::unique_ptr<ProfiledFrame>> Children;
211 
212     ProfiledFrame(uint64_t Addr = 0, ProfiledFrame *P = nullptr)
AddressUnwindState::ProfiledFrame213         : Address(Addr), Parent(P) {}
getOrCreateChildFrameUnwindState::ProfiledFrame214     ProfiledFrame *getOrCreateChildFrame(uint64_t Address) {
215       assert(Address && "Address can't be zero!");
216       auto Ret = Children.emplace(
217           Address, std::make_unique<ProfiledFrame>(Address, this));
218       return Ret.first->second.get();
219     }
recordRangeCountUnwindState::ProfiledFrame220     void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Count) {
221       RangeSamples.emplace_back(std::make_tuple(Start, End, Count));
222     }
recordBranchCountUnwindState::ProfiledFrame223     void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Count) {
224       BranchSamples.emplace_back(std::make_tuple(Source, Target, Count));
225     }
isDummyRootUnwindState::ProfiledFrame226     bool isDummyRoot() { return Address == 0; }
227   };
228 
229   ProfiledFrame DummyTrieRoot;
230   ProfiledFrame *CurrentLeafFrame;
231   // Used to fall through the LBR stack
232   uint32_t LBRIndex = 0;
233   // Reference to HybridSample.LBRStack
234   const SmallVector<LBREntry, 16> &LBRStack;
235   // Used to iterate the address range
236   InstructionPointer InstPtr;
UnwindStateUnwindState237   UnwindState(const HybridSample *Sample)
238       : Binary(Sample->Binary), LBRStack(Sample->LBRStack),
239         InstPtr(Sample->Binary, Sample->CallStack.front()) {
240     initFrameTrie(Sample->CallStack);
241   }
242 
validateInitialStateUnwindState243   bool validateInitialState() {
244     uint64_t LBRLeaf = LBRStack[LBRIndex].Target;
245     uint64_t LeafAddr = CurrentLeafFrame->Address;
246     // When we take a stack sample, ideally the sampling distance between the
247     // leaf IP of stack and the last LBR target shouldn't be very large.
248     // Use a heuristic size (0x100) to filter out broken records.
249     if (LeafAddr < LBRLeaf || LeafAddr >= LBRLeaf + 0x100) {
250       WithColor::warning() << "Bogus trace: stack tip = "
251                            << format("%#010x", LeafAddr)
252                            << ", LBR tip = " << format("%#010x\n", LBRLeaf);
253       return false;
254     }
255     return true;
256   }
257 
checkStateConsistencyUnwindState258   void checkStateConsistency() {
259     assert(InstPtr.Address == CurrentLeafFrame->Address &&
260            "IP should align with context leaf");
261   }
262 
getBinaryUnwindState263   const ProfiledBinary *getBinary() const { return Binary; }
hasNextLBRUnwindState264   bool hasNextLBR() const { return LBRIndex < LBRStack.size(); }
getCurrentLBRSourceUnwindState265   uint64_t getCurrentLBRSource() const { return LBRStack[LBRIndex].Source; }
getCurrentLBRTargetUnwindState266   uint64_t getCurrentLBRTarget() const { return LBRStack[LBRIndex].Target; }
getCurrentLBRUnwindState267   const LBREntry &getCurrentLBR() const { return LBRStack[LBRIndex]; }
advanceLBRUnwindState268   void advanceLBR() { LBRIndex++; }
269 
getParentFrameUnwindState270   ProfiledFrame *getParentFrame() { return CurrentLeafFrame->Parent; }
271 
pushFrameUnwindState272   void pushFrame(uint64_t Address) {
273     CurrentLeafFrame = CurrentLeafFrame->getOrCreateChildFrame(Address);
274   }
275 
switchToFrameUnwindState276   void switchToFrame(uint64_t Address) {
277     if (CurrentLeafFrame->Address == Address)
278       return;
279     CurrentLeafFrame = CurrentLeafFrame->Parent->getOrCreateChildFrame(Address);
280   }
281 
popFrameUnwindState282   void popFrame() { CurrentLeafFrame = CurrentLeafFrame->Parent; }
283 
initFrameTrieUnwindState284   void initFrameTrie(const SmallVectorImpl<uint64_t> &CallStack) {
285     ProfiledFrame *Cur = &DummyTrieRoot;
286     for (auto Address : reverse(CallStack)) {
287       Cur = Cur->getOrCreateChildFrame(Address);
288     }
289     CurrentLeafFrame = Cur;
290   }
291 
getDummyRootPtrUnwindState292   ProfiledFrame *getDummyRootPtr() { return &DummyTrieRoot; }
293 };
294 
295 // Base class for sample counter key with context
296 struct ContextKey {
297   uint64_t HashCode = 0;
298   virtual ~ContextKey() = default;
getHashCodeContextKey299   uint64_t getHashCode() const { return HashCode; }
isEqualContextKey300   virtual bool isEqual(const ContextKey *K) const {
301     return HashCode == K->HashCode;
302   };
303 
304   // Utilities for LLVM-style RTTI
305   enum ContextKind { CK_StringBased, CK_ProbeBased };
306   const ContextKind Kind;
getKindContextKey307   ContextKind getKind() const { return Kind; }
ContextKeyContextKey308   ContextKey(ContextKind K) : Kind(K){};
309 };
310 
311 // String based context id
312 struct StringBasedCtxKey : public ContextKey {
313   std::string Context;
StringBasedCtxKeyStringBasedCtxKey314   StringBasedCtxKey() : ContextKey(CK_StringBased){};
classofStringBasedCtxKey315   static bool classof(const ContextKey *K) {
316     return K->getKind() == CK_StringBased;
317   }
318 
isEqualStringBasedCtxKey319   bool isEqual(const ContextKey *K) const override {
320     const StringBasedCtxKey *Other = dyn_cast<StringBasedCtxKey>(K);
321     return Context == Other->Context;
322   }
323 
genHashCodeStringBasedCtxKey324   void genHashCode() { HashCode = hash_value(Context); }
325 };
326 
327 // Probe based context key as the intermediate key of context
328 // String based context key will introduce redundant string handling
329 // since the callee context is inferred from the context string which
330 // need to be splitted by '@' to get the last location frame, so we
331 // can just use probe instead and generate the string in the end.
332 struct ProbeBasedCtxKey : public ContextKey {
333   SmallVector<const PseudoProbe *, 16> Probes;
334 
ProbeBasedCtxKeyProbeBasedCtxKey335   ProbeBasedCtxKey() : ContextKey(CK_ProbeBased) {}
classofProbeBasedCtxKey336   static bool classof(const ContextKey *K) {
337     return K->getKind() == CK_ProbeBased;
338   }
339 
isEqualProbeBasedCtxKey340   bool isEqual(const ContextKey *K) const override {
341     const ProbeBasedCtxKey *O = dyn_cast<ProbeBasedCtxKey>(K);
342     assert(O != nullptr && "Probe based key shouldn't be null in isEqual");
343     return std::equal(Probes.begin(), Probes.end(), O->Probes.begin(),
344                       O->Probes.end());
345   }
346 
genHashCodeProbeBasedCtxKey347   void genHashCode() {
348     for (const auto *P : Probes) {
349       HashCode = hash_combine(HashCode, P);
350     }
351     if (HashCode == 0) {
352       // Avoid zero value of HashCode when it's an empty list
353       HashCode = 1;
354     }
355   }
356 };
357 
358 // The counter of branch samples for one function indexed by the branch,
359 // which is represented as the source and target offset pair.
360 using BranchSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
361 // The counter of range samples for one function indexed by the range,
362 // which is represented as the start and end offset pair.
363 using RangeSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
364 // Wrapper for sample counters including range counter and branch counter
365 struct SampleCounter {
366   RangeSample RangeCounter;
367   BranchSample BranchCounter;
368 
recordRangeCountSampleCounter369   void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Repeat) {
370     RangeCounter[{Start, End}] += Repeat;
371   }
recordBranchCountSampleCounter372   void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Repeat) {
373     BranchCounter[{Source, Target}] += Repeat;
374   }
375 };
376 
377 // Sample counter with context to support context-sensitive profile
378 using ContextSampleCounterMap =
379     std::unordered_map<Hashable<ContextKey>, SampleCounter,
380                        Hashable<ContextKey>::Hash, Hashable<ContextKey>::Equal>;
381 
382 struct FrameStack {
383   SmallVector<uint64_t, 16> Stack;
384   const ProfiledBinary *Binary;
FrameStackFrameStack385   FrameStack(const ProfiledBinary *B) : Binary(B) {}
pushFrameFrameStack386   bool pushFrame(UnwindState::ProfiledFrame *Cur) {
387     Stack.push_back(Cur->Address);
388     return true;
389   }
390 
popFrameFrameStack391   void popFrame() {
392     if (!Stack.empty())
393       Stack.pop_back();
394   }
395   std::shared_ptr<StringBasedCtxKey> getContextKey();
396 };
397 
398 struct ProbeStack {
399   SmallVector<const PseudoProbe *, 16> Stack;
400   const ProfiledBinary *Binary;
ProbeStackProbeStack401   ProbeStack(const ProfiledBinary *B) : Binary(B) {}
pushFrameProbeStack402   bool pushFrame(UnwindState::ProfiledFrame *Cur) {
403     const PseudoProbe *CallProbe = Binary->getCallProbeForAddr(Cur->Address);
404     // We may not find a probe for a merged or external callsite.
405     // Callsite merging may cause the loss of original probe IDs.
406     // Cutting off the context from here since the inliner will
407     // not know how to consume a context with unknown callsites.
408     if (!CallProbe)
409       return false;
410     Stack.push_back(CallProbe);
411     return true;
412   }
413 
popFrameProbeStack414   void popFrame() {
415     if (!Stack.empty())
416       Stack.pop_back();
417   }
418   // Use pseudo probe based context key to get the sample counter
419   // A context stands for a call path from 'main' to an uninlined
420   // callee with all inline frames recovered on that path. The probes
421   // belonging to that call path is the probes either originated from
422   // the callee or from any functions inlined into the callee. Since
423   // pseudo probes are organized in a tri-tree style after decoded,
424   // the tree path from the tri-tree root (which is the uninlined
425   // callee) to the probe node forms an inline context.
426   // Here we use a list of probe(pointer) as the context key to speed up
427   // aggregation and the final context string will be generate in
428   // ProfileGenerator
429   std::shared_ptr<ProbeBasedCtxKey> getContextKey();
430 };
431 
432 /*
433 As in hybrid sample we have a group of LBRs and the most recent sampling call
434 stack, we can walk through those LBRs to infer more call stacks which would be
435 used as context for profile. VirtualUnwinder is the class to do the call stack
436 unwinding based on LBR state. Two types of unwinding are processd here:
437 1) LBR unwinding and 2) linear range unwinding.
438 Specifically, for each LBR entry(can be classified into call, return, regular
439 branch), LBR unwinding will replay the operation by pushing, popping or
440 switching leaf frame towards the call stack and since the initial call stack
441 is most recently sampled, the replay should be in anti-execution order, i.e. for
442 the regular case, pop the call stack when LBR is call, push frame on call stack
443 when LBR is return. After each LBR processed, it also needs to align with the
444 next LBR by going through instructions from previous LBR's target to current
445 LBR's source, which is the linear unwinding. As instruction from linear range
446 can come from different function by inlining, linear unwinding will do the range
447 splitting and record counters by the range with same inline context. Over those
448 unwinding process we will record each call stack as context id and LBR/linear
449 range as sample counter for further CS profile generation.
450 */
451 class VirtualUnwinder {
452 public:
VirtualUnwinder(ContextSampleCounterMap * Counter,const ProfiledBinary * B)453   VirtualUnwinder(ContextSampleCounterMap *Counter, const ProfiledBinary *B)
454       : CtxCounterMap(Counter), Binary(B) {}
455   bool unwind(const HybridSample *Sample, uint64_t Repeat);
456 
457 private:
isCallState(UnwindState & State)458   bool isCallState(UnwindState &State) const {
459     // The tail call frame is always missing here in stack sample, we will
460     // use a specific tail call tracker to infer it.
461     return Binary->addressIsCall(State.getCurrentLBRSource());
462   }
463 
isReturnState(UnwindState & State)464   bool isReturnState(UnwindState &State) const {
465     // Simply check addressIsReturn, as ret is always reliable, both for
466     // regular call and tail call.
467     return Binary->addressIsReturn(State.getCurrentLBRSource());
468   }
469 
470   void unwindCall(UnwindState &State);
471   void unwindLinear(UnwindState &State, uint64_t Repeat);
472   void unwindReturn(UnwindState &State);
473   void unwindBranchWithinFrame(UnwindState &State);
474 
475   template <typename T>
476   void collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, T &Stack);
477   // Collect each samples on trie node by DFS traversal
478   template <typename T>
479   void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur, T &Stack);
480   void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur);
481 
482   void recordRangeCount(uint64_t Start, uint64_t End, UnwindState &State,
483                         uint64_t Repeat);
484   void recordBranchCount(const LBREntry &Branch, UnwindState &State,
485                          uint64_t Repeat);
486 
487   ContextSampleCounterMap *CtxCounterMap;
488   // Profiled binary that current frame address belongs to
489   const ProfiledBinary *Binary;
490 };
491 
492 // Filename to binary map
493 using BinaryMap = StringMap<ProfiledBinary>;
494 // Address to binary map for fast look-up
495 using AddressBinaryMap = std::map<uint64_t, ProfiledBinary *>;
496 // Binary to ContextSampleCounters Map to support multiple binary, we may have
497 // same binary loaded at different addresses, they should share the same sample
498 // counter
499 using BinarySampleCounterMap =
500     std::unordered_map<ProfiledBinary *, ContextSampleCounterMap>;
501 
502 // Load binaries and read perf trace to parse the events and samples
503 class PerfReader {
504 
505 public:
506   PerfReader(cl::list<std::string> &BinaryFilenames,
507              cl::list<std::string> &PerfTraceFilenames);
508 
509   // A LBR sample is like:
510   // 0x5c6313f/0x5c63170/P/-/-/0  0x5c630e7/0x5c63130/P/-/-/0 ...
511   // A heuristic for fast detection by checking whether a
512   // leading "  0x" and the '/' exist.
isLBRSample(StringRef Line)513   static bool isLBRSample(StringRef Line) {
514     if (!Line.startswith(" 0x"))
515       return false;
516     if (Line.find('/') != StringRef::npos)
517       return true;
518     return false;
519   }
520 
521   // The raw hybird sample is like
522   // e.g.
523   // 	          4005dc    # call stack leaf
524   //	          400634
525   //	          400684    # call stack root
526   // 0x4005c8/0x4005dc/P/-/-/0   0x40062f/0x4005b0/P/-/-/0 ...
527   //          ... 0x4005c8/0x4005dc/P/-/-/0    # LBR Entries
528   // Determine the perfscript contains hybrid samples(call stack + LBRs) by
529   // checking whether there is a non-empty call stack immediately followed by
530   // a LBR sample
checkPerfScriptType(StringRef FileName)531   static PerfScriptType checkPerfScriptType(StringRef FileName) {
532     TraceStream TraceIt(FileName);
533     uint64_t FrameAddr = 0;
534     while (!TraceIt.isAtEoF()) {
535       int32_t Count = 0;
536       while (!TraceIt.isAtEoF() &&
537              !TraceIt.getCurrentLine().ltrim().getAsInteger(16, FrameAddr)) {
538         Count++;
539         TraceIt.advance();
540       }
541       if (!TraceIt.isAtEoF()) {
542         if (isLBRSample(TraceIt.getCurrentLine())) {
543           if (Count > 0)
544             return PERF_LBR_STACK;
545           else
546             return PERF_LBR;
547         }
548         TraceIt.advance();
549       }
550     }
551     return PERF_INVALID;
552   }
553 
554   // The parsed MMap event
555   struct MMapEvent {
556     uint64_t PID = 0;
557     uint64_t BaseAddress = 0;
558     uint64_t Size = 0;
559     uint64_t Offset = 0;
560     StringRef BinaryPath;
561   };
562 
563   /// Load symbols and disassemble the code of a give binary.
564   /// Also register the binary in the binary table.
565   ///
566   ProfiledBinary &loadBinary(const StringRef BinaryPath,
567                              bool AllowNameConflict = true);
568   void updateBinaryAddress(const MMapEvent &Event);
getPerfScriptType()569   PerfScriptType getPerfScriptType() const { return PerfType; }
570   // Entry of the reader to parse multiple perf traces
571   void parsePerfTraces(cl::list<std::string> &PerfTraceFilenames);
getBinarySampleCounters()572   const BinarySampleCounterMap &getBinarySampleCounters() const {
573     return BinarySampleCounters;
574   }
575 
576 private:
577   /// Validate the command line input
578   void validateCommandLine(cl::list<std::string> &BinaryFilenames,
579                            cl::list<std::string> &PerfTraceFilenames);
580   /// Parse a single line of a PERF_RECORD_MMAP2 event looking for a
581   /// mapping between the binary name and its memory layout.
582   ///
583   void parseMMap2Event(TraceStream &TraceIt);
584   // Parse perf events/samples and do aggregation
585   void parseAndAggregateTrace(StringRef Filename);
586   // Parse either an MMAP event or a perf sample
587   void parseEventOrSample(TraceStream &TraceIt);
588   // Parse the hybrid sample including the call and LBR line
589   void parseHybridSample(TraceStream &TraceIt);
590   // Extract call stack from the perf trace lines
591   bool extractCallstack(TraceStream &TraceIt,
592                         SmallVectorImpl<uint64_t> &CallStack);
593   // Extract LBR stack from one perf trace line
594   bool extractLBRStack(TraceStream &TraceIt,
595                        SmallVectorImpl<LBREntry> &LBRStack,
596                        ProfiledBinary *Binary);
597   void checkAndSetPerfType(cl::list<std::string> &PerfTraceFilenames);
598   // Post process the profile after trace aggregation, we will do simple range
599   // overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample).
600   void generateRawProfile();
601   // Unwind the hybrid samples after aggregration
602   void unwindSamples();
603   void printUnwinderOutput();
604   // Helper function for looking up binary in AddressBinaryMap
605   ProfiledBinary *getBinary(uint64_t Address);
606 
607   BinaryMap BinaryTable;
608   AddressBinaryMap AddrToBinaryMap; // Used by address-based lookup.
609 
610 private:
611   BinarySampleCounterMap BinarySampleCounters;
612   // Samples with the repeating time generated by the perf reader
613   AggregatedCounter AggregatedSamples;
614   PerfScriptType PerfType = PERF_UNKNOWN;
615 };
616 
617 } // end namespace sampleprof
618 } // end namespace llvm
619 
620 #endif
621