1 //===-- PerfReader.h - perfscript reader -----------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #ifndef LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
10 #define LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H
11 #include "ErrorHandling.h"
12 #include "ProfiledBinary.h"
13 #include "llvm/Support/Casting.h"
14 #include "llvm/Support/CommandLine.h"
15 #include "llvm/Support/Regex.h"
16 #include <cstdint>
17 #include <fstream>
18 #include <list>
19 #include <map>
20 #include <vector>
21
22 using namespace llvm;
23 using namespace sampleprof;
24
25 namespace llvm {
26 namespace sampleprof {
27
28 // Stream based trace line iterator
29 class TraceStream {
30 std::string CurrentLine;
31 std::ifstream Fin;
32 bool IsAtEoF = false;
33 uint64_t LineNumber = 0;
34
35 public:
TraceStream(StringRef Filename)36 TraceStream(StringRef Filename) : Fin(Filename.str()) {
37 if (!Fin.good())
38 exitWithError("Error read input perf script file", Filename);
39 advance();
40 }
41
getCurrentLine()42 StringRef getCurrentLine() {
43 assert(!IsAtEoF && "Line iterator reaches the End-of-File!");
44 return CurrentLine;
45 }
46
getLineNumber()47 uint64_t getLineNumber() { return LineNumber; }
48
isAtEoF()49 bool isAtEoF() { return IsAtEoF; }
50
51 // Read the next line
advance()52 void advance() {
53 if (!std::getline(Fin, CurrentLine)) {
54 IsAtEoF = true;
55 return;
56 }
57 LineNumber++;
58 }
59 };
60
61 // The type of input format.
62 enum PerfFormat {
63 UnknownFormat = 0,
64 PerfData = 1, // Raw linux perf.data.
65 PerfScript = 2, // Perf script create by `perf script` command.
66 UnsymbolizedProfile = 3, // Unsymbolized profile generated by llvm-profgen.
67
68 };
69
70 // The type of perfscript content.
71 enum PerfContent {
72 UnknownContent = 0,
73 LBR = 1, // Only LBR sample.
74 LBRStack = 2, // Hybrid sample including call stack and LBR stack.
75 };
76
77 struct PerfInputFile {
78 std::string InputFile;
79 PerfFormat Format = PerfFormat::UnknownFormat;
80 PerfContent Content = PerfContent::UnknownContent;
81 };
82
83 // The parsed LBR sample entry.
84 struct LBREntry {
85 uint64_t Source = 0;
86 uint64_t Target = 0;
LBREntryLBREntry87 LBREntry(uint64_t S, uint64_t T) : Source(S), Target(T) {}
88
89 #ifndef NDEBUG
printLBREntry90 void print() const {
91 dbgs() << "from " << format("%#010x", Source) << " to "
92 << format("%#010x", Target);
93 }
94 #endif
95 };
96
97 #ifndef NDEBUG
printLBRStack(const SmallVectorImpl<LBREntry> & LBRStack)98 static inline void printLBRStack(const SmallVectorImpl<LBREntry> &LBRStack) {
99 for (size_t I = 0; I < LBRStack.size(); I++) {
100 dbgs() << "[" << I << "] ";
101 LBRStack[I].print();
102 dbgs() << "\n";
103 }
104 }
105
printCallStack(const SmallVectorImpl<uint64_t> & CallStack)106 static inline void printCallStack(const SmallVectorImpl<uint64_t> &CallStack) {
107 for (size_t I = 0; I < CallStack.size(); I++) {
108 dbgs() << "[" << I << "] " << format("%#010x", CallStack[I]) << "\n";
109 }
110 }
111 #endif
112
113 // Hash interface for generic data of type T
114 // Data should implement a \fn getHashCode and a \fn isEqual
115 // Currently getHashCode is non-virtual to avoid the overhead of calling vtable,
116 // i.e we explicitly calculate hash of derived class, assign to base class's
117 // HashCode. This also provides the flexibility for calculating the hash code
118 // incrementally(like rolling hash) during frame stack unwinding since unwinding
119 // only changes the leaf of frame stack. \fn isEqual is a virtual function,
120 // which will have perf overhead. In the future, if we redesign a better hash
121 // function, then we can just skip this or switch to non-virtual function(like
122 // just ignore comparison if hash conflicts probabilities is low)
123 template <class T> class Hashable {
124 public:
125 std::shared_ptr<T> Data;
Hashable(const std::shared_ptr<T> & D)126 Hashable(const std::shared_ptr<T> &D) : Data(D) {}
127
128 // Hash code generation
129 struct Hash {
operatorHash130 uint64_t operator()(const Hashable<T> &Key) const {
131 // Don't make it virtual for getHashCode
132 uint64_t Hash = Key.Data->getHashCode();
133 assert(Hash && "Should generate HashCode for it!");
134 return Hash;
135 }
136 };
137
138 // Hash equal
139 struct Equal {
operatorEqual140 bool operator()(const Hashable<T> &LHS, const Hashable<T> &RHS) const {
141 // Precisely compare the data, vtable will have overhead.
142 return LHS.Data->isEqual(RHS.Data.get());
143 }
144 };
145
getPtr()146 T *getPtr() const { return Data.get(); }
147 };
148
149 struct PerfSample {
150 // LBR stack recorded in FIFO order.
151 SmallVector<LBREntry, 16> LBRStack;
152 // Call stack recorded in FILO(leaf to root) order, it's used for CS-profile
153 // generation
154 SmallVector<uint64_t, 16> CallStack;
155
156 virtual ~PerfSample() = default;
getHashCodePerfSample157 uint64_t getHashCode() const {
158 // Use simple DJB2 hash
159 auto HashCombine = [](uint64_t H, uint64_t V) {
160 return ((H << 5) + H) + V;
161 };
162 uint64_t Hash = 5381;
163 for (const auto &Value : CallStack) {
164 Hash = HashCombine(Hash, Value);
165 }
166 for (const auto &Entry : LBRStack) {
167 Hash = HashCombine(Hash, Entry.Source);
168 Hash = HashCombine(Hash, Entry.Target);
169 }
170 return Hash;
171 }
172
isEqualPerfSample173 bool isEqual(const PerfSample *Other) const {
174 const SmallVector<uint64_t, 16> &OtherCallStack = Other->CallStack;
175 const SmallVector<LBREntry, 16> &OtherLBRStack = Other->LBRStack;
176
177 if (CallStack.size() != OtherCallStack.size() ||
178 LBRStack.size() != OtherLBRStack.size())
179 return false;
180
181 if (!std::equal(CallStack.begin(), CallStack.end(), OtherCallStack.begin()))
182 return false;
183
184 for (size_t I = 0; I < OtherLBRStack.size(); I++) {
185 if (LBRStack[I].Source != OtherLBRStack[I].Source ||
186 LBRStack[I].Target != OtherLBRStack[I].Target)
187 return false;
188 }
189 return true;
190 }
191
192 #ifndef NDEBUG
193 uint64_t Linenum = 0;
194
printPerfSample195 void print() const {
196 dbgs() << "Line " << Linenum << "\n";
197 dbgs() << "LBR stack\n";
198 printLBRStack(LBRStack);
199 dbgs() << "Call stack\n";
200 printCallStack(CallStack);
201 }
202 #endif
203 };
204 // After parsing the sample, we record the samples by aggregating them
205 // into this counter. The key stores the sample data and the value is
206 // the sample repeat times.
207 using AggregatedCounter =
208 std::unordered_map<Hashable<PerfSample>, uint64_t,
209 Hashable<PerfSample>::Hash, Hashable<PerfSample>::Equal>;
210
211 using SampleVector = SmallVector<std::tuple<uint64_t, uint64_t, uint64_t>, 16>;
212
isValidFallThroughRange(uint64_t Start,uint64_t End,ProfiledBinary * Binary)213 inline bool isValidFallThroughRange(uint64_t Start, uint64_t End,
214 ProfiledBinary *Binary) {
215 // Start bigger than End is considered invalid.
216 // LBR ranges cross the unconditional jmp are also assumed invalid.
217 // It's found that perf data may contain duplicate LBR entries that could form
218 // a range that does not reflect real execution flow on some Intel targets,
219 // e.g. Skylake. Such ranges are ususally very long. Exclude them since there
220 // cannot be a linear execution range that spans over unconditional jmp.
221 return Start <= End && !Binary->rangeCrossUncondBranch(Start, End);
222 }
223
224 // The state for the unwinder, it doesn't hold the data but only keep the
225 // pointer/index of the data, While unwinding, the CallStack is changed
226 // dynamicially and will be recorded as the context of the sample
227 struct UnwindState {
228 // Profiled binary that current frame address belongs to
229 const ProfiledBinary *Binary;
230 // Call stack trie node
231 struct ProfiledFrame {
232 const uint64_t Address = DummyRoot;
233 ProfiledFrame *Parent;
234 SampleVector RangeSamples;
235 SampleVector BranchSamples;
236 std::unordered_map<uint64_t, std::unique_ptr<ProfiledFrame>> Children;
237
238 ProfiledFrame(uint64_t Addr = 0, ProfiledFrame *P = nullptr)
AddressUnwindState::ProfiledFrame239 : Address(Addr), Parent(P) {}
getOrCreateChildFrameUnwindState::ProfiledFrame240 ProfiledFrame *getOrCreateChildFrame(uint64_t Address) {
241 assert(Address && "Address can't be zero!");
242 auto Ret = Children.emplace(
243 Address, std::make_unique<ProfiledFrame>(Address, this));
244 return Ret.first->second.get();
245 }
recordRangeCountUnwindState::ProfiledFrame246 void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Count) {
247 RangeSamples.emplace_back(std::make_tuple(Start, End, Count));
248 }
recordBranchCountUnwindState::ProfiledFrame249 void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Count) {
250 BranchSamples.emplace_back(std::make_tuple(Source, Target, Count));
251 }
isDummyRootUnwindState::ProfiledFrame252 bool isDummyRoot() { return Address == DummyRoot; }
isExternalFrameUnwindState::ProfiledFrame253 bool isExternalFrame() { return Address == ExternalAddr; }
isLeafFrameUnwindState::ProfiledFrame254 bool isLeafFrame() { return Children.empty(); }
255 };
256
257 ProfiledFrame DummyTrieRoot;
258 ProfiledFrame *CurrentLeafFrame;
259 // Used to fall through the LBR stack
260 uint32_t LBRIndex = 0;
261 // Reference to PerfSample.LBRStack
262 const SmallVector<LBREntry, 16> &LBRStack;
263 // Used to iterate the address range
264 InstructionPointer InstPtr;
265 // Indicate whether unwinding is currently in a bad state which requires to
266 // skip all subsequent unwinding.
267 bool Invalid = false;
UnwindStateUnwindState268 UnwindState(const PerfSample *Sample, const ProfiledBinary *Binary)
269 : Binary(Binary), LBRStack(Sample->LBRStack),
270 InstPtr(Binary, Sample->CallStack.front()) {
271 initFrameTrie(Sample->CallStack);
272 }
273
validateInitialStateUnwindState274 bool validateInitialState() {
275 uint64_t LBRLeaf = LBRStack[LBRIndex].Target;
276 uint64_t LeafAddr = CurrentLeafFrame->Address;
277 assert((LBRLeaf != ExternalAddr || LBRLeaf == LeafAddr) &&
278 "External leading LBR should match the leaf frame.");
279
280 // When we take a stack sample, ideally the sampling distance between the
281 // leaf IP of stack and the last LBR target shouldn't be very large.
282 // Use a heuristic size (0x100) to filter out broken records.
283 if (LeafAddr < LBRLeaf || LeafAddr - LBRLeaf >= 0x100) {
284 WithColor::warning() << "Bogus trace: stack tip = "
285 << format("%#010x", LeafAddr)
286 << ", LBR tip = " << format("%#010x\n", LBRLeaf);
287 return false;
288 }
289 return true;
290 }
291
checkStateConsistencyUnwindState292 void checkStateConsistency() {
293 assert(InstPtr.Address == CurrentLeafFrame->Address &&
294 "IP should align with context leaf");
295 }
296
setInvalidUnwindState297 void setInvalid() { Invalid = true; }
hasNextLBRUnwindState298 bool hasNextLBR() const { return LBRIndex < LBRStack.size(); }
getCurrentLBRSourceUnwindState299 uint64_t getCurrentLBRSource() const { return LBRStack[LBRIndex].Source; }
getCurrentLBRTargetUnwindState300 uint64_t getCurrentLBRTarget() const { return LBRStack[LBRIndex].Target; }
getCurrentLBRUnwindState301 const LBREntry &getCurrentLBR() const { return LBRStack[LBRIndex]; }
IsLastLBRUnwindState302 bool IsLastLBR() const { return LBRIndex == 0; }
getLBRStackSizeUnwindState303 bool getLBRStackSize() const { return LBRStack.size(); }
advanceLBRUnwindState304 void advanceLBR() { LBRIndex++; }
getParentFrameUnwindState305 ProfiledFrame *getParentFrame() { return CurrentLeafFrame->Parent; }
306
pushFrameUnwindState307 void pushFrame(uint64_t Address) {
308 CurrentLeafFrame = CurrentLeafFrame->getOrCreateChildFrame(Address);
309 }
310
switchToFrameUnwindState311 void switchToFrame(uint64_t Address) {
312 if (CurrentLeafFrame->Address == Address)
313 return;
314 CurrentLeafFrame = CurrentLeafFrame->Parent->getOrCreateChildFrame(Address);
315 }
316
popFrameUnwindState317 void popFrame() { CurrentLeafFrame = CurrentLeafFrame->Parent; }
318
clearCallStackUnwindState319 void clearCallStack() { CurrentLeafFrame = &DummyTrieRoot; }
320
initFrameTrieUnwindState321 void initFrameTrie(const SmallVectorImpl<uint64_t> &CallStack) {
322 ProfiledFrame *Cur = &DummyTrieRoot;
323 for (auto Address : reverse(CallStack)) {
324 Cur = Cur->getOrCreateChildFrame(Address);
325 }
326 CurrentLeafFrame = Cur;
327 }
328
getDummyRootPtrUnwindState329 ProfiledFrame *getDummyRootPtr() { return &DummyTrieRoot; }
330 };
331
332 // Base class for sample counter key with context
333 struct ContextKey {
334 uint64_t HashCode = 0;
335 virtual ~ContextKey() = default;
getHashCodeContextKey336 uint64_t getHashCode() {
337 if (HashCode == 0)
338 genHashCode();
339 return HashCode;
340 }
341 virtual void genHashCode() = 0;
isEqualContextKey342 virtual bool isEqual(const ContextKey *K) const {
343 return HashCode == K->HashCode;
344 };
345
346 // Utilities for LLVM-style RTTI
347 enum ContextKind { CK_StringBased, CK_AddrBased };
348 const ContextKind Kind;
getKindContextKey349 ContextKind getKind() const { return Kind; }
ContextKeyContextKey350 ContextKey(ContextKind K) : Kind(K){};
351 };
352
353 // String based context id
354 struct StringBasedCtxKey : public ContextKey {
355 SampleContextFrameVector Context;
356
357 bool WasLeafInlined;
StringBasedCtxKeyStringBasedCtxKey358 StringBasedCtxKey() : ContextKey(CK_StringBased), WasLeafInlined(false){};
classofStringBasedCtxKey359 static bool classof(const ContextKey *K) {
360 return K->getKind() == CK_StringBased;
361 }
362
isEqualStringBasedCtxKey363 bool isEqual(const ContextKey *K) const override {
364 const StringBasedCtxKey *Other = dyn_cast<StringBasedCtxKey>(K);
365 return Context == Other->Context;
366 }
367
genHashCodeStringBasedCtxKey368 void genHashCode() override {
369 HashCode = hash_value(SampleContextFrames(Context));
370 }
371 };
372
373 // Address-based context id
374 struct AddrBasedCtxKey : public ContextKey {
375 SmallVector<uint64_t, 16> Context;
376
377 bool WasLeafInlined;
AddrBasedCtxKeyAddrBasedCtxKey378 AddrBasedCtxKey() : ContextKey(CK_AddrBased), WasLeafInlined(false){};
classofAddrBasedCtxKey379 static bool classof(const ContextKey *K) {
380 return K->getKind() == CK_AddrBased;
381 }
382
isEqualAddrBasedCtxKey383 bool isEqual(const ContextKey *K) const override {
384 const AddrBasedCtxKey *Other = dyn_cast<AddrBasedCtxKey>(K);
385 return Context == Other->Context;
386 }
387
genHashCodeAddrBasedCtxKey388 void genHashCode() override {
389 HashCode = hash_combine_range(Context.begin(), Context.end());
390 }
391 };
392
393 // The counter of branch samples for one function indexed by the branch,
394 // which is represented as the source and target offset pair.
395 using BranchSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
396 // The counter of range samples for one function indexed by the range,
397 // which is represented as the start and end offset pair.
398 using RangeSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>;
399 // Wrapper for sample counters including range counter and branch counter
400 struct SampleCounter {
401 RangeSample RangeCounter;
402 BranchSample BranchCounter;
403
recordRangeCountSampleCounter404 void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Repeat) {
405 assert(Start <= End && "Invalid instruction range");
406 RangeCounter[{Start, End}] += Repeat;
407 }
recordBranchCountSampleCounter408 void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Repeat) {
409 BranchCounter[{Source, Target}] += Repeat;
410 }
411 };
412
413 // Sample counter with context to support context-sensitive profile
414 using ContextSampleCounterMap =
415 std::unordered_map<Hashable<ContextKey>, SampleCounter,
416 Hashable<ContextKey>::Hash, Hashable<ContextKey>::Equal>;
417
418 struct FrameStack {
419 SmallVector<uint64_t, 16> Stack;
420 ProfiledBinary *Binary;
FrameStackFrameStack421 FrameStack(ProfiledBinary *B) : Binary(B) {}
pushFrameFrameStack422 bool pushFrame(UnwindState::ProfiledFrame *Cur) {
423 assert(!Cur->isExternalFrame() &&
424 "External frame's not expected for context stack.");
425 Stack.push_back(Cur->Address);
426 return true;
427 }
428
popFrameFrameStack429 void popFrame() {
430 if (!Stack.empty())
431 Stack.pop_back();
432 }
433 std::shared_ptr<StringBasedCtxKey> getContextKey();
434 };
435
436 struct AddressStack {
437 SmallVector<uint64_t, 16> Stack;
438 ProfiledBinary *Binary;
AddressStackAddressStack439 AddressStack(ProfiledBinary *B) : Binary(B) {}
pushFrameAddressStack440 bool pushFrame(UnwindState::ProfiledFrame *Cur) {
441 assert(!Cur->isExternalFrame() &&
442 "External frame's not expected for context stack.");
443 Stack.push_back(Cur->Address);
444 return true;
445 }
446
popFrameAddressStack447 void popFrame() {
448 if (!Stack.empty())
449 Stack.pop_back();
450 }
451 std::shared_ptr<AddrBasedCtxKey> getContextKey();
452 };
453
454 /*
455 As in hybrid sample we have a group of LBRs and the most recent sampling call
456 stack, we can walk through those LBRs to infer more call stacks which would be
457 used as context for profile. VirtualUnwinder is the class to do the call stack
458 unwinding based on LBR state. Two types of unwinding are processd here:
459 1) LBR unwinding and 2) linear range unwinding.
460 Specifically, for each LBR entry(can be classified into call, return, regular
461 branch), LBR unwinding will replay the operation by pushing, popping or
462 switching leaf frame towards the call stack and since the initial call stack
463 is most recently sampled, the replay should be in anti-execution order, i.e. for
464 the regular case, pop the call stack when LBR is call, push frame on call stack
465 when LBR is return. After each LBR processed, it also needs to align with the
466 next LBR by going through instructions from previous LBR's target to current
467 LBR's source, which is the linear unwinding. As instruction from linear range
468 can come from different function by inlining, linear unwinding will do the range
469 splitting and record counters by the range with same inline context. Over those
470 unwinding process we will record each call stack as context id and LBR/linear
471 range as sample counter for further CS profile generation.
472 */
473 class VirtualUnwinder {
474 public:
VirtualUnwinder(ContextSampleCounterMap * Counter,ProfiledBinary * B)475 VirtualUnwinder(ContextSampleCounterMap *Counter, ProfiledBinary *B)
476 : CtxCounterMap(Counter), Binary(B) {}
477 bool unwind(const PerfSample *Sample, uint64_t Repeat);
getUntrackedCallsites()478 std::set<uint64_t> &getUntrackedCallsites() { return UntrackedCallsites; }
479
480 uint64_t NumTotalBranches = 0;
481 uint64_t NumExtCallBranch = 0;
482 uint64_t NumMissingExternalFrame = 0;
483 uint64_t NumMismatchedProEpiBranch = 0;
484 uint64_t NumMismatchedExtCallBranch = 0;
485 uint64_t NumUnpairedExtAddr = 0;
486 uint64_t NumPairedExtAddr = 0;
487
488 private:
isSourceExternal(UnwindState & State)489 bool isSourceExternal(UnwindState &State) const {
490 return State.getCurrentLBRSource() == ExternalAddr;
491 }
492
isTargetExternal(UnwindState & State)493 bool isTargetExternal(UnwindState &State) const {
494 return State.getCurrentLBRTarget() == ExternalAddr;
495 }
496
497 // Determine whether the return source is from external code by checking if
498 // the target's the next inst is a call inst.
isReturnFromExternal(UnwindState & State)499 bool isReturnFromExternal(UnwindState &State) const {
500 return isSourceExternal(State) &&
501 (Binary->getCallAddrFromFrameAddr(State.getCurrentLBRTarget()) != 0);
502 }
503
504 // If the source is external address but it's not the `return` case, treat it
505 // as a call from external.
isCallFromExternal(UnwindState & State)506 bool isCallFromExternal(UnwindState &State) const {
507 return isSourceExternal(State) &&
508 Binary->getCallAddrFromFrameAddr(State.getCurrentLBRTarget()) == 0;
509 }
510
isCallState(UnwindState & State)511 bool isCallState(UnwindState &State) const {
512 // The tail call frame is always missing here in stack sample, we will
513 // use a specific tail call tracker to infer it.
514 if (!isValidState(State))
515 return false;
516
517 if (Binary->addressIsCall(State.getCurrentLBRSource()))
518 return true;
519
520 return isCallFromExternal(State);
521 }
522
isReturnState(UnwindState & State)523 bool isReturnState(UnwindState &State) const {
524 if (!isValidState(State))
525 return false;
526
527 // Simply check addressIsReturn, as ret is always reliable, both for
528 // regular call and tail call.
529 if (Binary->addressIsReturn(State.getCurrentLBRSource()))
530 return true;
531
532 return isReturnFromExternal(State);
533 }
534
isValidState(UnwindState & State)535 bool isValidState(UnwindState &State) const { return !State.Invalid; }
536
537 void unwindCall(UnwindState &State);
538 void unwindLinear(UnwindState &State, uint64_t Repeat);
539 void unwindReturn(UnwindState &State);
540 void unwindBranch(UnwindState &State);
541
542 template <typename T>
543 void collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, T &Stack);
544 // Collect each samples on trie node by DFS traversal
545 template <typename T>
546 void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur, T &Stack);
547 void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur);
548
549 void recordRangeCount(uint64_t Start, uint64_t End, UnwindState &State,
550 uint64_t Repeat);
551 void recordBranchCount(const LBREntry &Branch, UnwindState &State,
552 uint64_t Repeat);
553
554 ContextSampleCounterMap *CtxCounterMap;
555 // Profiled binary that current frame address belongs to
556 ProfiledBinary *Binary;
557 // Keep track of all untracked callsites
558 std::set<uint64_t> UntrackedCallsites;
559 };
560
561 // Read perf trace to parse the events and samples.
562 class PerfReaderBase {
563 public:
PerfReaderBase(ProfiledBinary * B,StringRef PerfTrace)564 PerfReaderBase(ProfiledBinary *B, StringRef PerfTrace)
565 : Binary(B), PerfTraceFile(PerfTrace) {
566 // Initialize the base address to preferred address.
567 Binary->setBaseAddress(Binary->getPreferredBaseAddress());
568 };
569 virtual ~PerfReaderBase() = default;
570 static std::unique_ptr<PerfReaderBase>
571 create(ProfiledBinary *Binary, PerfInputFile &PerfInput,
572 std::optional<uint32_t> PIDFilter);
573
574 // Entry of the reader to parse multiple perf traces
575 virtual void parsePerfTraces() = 0;
getSampleCounters()576 const ContextSampleCounterMap &getSampleCounters() const {
577 return SampleCounters;
578 }
profileIsCS()579 bool profileIsCS() { return ProfileIsCS; }
580
581 protected:
582 ProfiledBinary *Binary = nullptr;
583 StringRef PerfTraceFile;
584
585 ContextSampleCounterMap SampleCounters;
586 bool ProfileIsCS = false;
587
588 uint64_t NumTotalSample = 0;
589 uint64_t NumLeafExternalFrame = 0;
590 uint64_t NumLeadingOutgoingLBR = 0;
591 };
592
593 // Read perf script to parse the events and samples.
594 class PerfScriptReader : public PerfReaderBase {
595 public:
PerfScriptReader(ProfiledBinary * B,StringRef PerfTrace,std::optional<uint32_t> PID)596 PerfScriptReader(ProfiledBinary *B, StringRef PerfTrace,
597 std::optional<uint32_t> PID)
598 : PerfReaderBase(B, PerfTrace), PIDFilter(PID){};
599
600 // Entry of the reader to parse multiple perf traces
601 void parsePerfTraces() override;
602 // Generate perf script from perf data
603 static PerfInputFile
604 convertPerfDataToTrace(ProfiledBinary *Binary, PerfInputFile &File,
605 std::optional<uint32_t> PIDFilter);
606 // Extract perf script type by peaking at the input
607 static PerfContent checkPerfScriptType(StringRef FileName);
608
609 protected:
610 // The parsed MMap event
611 struct MMapEvent {
612 uint64_t PID = 0;
613 uint64_t Address = 0;
614 uint64_t Size = 0;
615 uint64_t Offset = 0;
616 StringRef BinaryPath;
617 };
618
619 // Check whether a given line is LBR sample
620 static bool isLBRSample(StringRef Line);
621 // Check whether a given line is MMAP event
622 static bool isMMap2Event(StringRef Line);
623 // Parse a single line of a PERF_RECORD_MMAP2 event looking for a
624 // mapping between the binary name and its memory layout.
625 static bool extractMMap2EventForBinary(ProfiledBinary *Binary, StringRef Line,
626 MMapEvent &MMap);
627 // Update base address based on mmap events
628 void updateBinaryAddress(const MMapEvent &Event);
629 // Parse mmap event and update binary address
630 void parseMMap2Event(TraceStream &TraceIt);
631 // Parse perf events/samples and do aggregation
632 void parseAndAggregateTrace();
633 // Parse either an MMAP event or a perf sample
634 void parseEventOrSample(TraceStream &TraceIt);
635 // Warn if the relevant mmap event is missing.
636 void warnIfMissingMMap();
637 // Emit accumulate warnings.
638 void warnTruncatedStack();
639 // Warn if range is invalid.
640 void warnInvalidRange();
641 // Extract call stack from the perf trace lines
642 bool extractCallstack(TraceStream &TraceIt,
643 SmallVectorImpl<uint64_t> &CallStack);
644 // Extract LBR stack from one perf trace line
645 bool extractLBRStack(TraceStream &TraceIt,
646 SmallVectorImpl<LBREntry> &LBRStack);
647 uint64_t parseAggregatedCount(TraceStream &TraceIt);
648 // Parse one sample from multiple perf lines, override this for different
649 // sample type
650 void parseSample(TraceStream &TraceIt);
651 // An aggregated count is given to indicate how many times the sample is
652 // repeated.
parseSample(TraceStream & TraceIt,uint64_t Count)653 virtual void parseSample(TraceStream &TraceIt, uint64_t Count){};
654 void computeCounterFromLBR(const PerfSample *Sample, uint64_t Repeat);
655 // Post process the profile after trace aggregation, we will do simple range
656 // overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample).
657 virtual void generateUnsymbolizedProfile();
658 void writeUnsymbolizedProfile(StringRef Filename);
659 void writeUnsymbolizedProfile(raw_fd_ostream &OS);
660
661 // Samples with the repeating time generated by the perf reader
662 AggregatedCounter AggregatedSamples;
663 // Keep track of all invalid return addresses
664 std::set<uint64_t> InvalidReturnAddresses;
665 // PID for the process of interest
666 std::optional<uint32_t> PIDFilter;
667 };
668
669 /*
670 The reader of LBR only perf script.
671 A typical LBR sample is like:
672 40062f 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
673 ... 0x4005c8/0x4005dc/P/-/-/0
674 */
675 class LBRPerfReader : public PerfScriptReader {
676 public:
LBRPerfReader(ProfiledBinary * Binary,StringRef PerfTrace,std::optional<uint32_t> PID)677 LBRPerfReader(ProfiledBinary *Binary, StringRef PerfTrace,
678 std::optional<uint32_t> PID)
679 : PerfScriptReader(Binary, PerfTrace, PID){};
680 // Parse the LBR only sample.
681 void parseSample(TraceStream &TraceIt, uint64_t Count) override;
682 };
683
684 /*
685 Hybrid perf script includes a group of hybrid samples(LBRs + call stack),
686 which is used to generate CS profile. An example of hybrid sample:
687 4005dc # call stack leaf
688 400634
689 400684 # call stack root
690 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
691 ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
692 */
693 class HybridPerfReader : public PerfScriptReader {
694 public:
HybridPerfReader(ProfiledBinary * Binary,StringRef PerfTrace,std::optional<uint32_t> PID)695 HybridPerfReader(ProfiledBinary *Binary, StringRef PerfTrace,
696 std::optional<uint32_t> PID)
697 : PerfScriptReader(Binary, PerfTrace, PID){};
698 // Parse the hybrid sample including the call and LBR line
699 void parseSample(TraceStream &TraceIt, uint64_t Count) override;
700 void generateUnsymbolizedProfile() override;
701
702 private:
703 // Unwind the hybrid samples after aggregration
704 void unwindSamples();
705 };
706
707 /*
708 Format of unsymbolized profile:
709
710 [frame1 @ frame2 @ ...] # If it's a CS profile
711 number of entries in RangeCounter
712 from_1-to_1:count_1
713 from_2-to_2:count_2
714 ......
715 from_n-to_n:count_n
716 number of entries in BranchCounter
717 src_1->dst_1:count_1
718 src_2->dst_2:count_2
719 ......
720 src_n->dst_n:count_n
721 [frame1 @ frame2 @ ...] # Next context
722 ......
723
724 Note that non-CS profile doesn't have the empty `[]` context.
725 */
726 class UnsymbolizedProfileReader : public PerfReaderBase {
727 public:
UnsymbolizedProfileReader(ProfiledBinary * Binary,StringRef PerfTrace)728 UnsymbolizedProfileReader(ProfiledBinary *Binary, StringRef PerfTrace)
729 : PerfReaderBase(Binary, PerfTrace){};
730 void parsePerfTraces() override;
731
732 private:
733 void readSampleCounters(TraceStream &TraceIt, SampleCounter &SCounters);
734 void readUnsymbolizedProfile(StringRef Filename);
735
736 std::unordered_set<std::string> ContextStrSet;
737 };
738
739 } // end namespace sampleprof
740 } // end namespace llvm
741
742 #endif
743