1 //===-- PerfReader.h - perfscript reader -----------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H 10 #define LLVM_TOOLS_LLVM_PROFGEN_PERFREADER_H 11 #include "ErrorHandling.h" 12 #include "ProfiledBinary.h" 13 #include "llvm/Support/Casting.h" 14 #include "llvm/Support/CommandLine.h" 15 #include "llvm/Support/Regex.h" 16 #include <fstream> 17 #include <list> 18 #include <map> 19 #include <vector> 20 21 using namespace llvm; 22 using namespace sampleprof; 23 24 namespace llvm { 25 namespace sampleprof { 26 27 // Stream based trace line iterator 28 class TraceStream { 29 std::string CurrentLine; 30 std::ifstream Fin; 31 bool IsAtEoF = false; 32 uint64_t LineNumber = 0; 33 34 public: TraceStream(StringRef Filename)35 TraceStream(StringRef Filename) : Fin(Filename.str()) { 36 if (!Fin.good()) 37 exitWithError("Error read input perf script file", Filename); 38 advance(); 39 } 40 getCurrentLine()41 StringRef getCurrentLine() { 42 assert(!IsAtEoF && "Line iterator reaches the End-of-File!"); 43 return CurrentLine; 44 } 45 getLineNumber()46 uint64_t getLineNumber() { return LineNumber; } 47 isAtEoF()48 bool isAtEoF() { return IsAtEoF; } 49 50 // Read the next line advance()51 void advance() { 52 if (!std::getline(Fin, CurrentLine)) { 53 IsAtEoF = true; 54 return; 55 } 56 LineNumber++; 57 } 58 }; 59 60 // The type of perfscript 61 enum PerfScriptType { 62 PERF_UNKNOWN = 0, 63 PERF_INVALID = 1, 64 PERF_LBR = 2, // Only LBR sample 65 PERF_LBR_STACK = 3, // Hybrid sample including call stack and LBR stack. 66 }; 67 68 // The parsed LBR sample entry. 69 struct LBREntry { 70 uint64_t Source = 0; 71 uint64_t Target = 0; 72 // An artificial branch stands for a series of consecutive branches starting 73 // from the current binary with a transition through external code and 74 // eventually landing back in the current binary. 75 bool IsArtificial = false; LBREntryLBREntry76 LBREntry(uint64_t S, uint64_t T, bool I) 77 : Source(S), Target(T), IsArtificial(I) {} 78 }; 79 80 // Hash interface for generic data of type T 81 // Data should implement a \fn getHashCode and a \fn isEqual 82 // Currently getHashCode is non-virtual to avoid the overhead of calling vtable, 83 // i.e we explicitly calculate hash of derived class, assign to base class's 84 // HashCode. This also provides the flexibility for calculating the hash code 85 // incrementally(like rolling hash) during frame stack unwinding since unwinding 86 // only changes the leaf of frame stack. \fn isEqual is a virtual function, 87 // which will have perf overhead. In the future, if we redesign a better hash 88 // function, then we can just skip this or switch to non-virtual function(like 89 // just ignore comparision if hash conflicts probabilities is low) 90 template <class T> class Hashable { 91 public: 92 std::shared_ptr<T> Data; Hashable(const std::shared_ptr<T> & D)93 Hashable(const std::shared_ptr<T> &D) : Data(D) {} 94 95 // Hash code generation 96 struct Hash { operatorHash97 uint64_t operator()(const Hashable<T> &Key) const { 98 // Don't make it virtual for getHashCode 99 assert(Key.Data->getHashCode() && "Should generate HashCode for it!"); 100 return Key.Data->getHashCode(); 101 } 102 }; 103 104 // Hash equal 105 struct Equal { operatorEqual106 bool operator()(const Hashable<T> &LHS, const Hashable<T> &RHS) const { 107 // Precisely compare the data, vtable will have overhead. 108 return LHS.Data->isEqual(RHS.Data.get()); 109 } 110 }; 111 getPtr()112 T *getPtr() const { return Data.get(); } 113 }; 114 115 // Base class to extend for all types of perf sample 116 struct PerfSample { 117 uint64_t HashCode = 0; 118 119 virtual ~PerfSample() = default; getHashCodePerfSample120 uint64_t getHashCode() const { return HashCode; } isEqualPerfSample121 virtual bool isEqual(const PerfSample *K) const { 122 return HashCode == K->HashCode; 123 }; 124 125 // Utilities for LLVM-style RTTI 126 enum PerfKind { PK_HybridSample }; 127 const PerfKind Kind; getKindPerfSample128 PerfKind getKind() const { return Kind; } PerfSamplePerfSample129 PerfSample(PerfKind K) : Kind(K){}; 130 }; 131 132 // The parsed hybrid sample including call stack and LBR stack. 133 struct HybridSample : public PerfSample { 134 // Profiled binary that current frame address belongs to 135 ProfiledBinary *Binary; 136 // Call stack recorded in FILO(leaf to root) order 137 SmallVector<uint64_t, 16> CallStack; 138 // LBR stack recorded in FIFO order 139 SmallVector<LBREntry, 16> LBRStack; 140 HybridSampleHybridSample141 HybridSample() : PerfSample(PK_HybridSample){}; classofHybridSample142 static bool classof(const PerfSample *K) { 143 return K->getKind() == PK_HybridSample; 144 } 145 146 // Used for sample aggregation isEqualHybridSample147 bool isEqual(const PerfSample *K) const override { 148 const HybridSample *Other = dyn_cast<HybridSample>(K); 149 if (Other->Binary != Binary) 150 return false; 151 const SmallVector<uint64_t, 16> &OtherCallStack = Other->CallStack; 152 const SmallVector<LBREntry, 16> &OtherLBRStack = Other->LBRStack; 153 154 if (CallStack.size() != OtherCallStack.size() || 155 LBRStack.size() != OtherLBRStack.size()) 156 return false; 157 158 auto Iter = CallStack.begin(); 159 for (auto Address : OtherCallStack) { 160 if (Address != *Iter++) 161 return false; 162 } 163 164 for (size_t I = 0; I < OtherLBRStack.size(); I++) { 165 if (LBRStack[I].Source != OtherLBRStack[I].Source || 166 LBRStack[I].Target != OtherLBRStack[I].Target) 167 return false; 168 } 169 return true; 170 } 171 genHashCodeHybridSample172 void genHashCode() { 173 // Use simple DJB2 hash 174 auto HashCombine = [](uint64_t H, uint64_t V) { 175 return ((H << 5) + H) + V; 176 }; 177 uint64_t Hash = 5381; 178 Hash = HashCombine(Hash, reinterpret_cast<uint64_t>(Binary)); 179 for (const auto &Value : CallStack) { 180 Hash = HashCombine(Hash, Value); 181 } 182 for (const auto &Entry : LBRStack) { 183 Hash = HashCombine(Hash, Entry.Source); 184 Hash = HashCombine(Hash, Entry.Target); 185 } 186 HashCode = Hash; 187 } 188 }; 189 190 // After parsing the sample, we record the samples by aggregating them 191 // into this counter. The key stores the sample data and the value is 192 // the sample repeat times. 193 using AggregatedCounter = 194 std::unordered_map<Hashable<PerfSample>, uint64_t, 195 Hashable<PerfSample>::Hash, Hashable<PerfSample>::Equal>; 196 197 using SampleVector = SmallVector<std::tuple<uint64_t, uint64_t, uint64_t>, 16>; 198 // The state for the unwinder, it doesn't hold the data but only keep the 199 // pointer/index of the data, While unwinding, the CallStack is changed 200 // dynamicially and will be recorded as the context of the sample 201 struct UnwindState { 202 // Profiled binary that current frame address belongs to 203 const ProfiledBinary *Binary; 204 // Call stack trie node 205 struct ProfiledFrame { 206 const uint64_t Address = 0; 207 ProfiledFrame *Parent; 208 SampleVector RangeSamples; 209 SampleVector BranchSamples; 210 std::unordered_map<uint64_t, std::unique_ptr<ProfiledFrame>> Children; 211 212 ProfiledFrame(uint64_t Addr = 0, ProfiledFrame *P = nullptr) AddressUnwindState::ProfiledFrame213 : Address(Addr), Parent(P) {} getOrCreateChildFrameUnwindState::ProfiledFrame214 ProfiledFrame *getOrCreateChildFrame(uint64_t Address) { 215 assert(Address && "Address can't be zero!"); 216 auto Ret = Children.emplace( 217 Address, std::make_unique<ProfiledFrame>(Address, this)); 218 return Ret.first->second.get(); 219 } recordRangeCountUnwindState::ProfiledFrame220 void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Count) { 221 RangeSamples.emplace_back(std::make_tuple(Start, End, Count)); 222 } recordBranchCountUnwindState::ProfiledFrame223 void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Count) { 224 BranchSamples.emplace_back(std::make_tuple(Source, Target, Count)); 225 } isDummyRootUnwindState::ProfiledFrame226 bool isDummyRoot() { return Address == 0; } 227 }; 228 229 ProfiledFrame DummyTrieRoot; 230 ProfiledFrame *CurrentLeafFrame; 231 // Used to fall through the LBR stack 232 uint32_t LBRIndex = 0; 233 // Reference to HybridSample.LBRStack 234 const SmallVector<LBREntry, 16> &LBRStack; 235 // Used to iterate the address range 236 InstructionPointer InstPtr; UnwindStateUnwindState237 UnwindState(const HybridSample *Sample) 238 : Binary(Sample->Binary), LBRStack(Sample->LBRStack), 239 InstPtr(Sample->Binary, Sample->CallStack.front()) { 240 initFrameTrie(Sample->CallStack); 241 } 242 validateInitialStateUnwindState243 bool validateInitialState() { 244 uint64_t LBRLeaf = LBRStack[LBRIndex].Target; 245 uint64_t LeafAddr = CurrentLeafFrame->Address; 246 // When we take a stack sample, ideally the sampling distance between the 247 // leaf IP of stack and the last LBR target shouldn't be very large. 248 // Use a heuristic size (0x100) to filter out broken records. 249 if (LeafAddr < LBRLeaf || LeafAddr >= LBRLeaf + 0x100) { 250 WithColor::warning() << "Bogus trace: stack tip = " 251 << format("%#010x", LeafAddr) 252 << ", LBR tip = " << format("%#010x\n", LBRLeaf); 253 return false; 254 } 255 return true; 256 } 257 checkStateConsistencyUnwindState258 void checkStateConsistency() { 259 assert(InstPtr.Address == CurrentLeafFrame->Address && 260 "IP should align with context leaf"); 261 } 262 getBinaryUnwindState263 const ProfiledBinary *getBinary() const { return Binary; } hasNextLBRUnwindState264 bool hasNextLBR() const { return LBRIndex < LBRStack.size(); } getCurrentLBRSourceUnwindState265 uint64_t getCurrentLBRSource() const { return LBRStack[LBRIndex].Source; } getCurrentLBRTargetUnwindState266 uint64_t getCurrentLBRTarget() const { return LBRStack[LBRIndex].Target; } getCurrentLBRUnwindState267 const LBREntry &getCurrentLBR() const { return LBRStack[LBRIndex]; } advanceLBRUnwindState268 void advanceLBR() { LBRIndex++; } 269 getParentFrameUnwindState270 ProfiledFrame *getParentFrame() { return CurrentLeafFrame->Parent; } 271 pushFrameUnwindState272 void pushFrame(uint64_t Address) { 273 CurrentLeafFrame = CurrentLeafFrame->getOrCreateChildFrame(Address); 274 } 275 switchToFrameUnwindState276 void switchToFrame(uint64_t Address) { 277 if (CurrentLeafFrame->Address == Address) 278 return; 279 CurrentLeafFrame = CurrentLeafFrame->Parent->getOrCreateChildFrame(Address); 280 } 281 popFrameUnwindState282 void popFrame() { CurrentLeafFrame = CurrentLeafFrame->Parent; } 283 initFrameTrieUnwindState284 void initFrameTrie(const SmallVectorImpl<uint64_t> &CallStack) { 285 ProfiledFrame *Cur = &DummyTrieRoot; 286 for (auto Address : reverse(CallStack)) { 287 Cur = Cur->getOrCreateChildFrame(Address); 288 } 289 CurrentLeafFrame = Cur; 290 } 291 getDummyRootPtrUnwindState292 ProfiledFrame *getDummyRootPtr() { return &DummyTrieRoot; } 293 }; 294 295 // Base class for sample counter key with context 296 struct ContextKey { 297 uint64_t HashCode = 0; 298 virtual ~ContextKey() = default; getHashCodeContextKey299 uint64_t getHashCode() const { return HashCode; } isEqualContextKey300 virtual bool isEqual(const ContextKey *K) const { 301 return HashCode == K->HashCode; 302 }; 303 304 // Utilities for LLVM-style RTTI 305 enum ContextKind { CK_StringBased, CK_ProbeBased }; 306 const ContextKind Kind; getKindContextKey307 ContextKind getKind() const { return Kind; } ContextKeyContextKey308 ContextKey(ContextKind K) : Kind(K){}; 309 }; 310 311 // String based context id 312 struct StringBasedCtxKey : public ContextKey { 313 std::string Context; StringBasedCtxKeyStringBasedCtxKey314 StringBasedCtxKey() : ContextKey(CK_StringBased){}; classofStringBasedCtxKey315 static bool classof(const ContextKey *K) { 316 return K->getKind() == CK_StringBased; 317 } 318 isEqualStringBasedCtxKey319 bool isEqual(const ContextKey *K) const override { 320 const StringBasedCtxKey *Other = dyn_cast<StringBasedCtxKey>(K); 321 return Context == Other->Context; 322 } 323 genHashCodeStringBasedCtxKey324 void genHashCode() { HashCode = hash_value(Context); } 325 }; 326 327 // Probe based context key as the intermediate key of context 328 // String based context key will introduce redundant string handling 329 // since the callee context is inferred from the context string which 330 // need to be splitted by '@' to get the last location frame, so we 331 // can just use probe instead and generate the string in the end. 332 struct ProbeBasedCtxKey : public ContextKey { 333 SmallVector<const PseudoProbe *, 16> Probes; 334 ProbeBasedCtxKeyProbeBasedCtxKey335 ProbeBasedCtxKey() : ContextKey(CK_ProbeBased) {} classofProbeBasedCtxKey336 static bool classof(const ContextKey *K) { 337 return K->getKind() == CK_ProbeBased; 338 } 339 isEqualProbeBasedCtxKey340 bool isEqual(const ContextKey *K) const override { 341 const ProbeBasedCtxKey *O = dyn_cast<ProbeBasedCtxKey>(K); 342 assert(O != nullptr && "Probe based key shouldn't be null in isEqual"); 343 return std::equal(Probes.begin(), Probes.end(), O->Probes.begin(), 344 O->Probes.end()); 345 } 346 genHashCodeProbeBasedCtxKey347 void genHashCode() { 348 for (const auto *P : Probes) { 349 HashCode = hash_combine(HashCode, P); 350 } 351 if (HashCode == 0) { 352 // Avoid zero value of HashCode when it's an empty list 353 HashCode = 1; 354 } 355 } 356 }; 357 358 // The counter of branch samples for one function indexed by the branch, 359 // which is represented as the source and target offset pair. 360 using BranchSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>; 361 // The counter of range samples for one function indexed by the range, 362 // which is represented as the start and end offset pair. 363 using RangeSample = std::map<std::pair<uint64_t, uint64_t>, uint64_t>; 364 // Wrapper for sample counters including range counter and branch counter 365 struct SampleCounter { 366 RangeSample RangeCounter; 367 BranchSample BranchCounter; 368 recordRangeCountSampleCounter369 void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Repeat) { 370 RangeCounter[{Start, End}] += Repeat; 371 } recordBranchCountSampleCounter372 void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Repeat) { 373 BranchCounter[{Source, Target}] += Repeat; 374 } 375 }; 376 377 // Sample counter with context to support context-sensitive profile 378 using ContextSampleCounterMap = 379 std::unordered_map<Hashable<ContextKey>, SampleCounter, 380 Hashable<ContextKey>::Hash, Hashable<ContextKey>::Equal>; 381 382 struct FrameStack { 383 SmallVector<uint64_t, 16> Stack; 384 const ProfiledBinary *Binary; FrameStackFrameStack385 FrameStack(const ProfiledBinary *B) : Binary(B) {} pushFrameFrameStack386 bool pushFrame(UnwindState::ProfiledFrame *Cur) { 387 Stack.push_back(Cur->Address); 388 return true; 389 } 390 popFrameFrameStack391 void popFrame() { 392 if (!Stack.empty()) 393 Stack.pop_back(); 394 } 395 std::shared_ptr<StringBasedCtxKey> getContextKey(); 396 }; 397 398 struct ProbeStack { 399 SmallVector<const PseudoProbe *, 16> Stack; 400 const ProfiledBinary *Binary; ProbeStackProbeStack401 ProbeStack(const ProfiledBinary *B) : Binary(B) {} pushFrameProbeStack402 bool pushFrame(UnwindState::ProfiledFrame *Cur) { 403 const PseudoProbe *CallProbe = Binary->getCallProbeForAddr(Cur->Address); 404 // We may not find a probe for a merged or external callsite. 405 // Callsite merging may cause the loss of original probe IDs. 406 // Cutting off the context from here since the inliner will 407 // not know how to consume a context with unknown callsites. 408 if (!CallProbe) 409 return false; 410 Stack.push_back(CallProbe); 411 return true; 412 } 413 popFrameProbeStack414 void popFrame() { 415 if (!Stack.empty()) 416 Stack.pop_back(); 417 } 418 // Use pseudo probe based context key to get the sample counter 419 // A context stands for a call path from 'main' to an uninlined 420 // callee with all inline frames recovered on that path. The probes 421 // belonging to that call path is the probes either originated from 422 // the callee or from any functions inlined into the callee. Since 423 // pseudo probes are organized in a tri-tree style after decoded, 424 // the tree path from the tri-tree root (which is the uninlined 425 // callee) to the probe node forms an inline context. 426 // Here we use a list of probe(pointer) as the context key to speed up 427 // aggregation and the final context string will be generate in 428 // ProfileGenerator 429 std::shared_ptr<ProbeBasedCtxKey> getContextKey(); 430 }; 431 432 /* 433 As in hybrid sample we have a group of LBRs and the most recent sampling call 434 stack, we can walk through those LBRs to infer more call stacks which would be 435 used as context for profile. VirtualUnwinder is the class to do the call stack 436 unwinding based on LBR state. Two types of unwinding are processd here: 437 1) LBR unwinding and 2) linear range unwinding. 438 Specifically, for each LBR entry(can be classified into call, return, regular 439 branch), LBR unwinding will replay the operation by pushing, popping or 440 switching leaf frame towards the call stack and since the initial call stack 441 is most recently sampled, the replay should be in anti-execution order, i.e. for 442 the regular case, pop the call stack when LBR is call, push frame on call stack 443 when LBR is return. After each LBR processed, it also needs to align with the 444 next LBR by going through instructions from previous LBR's target to current 445 LBR's source, which is the linear unwinding. As instruction from linear range 446 can come from different function by inlining, linear unwinding will do the range 447 splitting and record counters by the range with same inline context. Over those 448 unwinding process we will record each call stack as context id and LBR/linear 449 range as sample counter for further CS profile generation. 450 */ 451 class VirtualUnwinder { 452 public: VirtualUnwinder(ContextSampleCounterMap * Counter,const ProfiledBinary * B)453 VirtualUnwinder(ContextSampleCounterMap *Counter, const ProfiledBinary *B) 454 : CtxCounterMap(Counter), Binary(B) {} 455 bool unwind(const HybridSample *Sample, uint64_t Repeat); 456 457 private: isCallState(UnwindState & State)458 bool isCallState(UnwindState &State) const { 459 // The tail call frame is always missing here in stack sample, we will 460 // use a specific tail call tracker to infer it. 461 return Binary->addressIsCall(State.getCurrentLBRSource()); 462 } 463 isReturnState(UnwindState & State)464 bool isReturnState(UnwindState &State) const { 465 // Simply check addressIsReturn, as ret is always reliable, both for 466 // regular call and tail call. 467 return Binary->addressIsReturn(State.getCurrentLBRSource()); 468 } 469 470 void unwindCall(UnwindState &State); 471 void unwindLinear(UnwindState &State, uint64_t Repeat); 472 void unwindReturn(UnwindState &State); 473 void unwindBranchWithinFrame(UnwindState &State); 474 475 template <typename T> 476 void collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, T &Stack); 477 // Collect each samples on trie node by DFS traversal 478 template <typename T> 479 void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur, T &Stack); 480 void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur); 481 482 void recordRangeCount(uint64_t Start, uint64_t End, UnwindState &State, 483 uint64_t Repeat); 484 void recordBranchCount(const LBREntry &Branch, UnwindState &State, 485 uint64_t Repeat); 486 487 ContextSampleCounterMap *CtxCounterMap; 488 // Profiled binary that current frame address belongs to 489 const ProfiledBinary *Binary; 490 }; 491 492 // Filename to binary map 493 using BinaryMap = StringMap<ProfiledBinary>; 494 // Address to binary map for fast look-up 495 using AddressBinaryMap = std::map<uint64_t, ProfiledBinary *>; 496 // Binary to ContextSampleCounters Map to support multiple binary, we may have 497 // same binary loaded at different addresses, they should share the same sample 498 // counter 499 using BinarySampleCounterMap = 500 std::unordered_map<ProfiledBinary *, ContextSampleCounterMap>; 501 502 // Load binaries and read perf trace to parse the events and samples 503 class PerfReader { 504 505 public: 506 PerfReader(cl::list<std::string> &BinaryFilenames, 507 cl::list<std::string> &PerfTraceFilenames); 508 509 // A LBR sample is like: 510 // 0x5c6313f/0x5c63170/P/-/-/0 0x5c630e7/0x5c63130/P/-/-/0 ... 511 // A heuristic for fast detection by checking whether a 512 // leading " 0x" and the '/' exist. isLBRSample(StringRef Line)513 static bool isLBRSample(StringRef Line) { 514 if (!Line.startswith(" 0x")) 515 return false; 516 if (Line.find('/') != StringRef::npos) 517 return true; 518 return false; 519 } 520 521 // The raw hybird sample is like 522 // e.g. 523 // 4005dc # call stack leaf 524 // 400634 525 // 400684 # call stack root 526 // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ... 527 // ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries 528 // Determine the perfscript contains hybrid samples(call stack + LBRs) by 529 // checking whether there is a non-empty call stack immediately followed by 530 // a LBR sample checkPerfScriptType(StringRef FileName)531 static PerfScriptType checkPerfScriptType(StringRef FileName) { 532 TraceStream TraceIt(FileName); 533 uint64_t FrameAddr = 0; 534 while (!TraceIt.isAtEoF()) { 535 int32_t Count = 0; 536 while (!TraceIt.isAtEoF() && 537 !TraceIt.getCurrentLine().ltrim().getAsInteger(16, FrameAddr)) { 538 Count++; 539 TraceIt.advance(); 540 } 541 if (!TraceIt.isAtEoF()) { 542 if (isLBRSample(TraceIt.getCurrentLine())) { 543 if (Count > 0) 544 return PERF_LBR_STACK; 545 else 546 return PERF_LBR; 547 } 548 TraceIt.advance(); 549 } 550 } 551 return PERF_INVALID; 552 } 553 554 // The parsed MMap event 555 struct MMapEvent { 556 uint64_t PID = 0; 557 uint64_t BaseAddress = 0; 558 uint64_t Size = 0; 559 uint64_t Offset = 0; 560 StringRef BinaryPath; 561 }; 562 563 /// Load symbols and disassemble the code of a give binary. 564 /// Also register the binary in the binary table. 565 /// 566 ProfiledBinary &loadBinary(const StringRef BinaryPath, 567 bool AllowNameConflict = true); 568 void updateBinaryAddress(const MMapEvent &Event); getPerfScriptType()569 PerfScriptType getPerfScriptType() const { return PerfType; } 570 // Entry of the reader to parse multiple perf traces 571 void parsePerfTraces(cl::list<std::string> &PerfTraceFilenames); getBinarySampleCounters()572 const BinarySampleCounterMap &getBinarySampleCounters() const { 573 return BinarySampleCounters; 574 } 575 576 private: 577 /// Validate the command line input 578 void validateCommandLine(cl::list<std::string> &BinaryFilenames, 579 cl::list<std::string> &PerfTraceFilenames); 580 /// Parse a single line of a PERF_RECORD_MMAP2 event looking for a 581 /// mapping between the binary name and its memory layout. 582 /// 583 void parseMMap2Event(TraceStream &TraceIt); 584 // Parse perf events/samples and do aggregation 585 void parseAndAggregateTrace(StringRef Filename); 586 // Parse either an MMAP event or a perf sample 587 void parseEventOrSample(TraceStream &TraceIt); 588 // Parse the hybrid sample including the call and LBR line 589 void parseHybridSample(TraceStream &TraceIt); 590 // Extract call stack from the perf trace lines 591 bool extractCallstack(TraceStream &TraceIt, 592 SmallVectorImpl<uint64_t> &CallStack); 593 // Extract LBR stack from one perf trace line 594 bool extractLBRStack(TraceStream &TraceIt, 595 SmallVectorImpl<LBREntry> &LBRStack, 596 ProfiledBinary *Binary); 597 void checkAndSetPerfType(cl::list<std::string> &PerfTraceFilenames); 598 // Post process the profile after trace aggregation, we will do simple range 599 // overlap computation for AutoFDO, or unwind for CSSPGO(hybrid sample). 600 void generateRawProfile(); 601 // Unwind the hybrid samples after aggregration 602 void unwindSamples(); 603 void printUnwinderOutput(); 604 // Helper function for looking up binary in AddressBinaryMap 605 ProfiledBinary *getBinary(uint64_t Address); 606 607 BinaryMap BinaryTable; 608 AddressBinaryMap AddrToBinaryMap; // Used by address-based lookup. 609 610 private: 611 BinarySampleCounterMap BinarySampleCounters; 612 // Samples with the repeating time generated by the perf reader 613 AggregatedCounter AggregatedSamples; 614 PerfScriptType PerfType = PERF_UNKNOWN; 615 }; 616 617 } // end namespace sampleprof 618 } // end namespace llvm 619 620 #endif 621