1 //===-- PerfReader.cpp - perfscript reader ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "PerfReader.h"
9 #include "ProfileGenerator.h"
10 #include "llvm/Support/FileSystem.h"
11
12 static cl::opt<bool> ShowMmapEvents("show-mmap-events", cl::ReallyHidden,
13 cl::init(false), cl::ZeroOrMore,
14 cl::desc("Print binary load events."));
15
16 static cl::opt<bool> ShowUnwinderOutput("show-unwinder-output",
17 cl::ReallyHidden, cl::init(false),
18 cl::ZeroOrMore,
19 cl::desc("Print unwinder output"));
20
21 extern cl::opt<bool> ShowDisassemblyOnly;
22 extern cl::opt<bool> ShowSourceLocations;
23
24 namespace llvm {
25 namespace sampleprof {
26
unwindCall(UnwindState & State)27 void VirtualUnwinder::unwindCall(UnwindState &State) {
28 // The 2nd frame after leaf could be missing if stack sample is
29 // taken when IP is within prolog/epilog, as frame chain isn't
30 // setup yet. Fill in the missing frame in that case.
31 // TODO: Currently we just assume all the addr that can't match the
32 // 2nd frame is in prolog/epilog. In the future, we will switch to
33 // pro/epi tracker(Dwarf CFI) for the precise check.
34 uint64_t Source = State.getCurrentLBRSource();
35 auto *ParentFrame = State.getParentFrame();
36 if (ParentFrame == State.getDummyRootPtr() ||
37 ParentFrame->Address != Source) {
38 State.switchToFrame(Source);
39 } else {
40 State.popFrame();
41 }
42 State.InstPtr.update(Source);
43 }
44
unwindLinear(UnwindState & State,uint64_t Repeat)45 void VirtualUnwinder::unwindLinear(UnwindState &State, uint64_t Repeat) {
46 InstructionPointer &IP = State.InstPtr;
47 uint64_t Target = State.getCurrentLBRTarget();
48 uint64_t End = IP.Address;
49 if (Binary->usePseudoProbes()) {
50 // We don't need to top frame probe since it should be extracted
51 // from the range.
52 // The outcome of the virtual unwinding with pseudo probes is a
53 // map from a context key to the address range being unwound.
54 // This means basically linear unwinding is not needed for pseudo
55 // probes. The range will be simply recorded here and will be
56 // converted to a list of pseudo probes to report in ProfileGenerator.
57 State.getParentFrame()->recordRangeCount(Target, End, Repeat);
58 } else {
59 // Unwind linear execution part
60 uint64_t LeafAddr = State.CurrentLeafFrame->Address;
61 while (IP.Address >= Target) {
62 uint64_t PrevIP = IP.Address;
63 IP.backward();
64 // Break into segments for implicit call/return due to inlining
65 bool SameInlinee = Binary->inlineContextEqual(PrevIP, IP.Address);
66 if (!SameInlinee || PrevIP == Target) {
67 State.switchToFrame(LeafAddr);
68 State.CurrentLeafFrame->recordRangeCount(PrevIP, End, Repeat);
69 End = IP.Address;
70 }
71 LeafAddr = IP.Address;
72 }
73 }
74 }
75
unwindReturn(UnwindState & State)76 void VirtualUnwinder::unwindReturn(UnwindState &State) {
77 // Add extra frame as we unwind through the return
78 const LBREntry &LBR = State.getCurrentLBR();
79 uint64_t CallAddr = Binary->getCallAddrFromFrameAddr(LBR.Target);
80 State.switchToFrame(CallAddr);
81 State.pushFrame(LBR.Source);
82 State.InstPtr.update(LBR.Source);
83 }
84
unwindBranchWithinFrame(UnwindState & State)85 void VirtualUnwinder::unwindBranchWithinFrame(UnwindState &State) {
86 // TODO: Tolerate tail call for now, as we may see tail call from libraries.
87 // This is only for intra function branches, excluding tail calls.
88 uint64_t Source = State.getCurrentLBRSource();
89 State.switchToFrame(Source);
90 State.InstPtr.update(Source);
91 }
92
getContextKey()93 std::shared_ptr<StringBasedCtxKey> FrameStack::getContextKey() {
94 std::shared_ptr<StringBasedCtxKey> KeyStr =
95 std::make_shared<StringBasedCtxKey>();
96 KeyStr->Context =
97 Binary->getExpandedContextStr(Stack, KeyStr->WasLeafInlined);
98 if (KeyStr->Context.empty())
99 return nullptr;
100 KeyStr->genHashCode();
101 return KeyStr;
102 }
103
getContextKey()104 std::shared_ptr<ProbeBasedCtxKey> ProbeStack::getContextKey() {
105 std::shared_ptr<ProbeBasedCtxKey> ProbeBasedKey =
106 std::make_shared<ProbeBasedCtxKey>();
107 for (auto CallProbe : Stack) {
108 ProbeBasedKey->Probes.emplace_back(CallProbe);
109 }
110 CSProfileGenerator::compressRecursionContext<const PseudoProbe *>(
111 ProbeBasedKey->Probes);
112 ProbeBasedKey->genHashCode();
113 return ProbeBasedKey;
114 }
115
116 template <typename T>
collectSamplesFromFrame(UnwindState::ProfiledFrame * Cur,T & Stack)117 void VirtualUnwinder::collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur,
118 T &Stack) {
119 if (Cur->RangeSamples.empty() && Cur->BranchSamples.empty())
120 return;
121
122 std::shared_ptr<ContextKey> Key = Stack.getContextKey();
123 if (Key == nullptr)
124 return;
125 auto Ret = CtxCounterMap->emplace(Hashable<ContextKey>(Key), SampleCounter());
126 SampleCounter &SCounter = Ret.first->second;
127 for (auto &Item : Cur->RangeSamples) {
128 uint64_t StartOffset = Binary->virtualAddrToOffset(std::get<0>(Item));
129 uint64_t EndOffset = Binary->virtualAddrToOffset(std::get<1>(Item));
130 SCounter.recordRangeCount(StartOffset, EndOffset, std::get<2>(Item));
131 }
132
133 for (auto &Item : Cur->BranchSamples) {
134 uint64_t SourceOffset = Binary->virtualAddrToOffset(std::get<0>(Item));
135 uint64_t TargetOffset = Binary->virtualAddrToOffset(std::get<1>(Item));
136 SCounter.recordBranchCount(SourceOffset, TargetOffset, std::get<2>(Item));
137 }
138 }
139
140 template <typename T>
collectSamplesFromFrameTrie(UnwindState::ProfiledFrame * Cur,T & Stack)141 void VirtualUnwinder::collectSamplesFromFrameTrie(
142 UnwindState::ProfiledFrame *Cur, T &Stack) {
143 if (!Cur->isDummyRoot()) {
144 if (!Stack.pushFrame(Cur)) {
145 // Process truncated context
146 // Start a new traversal ignoring its bottom context
147 T EmptyStack(Binary);
148 collectSamplesFromFrame(Cur, EmptyStack);
149 for (const auto &Item : Cur->Children) {
150 collectSamplesFromFrameTrie(Item.second.get(), EmptyStack);
151 }
152 return;
153 }
154 }
155
156 collectSamplesFromFrame(Cur, Stack);
157 // Process children frame
158 for (const auto &Item : Cur->Children) {
159 collectSamplesFromFrameTrie(Item.second.get(), Stack);
160 }
161 // Recover the call stack
162 Stack.popFrame();
163 }
164
collectSamplesFromFrameTrie(UnwindState::ProfiledFrame * Cur)165 void VirtualUnwinder::collectSamplesFromFrameTrie(
166 UnwindState::ProfiledFrame *Cur) {
167 if (Binary->usePseudoProbes()) {
168 ProbeStack Stack(Binary);
169 collectSamplesFromFrameTrie<ProbeStack>(Cur, Stack);
170 } else {
171 FrameStack Stack(Binary);
172 collectSamplesFromFrameTrie<FrameStack>(Cur, Stack);
173 }
174 }
175
recordBranchCount(const LBREntry & Branch,UnwindState & State,uint64_t Repeat)176 void VirtualUnwinder::recordBranchCount(const LBREntry &Branch,
177 UnwindState &State, uint64_t Repeat) {
178 if (Branch.IsArtificial)
179 return;
180
181 if (Binary->usePseudoProbes()) {
182 // Same as recordRangeCount, We don't need to top frame probe since we will
183 // extract it from branch's source address
184 State.getParentFrame()->recordBranchCount(Branch.Source, Branch.Target,
185 Repeat);
186 } else {
187 State.CurrentLeafFrame->recordBranchCount(Branch.Source, Branch.Target,
188 Repeat);
189 }
190 }
191
unwind(const HybridSample * Sample,uint64_t Repeat)192 bool VirtualUnwinder::unwind(const HybridSample *Sample, uint64_t Repeat) {
193 // Capture initial state as starting point for unwinding.
194 UnwindState State(Sample);
195
196 // Sanity check - making sure leaf of LBR aligns with leaf of stack sample
197 // Stack sample sometimes can be unreliable, so filter out bogus ones.
198 if (!State.validateInitialState())
199 return false;
200
201 // Also do not attempt linear unwind for the leaf range as it's incomplete.
202 bool IsLeaf = true;
203
204 // Now process the LBR samples in parrallel with stack sample
205 // Note that we do not reverse the LBR entry order so we can
206 // unwind the sample stack as we walk through LBR entries.
207 while (State.hasNextLBR()) {
208 State.checkStateConsistency();
209
210 // Unwind implicit calls/returns from inlining, along the linear path,
211 // break into smaller sub section each with its own calling context.
212 if (!IsLeaf) {
213 unwindLinear(State, Repeat);
214 }
215 IsLeaf = false;
216
217 // Save the LBR branch before it gets unwound.
218 const LBREntry &Branch = State.getCurrentLBR();
219
220 if (isCallState(State)) {
221 // Unwind calls - we know we encountered call if LBR overlaps with
222 // transition between leaf the 2nd frame. Note that for calls that
223 // were not in the original stack sample, we should have added the
224 // extra frame when processing the return paired with this call.
225 unwindCall(State);
226 } else if (isReturnState(State)) {
227 // Unwind returns - check whether the IP is indeed at a return instruction
228 unwindReturn(State);
229 } else {
230 // Unwind branches - for regular intra function branches, we only
231 // need to record branch with context.
232 unwindBranchWithinFrame(State);
233 }
234 State.advanceLBR();
235 // Record `branch` with calling context after unwinding.
236 recordBranchCount(Branch, State, Repeat);
237 }
238 // As samples are aggregated on trie, record them into counter map
239 collectSamplesFromFrameTrie(State.getDummyRootPtr());
240
241 return true;
242 }
243
validateCommandLine(cl::list<std::string> & BinaryFilenames,cl::list<std::string> & PerfTraceFilenames)244 void PerfReader::validateCommandLine(
245 cl::list<std::string> &BinaryFilenames,
246 cl::list<std::string> &PerfTraceFilenames) {
247 // Allow the invalid perfscript if we only use to show binary disassembly
248 if (!ShowDisassemblyOnly) {
249 for (auto &File : PerfTraceFilenames) {
250 if (!llvm::sys::fs::exists(File)) {
251 std::string Msg = "Input perf script(" + File + ") doesn't exist!";
252 exitWithError(Msg);
253 }
254 }
255 }
256 if (BinaryFilenames.size() > 1) {
257 // TODO: remove this if everything is ready to support multiple binaries.
258 exitWithError(
259 "Currently only support one input binary, multiple binaries' "
260 "profile will be merged in one profile and make profile "
261 "summary info inaccurate. Please use `llvm-perfdata` to merge "
262 "profiles from multiple binaries.");
263 }
264 for (auto &Binary : BinaryFilenames) {
265 if (!llvm::sys::fs::exists(Binary)) {
266 std::string Msg = "Input binary(" + Binary + ") doesn't exist!";
267 exitWithError(Msg);
268 }
269 }
270 if (CSProfileGenerator::MaxCompressionSize < -1) {
271 exitWithError("Value of --compress-recursion should >= -1");
272 }
273 if (ShowSourceLocations && !ShowDisassemblyOnly) {
274 exitWithError("--show-source-locations should work together with "
275 "--show-disassembly-only!");
276 }
277 }
278
PerfReader(cl::list<std::string> & BinaryFilenames,cl::list<std::string> & PerfTraceFilenames)279 PerfReader::PerfReader(cl::list<std::string> &BinaryFilenames,
280 cl::list<std::string> &PerfTraceFilenames) {
281 validateCommandLine(BinaryFilenames, PerfTraceFilenames);
282 // Load the binaries.
283 for (auto Filename : BinaryFilenames)
284 loadBinary(Filename, /*AllowNameConflict*/ false);
285 }
286
loadBinary(const StringRef BinaryPath,bool AllowNameConflict)287 ProfiledBinary &PerfReader::loadBinary(const StringRef BinaryPath,
288 bool AllowNameConflict) {
289 // The binary table is currently indexed by the binary name not the full
290 // binary path. This is because the user-given path may not match the one
291 // that was actually executed.
292 StringRef BinaryName = llvm::sys::path::filename(BinaryPath);
293
294 // Call to load the binary in the ctor of ProfiledBinary.
295 auto Ret = BinaryTable.insert({BinaryName, ProfiledBinary(BinaryPath)});
296
297 if (!Ret.second && !AllowNameConflict) {
298 std::string ErrorMsg = "Binary name conflict: " + BinaryPath.str() +
299 " and " + Ret.first->second.getPath().str() + " \n";
300 exitWithError(ErrorMsg);
301 }
302
303 return Ret.first->second;
304 }
305
updateBinaryAddress(const MMapEvent & Event)306 void PerfReader::updateBinaryAddress(const MMapEvent &Event) {
307 // Load the binary.
308 StringRef BinaryPath = Event.BinaryPath;
309 StringRef BinaryName = llvm::sys::path::filename(BinaryPath);
310
311 auto I = BinaryTable.find(BinaryName);
312 // Drop the event which doesn't belong to user-provided binaries
313 // or if its image is loaded at the same address
314 if (I == BinaryTable.end() || Event.Address == I->second.getBaseAddress())
315 return;
316
317 ProfiledBinary &Binary = I->second;
318
319 if (Event.Offset == Binary.getTextSegmentOffset()) {
320 // A binary image could be unloaded and then reloaded at different
321 // place, so update the address map here.
322 // Only update for the first executable segment and assume all other
323 // segments are loaded at consecutive memory addresses, which is the case on
324 // X64.
325 AddrToBinaryMap.erase(Binary.getBaseAddress());
326 AddrToBinaryMap[Event.Address] = &Binary;
327
328 // Update binary load address.
329 Binary.setBaseAddress(Event.Address);
330 } else {
331 // Verify segments are loaded consecutively.
332 const auto &Offsets = Binary.getTextSegmentOffsets();
333 auto It = std::lower_bound(Offsets.begin(), Offsets.end(), Event.Offset);
334 if (It != Offsets.end() && *It == Event.Offset) {
335 // The event is for loading a separate executable segment.
336 auto I = std::distance(Offsets.begin(), It);
337 const auto &PreferredAddrs = Binary.getPreferredTextSegmentAddresses();
338 if (PreferredAddrs[I] - Binary.getPreferredBaseAddress() !=
339 Event.Address - Binary.getBaseAddress())
340 exitWithError("Executable segments not loaded consecutively");
341 } else {
342 if (It == Offsets.begin())
343 exitWithError("File offset not found");
344 else {
345 // Find the segment the event falls in. A large segment could be loaded
346 // via multiple mmap calls with consecutive memory addresses.
347 --It;
348 assert(*It < Event.Offset);
349 if (Event.Offset - *It != Event.Address - Binary.getBaseAddress())
350 exitWithError("Segment not loaded by consecutive mmaps");
351 }
352 }
353 }
354 }
355
getBinary(uint64_t Address)356 ProfiledBinary *PerfReader::getBinary(uint64_t Address) {
357 auto Iter = AddrToBinaryMap.lower_bound(Address);
358 if (Iter == AddrToBinaryMap.end() || Iter->first != Address) {
359 if (Iter == AddrToBinaryMap.begin())
360 return nullptr;
361 Iter--;
362 }
363 return Iter->second;
364 }
365
366 // Use ordered map to make the output deterministic
367 using OrderedCounterForPrint = std::map<std::string, RangeSample>;
368
printSampleCounter(OrderedCounterForPrint & OrderedCounter)369 static void printSampleCounter(OrderedCounterForPrint &OrderedCounter) {
370 for (auto Range : OrderedCounter) {
371 outs() << Range.first << "\n";
372 for (auto I : Range.second) {
373 outs() << " (" << format("%" PRIx64, I.first.first) << ", "
374 << format("%" PRIx64, I.first.second) << "): " << I.second << "\n";
375 }
376 }
377 }
378
getContextKeyStr(ContextKey * K,const ProfiledBinary * Binary)379 static std::string getContextKeyStr(ContextKey *K,
380 const ProfiledBinary *Binary) {
381 std::string ContextStr;
382 if (const auto *CtxKey = dyn_cast<StringBasedCtxKey>(K)) {
383 return CtxKey->Context;
384 } else if (const auto *CtxKey = dyn_cast<ProbeBasedCtxKey>(K)) {
385 SmallVector<std::string, 16> ContextStack;
386 for (const auto *Probe : CtxKey->Probes) {
387 Binary->getInlineContextForProbe(Probe, ContextStack, true);
388 }
389 for (const auto &Context : ContextStack) {
390 if (ContextStr.size())
391 ContextStr += " @ ";
392 ContextStr += Context;
393 }
394 }
395 return ContextStr;
396 }
397
printRangeCounter(ContextSampleCounterMap & Counter,const ProfiledBinary * Binary)398 static void printRangeCounter(ContextSampleCounterMap &Counter,
399 const ProfiledBinary *Binary) {
400 OrderedCounterForPrint OrderedCounter;
401 for (auto &CI : Counter) {
402 OrderedCounter[getContextKeyStr(CI.first.getPtr(), Binary)] =
403 CI.second.RangeCounter;
404 }
405 printSampleCounter(OrderedCounter);
406 }
407
printBranchCounter(ContextSampleCounterMap & Counter,const ProfiledBinary * Binary)408 static void printBranchCounter(ContextSampleCounterMap &Counter,
409 const ProfiledBinary *Binary) {
410 OrderedCounterForPrint OrderedCounter;
411 for (auto &CI : Counter) {
412 OrderedCounter[getContextKeyStr(CI.first.getPtr(), Binary)] =
413 CI.second.BranchCounter;
414 }
415 printSampleCounter(OrderedCounter);
416 }
417
printUnwinderOutput()418 void PerfReader::printUnwinderOutput() {
419 for (auto I : BinarySampleCounters) {
420 const ProfiledBinary *Binary = I.first;
421 outs() << "Binary(" << Binary->getName().str() << ")'s Range Counter:\n";
422 printRangeCounter(I.second, Binary);
423 outs() << "\nBinary(" << Binary->getName().str() << ")'s Branch Counter:\n";
424 printBranchCounter(I.second, Binary);
425 }
426 }
427
unwindSamples()428 void PerfReader::unwindSamples() {
429 for (const auto &Item : AggregatedSamples) {
430 const HybridSample *Sample = dyn_cast<HybridSample>(Item.first.getPtr());
431 VirtualUnwinder Unwinder(&BinarySampleCounters[Sample->Binary],
432 Sample->Binary);
433 Unwinder.unwind(Sample, Item.second);
434 }
435
436 if (ShowUnwinderOutput)
437 printUnwinderOutput();
438 }
439
extractLBRStack(TraceStream & TraceIt,SmallVectorImpl<LBREntry> & LBRStack,ProfiledBinary * Binary)440 bool PerfReader::extractLBRStack(TraceStream &TraceIt,
441 SmallVectorImpl<LBREntry> &LBRStack,
442 ProfiledBinary *Binary) {
443 // The raw format of LBR stack is like:
444 // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
445 // ... 0x4005c8/0x4005dc/P/-/-/0
446 // It's in FIFO order and seperated by whitespace.
447 SmallVector<StringRef, 32> Records;
448 TraceIt.getCurrentLine().split(Records, " ");
449
450 // Extract leading instruction pointer if present, use single
451 // list to pass out as reference.
452 size_t Index = 0;
453 if (!Records.empty() && Records[0].find('/') == StringRef::npos) {
454 Index = 1;
455 }
456 // Now extract LBR samples - note that we do not reverse the
457 // LBR entry order so we can unwind the sample stack as we walk
458 // through LBR entries.
459 uint64_t PrevTrDst = 0;
460
461 while (Index < Records.size()) {
462 auto &Token = Records[Index++];
463 if (Token.size() == 0)
464 continue;
465
466 SmallVector<StringRef, 8> Addresses;
467 Token.split(Addresses, "/");
468 uint64_t Src;
469 uint64_t Dst;
470 Addresses[0].substr(2).getAsInteger(16, Src);
471 Addresses[1].substr(2).getAsInteger(16, Dst);
472
473 bool SrcIsInternal = Binary->addressIsCode(Src);
474 bool DstIsInternal = Binary->addressIsCode(Dst);
475 bool IsExternal = !SrcIsInternal && !DstIsInternal;
476 bool IsIncoming = !SrcIsInternal && DstIsInternal;
477 bool IsOutgoing = SrcIsInternal && !DstIsInternal;
478 bool IsArtificial = false;
479
480 // Ignore branches outside the current binary.
481 if (IsExternal)
482 continue;
483
484 if (IsOutgoing) {
485 if (!PrevTrDst) {
486 // This is unpaired outgoing jump which is likely due to interrupt or
487 // incomplete LBR trace. Ignore current and subsequent entries since
488 // they are likely in different contexts.
489 break;
490 }
491
492 if (Binary->addressIsReturn(Src)) {
493 // In a callback case, a return from internal code, say A, to external
494 // runtime can happen. The external runtime can then call back to
495 // another internal routine, say B. Making an artificial branch that
496 // looks like a return from A to B can confuse the unwinder to treat
497 // the instruction before B as the call instruction.
498 break;
499 }
500
501 // For transition to external code, group the Source with the next
502 // availabe transition target.
503 Dst = PrevTrDst;
504 PrevTrDst = 0;
505 IsArtificial = true;
506 } else {
507 if (PrevTrDst) {
508 // If we have seen an incoming transition from external code to internal
509 // code, but not a following outgoing transition, the incoming
510 // transition is likely due to interrupt which is usually unpaired.
511 // Ignore current and subsequent entries since they are likely in
512 // different contexts.
513 break;
514 }
515
516 if (IsIncoming) {
517 // For transition from external code (such as dynamic libraries) to
518 // the current binary, keep track of the branch target which will be
519 // grouped with the Source of the last transition from the current
520 // binary.
521 PrevTrDst = Dst;
522 continue;
523 }
524 }
525
526 // TODO: filter out buggy duplicate branches on Skylake
527
528 LBRStack.emplace_back(LBREntry(Src, Dst, IsArtificial));
529 }
530 TraceIt.advance();
531 return !LBRStack.empty();
532 }
533
extractCallstack(TraceStream & TraceIt,SmallVectorImpl<uint64_t> & CallStack)534 bool PerfReader::extractCallstack(TraceStream &TraceIt,
535 SmallVectorImpl<uint64_t> &CallStack) {
536 // The raw format of call stack is like:
537 // 4005dc # leaf frame
538 // 400634
539 // 400684 # root frame
540 // It's in bottom-up order with each frame in one line.
541
542 // Extract stack frames from sample
543 ProfiledBinary *Binary = nullptr;
544 while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().startswith(" 0x")) {
545 StringRef FrameStr = TraceIt.getCurrentLine().ltrim();
546 uint64_t FrameAddr = 0;
547 if (FrameStr.getAsInteger(16, FrameAddr)) {
548 // We might parse a non-perf sample line like empty line and comments,
549 // skip it
550 TraceIt.advance();
551 return false;
552 }
553 TraceIt.advance();
554 if (!Binary) {
555 Binary = getBinary(FrameAddr);
556 // we might have addr not match the MMAP, skip it
557 if (!Binary) {
558 if (AddrToBinaryMap.size() == 0)
559 WithColor::warning() << "No MMAP event in the perfscript, create it "
560 "with '--show-mmap-events'\n";
561 break;
562 }
563 }
564 // Currently intermixed frame from different binaries is not supported.
565 // Ignore bottom frames not from binary of interest.
566 if (!Binary->addressIsCode(FrameAddr))
567 break;
568
569 // We need to translate return address to call address
570 // for non-leaf frames
571 if (!CallStack.empty()) {
572 FrameAddr = Binary->getCallAddrFromFrameAddr(FrameAddr);
573 }
574
575 CallStack.emplace_back(FrameAddr);
576 }
577
578 // Skip other unrelated line, find the next valid LBR line
579 // Note that even for empty call stack, we should skip the address at the
580 // bottom, otherwise the following pass may generate a truncated callstack
581 while (!TraceIt.isAtEoF() && !TraceIt.getCurrentLine().startswith(" 0x")) {
582 TraceIt.advance();
583 }
584 // Filter out broken stack sample. We may not have complete frame info
585 // if sample end up in prolog/epilog, the result is dangling context not
586 // connected to entry point. This should be relatively rare thus not much
587 // impact on overall profile quality. However we do want to filter them
588 // out to reduce the number of different calling contexts. One instance
589 // of such case - when sample landed in prolog/epilog, somehow stack
590 // walking will be broken in an unexpected way that higher frames will be
591 // missing.
592 return !CallStack.empty() &&
593 !Binary->addressInPrologEpilog(CallStack.front());
594 }
595
parseHybridSample(TraceStream & TraceIt)596 void PerfReader::parseHybridSample(TraceStream &TraceIt) {
597 // The raw hybird sample started with call stack in FILO order and followed
598 // intermediately by LBR sample
599 // e.g.
600 // 4005dc # call stack leaf
601 // 400634
602 // 400684 # call stack root
603 // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
604 // ... 0x4005c8/0x4005dc/P/-/-/0 # LBR Entries
605 //
606 std::shared_ptr<HybridSample> Sample = std::make_shared<HybridSample>();
607
608 // Parsing call stack and populate into HybridSample.CallStack
609 if (!extractCallstack(TraceIt, Sample->CallStack)) {
610 // Skip the next LBR line matched current call stack
611 if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().startswith(" 0x"))
612 TraceIt.advance();
613 return;
614 }
615 // Set the binary current sample belongs to
616 Sample->Binary = getBinary(Sample->CallStack.front());
617
618 if (!TraceIt.isAtEoF() && TraceIt.getCurrentLine().startswith(" 0x")) {
619 // Parsing LBR stack and populate into HybridSample.LBRStack
620 if (extractLBRStack(TraceIt, Sample->LBRStack, Sample->Binary)) {
621 // Canonicalize stack leaf to avoid 'random' IP from leaf frame skew LBR
622 // ranges
623 Sample->CallStack.front() = Sample->LBRStack[0].Target;
624 // Record samples by aggregation
625 Sample->genHashCode();
626 AggregatedSamples[Hashable<PerfSample>(Sample)]++;
627 }
628 } else {
629 // LBR sample is encoded in single line after stack sample
630 exitWithError("'Hybrid perf sample is corrupted, No LBR sample line");
631 }
632 }
633
parseMMap2Event(TraceStream & TraceIt)634 void PerfReader::parseMMap2Event(TraceStream &TraceIt) {
635 // Parse a line like:
636 // PERF_RECORD_MMAP2 2113428/2113428: [0x7fd4efb57000(0x204000) @ 0
637 // 08:04 19532229 3585508847]: r-xp /usr/lib64/libdl-2.17.so
638 constexpr static const char *const Pattern =
639 "PERF_RECORD_MMAP2 ([0-9]+)/[0-9]+: "
640 "\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ "
641 "(0x[a-f0-9]+|0) .*\\]: [-a-z]+ (.*)";
642 // Field 0 - whole line
643 // Field 1 - PID
644 // Field 2 - base address
645 // Field 3 - mmapped size
646 // Field 4 - page offset
647 // Field 5 - binary path
648 enum EventIndex {
649 WHOLE_LINE = 0,
650 PID = 1,
651 MMAPPED_ADDRESS = 2,
652 MMAPPED_SIZE = 3,
653 PAGE_OFFSET = 4,
654 BINARY_PATH = 5
655 };
656
657 Regex RegMmap2(Pattern);
658 SmallVector<StringRef, 6> Fields;
659 bool R = RegMmap2.match(TraceIt.getCurrentLine(), &Fields);
660 if (!R) {
661 std::string ErrorMsg = "Cannot parse mmap event: Line" +
662 Twine(TraceIt.getLineNumber()).str() + ": " +
663 TraceIt.getCurrentLine().str() + " \n";
664 exitWithError(ErrorMsg);
665 }
666 MMapEvent Event;
667 Fields[PID].getAsInteger(10, Event.PID);
668 Fields[MMAPPED_ADDRESS].getAsInteger(0, Event.Address);
669 Fields[MMAPPED_SIZE].getAsInteger(0, Event.Size);
670 Fields[PAGE_OFFSET].getAsInteger(0, Event.Offset);
671 Event.BinaryPath = Fields[BINARY_PATH];
672 updateBinaryAddress(Event);
673 if (ShowMmapEvents) {
674 outs() << "Mmap: Binary " << Event.BinaryPath << " loaded at "
675 << format("0x%" PRIx64 ":", Event.Address) << " \n";
676 }
677 TraceIt.advance();
678 }
679
parseEventOrSample(TraceStream & TraceIt)680 void PerfReader::parseEventOrSample(TraceStream &TraceIt) {
681 if (TraceIt.getCurrentLine().startswith("PERF_RECORD_MMAP2"))
682 parseMMap2Event(TraceIt);
683 else if (getPerfScriptType() == PERF_LBR_STACK)
684 parseHybridSample(TraceIt);
685 else {
686 // TODO: parse other type sample
687 TraceIt.advance();
688 }
689 }
690
parseAndAggregateTrace(StringRef Filename)691 void PerfReader::parseAndAggregateTrace(StringRef Filename) {
692 // Trace line iterator
693 TraceStream TraceIt(Filename);
694 while (!TraceIt.isAtEoF())
695 parseEventOrSample(TraceIt);
696 }
697
checkAndSetPerfType(cl::list<std::string> & PerfTraceFilenames)698 void PerfReader::checkAndSetPerfType(
699 cl::list<std::string> &PerfTraceFilenames) {
700 for (auto FileName : PerfTraceFilenames) {
701 PerfScriptType Type = checkPerfScriptType(FileName);
702 if (Type == PERF_INVALID)
703 exitWithError("Invalid perf script input!");
704 if (PerfType != PERF_UNKNOWN && PerfType != Type)
705 exitWithError("Inconsistent sample among different perf scripts");
706 PerfType = Type;
707 }
708 }
709
generateRawProfile()710 void PerfReader::generateRawProfile() {
711 if (getPerfScriptType() == PERF_LBR_STACK) {
712 // Unwind samples if it's hybird sample
713 unwindSamples();
714 } else if (getPerfScriptType() == PERF_LBR) {
715 // TODO: range overlap computation for regular AutoFDO
716 }
717 }
718
parsePerfTraces(cl::list<std::string> & PerfTraceFilenames)719 void PerfReader::parsePerfTraces(cl::list<std::string> &PerfTraceFilenames) {
720 // Check and set current perfscript type
721 checkAndSetPerfType(PerfTraceFilenames);
722 // Parse perf traces and do aggregation.
723 for (auto Filename : PerfTraceFilenames)
724 parseAndAggregateTrace(Filename);
725
726 generateRawProfile();
727 }
728
729 } // end namespace sampleprof
730 } // end namespace llvm
731