1 //===-- ProfileGenerator.cpp - Profile Generator ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "ProfileGenerator.h"
10 #include "llvm/ProfileData/ProfileCommon.h"
11
12 static cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
13 cl::Required,
14 cl::desc("Output profile file"));
15 static cl::alias OutputA("o", cl::desc("Alias for --output"),
16 cl::aliasopt(OutputFilename));
17
18 static cl::opt<SampleProfileFormat> OutputFormat(
19 "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary),
20 cl::values(
21 clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"),
22 clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"),
23 clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"),
24 clEnumValN(SPF_Text, "text", "Text encoding"),
25 clEnumValN(SPF_GCC, "gcc",
26 "GCC encoding (only meaningful for -sample)")));
27
28 static cl::opt<int32_t, true> RecursionCompression(
29 "compress-recursion",
30 cl::desc("Compressing recursion by deduplicating adjacent frame "
31 "sequences up to the specified size. -1 means no size limit."),
32 cl::Hidden,
33 cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
34
35 static cl::opt<bool> CSProfMergeColdContext(
36 "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore,
37 cl::desc("If the total count of context profile is smaller than "
38 "the threshold, it will be merged into context-less base "
39 "profile."));
40
41 static cl::opt<bool> CSProfTrimColdContext(
42 "csprof-trim-cold-context", cl::init(true), cl::ZeroOrMore,
43 cl::desc("If the total count of the profile after all merge is done "
44 "is still smaller than threshold, it will be trimmed."));
45
46 static cl::opt<uint32_t> CSProfColdContextFrameDepth(
47 "csprof-frame-depth-for-cold-context", cl::init(1), cl::ZeroOrMore,
48 cl::desc("Keep the last K frames while merging cold profile. 1 means the "
49 "context-less base profile"));
50
51 static cl::opt<bool> EnableCSPreInliner(
52 "csspgo-preinliner", cl::Hidden, cl::init(false),
53 cl::desc("Run a global pre-inliner to merge context profile based on "
54 "estimated global top-down inline decisions"));
55
56 extern cl::opt<int> ProfileSummaryCutoffCold;
57
58 using namespace llvm;
59 using namespace sampleprof;
60
61 namespace llvm {
62 namespace sampleprof {
63
64 // Initialize the MaxCompressionSize to -1 which means no size limit
65 int32_t CSProfileGenerator::MaxCompressionSize = -1;
66
67 static bool
usePseudoProbes(const BinarySampleCounterMap & BinarySampleCounters)68 usePseudoProbes(const BinarySampleCounterMap &BinarySampleCounters) {
69 return BinarySampleCounters.size() &&
70 BinarySampleCounters.begin()->first->usePseudoProbes();
71 }
72
73 std::unique_ptr<ProfileGenerator>
create(const BinarySampleCounterMap & BinarySampleCounters,enum PerfScriptType SampleType)74 ProfileGenerator::create(const BinarySampleCounterMap &BinarySampleCounters,
75 enum PerfScriptType SampleType) {
76 std::unique_ptr<ProfileGenerator> ProfileGenerator;
77 if (SampleType == PERF_LBR_STACK) {
78 if (usePseudoProbes(BinarySampleCounters)) {
79 ProfileGenerator.reset(
80 new PseudoProbeCSProfileGenerator(BinarySampleCounters));
81 } else {
82 ProfileGenerator.reset(new CSProfileGenerator(BinarySampleCounters));
83 }
84 } else {
85 // TODO:
86 llvm_unreachable("Unsupported perfscript!");
87 }
88
89 return ProfileGenerator;
90 }
91
write(std::unique_ptr<SampleProfileWriter> Writer,StringMap<FunctionSamples> & ProfileMap)92 void ProfileGenerator::write(std::unique_ptr<SampleProfileWriter> Writer,
93 StringMap<FunctionSamples> &ProfileMap) {
94 if (std::error_code EC = Writer->write(ProfileMap))
95 exitWithError(std::move(EC));
96 }
97
write()98 void ProfileGenerator::write() {
99 auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
100 if (std::error_code EC = WriterOrErr.getError())
101 exitWithError(EC, OutputFilename);
102 write(std::move(WriterOrErr.get()), ProfileMap);
103 }
104
findDisjointRanges(RangeSample & DisjointRanges,const RangeSample & Ranges)105 void ProfileGenerator::findDisjointRanges(RangeSample &DisjointRanges,
106 const RangeSample &Ranges) {
107
108 /*
109 Regions may overlap with each other. Using the boundary info, find all
110 disjoint ranges and their sample count. BoundaryPoint contains the count
111 multiple samples begin/end at this points.
112
113 |<--100-->| Sample1
114 |<------200------>| Sample2
115 A B C
116
117 In the example above,
118 Sample1 begins at A, ends at B, its value is 100.
119 Sample2 beings at A, ends at C, its value is 200.
120 For A, BeginCount is the sum of sample begins at A, which is 300 and no
121 samples ends at A, so EndCount is 0.
122 Then boundary points A, B, and C with begin/end counts are:
123 A: (300, 0)
124 B: (0, 100)
125 C: (0, 200)
126 */
127 struct BoundaryPoint {
128 // Sum of sample counts beginning at this point
129 uint64_t BeginCount;
130 // Sum of sample counts ending at this point
131 uint64_t EndCount;
132
133 BoundaryPoint() : BeginCount(0), EndCount(0){};
134
135 void addBeginCount(uint64_t Count) { BeginCount += Count; }
136
137 void addEndCount(uint64_t Count) { EndCount += Count; }
138 };
139
140 /*
141 For the above example. With boundary points, follwing logic finds two
142 disjoint region of
143
144 [A,B]: 300
145 [B+1,C]: 200
146
147 If there is a boundary point that both begin and end, the point itself
148 becomes a separate disjoint region. For example, if we have original
149 ranges of
150
151 |<--- 100 --->|
152 |<--- 200 --->|
153 A B C
154
155 there are three boundary points with their begin/end counts of
156
157 A: (100, 0)
158 B: (200, 100)
159 C: (0, 200)
160
161 the disjoint ranges would be
162
163 [A, B-1]: 100
164 [B, B]: 300
165 [B+1, C]: 200.
166 */
167 std::map<uint64_t, BoundaryPoint> Boundaries;
168
169 for (auto Item : Ranges) {
170 uint64_t Begin = Item.first.first;
171 uint64_t End = Item.first.second;
172 uint64_t Count = Item.second;
173 if (Boundaries.find(Begin) == Boundaries.end())
174 Boundaries[Begin] = BoundaryPoint();
175 Boundaries[Begin].addBeginCount(Count);
176
177 if (Boundaries.find(End) == Boundaries.end())
178 Boundaries[End] = BoundaryPoint();
179 Boundaries[End].addEndCount(Count);
180 }
181
182 uint64_t BeginAddress = UINT64_MAX;
183 int Count = 0;
184 for (auto Item : Boundaries) {
185 uint64_t Address = Item.first;
186 BoundaryPoint &Point = Item.second;
187 if (Point.BeginCount) {
188 if (BeginAddress != UINT64_MAX)
189 DisjointRanges[{BeginAddress, Address - 1}] = Count;
190 Count += Point.BeginCount;
191 BeginAddress = Address;
192 }
193 if (Point.EndCount) {
194 assert((BeginAddress != UINT64_MAX) &&
195 "First boundary point cannot be 'end' point");
196 DisjointRanges[{BeginAddress, Address}] = Count;
197 Count -= Point.EndCount;
198 BeginAddress = Address + 1;
199 }
200 }
201 }
202
203 FunctionSamples &
getFunctionProfileForContext(StringRef ContextStr,bool WasLeafInlined)204 CSProfileGenerator::getFunctionProfileForContext(StringRef ContextStr,
205 bool WasLeafInlined) {
206 auto Ret = ProfileMap.try_emplace(ContextStr, FunctionSamples());
207 if (Ret.second) {
208 // Make a copy of the underlying context string in string table
209 // before StringRef wrapper is used for context.
210 auto It = ContextStrings.insert(ContextStr.str());
211 SampleContext FContext(*It.first, RawContext);
212 if (WasLeafInlined)
213 FContext.setAttribute(ContextWasInlined);
214 FunctionSamples &FProfile = Ret.first->second;
215 FProfile.setContext(FContext);
216 FProfile.setName(FContext.getNameWithoutContext());
217 }
218 return Ret.first->second;
219 }
220
generateProfile()221 void CSProfileGenerator::generateProfile() {
222 FunctionSamples::ProfileIsCS = true;
223 for (const auto &BI : BinarySampleCounters) {
224 ProfiledBinary *Binary = BI.first;
225 for (const auto &CI : BI.second) {
226 const StringBasedCtxKey *CtxKey =
227 dyn_cast<StringBasedCtxKey>(CI.first.getPtr());
228 StringRef ContextId(CtxKey->Context);
229 // Get or create function profile for the range
230 FunctionSamples &FunctionProfile =
231 getFunctionProfileForContext(ContextId, CtxKey->WasLeafInlined);
232
233 // Fill in function body samples
234 populateFunctionBodySamples(FunctionProfile, CI.second.RangeCounter,
235 Binary);
236 // Fill in boundary sample counts as well as call site samples for calls
237 populateFunctionBoundarySamples(ContextId, FunctionProfile,
238 CI.second.BranchCounter, Binary);
239 }
240 }
241 // Fill in call site value sample for inlined calls and also use context to
242 // infer missing samples. Since we don't have call count for inlined
243 // functions, we estimate it from inlinee's profile using the entry of the
244 // body sample.
245 populateInferredFunctionSamples();
246
247 postProcessProfiles();
248 }
249
updateBodySamplesforFunctionProfile(FunctionSamples & FunctionProfile,const FrameLocation & LeafLoc,uint64_t Count)250 void CSProfileGenerator::updateBodySamplesforFunctionProfile(
251 FunctionSamples &FunctionProfile, const FrameLocation &LeafLoc,
252 uint64_t Count) {
253 // Filter out invalid negative(int type) lineOffset
254 if (LeafLoc.second.LineOffset & 0x80000000)
255 return;
256 // Use the maximum count of samples with same line location
257 ErrorOr<uint64_t> R = FunctionProfile.findSamplesAt(
258 LeafLoc.second.LineOffset, LeafLoc.second.Discriminator);
259 uint64_t PreviousCount = R ? R.get() : 0;
260 if (PreviousCount < Count) {
261 FunctionProfile.addBodySamples(LeafLoc.second.LineOffset,
262 LeafLoc.second.Discriminator,
263 Count - PreviousCount);
264 }
265 }
266
populateFunctionBodySamples(FunctionSamples & FunctionProfile,const RangeSample & RangeCounter,ProfiledBinary * Binary)267 void CSProfileGenerator::populateFunctionBodySamples(
268 FunctionSamples &FunctionProfile, const RangeSample &RangeCounter,
269 ProfiledBinary *Binary) {
270 // Compute disjoint ranges first, so we can use MAX
271 // for calculating count for each location.
272 RangeSample Ranges;
273 findDisjointRanges(Ranges, RangeCounter);
274 for (auto Range : Ranges) {
275 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
276 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
277 uint64_t Count = Range.second;
278 // Disjoint ranges have introduce zero-filled gap that
279 // doesn't belong to current context, filter them out.
280 if (Count == 0)
281 continue;
282
283 InstructionPointer IP(Binary, RangeBegin, true);
284
285 // Disjoint ranges may have range in the middle of two instr,
286 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
287 // can be Addr1+1 to Addr2-1. We should ignore such range.
288 if (IP.Address > RangeEnd)
289 continue;
290
291 while (IP.Address <= RangeEnd) {
292 uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
293 auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
294 if (LeafLoc.hasValue()) {
295 // Recording body sample for this specific context
296 updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
297 }
298 // Accumulate total sample count even it's a line with invalid debug info
299 FunctionProfile.addTotalSamples(Count);
300 // Move to next IP within the range
301 IP.advance();
302 }
303 }
304 }
305
populateFunctionBoundarySamples(StringRef ContextId,FunctionSamples & FunctionProfile,const BranchSample & BranchCounters,ProfiledBinary * Binary)306 void CSProfileGenerator::populateFunctionBoundarySamples(
307 StringRef ContextId, FunctionSamples &FunctionProfile,
308 const BranchSample &BranchCounters, ProfiledBinary *Binary) {
309
310 for (auto Entry : BranchCounters) {
311 uint64_t SourceOffset = Entry.first.first;
312 uint64_t TargetOffset = Entry.first.second;
313 uint64_t Count = Entry.second;
314 // Get the callee name by branch target if it's a call branch
315 StringRef CalleeName = FunctionSamples::getCanonicalFnName(
316 Binary->getFuncFromStartOffset(TargetOffset));
317 if (CalleeName.size() == 0)
318 continue;
319
320 // Record called target sample and its count
321 auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset);
322 if (!LeafLoc.hasValue())
323 continue;
324 FunctionProfile.addCalledTargetSamples(LeafLoc->second.LineOffset,
325 LeafLoc->second.Discriminator,
326 CalleeName, Count);
327
328 // Record head sample for called target(callee)
329 std::ostringstream OCalleeCtxStr;
330 if (ContextId.find(" @ ") != StringRef::npos) {
331 OCalleeCtxStr << ContextId.rsplit(" @ ").first.str();
332 OCalleeCtxStr << " @ ";
333 }
334 OCalleeCtxStr << getCallSite(*LeafLoc) << " @ " << CalleeName.str();
335
336 FunctionSamples &CalleeProfile =
337 getFunctionProfileForContext(OCalleeCtxStr.str());
338 assert(Count != 0 && "Unexpected zero weight branch");
339 CalleeProfile.addHeadSamples(Count);
340 }
341 }
342
getCallerContext(StringRef CalleeContext,StringRef & CallerNameWithContext)343 static FrameLocation getCallerContext(StringRef CalleeContext,
344 StringRef &CallerNameWithContext) {
345 StringRef CallerContext = CalleeContext.rsplit(" @ ").first;
346 CallerNameWithContext = CallerContext.rsplit(':').first;
347 auto ContextSplit = CallerContext.rsplit(" @ ");
348 StringRef CallerFrameStr = ContextSplit.second.size() == 0
349 ? ContextSplit.first
350 : ContextSplit.second;
351 FrameLocation LeafFrameLoc = {"", {0, 0}};
352 StringRef Funcname;
353 SampleContext::decodeContextString(CallerFrameStr, Funcname,
354 LeafFrameLoc.second);
355 LeafFrameLoc.first = Funcname.str();
356 return LeafFrameLoc;
357 }
358
populateInferredFunctionSamples()359 void CSProfileGenerator::populateInferredFunctionSamples() {
360 for (const auto &Item : ProfileMap) {
361 const StringRef CalleeContext = Item.first();
362 const FunctionSamples &CalleeProfile = Item.second;
363
364 // If we already have head sample counts, we must have value profile
365 // for call sites added already. Skip to avoid double counting.
366 if (CalleeProfile.getHeadSamples())
367 continue;
368 // If we don't have context, nothing to do for caller's call site.
369 // This could happen for entry point function.
370 if (CalleeContext.find(" @ ") == StringRef::npos)
371 continue;
372
373 // Infer Caller's frame loc and context ID through string splitting
374 StringRef CallerContextId;
375 FrameLocation &&CallerLeafFrameLoc =
376 getCallerContext(CalleeContext, CallerContextId);
377
378 // It's possible that we haven't seen any sample directly in the caller,
379 // in which case CallerProfile will not exist. But we can't modify
380 // ProfileMap while iterating it.
381 // TODO: created function profile for those callers too
382 if (ProfileMap.find(CallerContextId) == ProfileMap.end())
383 continue;
384 FunctionSamples &CallerProfile = ProfileMap[CallerContextId];
385
386 // Since we don't have call count for inlined functions, we
387 // estimate it from inlinee's profile using entry body sample.
388 uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples();
389 // If we don't have samples with location, use 1 to indicate live.
390 if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size())
391 EstimatedCallCount = 1;
392 CallerProfile.addCalledTargetSamples(
393 CallerLeafFrameLoc.second.LineOffset,
394 CallerLeafFrameLoc.second.Discriminator,
395 CalleeProfile.getContext().getNameWithoutContext(), EstimatedCallCount);
396 CallerProfile.addBodySamples(CallerLeafFrameLoc.second.LineOffset,
397 CallerLeafFrameLoc.second.Discriminator,
398 EstimatedCallCount);
399 CallerProfile.addTotalSamples(EstimatedCallCount);
400 }
401 }
402
postProcessProfiles()403 void CSProfileGenerator::postProcessProfiles() {
404 // Compute hot/cold threshold based on profile. This will be used for cold
405 // context profile merging/trimming.
406 computeSummaryAndThreshold();
407
408 // Run global pre-inliner to adjust/merge context profile based on estimated
409 // inline decisions.
410 if (EnableCSPreInliner)
411 CSPreInliner(ProfileMap, HotCountThreshold, ColdCountThreshold).run();
412
413 // Trim and merge cold context profile using cold threshold above;
414 SampleContextTrimmer(ProfileMap)
415 .trimAndMergeColdContextProfiles(
416 ColdCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext,
417 CSProfColdContextFrameDepth);
418 }
419
computeSummaryAndThreshold()420 void CSProfileGenerator::computeSummaryAndThreshold() {
421 // Update the default value of cold cutoff for llvm-profgen.
422 // Do it here because we don't want to change the global default,
423 // which would lead CS profile size too large.
424 if (!ProfileSummaryCutoffCold.getNumOccurrences())
425 ProfileSummaryCutoffCold = 999000;
426
427 SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
428 auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
429 HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
430 (Summary->getDetailedSummary()));
431 ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
432 (Summary->getDetailedSummary()));
433 }
434
write(std::unique_ptr<SampleProfileWriter> Writer,StringMap<FunctionSamples> & ProfileMap)435 void CSProfileGenerator::write(std::unique_ptr<SampleProfileWriter> Writer,
436 StringMap<FunctionSamples> &ProfileMap) {
437 if (std::error_code EC = Writer->write(ProfileMap))
438 exitWithError(std::move(EC));
439 }
440
441 // Helper function to extract context prefix string stack
442 // Extract context stack for reusing, leaf context stack will
443 // be added compressed while looking up function profile
444 static void
extractPrefixContextStack(SmallVectorImpl<std::string> & ContextStrStack,const SmallVectorImpl<const PseudoProbe * > & Probes,ProfiledBinary * Binary)445 extractPrefixContextStack(SmallVectorImpl<std::string> &ContextStrStack,
446 const SmallVectorImpl<const PseudoProbe *> &Probes,
447 ProfiledBinary *Binary) {
448 for (const auto *P : Probes) {
449 Binary->getInlineContextForProbe(P, ContextStrStack, true);
450 }
451 }
452
generateProfile()453 void PseudoProbeCSProfileGenerator::generateProfile() {
454 // Enable pseudo probe functionalities in SampleProf
455 FunctionSamples::ProfileIsProbeBased = true;
456 FunctionSamples::ProfileIsCS = true;
457 for (const auto &BI : BinarySampleCounters) {
458 ProfiledBinary *Binary = BI.first;
459 for (const auto &CI : BI.second) {
460 const ProbeBasedCtxKey *CtxKey =
461 dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr());
462 SmallVector<std::string, 16> ContextStrStack;
463 extractPrefixContextStack(ContextStrStack, CtxKey->Probes, Binary);
464 // Fill in function body samples from probes, also infer caller's samples
465 // from callee's probe
466 populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStrStack,
467 Binary);
468 // Fill in boundary samples for a call probe
469 populateBoundarySamplesWithProbes(CI.second.BranchCounter,
470 ContextStrStack, Binary);
471 }
472 }
473
474 postProcessProfiles();
475 }
476
extractProbesFromRange(const RangeSample & RangeCounter,ProbeCounterMap & ProbeCounter,ProfiledBinary * Binary)477 void PseudoProbeCSProfileGenerator::extractProbesFromRange(
478 const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter,
479 ProfiledBinary *Binary) {
480 RangeSample Ranges;
481 findDisjointRanges(Ranges, RangeCounter);
482 for (const auto &Range : Ranges) {
483 uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
484 uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
485 uint64_t Count = Range.second;
486 // Disjoint ranges have introduce zero-filled gap that
487 // doesn't belong to current context, filter them out.
488 if (Count == 0)
489 continue;
490
491 InstructionPointer IP(Binary, RangeBegin, true);
492
493 // Disjoint ranges may have range in the middle of two instr,
494 // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
495 // can be Addr1+1 to Addr2-1. We should ignore such range.
496 if (IP.Address > RangeEnd)
497 continue;
498
499 while (IP.Address <= RangeEnd) {
500 const AddressProbesMap &Address2ProbesMap =
501 Binary->getAddress2ProbesMap();
502 auto It = Address2ProbesMap.find(IP.Address);
503 if (It != Address2ProbesMap.end()) {
504 for (const auto &Probe : It->second) {
505 if (!Probe.isBlock())
506 continue;
507 ProbeCounter[&Probe] += Count;
508 }
509 }
510
511 IP.advance();
512 }
513 }
514 }
515
populateBodySamplesWithProbes(const RangeSample & RangeCounter,SmallVectorImpl<std::string> & ContextStrStack,ProfiledBinary * Binary)516 void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes(
517 const RangeSample &RangeCounter,
518 SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary) {
519 ProbeCounterMap ProbeCounter;
520 // Extract the top frame probes by looking up each address among the range in
521 // the Address2ProbeMap
522 extractProbesFromRange(RangeCounter, ProbeCounter, Binary);
523 std::unordered_map<PseudoProbeInlineTree *, FunctionSamples *> FrameSamples;
524 for (auto PI : ProbeCounter) {
525 const PseudoProbe *Probe = PI.first;
526 uint64_t Count = PI.second;
527 FunctionSamples &FunctionProfile =
528 getFunctionProfileForLeafProbe(ContextStrStack, Probe, Binary);
529 // Record the current frame and FunctionProfile whenever samples are
530 // collected for non-danglie probes. This is for reporting all of the
531 // zero count probes of the frame later.
532 FrameSamples[Probe->getInlineTreeNode()] = &FunctionProfile;
533 FunctionProfile.addBodySamplesForProbe(Probe->Index, Count);
534 FunctionProfile.addTotalSamples(Count);
535 if (Probe->isEntry()) {
536 FunctionProfile.addHeadSamples(Count);
537 // Look up for the caller's function profile
538 const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
539 if (InlinerDesc != nullptr) {
540 // Since the context id will be compressed, we have to use callee's
541 // context id to infer caller's context id to ensure they share the
542 // same context prefix.
543 StringRef CalleeContextId =
544 FunctionProfile.getContext().getNameWithContext();
545 StringRef CallerContextId;
546 FrameLocation &&CallerLeafFrameLoc =
547 getCallerContext(CalleeContextId, CallerContextId);
548 uint64_t CallerIndex = CallerLeafFrameLoc.second.LineOffset;
549 assert(CallerIndex &&
550 "Inferred caller's location index shouldn't be zero!");
551 FunctionSamples &CallerProfile =
552 getFunctionProfileForContext(CallerContextId);
553 CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
554 CallerProfile.addBodySamples(CallerIndex, 0, Count);
555 CallerProfile.addTotalSamples(Count);
556 CallerProfile.addCalledTargetSamples(
557 CallerIndex, 0,
558 FunctionProfile.getContext().getNameWithoutContext(), Count);
559 }
560 }
561
562 // Assign zero count for remaining probes without sample hits to
563 // differentiate from probes optimized away, of which the counts are unknown
564 // and will be inferred by the compiler.
565 for (auto &I : FrameSamples) {
566 auto *FunctionProfile = I.second;
567 for (auto *Probe : I.first->getProbes()) {
568 FunctionProfile->addBodySamplesForProbe(Probe->Index, 0);
569 }
570 }
571 }
572 }
573
populateBoundarySamplesWithProbes(const BranchSample & BranchCounter,SmallVectorImpl<std::string> & ContextStrStack,ProfiledBinary * Binary)574 void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes(
575 const BranchSample &BranchCounter,
576 SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary) {
577 for (auto BI : BranchCounter) {
578 uint64_t SourceOffset = BI.first.first;
579 uint64_t TargetOffset = BI.first.second;
580 uint64_t Count = BI.second;
581 uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
582 const PseudoProbe *CallProbe = Binary->getCallProbeForAddr(SourceAddress);
583 if (CallProbe == nullptr)
584 continue;
585 FunctionSamples &FunctionProfile =
586 getFunctionProfileForLeafProbe(ContextStrStack, CallProbe, Binary);
587 FunctionProfile.addBodySamples(CallProbe->Index, 0, Count);
588 FunctionProfile.addTotalSamples(Count);
589 StringRef CalleeName = FunctionSamples::getCanonicalFnName(
590 Binary->getFuncFromStartOffset(TargetOffset));
591 if (CalleeName.size() == 0)
592 continue;
593 FunctionProfile.addCalledTargetSamples(CallProbe->Index, 0, CalleeName,
594 Count);
595 }
596 }
597
getFunctionProfileForLeafProbe(SmallVectorImpl<std::string> & ContextStrStack,const PseudoProbeFuncDesc * LeafFuncDesc,bool WasLeafInlined)598 FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe(
599 SmallVectorImpl<std::string> &ContextStrStack,
600 const PseudoProbeFuncDesc *LeafFuncDesc, bool WasLeafInlined) {
601 assert(ContextStrStack.size() && "Profile context must have the leaf frame");
602 // Compress the context string except for the leaf frame
603 std::string LeafFrame = ContextStrStack.back();
604 ContextStrStack.pop_back();
605 CSProfileGenerator::compressRecursionContext(ContextStrStack);
606
607 std::ostringstream OContextStr;
608 for (uint32_t I = 0; I < ContextStrStack.size(); I++) {
609 if (OContextStr.str().size())
610 OContextStr << " @ ";
611 OContextStr << ContextStrStack[I];
612 }
613 // For leaf inlined context with the top frame, we should strip off the top
614 // frame's probe id, like:
615 // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
616 if (OContextStr.str().size())
617 OContextStr << " @ ";
618 OContextStr << StringRef(LeafFrame).split(":").first.str();
619
620 FunctionSamples &FunctionProile =
621 getFunctionProfileForContext(OContextStr.str(), WasLeafInlined);
622 FunctionProile.setFunctionHash(LeafFuncDesc->FuncHash);
623 return FunctionProile;
624 }
625
getFunctionProfileForLeafProbe(SmallVectorImpl<std::string> & ContextStrStack,const PseudoProbe * LeafProbe,ProfiledBinary * Binary)626 FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe(
627 SmallVectorImpl<std::string> &ContextStrStack, const PseudoProbe *LeafProbe,
628 ProfiledBinary *Binary) {
629 // Explicitly copy the context for appending the leaf context
630 SmallVector<std::string, 16> ContextStrStackCopy(ContextStrStack.begin(),
631 ContextStrStack.end());
632 Binary->getInlineContextForProbe(LeafProbe, ContextStrStackCopy, true);
633 const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->GUID);
634 bool WasLeafInlined = LeafProbe->InlineTree->hasInlineSite();
635 return getFunctionProfileForLeafProbe(ContextStrStackCopy, FuncDesc,
636 WasLeafInlined);
637 }
638
639 } // end namespace sampleprof
640 } // end namespace llvm
641