1 //===-- ProfileGenerator.cpp - Profile Generator  ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ProfileGenerator.h"
10 #include "llvm/ProfileData/ProfileCommon.h"
11 
12 static cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
13                                            cl::Required,
14                                            cl::desc("Output profile file"));
15 static cl::alias OutputA("o", cl::desc("Alias for --output"),
16                          cl::aliasopt(OutputFilename));
17 
18 static cl::opt<SampleProfileFormat> OutputFormat(
19     "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary),
20     cl::values(
21         clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"),
22         clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"),
23         clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"),
24         clEnumValN(SPF_Text, "text", "Text encoding"),
25         clEnumValN(SPF_GCC, "gcc",
26                    "GCC encoding (only meaningful for -sample)")));
27 
28 static cl::opt<int32_t, true> RecursionCompression(
29     "compress-recursion",
30     cl::desc("Compressing recursion by deduplicating adjacent frame "
31              "sequences up to the specified size. -1 means no size limit."),
32     cl::Hidden,
33     cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
34 
35 static cl::opt<bool> CSProfMergeColdContext(
36     "csprof-merge-cold-context", cl::init(true), cl::ZeroOrMore,
37     cl::desc("If the total count of context profile is smaller than "
38              "the threshold, it will be merged into context-less base "
39              "profile."));
40 
41 static cl::opt<bool> CSProfTrimColdContext(
42     "csprof-trim-cold-context", cl::init(true), cl::ZeroOrMore,
43     cl::desc("If the total count of the profile after all merge is done "
44              "is still smaller than threshold, it will be trimmed."));
45 
46 static cl::opt<uint32_t> CSProfColdContextFrameDepth(
47     "csprof-frame-depth-for-cold-context", cl::init(1), cl::ZeroOrMore,
48     cl::desc("Keep the last K frames while merging cold profile. 1 means the "
49              "context-less base profile"));
50 
51 static cl::opt<bool> EnableCSPreInliner(
52     "csspgo-preinliner", cl::Hidden, cl::init(false),
53     cl::desc("Run a global pre-inliner to merge context profile based on "
54              "estimated global top-down inline decisions"));
55 
56 extern cl::opt<int> ProfileSummaryCutoffCold;
57 
58 using namespace llvm;
59 using namespace sampleprof;
60 
61 namespace llvm {
62 namespace sampleprof {
63 
64 // Initialize the MaxCompressionSize to -1 which means no size limit
65 int32_t CSProfileGenerator::MaxCompressionSize = -1;
66 
67 static bool
usePseudoProbes(const BinarySampleCounterMap & BinarySampleCounters)68 usePseudoProbes(const BinarySampleCounterMap &BinarySampleCounters) {
69   return BinarySampleCounters.size() &&
70          BinarySampleCounters.begin()->first->usePseudoProbes();
71 }
72 
73 std::unique_ptr<ProfileGenerator>
create(const BinarySampleCounterMap & BinarySampleCounters,enum PerfScriptType SampleType)74 ProfileGenerator::create(const BinarySampleCounterMap &BinarySampleCounters,
75                          enum PerfScriptType SampleType) {
76   std::unique_ptr<ProfileGenerator> ProfileGenerator;
77   if (SampleType == PERF_LBR_STACK) {
78     if (usePseudoProbes(BinarySampleCounters)) {
79       ProfileGenerator.reset(
80           new PseudoProbeCSProfileGenerator(BinarySampleCounters));
81     } else {
82       ProfileGenerator.reset(new CSProfileGenerator(BinarySampleCounters));
83     }
84   } else {
85     // TODO:
86     llvm_unreachable("Unsupported perfscript!");
87   }
88 
89   return ProfileGenerator;
90 }
91 
write(std::unique_ptr<SampleProfileWriter> Writer,StringMap<FunctionSamples> & ProfileMap)92 void ProfileGenerator::write(std::unique_ptr<SampleProfileWriter> Writer,
93                              StringMap<FunctionSamples> &ProfileMap) {
94   if (std::error_code EC = Writer->write(ProfileMap))
95     exitWithError(std::move(EC));
96 }
97 
write()98 void ProfileGenerator::write() {
99   auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
100   if (std::error_code EC = WriterOrErr.getError())
101     exitWithError(EC, OutputFilename);
102   write(std::move(WriterOrErr.get()), ProfileMap);
103 }
104 
findDisjointRanges(RangeSample & DisjointRanges,const RangeSample & Ranges)105 void ProfileGenerator::findDisjointRanges(RangeSample &DisjointRanges,
106                                           const RangeSample &Ranges) {
107 
108   /*
109   Regions may overlap with each other. Using the boundary info, find all
110   disjoint ranges and their sample count. BoundaryPoint contains the count
111   multiple samples begin/end at this points.
112 
113   |<--100-->|           Sample1
114   |<------200------>|   Sample2
115   A         B       C
116 
117   In the example above,
118   Sample1 begins at A, ends at B, its value is 100.
119   Sample2 beings at A, ends at C, its value is 200.
120   For A, BeginCount is the sum of sample begins at A, which is 300 and no
121   samples ends at A, so EndCount is 0.
122   Then boundary points A, B, and C with begin/end counts are:
123   A: (300, 0)
124   B: (0, 100)
125   C: (0, 200)
126   */
127   struct BoundaryPoint {
128     // Sum of sample counts beginning at this point
129     uint64_t BeginCount;
130     // Sum of sample counts ending at this point
131     uint64_t EndCount;
132 
133     BoundaryPoint() : BeginCount(0), EndCount(0){};
134 
135     void addBeginCount(uint64_t Count) { BeginCount += Count; }
136 
137     void addEndCount(uint64_t Count) { EndCount += Count; }
138   };
139 
140   /*
141   For the above example. With boundary points, follwing logic finds two
142   disjoint region of
143 
144   [A,B]:   300
145   [B+1,C]: 200
146 
147   If there is a boundary point that both begin and end, the point itself
148   becomes a separate disjoint region. For example, if we have original
149   ranges of
150 
151   |<--- 100 --->|
152                 |<--- 200 --->|
153   A             B             C
154 
155   there are three boundary points with their begin/end counts of
156 
157   A: (100, 0)
158   B: (200, 100)
159   C: (0, 200)
160 
161   the disjoint ranges would be
162 
163   [A, B-1]: 100
164   [B, B]:   300
165   [B+1, C]: 200.
166   */
167   std::map<uint64_t, BoundaryPoint> Boundaries;
168 
169   for (auto Item : Ranges) {
170     uint64_t Begin = Item.first.first;
171     uint64_t End = Item.first.second;
172     uint64_t Count = Item.second;
173     if (Boundaries.find(Begin) == Boundaries.end())
174       Boundaries[Begin] = BoundaryPoint();
175     Boundaries[Begin].addBeginCount(Count);
176 
177     if (Boundaries.find(End) == Boundaries.end())
178       Boundaries[End] = BoundaryPoint();
179     Boundaries[End].addEndCount(Count);
180   }
181 
182   uint64_t BeginAddress = UINT64_MAX;
183   int Count = 0;
184   for (auto Item : Boundaries) {
185     uint64_t Address = Item.first;
186     BoundaryPoint &Point = Item.second;
187     if (Point.BeginCount) {
188       if (BeginAddress != UINT64_MAX)
189         DisjointRanges[{BeginAddress, Address - 1}] = Count;
190       Count += Point.BeginCount;
191       BeginAddress = Address;
192     }
193     if (Point.EndCount) {
194       assert((BeginAddress != UINT64_MAX) &&
195              "First boundary point cannot be 'end' point");
196       DisjointRanges[{BeginAddress, Address}] = Count;
197       Count -= Point.EndCount;
198       BeginAddress = Address + 1;
199     }
200   }
201 }
202 
203 FunctionSamples &
getFunctionProfileForContext(StringRef ContextStr,bool WasLeafInlined)204 CSProfileGenerator::getFunctionProfileForContext(StringRef ContextStr,
205                                                  bool WasLeafInlined) {
206   auto Ret = ProfileMap.try_emplace(ContextStr, FunctionSamples());
207   if (Ret.second) {
208     // Make a copy of the underlying context string in string table
209     // before StringRef wrapper is used for context.
210     auto It = ContextStrings.insert(ContextStr.str());
211     SampleContext FContext(*It.first, RawContext);
212     if (WasLeafInlined)
213       FContext.setAttribute(ContextWasInlined);
214     FunctionSamples &FProfile = Ret.first->second;
215     FProfile.setContext(FContext);
216     FProfile.setName(FContext.getNameWithoutContext());
217   }
218   return Ret.first->second;
219 }
220 
generateProfile()221 void CSProfileGenerator::generateProfile() {
222   FunctionSamples::ProfileIsCS = true;
223   for (const auto &BI : BinarySampleCounters) {
224     ProfiledBinary *Binary = BI.first;
225     for (const auto &CI : BI.second) {
226       const StringBasedCtxKey *CtxKey =
227           dyn_cast<StringBasedCtxKey>(CI.first.getPtr());
228       StringRef ContextId(CtxKey->Context);
229       // Get or create function profile for the range
230       FunctionSamples &FunctionProfile =
231           getFunctionProfileForContext(ContextId, CtxKey->WasLeafInlined);
232 
233       // Fill in function body samples
234       populateFunctionBodySamples(FunctionProfile, CI.second.RangeCounter,
235                                   Binary);
236       // Fill in boundary sample counts as well as call site samples for calls
237       populateFunctionBoundarySamples(ContextId, FunctionProfile,
238                                       CI.second.BranchCounter, Binary);
239     }
240   }
241   // Fill in call site value sample for inlined calls and also use context to
242   // infer missing samples. Since we don't have call count for inlined
243   // functions, we estimate it from inlinee's profile using the entry of the
244   // body sample.
245   populateInferredFunctionSamples();
246 
247   postProcessProfiles();
248 }
249 
updateBodySamplesforFunctionProfile(FunctionSamples & FunctionProfile,const FrameLocation & LeafLoc,uint64_t Count)250 void CSProfileGenerator::updateBodySamplesforFunctionProfile(
251     FunctionSamples &FunctionProfile, const FrameLocation &LeafLoc,
252     uint64_t Count) {
253   // Filter out invalid negative(int type) lineOffset
254   if (LeafLoc.second.LineOffset & 0x80000000)
255     return;
256   // Use the maximum count of samples with same line location
257   ErrorOr<uint64_t> R = FunctionProfile.findSamplesAt(
258       LeafLoc.second.LineOffset, LeafLoc.second.Discriminator);
259   uint64_t PreviousCount = R ? R.get() : 0;
260   if (PreviousCount < Count) {
261     FunctionProfile.addBodySamples(LeafLoc.second.LineOffset,
262                                    LeafLoc.second.Discriminator,
263                                    Count - PreviousCount);
264   }
265 }
266 
populateFunctionBodySamples(FunctionSamples & FunctionProfile,const RangeSample & RangeCounter,ProfiledBinary * Binary)267 void CSProfileGenerator::populateFunctionBodySamples(
268     FunctionSamples &FunctionProfile, const RangeSample &RangeCounter,
269     ProfiledBinary *Binary) {
270   // Compute disjoint ranges first, so we can use MAX
271   // for calculating count for each location.
272   RangeSample Ranges;
273   findDisjointRanges(Ranges, RangeCounter);
274   for (auto Range : Ranges) {
275     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
276     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
277     uint64_t Count = Range.second;
278     // Disjoint ranges have introduce zero-filled gap that
279     // doesn't belong to current context, filter them out.
280     if (Count == 0)
281       continue;
282 
283     InstructionPointer IP(Binary, RangeBegin, true);
284 
285     // Disjoint ranges may have range in the middle of two instr,
286     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
287     // can be Addr1+1 to Addr2-1. We should ignore such range.
288     if (IP.Address > RangeEnd)
289       continue;
290 
291     while (IP.Address <= RangeEnd) {
292       uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
293       auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
294       if (LeafLoc.hasValue()) {
295         // Recording body sample for this specific context
296         updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
297       }
298       // Accumulate total sample count even it's a line with invalid debug info
299       FunctionProfile.addTotalSamples(Count);
300       // Move to next IP within the range
301       IP.advance();
302     }
303   }
304 }
305 
populateFunctionBoundarySamples(StringRef ContextId,FunctionSamples & FunctionProfile,const BranchSample & BranchCounters,ProfiledBinary * Binary)306 void CSProfileGenerator::populateFunctionBoundarySamples(
307     StringRef ContextId, FunctionSamples &FunctionProfile,
308     const BranchSample &BranchCounters, ProfiledBinary *Binary) {
309 
310   for (auto Entry : BranchCounters) {
311     uint64_t SourceOffset = Entry.first.first;
312     uint64_t TargetOffset = Entry.first.second;
313     uint64_t Count = Entry.second;
314     // Get the callee name by branch target if it's a call branch
315     StringRef CalleeName = FunctionSamples::getCanonicalFnName(
316         Binary->getFuncFromStartOffset(TargetOffset));
317     if (CalleeName.size() == 0)
318       continue;
319 
320     // Record called target sample and its count
321     auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset);
322     if (!LeafLoc.hasValue())
323       continue;
324     FunctionProfile.addCalledTargetSamples(LeafLoc->second.LineOffset,
325                                            LeafLoc->second.Discriminator,
326                                            CalleeName, Count);
327 
328     // Record head sample for called target(callee)
329     std::ostringstream OCalleeCtxStr;
330     if (ContextId.find(" @ ") != StringRef::npos) {
331       OCalleeCtxStr << ContextId.rsplit(" @ ").first.str();
332       OCalleeCtxStr << " @ ";
333     }
334     OCalleeCtxStr << getCallSite(*LeafLoc) << " @ " << CalleeName.str();
335 
336     FunctionSamples &CalleeProfile =
337         getFunctionProfileForContext(OCalleeCtxStr.str());
338     assert(Count != 0 && "Unexpected zero weight branch");
339     CalleeProfile.addHeadSamples(Count);
340   }
341 }
342 
getCallerContext(StringRef CalleeContext,StringRef & CallerNameWithContext)343 static FrameLocation getCallerContext(StringRef CalleeContext,
344                                       StringRef &CallerNameWithContext) {
345   StringRef CallerContext = CalleeContext.rsplit(" @ ").first;
346   CallerNameWithContext = CallerContext.rsplit(':').first;
347   auto ContextSplit = CallerContext.rsplit(" @ ");
348   StringRef CallerFrameStr = ContextSplit.second.size() == 0
349                                  ? ContextSplit.first
350                                  : ContextSplit.second;
351   FrameLocation LeafFrameLoc = {"", {0, 0}};
352   StringRef Funcname;
353   SampleContext::decodeContextString(CallerFrameStr, Funcname,
354                                      LeafFrameLoc.second);
355   LeafFrameLoc.first = Funcname.str();
356   return LeafFrameLoc;
357 }
358 
populateInferredFunctionSamples()359 void CSProfileGenerator::populateInferredFunctionSamples() {
360   for (const auto &Item : ProfileMap) {
361     const StringRef CalleeContext = Item.first();
362     const FunctionSamples &CalleeProfile = Item.second;
363 
364     // If we already have head sample counts, we must have value profile
365     // for call sites added already. Skip to avoid double counting.
366     if (CalleeProfile.getHeadSamples())
367       continue;
368     // If we don't have context, nothing to do for caller's call site.
369     // This could happen for entry point function.
370     if (CalleeContext.find(" @ ") == StringRef::npos)
371       continue;
372 
373     // Infer Caller's frame loc and context ID through string splitting
374     StringRef CallerContextId;
375     FrameLocation &&CallerLeafFrameLoc =
376         getCallerContext(CalleeContext, CallerContextId);
377 
378     // It's possible that we haven't seen any sample directly in the caller,
379     // in which case CallerProfile will not exist. But we can't modify
380     // ProfileMap while iterating it.
381     // TODO: created function profile for those callers too
382     if (ProfileMap.find(CallerContextId) == ProfileMap.end())
383       continue;
384     FunctionSamples &CallerProfile = ProfileMap[CallerContextId];
385 
386     // Since we don't have call count for inlined functions, we
387     // estimate it from inlinee's profile using entry body sample.
388     uint64_t EstimatedCallCount = CalleeProfile.getEntrySamples();
389     // If we don't have samples with location, use 1 to indicate live.
390     if (!EstimatedCallCount && !CalleeProfile.getBodySamples().size())
391       EstimatedCallCount = 1;
392     CallerProfile.addCalledTargetSamples(
393         CallerLeafFrameLoc.second.LineOffset,
394         CallerLeafFrameLoc.second.Discriminator,
395         CalleeProfile.getContext().getNameWithoutContext(), EstimatedCallCount);
396     CallerProfile.addBodySamples(CallerLeafFrameLoc.second.LineOffset,
397                                  CallerLeafFrameLoc.second.Discriminator,
398                                  EstimatedCallCount);
399     CallerProfile.addTotalSamples(EstimatedCallCount);
400   }
401 }
402 
postProcessProfiles()403 void CSProfileGenerator::postProcessProfiles() {
404   // Compute hot/cold threshold based on profile. This will be used for cold
405   // context profile merging/trimming.
406   computeSummaryAndThreshold();
407 
408   // Run global pre-inliner to adjust/merge context profile based on estimated
409   // inline decisions.
410   if (EnableCSPreInliner)
411     CSPreInliner(ProfileMap, HotCountThreshold, ColdCountThreshold).run();
412 
413   // Trim and merge cold context profile using cold threshold above;
414   SampleContextTrimmer(ProfileMap)
415       .trimAndMergeColdContextProfiles(
416           ColdCountThreshold, CSProfTrimColdContext, CSProfMergeColdContext,
417           CSProfColdContextFrameDepth);
418 }
419 
computeSummaryAndThreshold()420 void CSProfileGenerator::computeSummaryAndThreshold() {
421   // Update the default value of cold cutoff for llvm-profgen.
422   // Do it here because we don't want to change the global default,
423   // which would lead CS profile size too large.
424   if (!ProfileSummaryCutoffCold.getNumOccurrences())
425     ProfileSummaryCutoffCold = 999000;
426 
427   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
428   auto Summary = Builder.computeSummaryForProfiles(ProfileMap);
429   HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
430       (Summary->getDetailedSummary()));
431   ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
432       (Summary->getDetailedSummary()));
433 }
434 
write(std::unique_ptr<SampleProfileWriter> Writer,StringMap<FunctionSamples> & ProfileMap)435 void CSProfileGenerator::write(std::unique_ptr<SampleProfileWriter> Writer,
436                                StringMap<FunctionSamples> &ProfileMap) {
437   if (std::error_code EC = Writer->write(ProfileMap))
438     exitWithError(std::move(EC));
439 }
440 
441 // Helper function to extract context prefix string stack
442 // Extract context stack for reusing, leaf context stack will
443 // be added compressed while looking up function profile
444 static void
extractPrefixContextStack(SmallVectorImpl<std::string> & ContextStrStack,const SmallVectorImpl<const PseudoProbe * > & Probes,ProfiledBinary * Binary)445 extractPrefixContextStack(SmallVectorImpl<std::string> &ContextStrStack,
446                           const SmallVectorImpl<const PseudoProbe *> &Probes,
447                           ProfiledBinary *Binary) {
448   for (const auto *P : Probes) {
449     Binary->getInlineContextForProbe(P, ContextStrStack, true);
450   }
451 }
452 
generateProfile()453 void PseudoProbeCSProfileGenerator::generateProfile() {
454   // Enable pseudo probe functionalities in SampleProf
455   FunctionSamples::ProfileIsProbeBased = true;
456   FunctionSamples::ProfileIsCS = true;
457   for (const auto &BI : BinarySampleCounters) {
458     ProfiledBinary *Binary = BI.first;
459     for (const auto &CI : BI.second) {
460       const ProbeBasedCtxKey *CtxKey =
461           dyn_cast<ProbeBasedCtxKey>(CI.first.getPtr());
462       SmallVector<std::string, 16> ContextStrStack;
463       extractPrefixContextStack(ContextStrStack, CtxKey->Probes, Binary);
464       // Fill in function body samples from probes, also infer caller's samples
465       // from callee's probe
466       populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStrStack,
467                                     Binary);
468       // Fill in boundary samples for a call probe
469       populateBoundarySamplesWithProbes(CI.second.BranchCounter,
470                                         ContextStrStack, Binary);
471     }
472   }
473 
474   postProcessProfiles();
475 }
476 
extractProbesFromRange(const RangeSample & RangeCounter,ProbeCounterMap & ProbeCounter,ProfiledBinary * Binary)477 void PseudoProbeCSProfileGenerator::extractProbesFromRange(
478     const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter,
479     ProfiledBinary *Binary) {
480   RangeSample Ranges;
481   findDisjointRanges(Ranges, RangeCounter);
482   for (const auto &Range : Ranges) {
483     uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
484     uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
485     uint64_t Count = Range.second;
486     // Disjoint ranges have introduce zero-filled gap that
487     // doesn't belong to current context, filter them out.
488     if (Count == 0)
489       continue;
490 
491     InstructionPointer IP(Binary, RangeBegin, true);
492 
493     // Disjoint ranges may have range in the middle of two instr,
494     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
495     // can be Addr1+1 to Addr2-1. We should ignore such range.
496     if (IP.Address > RangeEnd)
497       continue;
498 
499     while (IP.Address <= RangeEnd) {
500       const AddressProbesMap &Address2ProbesMap =
501           Binary->getAddress2ProbesMap();
502       auto It = Address2ProbesMap.find(IP.Address);
503       if (It != Address2ProbesMap.end()) {
504         for (const auto &Probe : It->second) {
505           if (!Probe.isBlock())
506             continue;
507           ProbeCounter[&Probe] += Count;
508         }
509       }
510 
511       IP.advance();
512     }
513   }
514 }
515 
populateBodySamplesWithProbes(const RangeSample & RangeCounter,SmallVectorImpl<std::string> & ContextStrStack,ProfiledBinary * Binary)516 void PseudoProbeCSProfileGenerator::populateBodySamplesWithProbes(
517     const RangeSample &RangeCounter,
518     SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary) {
519   ProbeCounterMap ProbeCounter;
520   // Extract the top frame probes by looking up each address among the range in
521   // the Address2ProbeMap
522   extractProbesFromRange(RangeCounter, ProbeCounter, Binary);
523   std::unordered_map<PseudoProbeInlineTree *, FunctionSamples *> FrameSamples;
524   for (auto PI : ProbeCounter) {
525     const PseudoProbe *Probe = PI.first;
526     uint64_t Count = PI.second;
527     FunctionSamples &FunctionProfile =
528         getFunctionProfileForLeafProbe(ContextStrStack, Probe, Binary);
529     // Record the current frame and FunctionProfile whenever samples are
530     // collected for non-danglie probes. This is for reporting all of the
531     // zero count probes of the frame later.
532     FrameSamples[Probe->getInlineTreeNode()] = &FunctionProfile;
533     FunctionProfile.addBodySamplesForProbe(Probe->Index, Count);
534     FunctionProfile.addTotalSamples(Count);
535     if (Probe->isEntry()) {
536       FunctionProfile.addHeadSamples(Count);
537       // Look up for the caller's function profile
538       const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
539       if (InlinerDesc != nullptr) {
540         // Since the context id will be compressed, we have to use callee's
541         // context id to infer caller's context id to ensure they share the
542         // same context prefix.
543         StringRef CalleeContextId =
544             FunctionProfile.getContext().getNameWithContext();
545         StringRef CallerContextId;
546         FrameLocation &&CallerLeafFrameLoc =
547             getCallerContext(CalleeContextId, CallerContextId);
548         uint64_t CallerIndex = CallerLeafFrameLoc.second.LineOffset;
549         assert(CallerIndex &&
550                "Inferred caller's location index shouldn't be zero!");
551         FunctionSamples &CallerProfile =
552             getFunctionProfileForContext(CallerContextId);
553         CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
554         CallerProfile.addBodySamples(CallerIndex, 0, Count);
555         CallerProfile.addTotalSamples(Count);
556         CallerProfile.addCalledTargetSamples(
557             CallerIndex, 0,
558             FunctionProfile.getContext().getNameWithoutContext(), Count);
559       }
560     }
561 
562     // Assign zero count for remaining probes without sample hits to
563     // differentiate from probes optimized away, of which the counts are unknown
564     // and will be inferred by the compiler.
565     for (auto &I : FrameSamples) {
566       auto *FunctionProfile = I.second;
567       for (auto *Probe : I.first->getProbes()) {
568           FunctionProfile->addBodySamplesForProbe(Probe->Index, 0);
569       }
570     }
571   }
572 }
573 
populateBoundarySamplesWithProbes(const BranchSample & BranchCounter,SmallVectorImpl<std::string> & ContextStrStack,ProfiledBinary * Binary)574 void PseudoProbeCSProfileGenerator::populateBoundarySamplesWithProbes(
575     const BranchSample &BranchCounter,
576     SmallVectorImpl<std::string> &ContextStrStack, ProfiledBinary *Binary) {
577   for (auto BI : BranchCounter) {
578     uint64_t SourceOffset = BI.first.first;
579     uint64_t TargetOffset = BI.first.second;
580     uint64_t Count = BI.second;
581     uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
582     const PseudoProbe *CallProbe = Binary->getCallProbeForAddr(SourceAddress);
583     if (CallProbe == nullptr)
584       continue;
585     FunctionSamples &FunctionProfile =
586         getFunctionProfileForLeafProbe(ContextStrStack, CallProbe, Binary);
587     FunctionProfile.addBodySamples(CallProbe->Index, 0, Count);
588     FunctionProfile.addTotalSamples(Count);
589     StringRef CalleeName = FunctionSamples::getCanonicalFnName(
590         Binary->getFuncFromStartOffset(TargetOffset));
591     if (CalleeName.size() == 0)
592       continue;
593     FunctionProfile.addCalledTargetSamples(CallProbe->Index, 0, CalleeName,
594                                            Count);
595   }
596 }
597 
getFunctionProfileForLeafProbe(SmallVectorImpl<std::string> & ContextStrStack,const PseudoProbeFuncDesc * LeafFuncDesc,bool WasLeafInlined)598 FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe(
599     SmallVectorImpl<std::string> &ContextStrStack,
600     const PseudoProbeFuncDesc *LeafFuncDesc, bool WasLeafInlined) {
601   assert(ContextStrStack.size() && "Profile context must have the leaf frame");
602   // Compress the context string except for the leaf frame
603   std::string LeafFrame = ContextStrStack.back();
604   ContextStrStack.pop_back();
605   CSProfileGenerator::compressRecursionContext(ContextStrStack);
606 
607   std::ostringstream OContextStr;
608   for (uint32_t I = 0; I < ContextStrStack.size(); I++) {
609     if (OContextStr.str().size())
610       OContextStr << " @ ";
611     OContextStr << ContextStrStack[I];
612   }
613   // For leaf inlined context with the top frame, we should strip off the top
614   // frame's probe id, like:
615   // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
616   if (OContextStr.str().size())
617     OContextStr << " @ ";
618   OContextStr << StringRef(LeafFrame).split(":").first.str();
619 
620   FunctionSamples &FunctionProile =
621       getFunctionProfileForContext(OContextStr.str(), WasLeafInlined);
622   FunctionProile.setFunctionHash(LeafFuncDesc->FuncHash);
623   return FunctionProile;
624 }
625 
getFunctionProfileForLeafProbe(SmallVectorImpl<std::string> & ContextStrStack,const PseudoProbe * LeafProbe,ProfiledBinary * Binary)626 FunctionSamples &PseudoProbeCSProfileGenerator::getFunctionProfileForLeafProbe(
627     SmallVectorImpl<std::string> &ContextStrStack, const PseudoProbe *LeafProbe,
628     ProfiledBinary *Binary) {
629   // Explicitly copy the context for appending the leaf context
630   SmallVector<std::string, 16> ContextStrStackCopy(ContextStrStack.begin(),
631                                                    ContextStrStack.end());
632   Binary->getInlineContextForProbe(LeafProbe, ContextStrStackCopy, true);
633   const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->GUID);
634   bool WasLeafInlined = LeafProbe->InlineTree->hasInlineSite();
635   return getFunctionProfileForLeafProbe(ContextStrStackCopy, FuncDesc,
636                                         WasLeafInlined);
637 }
638 
639 } // end namespace sampleprof
640 } // end namespace llvm
641