1 //===-- ProfileGenerator.cpp - Profile Generator  ---------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "ProfileGenerator.h"
9 #include "ErrorHandling.h"
10 #include "MissingFrameInferrer.h"
11 #include "PerfReader.h"
12 #include "ProfiledBinary.h"
13 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
14 #include "llvm/ProfileData/ProfileCommon.h"
15 #include <algorithm>
16 #include <float.h>
17 #include <unordered_set>
18 #include <utility>
19 
20 cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
21                                     cl::Required,
22                                     cl::desc("Output profile file"));
23 static cl::alias OutputA("o", cl::desc("Alias for --output"),
24                          cl::aliasopt(OutputFilename));
25 
26 static cl::opt<SampleProfileFormat> OutputFormat(
27     "format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary),
28     cl::values(
29         clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"),
30         clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"),
31         clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"),
32         clEnumValN(SPF_Text, "text", "Text encoding"),
33         clEnumValN(SPF_GCC, "gcc",
34                    "GCC encoding (only meaningful for -sample)")));
35 
36 static cl::opt<bool> UseMD5(
37     "use-md5", cl::Hidden,
38     cl::desc("Use md5 to represent function names in the output profile (only "
39              "meaningful for -extbinary)"));
40 
41 static cl::opt<bool> PopulateProfileSymbolList(
42     "populate-profile-symbol-list", cl::init(false), cl::Hidden,
43     cl::desc("Populate profile symbol list (only meaningful for -extbinary)"));
44 
45 static cl::opt<bool> FillZeroForAllFuncs(
46     "fill-zero-for-all-funcs", cl::init(false), cl::Hidden,
47     cl::desc("Attribute all functions' range with zero count "
48              "even it's not hit by any samples."));
49 
50 static cl::opt<int32_t, true> RecursionCompression(
51     "compress-recursion",
52     cl::desc("Compressing recursion by deduplicating adjacent frame "
53              "sequences up to the specified size. -1 means no size limit."),
54     cl::Hidden,
55     cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
56 
57 static cl::opt<bool>
58     TrimColdProfile("trim-cold-profile",
59                     cl::desc("If the total count of the profile is smaller "
60                              "than threshold, it will be trimmed."));
61 
62 static cl::opt<bool> CSProfMergeColdContext(
63     "csprof-merge-cold-context", cl::init(true),
64     cl::desc("If the total count of context profile is smaller than "
65              "the threshold, it will be merged into context-less base "
66              "profile."));
67 
68 static cl::opt<uint32_t> CSProfMaxColdContextDepth(
69     "csprof-max-cold-context-depth", cl::init(1),
70     cl::desc("Keep the last K contexts while merging cold profile. 1 means the "
71              "context-less base profile"));
72 
73 static cl::opt<int, true> CSProfMaxContextDepth(
74     "csprof-max-context-depth",
75     cl::desc("Keep the last K contexts while merging profile. -1 means no "
76              "depth limit."),
77     cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth));
78 
79 static cl::opt<double> HotFunctionDensityThreshold(
80     "hot-function-density-threshold", llvm::cl::init(1000),
81     llvm::cl::desc(
82         "specify density threshold for hot functions (default: 1000)"),
83     llvm::cl::Optional);
84 static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false),
85                                  llvm::cl::desc("show profile density details"),
86                                  llvm::cl::Optional);
87 
88 static cl::opt<bool> UpdateTotalSamples(
89     "update-total-samples", llvm::cl::init(false),
90     llvm::cl::desc(
91         "Update total samples by accumulating all its body samples."),
92     llvm::cl::Optional);
93 
94 static cl::opt<bool> GenCSNestedProfile(
95     "gen-cs-nested-profile", cl::Hidden, cl::init(true),
96     cl::desc("Generate nested function profiles for CSSPGO"));
97 
98 cl::opt<bool> InferMissingFrames(
99     "infer-missing-frames", llvm::cl::init(true),
100     llvm::cl::desc(
101         "Infer missing call frames due to compiler tail call elimination."),
102     llvm::cl::Optional);
103 
104 using namespace llvm;
105 using namespace sampleprof;
106 
107 namespace llvm {
108 extern cl::opt<int> ProfileSummaryCutoffHot;
109 extern cl::opt<bool> UseContextLessSummary;
110 
111 namespace sampleprof {
112 
113 // Initialize the MaxCompressionSize to -1 which means no size limit
114 int32_t CSProfileGenerator::MaxCompressionSize = -1;
115 
116 int CSProfileGenerator::MaxContextDepth = -1;
117 
118 bool ProfileGeneratorBase::UseFSDiscriminator = false;
119 
120 std::unique_ptr<ProfileGeneratorBase>
create(ProfiledBinary * Binary,const ContextSampleCounterMap * SampleCounters,bool ProfileIsCS)121 ProfileGeneratorBase::create(ProfiledBinary *Binary,
122                              const ContextSampleCounterMap *SampleCounters,
123                              bool ProfileIsCS) {
124   std::unique_ptr<ProfileGeneratorBase> Generator;
125   if (ProfileIsCS) {
126     if (Binary->useFSDiscriminator())
127       exitWithError("FS discriminator is not supported in CS profile.");
128     Generator.reset(new CSProfileGenerator(Binary, SampleCounters));
129   } else {
130     Generator.reset(new ProfileGenerator(Binary, SampleCounters));
131   }
132   ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
133   FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
134 
135   return Generator;
136 }
137 
138 std::unique_ptr<ProfileGeneratorBase>
create(ProfiledBinary * Binary,SampleProfileMap & Profiles,bool ProfileIsCS)139 ProfileGeneratorBase::create(ProfiledBinary *Binary, SampleProfileMap &Profiles,
140                              bool ProfileIsCS) {
141   std::unique_ptr<ProfileGeneratorBase> Generator;
142   if (ProfileIsCS) {
143     if (Binary->useFSDiscriminator())
144       exitWithError("FS discriminator is not supported in CS profile.");
145     Generator.reset(new CSProfileGenerator(Binary, Profiles));
146   } else {
147     Generator.reset(new ProfileGenerator(Binary, std::move(Profiles)));
148   }
149   ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
150   FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
151 
152   return Generator;
153 }
154 
write(std::unique_ptr<SampleProfileWriter> Writer,SampleProfileMap & ProfileMap)155 void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer,
156                                  SampleProfileMap &ProfileMap) {
157   // Populate profile symbol list if extended binary format is used.
158   ProfileSymbolList SymbolList;
159 
160   if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) {
161     Binary->populateSymbolListFromDWARF(SymbolList);
162     Writer->setProfileSymbolList(&SymbolList);
163   }
164 
165   if (std::error_code EC = Writer->write(ProfileMap))
166     exitWithError(std::move(EC));
167 }
168 
write()169 void ProfileGeneratorBase::write() {
170   auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
171   if (std::error_code EC = WriterOrErr.getError())
172     exitWithError(EC, OutputFilename);
173 
174   if (UseMD5) {
175     if (OutputFormat != SPF_Ext_Binary)
176       WithColor::warning() << "-use-md5 is ignored. Specify "
177                               "--format=extbinary to enable it\n";
178     else
179       WriterOrErr.get()->setUseMD5();
180   }
181 
182   write(std::move(WriterOrErr.get()), ProfileMap);
183 }
184 
showDensitySuggestion(double Density)185 void ProfileGeneratorBase::showDensitySuggestion(double Density) {
186   if (Density == 0.0)
187     WithColor::warning() << "The --profile-summary-cutoff-hot option may be "
188                             "set too low. Please check your command.\n";
189   else if (Density < HotFunctionDensityThreshold)
190     WithColor::warning()
191         << "AutoFDO is estimated to optimize better with "
192         << format("%.1f", HotFunctionDensityThreshold / Density)
193         << "x more samples. Please consider increasing sampling rate or "
194            "profiling for longer duration to get more samples.\n";
195 
196   if (ShowDensity)
197     outs() << "Minimum profile density for hot functions with top "
198            << format("%.2f",
199                      static_cast<double>(ProfileSummaryCutoffHot.getValue()) /
200                          10000)
201            << "% total samples: " << format("%.1f", Density) << "\n";
202 }
203 
calculateDensity(const SampleProfileMap & Profiles,uint64_t HotCntThreshold)204 double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles,
205                                               uint64_t HotCntThreshold) {
206   double Density = DBL_MAX;
207   std::vector<const FunctionSamples *> HotFuncs;
208   for (auto &I : Profiles) {
209     auto &FuncSamples = I.second;
210     if (FuncSamples.getTotalSamples() < HotCntThreshold)
211       continue;
212     HotFuncs.emplace_back(&FuncSamples);
213   }
214 
215   for (auto *FuncSamples : HotFuncs) {
216     auto *Func = Binary->getBinaryFunction(FuncSamples->getName());
217     if (!Func)
218       continue;
219     uint64_t FuncSize = Func->getFuncSize();
220     if (FuncSize == 0)
221       continue;
222     Density =
223         std::min(Density, static_cast<double>(FuncSamples->getTotalSamples()) /
224                               FuncSize);
225   }
226 
227   return Density == DBL_MAX ? 0.0 : Density;
228 }
229 
findDisjointRanges(RangeSample & DisjointRanges,const RangeSample & Ranges)230 void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges,
231                                               const RangeSample &Ranges) {
232 
233   /*
234   Regions may overlap with each other. Using the boundary info, find all
235   disjoint ranges and their sample count. BoundaryPoint contains the count
236   multiple samples begin/end at this points.
237 
238   |<--100-->|           Sample1
239   |<------200------>|   Sample2
240   A         B       C
241 
242   In the example above,
243   Sample1 begins at A, ends at B, its value is 100.
244   Sample2 beings at A, ends at C, its value is 200.
245   For A, BeginCount is the sum of sample begins at A, which is 300 and no
246   samples ends at A, so EndCount is 0.
247   Then boundary points A, B, and C with begin/end counts are:
248   A: (300, 0)
249   B: (0, 100)
250   C: (0, 200)
251   */
252   struct BoundaryPoint {
253     // Sum of sample counts beginning at this point
254     uint64_t BeginCount = UINT64_MAX;
255     // Sum of sample counts ending at this point
256     uint64_t EndCount = UINT64_MAX;
257     // Is the begin point of a zero range.
258     bool IsZeroRangeBegin = false;
259     // Is the end point of a zero range.
260     bool IsZeroRangeEnd = false;
261 
262     void addBeginCount(uint64_t Count) {
263       if (BeginCount == UINT64_MAX)
264         BeginCount = 0;
265       BeginCount += Count;
266     }
267 
268     void addEndCount(uint64_t Count) {
269       if (EndCount == UINT64_MAX)
270         EndCount = 0;
271       EndCount += Count;
272     }
273   };
274 
275   /*
276   For the above example. With boundary points, follwing logic finds two
277   disjoint region of
278 
279   [A,B]:   300
280   [B+1,C]: 200
281 
282   If there is a boundary point that both begin and end, the point itself
283   becomes a separate disjoint region. For example, if we have original
284   ranges of
285 
286   |<--- 100 --->|
287                 |<--- 200 --->|
288   A             B             C
289 
290   there are three boundary points with their begin/end counts of
291 
292   A: (100, 0)
293   B: (200, 100)
294   C: (0, 200)
295 
296   the disjoint ranges would be
297 
298   [A, B-1]: 100
299   [B, B]:   300
300   [B+1, C]: 200.
301 
302   Example for zero value range:
303 
304     |<--- 100 --->|
305                        |<--- 200 --->|
306   |<---------------  0 ----------------->|
307   A  B            C    D             E   F
308 
309   [A, B-1]  : 0
310   [B, C]    : 100
311   [C+1, D-1]: 0
312   [D, E]    : 200
313   [E+1, F]  : 0
314   */
315   std::map<uint64_t, BoundaryPoint> Boundaries;
316 
317   for (const auto &Item : Ranges) {
318     assert(Item.first.first <= Item.first.second &&
319            "Invalid instruction range");
320     auto &BeginPoint = Boundaries[Item.first.first];
321     auto &EndPoint = Boundaries[Item.first.second];
322     uint64_t Count = Item.second;
323 
324     BeginPoint.addBeginCount(Count);
325     EndPoint.addEndCount(Count);
326     if (Count == 0) {
327       BeginPoint.IsZeroRangeBegin = true;
328       EndPoint.IsZeroRangeEnd = true;
329     }
330   }
331 
332   // Use UINT64_MAX to indicate there is no existing range between BeginAddress
333   // and the next valid address
334   uint64_t BeginAddress = UINT64_MAX;
335   int ZeroRangeDepth = 0;
336   uint64_t Count = 0;
337   for (const auto &Item : Boundaries) {
338     uint64_t Address = Item.first;
339     const BoundaryPoint &Point = Item.second;
340     if (Point.BeginCount != UINT64_MAX) {
341       if (BeginAddress != UINT64_MAX)
342         DisjointRanges[{BeginAddress, Address - 1}] = Count;
343       Count += Point.BeginCount;
344       BeginAddress = Address;
345       ZeroRangeDepth += Point.IsZeroRangeBegin;
346     }
347     if (Point.EndCount != UINT64_MAX) {
348       assert((BeginAddress != UINT64_MAX) &&
349              "First boundary point cannot be 'end' point");
350       DisjointRanges[{BeginAddress, Address}] = Count;
351       assert(Count >= Point.EndCount && "Mismatched live ranges");
352       Count -= Point.EndCount;
353       BeginAddress = Address + 1;
354       ZeroRangeDepth -= Point.IsZeroRangeEnd;
355       // If the remaining count is zero and it's no longer in a zero range, this
356       // means we consume all the ranges before, thus mark BeginAddress as
357       // UINT64_MAX. e.g. supposing we have two non-overlapping ranges:
358       //  [<---- 10 ---->]
359       //                       [<---- 20 ---->]
360       //   A             B     C              D
361       // The BeginAddress(B+1) will reset to invalid(UINT64_MAX), so we won't
362       // have the [B+1, C-1] zero range.
363       if (Count == 0 && ZeroRangeDepth == 0)
364         BeginAddress = UINT64_MAX;
365     }
366   }
367 }
368 
updateBodySamplesforFunctionProfile(FunctionSamples & FunctionProfile,const SampleContextFrame & LeafLoc,uint64_t Count)369 void ProfileGeneratorBase::updateBodySamplesforFunctionProfile(
370     FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc,
371     uint64_t Count) {
372   // Use the maximum count of samples with same line location
373   uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator);
374 
375   // Use duplication factor to compensated for loop unroll/vectorization.
376   // Note that this is only needed when we're taking MAX of the counts at
377   // the location instead of SUM.
378   Count *= getDuplicationFactor(LeafLoc.Location.Discriminator);
379 
380   ErrorOr<uint64_t> R =
381       FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator);
382 
383   uint64_t PreviousCount = R ? R.get() : 0;
384   if (PreviousCount <= Count) {
385     FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator,
386                                    Count - PreviousCount);
387   }
388 }
389 
updateTotalSamples()390 void ProfileGeneratorBase::updateTotalSamples() {
391   for (auto &Item : ProfileMap) {
392     FunctionSamples &FunctionProfile = Item.second;
393     FunctionProfile.updateTotalSamples();
394   }
395 }
396 
updateCallsiteSamples()397 void ProfileGeneratorBase::updateCallsiteSamples() {
398   for (auto &Item : ProfileMap) {
399     FunctionSamples &FunctionProfile = Item.second;
400     FunctionProfile.updateCallsiteSamples();
401   }
402 }
403 
updateFunctionSamples()404 void ProfileGeneratorBase::updateFunctionSamples() {
405   updateCallsiteSamples();
406 
407   if (UpdateTotalSamples)
408     updateTotalSamples();
409 }
410 
collectProfiledFunctions()411 void ProfileGeneratorBase::collectProfiledFunctions() {
412   std::unordered_set<const BinaryFunction *> ProfiledFunctions;
413   if (collectFunctionsFromRawProfile(ProfiledFunctions))
414     Binary->setProfiledFunctions(ProfiledFunctions);
415   else if (collectFunctionsFromLLVMProfile(ProfiledFunctions))
416     Binary->setProfiledFunctions(ProfiledFunctions);
417   else
418     llvm_unreachable("Unsupported input profile");
419 }
420 
collectFunctionsFromRawProfile(std::unordered_set<const BinaryFunction * > & ProfiledFunctions)421 bool ProfileGeneratorBase::collectFunctionsFromRawProfile(
422     std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
423   if (!SampleCounters)
424     return false;
425   // Go through all the stacks, ranges and branches in sample counters, use
426   // the start of the range to look up the function it belongs and record the
427   // function.
428   for (const auto &CI : *SampleCounters) {
429     if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) {
430       for (auto StackAddr : CtxKey->Context) {
431         if (FuncRange *FRange = Binary->findFuncRange(StackAddr))
432           ProfiledFunctions.insert(FRange->Func);
433       }
434     }
435 
436     for (auto Item : CI.second.RangeCounter) {
437       uint64_t StartAddress = Item.first.first;
438       if (FuncRange *FRange = Binary->findFuncRange(StartAddress))
439         ProfiledFunctions.insert(FRange->Func);
440     }
441 
442     for (auto Item : CI.second.BranchCounter) {
443       uint64_t SourceAddress = Item.first.first;
444       uint64_t TargetAddress = Item.first.second;
445       if (FuncRange *FRange = Binary->findFuncRange(SourceAddress))
446         ProfiledFunctions.insert(FRange->Func);
447       if (FuncRange *FRange = Binary->findFuncRange(TargetAddress))
448         ProfiledFunctions.insert(FRange->Func);
449     }
450   }
451   return true;
452 }
453 
collectFunctionsFromLLVMProfile(std::unordered_set<const BinaryFunction * > & ProfiledFunctions)454 bool ProfileGenerator::collectFunctionsFromLLVMProfile(
455     std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
456   for (const auto &FS : ProfileMap) {
457     if (auto *Func = Binary->getBinaryFunction(FS.first.getName()))
458       ProfiledFunctions.insert(Func);
459   }
460   return true;
461 }
462 
collectFunctionsFromLLVMProfile(std::unordered_set<const BinaryFunction * > & ProfiledFunctions)463 bool CSProfileGenerator::collectFunctionsFromLLVMProfile(
464     std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
465   for (auto *Node : ContextTracker) {
466     if (!Node->getFuncName().empty())
467       if (auto *Func = Binary->getBinaryFunction(Node->getFuncName()))
468         ProfiledFunctions.insert(Func);
469   }
470   return true;
471 }
472 
473 FunctionSamples &
getTopLevelFunctionProfile(StringRef FuncName)474 ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) {
475   SampleContext Context(FuncName);
476   auto Ret = ProfileMap.emplace(Context, FunctionSamples());
477   if (Ret.second) {
478     FunctionSamples &FProfile = Ret.first->second;
479     FProfile.setContext(Context);
480   }
481   return Ret.first->second;
482 }
483 
generateProfile()484 void ProfileGenerator::generateProfile() {
485   collectProfiledFunctions();
486 
487   if (Binary->usePseudoProbes())
488     Binary->decodePseudoProbe();
489 
490   if (SampleCounters) {
491     if (Binary->usePseudoProbes()) {
492       generateProbeBasedProfile();
493     } else {
494       generateLineNumBasedProfile();
495     }
496   }
497 
498   postProcessProfiles();
499 }
500 
postProcessProfiles()501 void ProfileGenerator::postProcessProfiles() {
502   computeSummaryAndThreshold(ProfileMap);
503   trimColdProfiles(ProfileMap, ColdCountThreshold);
504   calculateAndShowDensity(ProfileMap);
505 }
506 
trimColdProfiles(const SampleProfileMap & Profiles,uint64_t ColdCntThreshold)507 void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles,
508                                         uint64_t ColdCntThreshold) {
509   if (!TrimColdProfile)
510     return;
511 
512   // Move cold profiles into a tmp container.
513   std::vector<SampleContext> ColdProfiles;
514   for (const auto &I : ProfileMap) {
515     if (I.second.getTotalSamples() < ColdCntThreshold)
516       ColdProfiles.emplace_back(I.first);
517   }
518 
519   // Remove the cold profile from ProfileMap.
520   for (const auto &I : ColdProfiles)
521     ProfileMap.erase(I);
522 }
523 
generateLineNumBasedProfile()524 void ProfileGenerator::generateLineNumBasedProfile() {
525   assert(SampleCounters->size() == 1 &&
526          "Must have one entry for profile generation.");
527   const SampleCounter &SC = SampleCounters->begin()->second;
528   // Fill in function body samples
529   populateBodySamplesForAllFunctions(SC.RangeCounter);
530   // Fill in boundary sample counts as well as call site samples for calls
531   populateBoundarySamplesForAllFunctions(SC.BranchCounter);
532 
533   updateFunctionSamples();
534 }
535 
generateProbeBasedProfile()536 void ProfileGenerator::generateProbeBasedProfile() {
537   assert(SampleCounters->size() == 1 &&
538          "Must have one entry for profile generation.");
539   // Enable pseudo probe functionalities in SampleProf
540   FunctionSamples::ProfileIsProbeBased = true;
541   const SampleCounter &SC = SampleCounters->begin()->second;
542   // Fill in function body samples
543   populateBodySamplesWithProbesForAllFunctions(SC.RangeCounter);
544   // Fill in boundary sample counts as well as call site samples for calls
545   populateBoundarySamplesWithProbesForAllFunctions(SC.BranchCounter);
546 
547   updateFunctionSamples();
548 }
549 
populateBodySamplesWithProbesForAllFunctions(const RangeSample & RangeCounter)550 void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions(
551     const RangeSample &RangeCounter) {
552   ProbeCounterMap ProbeCounter;
553   // preprocessRangeCounter returns disjoint ranges, so no longer to redo it
554   // inside extractProbesFromRange.
555   extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter,
556                          false);
557 
558   for (const auto &PI : ProbeCounter) {
559     const MCDecodedPseudoProbe *Probe = PI.first;
560     uint64_t Count = PI.second;
561     SampleContextFrameVector FrameVec;
562     Binary->getInlineContextForProbe(Probe, FrameVec, true);
563     FunctionSamples &FunctionProfile =
564         getLeafProfileAndAddTotalSamples(FrameVec, Count);
565     FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
566     if (Probe->isEntry())
567       FunctionProfile.addHeadSamples(Count);
568   }
569 }
570 
populateBoundarySamplesWithProbesForAllFunctions(const BranchSample & BranchCounters)571 void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions(
572     const BranchSample &BranchCounters) {
573   for (const auto &Entry : BranchCounters) {
574     uint64_t SourceAddress = Entry.first.first;
575     uint64_t TargetAddress = Entry.first.second;
576     uint64_t Count = Entry.second;
577     assert(Count != 0 && "Unexpected zero weight branch");
578 
579     StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
580     if (CalleeName.size() == 0)
581       continue;
582 
583     const MCDecodedPseudoProbe *CallProbe =
584         Binary->getCallProbeForAddr(SourceAddress);
585     if (CallProbe == nullptr)
586       continue;
587 
588     // Record called target sample and its count.
589     SampleContextFrameVector FrameVec;
590     Binary->getInlineContextForProbe(CallProbe, FrameVec, true);
591 
592     if (!FrameVec.empty()) {
593       FunctionSamples &FunctionProfile =
594           getLeafProfileAndAddTotalSamples(FrameVec, 0);
595       FunctionProfile.addCalledTargetSamples(
596           FrameVec.back().Location.LineOffset, 0, CalleeName, Count);
597     }
598   }
599 }
600 
getLeafProfileAndAddTotalSamples(const SampleContextFrameVector & FrameVec,uint64_t Count)601 FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples(
602     const SampleContextFrameVector &FrameVec, uint64_t Count) {
603   // Get top level profile
604   FunctionSamples *FunctionProfile =
605       &getTopLevelFunctionProfile(FrameVec[0].FuncName);
606   FunctionProfile->addTotalSamples(Count);
607   if (Binary->usePseudoProbes()) {
608     const auto *FuncDesc = Binary->getFuncDescForGUID(
609         Function::getGUID(FunctionProfile->getName()));
610     FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
611   }
612 
613   for (size_t I = 1; I < FrameVec.size(); I++) {
614     LineLocation Callsite(
615         FrameVec[I - 1].Location.LineOffset,
616         getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator));
617     FunctionSamplesMap &SamplesMap =
618         FunctionProfile->functionSamplesAt(Callsite);
619     auto Ret =
620         SamplesMap.emplace(FrameVec[I].FuncName.str(), FunctionSamples());
621     if (Ret.second) {
622       SampleContext Context(FrameVec[I].FuncName);
623       Ret.first->second.setContext(Context);
624     }
625     FunctionProfile = &Ret.first->second;
626     FunctionProfile->addTotalSamples(Count);
627     if (Binary->usePseudoProbes()) {
628       const auto *FuncDesc = Binary->getFuncDescForGUID(
629           Function::getGUID(FunctionProfile->getName()));
630       FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
631     }
632   }
633 
634   return *FunctionProfile;
635 }
636 
637 RangeSample
preprocessRangeCounter(const RangeSample & RangeCounter)638 ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) {
639   RangeSample Ranges(RangeCounter.begin(), RangeCounter.end());
640   if (FillZeroForAllFuncs) {
641     for (auto &FuncI : Binary->getAllBinaryFunctions()) {
642       for (auto &R : FuncI.second.Ranges) {
643         Ranges[{R.first, R.second - 1}] += 0;
644       }
645     }
646   } else {
647     // For each range, we search for all ranges of the function it belongs to
648     // and initialize it with zero count, so it remains zero if doesn't hit any
649     // samples. This is to be consistent with compiler that interpret zero count
650     // as unexecuted(cold).
651     for (const auto &I : RangeCounter) {
652       uint64_t StartAddress = I.first.first;
653       for (const auto &Range : Binary->getRanges(StartAddress))
654         Ranges[{Range.first, Range.second - 1}] += 0;
655     }
656   }
657   RangeSample DisjointRanges;
658   findDisjointRanges(DisjointRanges, Ranges);
659   return DisjointRanges;
660 }
661 
populateBodySamplesForAllFunctions(const RangeSample & RangeCounter)662 void ProfileGenerator::populateBodySamplesForAllFunctions(
663     const RangeSample &RangeCounter) {
664   for (const auto &Range : preprocessRangeCounter(RangeCounter)) {
665     uint64_t RangeBegin = Range.first.first;
666     uint64_t RangeEnd = Range.first.second;
667     uint64_t Count = Range.second;
668 
669     InstructionPointer IP(Binary, RangeBegin, true);
670     // Disjoint ranges may have range in the middle of two instr,
671     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
672     // can be Addr1+1 to Addr2-1. We should ignore such range.
673     if (IP.Address > RangeEnd)
674       continue;
675 
676     do {
677       const SampleContextFrameVector FrameVec =
678           Binary->getFrameLocationStack(IP.Address);
679       if (!FrameVec.empty()) {
680         // FIXME: As accumulating total count per instruction caused some
681         // regression, we changed to accumulate total count per byte as a
682         // workaround. Tuning hotness threshold on the compiler side might be
683         // necessary in the future.
684         FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(
685             FrameVec, Count * Binary->getInstSize(IP.Address));
686         updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(),
687                                             Count);
688       }
689     } while (IP.advance() && IP.Address <= RangeEnd);
690   }
691 }
692 
693 StringRef
getCalleeNameForAddress(uint64_t TargetAddress)694 ProfileGeneratorBase::getCalleeNameForAddress(uint64_t TargetAddress) {
695   // Get the function range by branch target if it's a call branch.
696   auto *FRange = Binary->findFuncRangeForStartAddr(TargetAddress);
697 
698   // We won't accumulate sample count for a range whose start is not the real
699   // function entry such as outlined function or inner labels.
700   if (!FRange || !FRange->IsFuncEntry)
701     return StringRef();
702 
703   return FunctionSamples::getCanonicalFnName(FRange->getFuncName());
704 }
705 
populateBoundarySamplesForAllFunctions(const BranchSample & BranchCounters)706 void ProfileGenerator::populateBoundarySamplesForAllFunctions(
707     const BranchSample &BranchCounters) {
708   for (const auto &Entry : BranchCounters) {
709     uint64_t SourceAddress = Entry.first.first;
710     uint64_t TargetAddress = Entry.first.second;
711     uint64_t Count = Entry.second;
712     assert(Count != 0 && "Unexpected zero weight branch");
713 
714     StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
715     if (CalleeName.size() == 0)
716       continue;
717     // Record called target sample and its count.
718     const SampleContextFrameVector &FrameVec =
719         Binary->getCachedFrameLocationStack(SourceAddress);
720     if (!FrameVec.empty()) {
721       FunctionSamples &FunctionProfile =
722           getLeafProfileAndAddTotalSamples(FrameVec, 0);
723       FunctionProfile.addCalledTargetSamples(
724           FrameVec.back().Location.LineOffset,
725           getBaseDiscriminator(FrameVec.back().Location.Discriminator),
726           CalleeName, Count);
727     }
728     // Add head samples for callee.
729     FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName);
730     CalleeProfile.addHeadSamples(Count);
731   }
732 }
733 
calculateAndShowDensity(const SampleProfileMap & Profiles)734 void ProfileGeneratorBase::calculateAndShowDensity(
735     const SampleProfileMap &Profiles) {
736   double Density = calculateDensity(Profiles, HotCountThreshold);
737   showDensitySuggestion(Density);
738 }
739 
740 FunctionSamples *
getOrCreateFunctionSamples(ContextTrieNode * ContextNode,bool WasLeafInlined)741 CSProfileGenerator::getOrCreateFunctionSamples(ContextTrieNode *ContextNode,
742                                                bool WasLeafInlined) {
743   FunctionSamples *FProfile = ContextNode->getFunctionSamples();
744   if (!FProfile) {
745     FSamplesList.emplace_back();
746     FProfile = &FSamplesList.back();
747     FProfile->setName(ContextNode->getFuncName());
748     ContextNode->setFunctionSamples(FProfile);
749   }
750   // Update ContextWasInlined attribute for existing contexts.
751   // The current function can be called in two ways:
752   //  - when processing a probe of the current frame
753   //  - when processing the entry probe of an inlinee's frame, which
754   //    is then used to update the callsite count of the current frame.
755   // The two can happen in any order, hence here we are making sure
756   // `ContextWasInlined` is always set as expected.
757   // TODO: Note that the former does not always happen if no probes of the
758   // current frame has samples, and if the latter happens, we could lose the
759   // attribute. This should be fixed.
760   if (WasLeafInlined)
761     FProfile->getContext().setAttribute(ContextWasInlined);
762   return FProfile;
763 }
764 
765 ContextTrieNode *
getOrCreateContextNode(const SampleContextFrames Context,bool WasLeafInlined)766 CSProfileGenerator::getOrCreateContextNode(const SampleContextFrames Context,
767                                            bool WasLeafInlined) {
768   ContextTrieNode *ContextNode =
769       ContextTracker.getOrCreateContextPath(Context, true);
770   getOrCreateFunctionSamples(ContextNode, WasLeafInlined);
771   return ContextNode;
772 }
773 
generateProfile()774 void CSProfileGenerator::generateProfile() {
775   FunctionSamples::ProfileIsCS = true;
776 
777   collectProfiledFunctions();
778 
779   if (Binary->usePseudoProbes()) {
780     Binary->decodePseudoProbe();
781     if (InferMissingFrames)
782       initializeMissingFrameInferrer();
783   }
784 
785   if (SampleCounters) {
786     if (Binary->usePseudoProbes()) {
787       generateProbeBasedProfile();
788     } else {
789       generateLineNumBasedProfile();
790     }
791   }
792 
793   if (Binary->getTrackFuncContextSize())
794     computeSizeForProfiledFunctions();
795 
796   postProcessProfiles();
797 }
798 
initializeMissingFrameInferrer()799 void CSProfileGenerator::initializeMissingFrameInferrer() {
800   Binary->getMissingContextInferrer()->initialize(SampleCounters);
801 }
802 
inferMissingFrames(const SmallVectorImpl<uint64_t> & Context,SmallVectorImpl<uint64_t> & NewContext)803 void CSProfileGenerator::inferMissingFrames(
804     const SmallVectorImpl<uint64_t> &Context,
805     SmallVectorImpl<uint64_t> &NewContext) {
806   Binary->inferMissingFrames(Context, NewContext);
807 }
808 
computeSizeForProfiledFunctions()809 void CSProfileGenerator::computeSizeForProfiledFunctions() {
810   for (auto *Func : Binary->getProfiledFunctions())
811     Binary->computeInlinedContextSizeForFunc(Func);
812 
813   // Flush the symbolizer to save memory.
814   Binary->flushSymbolizer();
815 }
816 
updateFunctionSamples()817 void CSProfileGenerator::updateFunctionSamples() {
818   for (auto *Node : ContextTracker) {
819     FunctionSamples *FSamples = Node->getFunctionSamples();
820     if (FSamples) {
821       if (UpdateTotalSamples)
822         FSamples->updateTotalSamples();
823       FSamples->updateCallsiteSamples();
824     }
825   }
826 }
827 
generateLineNumBasedProfile()828 void CSProfileGenerator::generateLineNumBasedProfile() {
829   for (const auto &CI : *SampleCounters) {
830     const auto *CtxKey = cast<StringBasedCtxKey>(CI.first.getPtr());
831 
832     ContextTrieNode *ContextNode = &getRootContext();
833     // Sample context will be empty if the jump is an external-to-internal call
834     // pattern, the head samples should be added for the internal function.
835     if (!CtxKey->Context.empty()) {
836       // Get or create function profile for the range
837       ContextNode =
838           getOrCreateContextNode(CtxKey->Context, CtxKey->WasLeafInlined);
839       // Fill in function body samples
840       populateBodySamplesForFunction(*ContextNode->getFunctionSamples(),
841                                      CI.second.RangeCounter);
842     }
843     // Fill in boundary sample counts as well as call site samples for calls
844     populateBoundarySamplesForFunction(ContextNode, CI.second.BranchCounter);
845   }
846   // Fill in call site value sample for inlined calls and also use context to
847   // infer missing samples. Since we don't have call count for inlined
848   // functions, we estimate it from inlinee's profile using the entry of the
849   // body sample.
850   populateInferredFunctionSamples(getRootContext());
851 
852   updateFunctionSamples();
853 }
854 
populateBodySamplesForFunction(FunctionSamples & FunctionProfile,const RangeSample & RangeCounter)855 void CSProfileGenerator::populateBodySamplesForFunction(
856     FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) {
857   // Compute disjoint ranges first, so we can use MAX
858   // for calculating count for each location.
859   RangeSample Ranges;
860   findDisjointRanges(Ranges, RangeCounter);
861   for (const auto &Range : Ranges) {
862     uint64_t RangeBegin = Range.first.first;
863     uint64_t RangeEnd = Range.first.second;
864     uint64_t Count = Range.second;
865     // Disjoint ranges have introduce zero-filled gap that
866     // doesn't belong to current context, filter them out.
867     if (Count == 0)
868       continue;
869 
870     InstructionPointer IP(Binary, RangeBegin, true);
871     // Disjoint ranges may have range in the middle of two instr,
872     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
873     // can be Addr1+1 to Addr2-1. We should ignore such range.
874     if (IP.Address > RangeEnd)
875       continue;
876 
877     do {
878       auto LeafLoc = Binary->getInlineLeafFrameLoc(IP.Address);
879       if (LeafLoc) {
880         // Recording body sample for this specific context
881         updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
882         FunctionProfile.addTotalSamples(Count);
883       }
884     } while (IP.advance() && IP.Address <= RangeEnd);
885   }
886 }
887 
populateBoundarySamplesForFunction(ContextTrieNode * Node,const BranchSample & BranchCounters)888 void CSProfileGenerator::populateBoundarySamplesForFunction(
889     ContextTrieNode *Node, const BranchSample &BranchCounters) {
890 
891   for (const auto &Entry : BranchCounters) {
892     uint64_t SourceAddress = Entry.first.first;
893     uint64_t TargetAddress = Entry.first.second;
894     uint64_t Count = Entry.second;
895     assert(Count != 0 && "Unexpected zero weight branch");
896 
897     StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
898     if (CalleeName.size() == 0)
899       continue;
900 
901     ContextTrieNode *CallerNode = Node;
902     LineLocation CalleeCallSite(0, 0);
903     if (CallerNode != &getRootContext()) {
904       // Record called target sample and its count
905       auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceAddress);
906       if (LeafLoc) {
907         CallerNode->getFunctionSamples()->addCalledTargetSamples(
908             LeafLoc->Location.LineOffset,
909             getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName,
910             Count);
911         // Record head sample for called target(callee)
912         CalleeCallSite = LeafLoc->Location;
913       }
914     }
915 
916     ContextTrieNode *CalleeNode =
917         CallerNode->getOrCreateChildContext(CalleeCallSite, CalleeName);
918     FunctionSamples *CalleeProfile = getOrCreateFunctionSamples(CalleeNode);
919     CalleeProfile->addHeadSamples(Count);
920   }
921 }
922 
populateInferredFunctionSamples(ContextTrieNode & Node)923 void CSProfileGenerator::populateInferredFunctionSamples(
924     ContextTrieNode &Node) {
925   // There is no call jmp sample between the inliner and inlinee, we need to use
926   // the inlinee's context to infer inliner's context, i.e. parent(inliner)'s
927   // sample depends on child(inlinee)'s sample, so traverse the tree in
928   // post-order.
929   for (auto &It : Node.getAllChildContext())
930     populateInferredFunctionSamples(It.second);
931 
932   FunctionSamples *CalleeProfile = Node.getFunctionSamples();
933   if (!CalleeProfile)
934     return;
935   // If we already have head sample counts, we must have value profile
936   // for call sites added already. Skip to avoid double counting.
937   if (CalleeProfile->getHeadSamples())
938     return;
939   ContextTrieNode *CallerNode = Node.getParentContext();
940   // If we don't have context, nothing to do for caller's call site.
941   // This could happen for entry point function.
942   if (CallerNode == &getRootContext())
943     return;
944 
945   LineLocation CallerLeafFrameLoc = Node.getCallSiteLoc();
946   FunctionSamples &CallerProfile = *getOrCreateFunctionSamples(CallerNode);
947   // Since we don't have call count for inlined functions, we
948   // estimate it from inlinee's profile using entry body sample.
949   uint64_t EstimatedCallCount = CalleeProfile->getHeadSamplesEstimate();
950   // If we don't have samples with location, use 1 to indicate live.
951   if (!EstimatedCallCount && !CalleeProfile->getBodySamples().size())
952     EstimatedCallCount = 1;
953   CallerProfile.addCalledTargetSamples(CallerLeafFrameLoc.LineOffset,
954                                        CallerLeafFrameLoc.Discriminator,
955                                        Node.getFuncName(), EstimatedCallCount);
956   CallerProfile.addBodySamples(CallerLeafFrameLoc.LineOffset,
957                                CallerLeafFrameLoc.Discriminator,
958                                EstimatedCallCount);
959   CallerProfile.addTotalSamples(EstimatedCallCount);
960 }
961 
convertToProfileMap(ContextTrieNode & Node,SampleContextFrameVector & Context)962 void CSProfileGenerator::convertToProfileMap(
963     ContextTrieNode &Node, SampleContextFrameVector &Context) {
964   FunctionSamples *FProfile = Node.getFunctionSamples();
965   if (FProfile) {
966     Context.emplace_back(Node.getFuncName(), LineLocation(0, 0));
967     // Save the new context for future references.
968     SampleContextFrames NewContext = *Contexts.insert(Context).first;
969     auto Ret = ProfileMap.emplace(NewContext, std::move(*FProfile));
970     FunctionSamples &NewProfile = Ret.first->second;
971     NewProfile.getContext().setContext(NewContext);
972     Context.pop_back();
973   }
974 
975   for (auto &It : Node.getAllChildContext()) {
976     ContextTrieNode &ChildNode = It.second;
977     Context.emplace_back(Node.getFuncName(), ChildNode.getCallSiteLoc());
978     convertToProfileMap(ChildNode, Context);
979     Context.pop_back();
980   }
981 }
982 
convertToProfileMap()983 void CSProfileGenerator::convertToProfileMap() {
984   assert(ProfileMap.empty() &&
985          "ProfileMap should be empty before converting from the trie");
986   assert(IsProfileValidOnTrie &&
987          "Do not convert the trie twice, it's already destroyed");
988 
989   SampleContextFrameVector Context;
990   for (auto &It : getRootContext().getAllChildContext())
991     convertToProfileMap(It.second, Context);
992 
993   IsProfileValidOnTrie = false;
994 }
995 
postProcessProfiles()996 void CSProfileGenerator::postProcessProfiles() {
997   // Compute hot/cold threshold based on profile. This will be used for cold
998   // context profile merging/trimming.
999   computeSummaryAndThreshold();
1000 
1001   // Run global pre-inliner to adjust/merge context profile based on estimated
1002   // inline decisions.
1003   if (EnableCSPreInliner) {
1004     ContextTracker.populateFuncToCtxtMap();
1005     CSPreInliner(ContextTracker, *Binary, Summary.get()).run();
1006     // Turn off the profile merger by default unless it is explicitly enabled.
1007     if (!CSProfMergeColdContext.getNumOccurrences())
1008       CSProfMergeColdContext = false;
1009   }
1010 
1011   convertToProfileMap();
1012 
1013   // Trim and merge cold context profile using cold threshold above.
1014   if (TrimColdProfile || CSProfMergeColdContext) {
1015     SampleContextTrimmer(ProfileMap)
1016         .trimAndMergeColdContextProfiles(
1017             HotCountThreshold, TrimColdProfile, CSProfMergeColdContext,
1018             CSProfMaxColdContextDepth, EnableCSPreInliner);
1019   }
1020 
1021   // Merge function samples of CS profile to calculate profile density.
1022   sampleprof::SampleProfileMap ContextLessProfiles;
1023   for (const auto &I : ProfileMap) {
1024     ContextLessProfiles[I.second.getName()].merge(I.second);
1025   }
1026 
1027   calculateAndShowDensity(ContextLessProfiles);
1028   if (GenCSNestedProfile) {
1029     CSProfileConverter CSConverter(ProfileMap);
1030     CSConverter.convertProfiles();
1031     FunctionSamples::ProfileIsCS = false;
1032   }
1033 }
1034 
computeSummaryAndThreshold(SampleProfileMap & Profiles)1035 void ProfileGeneratorBase::computeSummaryAndThreshold(
1036     SampleProfileMap &Profiles) {
1037   SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
1038   Summary = Builder.computeSummaryForProfiles(Profiles);
1039   HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
1040       (Summary->getDetailedSummary()));
1041   ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
1042       (Summary->getDetailedSummary()));
1043 }
1044 
computeSummaryAndThreshold()1045 void CSProfileGenerator::computeSummaryAndThreshold() {
1046   // Always merge and use context-less profile map to compute summary.
1047   SampleProfileMap ContextLessProfiles;
1048   ContextTracker.createContextLessProfileMap(ContextLessProfiles);
1049 
1050   // Set the flag below to avoid merging the profile again in
1051   // computeSummaryAndThreshold
1052   FunctionSamples::ProfileIsCS = false;
1053   assert(
1054       (!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) &&
1055       "Don't set --profile-summary-contextless to false for profile "
1056       "generation");
1057   ProfileGeneratorBase::computeSummaryAndThreshold(ContextLessProfiles);
1058   // Recover the old value.
1059   FunctionSamples::ProfileIsCS = true;
1060 }
1061 
extractProbesFromRange(const RangeSample & RangeCounter,ProbeCounterMap & ProbeCounter,bool FindDisjointRanges)1062 void ProfileGeneratorBase::extractProbesFromRange(
1063     const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter,
1064     bool FindDisjointRanges) {
1065   const RangeSample *PRanges = &RangeCounter;
1066   RangeSample Ranges;
1067   if (FindDisjointRanges) {
1068     findDisjointRanges(Ranges, RangeCounter);
1069     PRanges = &Ranges;
1070   }
1071 
1072   for (const auto &Range : *PRanges) {
1073     uint64_t RangeBegin = Range.first.first;
1074     uint64_t RangeEnd = Range.first.second;
1075     uint64_t Count = Range.second;
1076 
1077     InstructionPointer IP(Binary, RangeBegin, true);
1078     // Disjoint ranges may have range in the middle of two instr,
1079     // e.g. If Instr1 at Addr1, and Instr2 at Addr2, disjoint range
1080     // can be Addr1+1 to Addr2-1. We should ignore such range.
1081     if (IP.Address > RangeEnd)
1082       continue;
1083 
1084     do {
1085       const AddressProbesMap &Address2ProbesMap =
1086           Binary->getAddress2ProbesMap();
1087       auto It = Address2ProbesMap.find(IP.Address);
1088       if (It != Address2ProbesMap.end()) {
1089         for (const auto &Probe : It->second) {
1090           ProbeCounter[&Probe] += Count;
1091         }
1092       }
1093     } while (IP.advance() && IP.Address <= RangeEnd);
1094   }
1095 }
1096 
extractPrefixContextStack(SampleContextFrameVector & ContextStack,const SmallVectorImpl<uint64_t> & AddrVec,ProfiledBinary * Binary)1097 static void extractPrefixContextStack(SampleContextFrameVector &ContextStack,
1098                                       const SmallVectorImpl<uint64_t> &AddrVec,
1099                                       ProfiledBinary *Binary) {
1100   SmallVector<const MCDecodedPseudoProbe *, 16> Probes;
1101   for (auto Address : reverse(AddrVec)) {
1102     const MCDecodedPseudoProbe *CallProbe =
1103         Binary->getCallProbeForAddr(Address);
1104     // These could be the cases when a probe is not found at a calliste. Cutting
1105     // off the context from here since the inliner will not know how to consume
1106     // a context with unknown callsites.
1107     // 1. for functions that are not sampled when
1108     // --decode-probe-for-profiled-functions-only is on.
1109     // 2. for a merged callsite. Callsite merging may cause the loss of original
1110     // probe IDs.
1111     // 3. for an external callsite.
1112     if (!CallProbe)
1113       break;
1114     Probes.push_back(CallProbe);
1115   }
1116 
1117   std::reverse(Probes.begin(), Probes.end());
1118 
1119   // Extract context stack for reusing, leaf context stack will be added
1120   // compressed while looking up function profile.
1121   for (const auto *P : Probes) {
1122     Binary->getInlineContextForProbe(P, ContextStack, true);
1123   }
1124 }
1125 
generateProbeBasedProfile()1126 void CSProfileGenerator::generateProbeBasedProfile() {
1127   // Enable pseudo probe functionalities in SampleProf
1128   FunctionSamples::ProfileIsProbeBased = true;
1129   for (const auto &CI : *SampleCounters) {
1130     const AddrBasedCtxKey *CtxKey =
1131         dyn_cast<AddrBasedCtxKey>(CI.first.getPtr());
1132     // Fill in function body samples from probes, also infer caller's samples
1133     // from callee's probe
1134     populateBodySamplesWithProbes(CI.second.RangeCounter, CtxKey);
1135     // Fill in boundary samples for a call probe
1136     populateBoundarySamplesWithProbes(CI.second.BranchCounter, CtxKey);
1137   }
1138 }
1139 
populateBodySamplesWithProbes(const RangeSample & RangeCounter,const AddrBasedCtxKey * CtxKey)1140 void CSProfileGenerator::populateBodySamplesWithProbes(
1141     const RangeSample &RangeCounter, const AddrBasedCtxKey *CtxKey) {
1142   ProbeCounterMap ProbeCounter;
1143   // Extract the top frame probes by looking up each address among the range in
1144   // the Address2ProbeMap
1145   extractProbesFromRange(RangeCounter, ProbeCounter);
1146   std::unordered_map<MCDecodedPseudoProbeInlineTree *,
1147                      std::unordered_set<FunctionSamples *>>
1148       FrameSamples;
1149   for (const auto &PI : ProbeCounter) {
1150     const MCDecodedPseudoProbe *Probe = PI.first;
1151     uint64_t Count = PI.second;
1152     // Disjoint ranges have introduce zero-filled gap that
1153     // doesn't belong to current context, filter them out.
1154     if (!Probe->isBlock() || Count == 0)
1155       continue;
1156 
1157     ContextTrieNode *ContextNode = getContextNodeForLeafProbe(CtxKey, Probe);
1158     FunctionSamples &FunctionProfile = *ContextNode->getFunctionSamples();
1159     // Record the current frame and FunctionProfile whenever samples are
1160     // collected for non-danglie probes. This is for reporting all of the
1161     // zero count probes of the frame later.
1162     FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile);
1163     FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
1164     FunctionProfile.addTotalSamples(Count);
1165     if (Probe->isEntry()) {
1166       FunctionProfile.addHeadSamples(Count);
1167       // Look up for the caller's function profile
1168       const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
1169       ContextTrieNode *CallerNode = ContextNode->getParentContext();
1170       if (InlinerDesc != nullptr && CallerNode != &getRootContext()) {
1171         // Since the context id will be compressed, we have to use callee's
1172         // context id to infer caller's context id to ensure they share the
1173         // same context prefix.
1174         uint64_t CallerIndex = ContextNode->getCallSiteLoc().LineOffset;
1175         assert(CallerIndex &&
1176                "Inferred caller's location index shouldn't be zero!");
1177         FunctionSamples &CallerProfile =
1178             *getOrCreateFunctionSamples(CallerNode);
1179         CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
1180         CallerProfile.addBodySamples(CallerIndex, 0, Count);
1181         CallerProfile.addTotalSamples(Count);
1182         CallerProfile.addCalledTargetSamples(CallerIndex, 0,
1183                                              ContextNode->getFuncName(), Count);
1184       }
1185     }
1186   }
1187 
1188   // Assign zero count for remaining probes without sample hits to
1189   // differentiate from probes optimized away, of which the counts are unknown
1190   // and will be inferred by the compiler.
1191   for (auto &I : FrameSamples) {
1192     for (auto *FunctionProfile : I.second) {
1193       for (auto *Probe : I.first->getProbes()) {
1194         FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0);
1195       }
1196     }
1197   }
1198 }
1199 
populateBoundarySamplesWithProbes(const BranchSample & BranchCounter,const AddrBasedCtxKey * CtxKey)1200 void CSProfileGenerator::populateBoundarySamplesWithProbes(
1201     const BranchSample &BranchCounter, const AddrBasedCtxKey *CtxKey) {
1202   for (const auto &BI : BranchCounter) {
1203     uint64_t SourceAddress = BI.first.first;
1204     uint64_t TargetAddress = BI.first.second;
1205     uint64_t Count = BI.second;
1206     const MCDecodedPseudoProbe *CallProbe =
1207         Binary->getCallProbeForAddr(SourceAddress);
1208     if (CallProbe == nullptr)
1209       continue;
1210     FunctionSamples &FunctionProfile =
1211         getFunctionProfileForLeafProbe(CtxKey, CallProbe);
1212     FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count);
1213     FunctionProfile.addTotalSamples(Count);
1214     StringRef CalleeName = getCalleeNameForAddress(TargetAddress);
1215     if (CalleeName.size() == 0)
1216       continue;
1217     FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName,
1218                                            Count);
1219   }
1220 }
1221 
getContextNodeForLeafProbe(const AddrBasedCtxKey * CtxKey,const MCDecodedPseudoProbe * LeafProbe)1222 ContextTrieNode *CSProfileGenerator::getContextNodeForLeafProbe(
1223     const AddrBasedCtxKey *CtxKey, const MCDecodedPseudoProbe *LeafProbe) {
1224 
1225   const SmallVectorImpl<uint64_t> *PContext = &CtxKey->Context;
1226   SmallVector<uint64_t, 16> NewContext;
1227 
1228   if (InferMissingFrames) {
1229     SmallVector<uint64_t, 16> Context = CtxKey->Context;
1230     // Append leaf frame for a complete inference.
1231     Context.push_back(LeafProbe->getAddress());
1232     inferMissingFrames(Context, NewContext);
1233     // Pop out the leaf probe that was pushed in above.
1234     NewContext.pop_back();
1235     PContext = &NewContext;
1236   }
1237 
1238   SampleContextFrameVector ContextStack;
1239   extractPrefixContextStack(ContextStack, *PContext, Binary);
1240 
1241   // Explicitly copy the context for appending the leaf context
1242   SampleContextFrameVector NewContextStack(ContextStack.begin(),
1243                                            ContextStack.end());
1244   Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true);
1245   // For leaf inlined context with the top frame, we should strip off the top
1246   // frame's probe id, like:
1247   // Inlined stack: [foo:1, bar:2], the ContextId will be "foo:1 @ bar"
1248   auto LeafFrame = NewContextStack.back();
1249   LeafFrame.Location = LineLocation(0, 0);
1250   NewContextStack.pop_back();
1251   // Compress the context string except for the leaf frame
1252   CSProfileGenerator::compressRecursionContext(NewContextStack);
1253   CSProfileGenerator::trimContext(NewContextStack);
1254   NewContextStack.push_back(LeafFrame);
1255 
1256   const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid());
1257   bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite();
1258   ContextTrieNode *ContextNode =
1259       getOrCreateContextNode(NewContextStack, WasLeafInlined);
1260   ContextNode->getFunctionSamples()->setFunctionHash(FuncDesc->FuncHash);
1261   return ContextNode;
1262 }
1263 
getFunctionProfileForLeafProbe(const AddrBasedCtxKey * CtxKey,const MCDecodedPseudoProbe * LeafProbe)1264 FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe(
1265     const AddrBasedCtxKey *CtxKey, const MCDecodedPseudoProbe *LeafProbe) {
1266   return *getContextNodeForLeafProbe(CtxKey, LeafProbe)->getFunctionSamples();
1267 }
1268 
1269 } // end namespace sampleprof
1270 } // end namespace llvm
1271