1 ////===- SampleProfileLoadBaseImpl.h - Profile loader base impl --*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file provides the interface for the sampled PGO profile loader base
11 /// implementation.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_TRANSFORMS_UTILS_SAMPLEPROFILELOADERBASEIMPL_H
16 #define LLVM_TRANSFORMS_UTILS_SAMPLEPROFILELOADERBASEIMPL_H
17 
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/DenseSet.h"
21 #include "llvm/ADT/SmallPtrSet.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/Analysis/LoopInfo.h"
25 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
26 #include "llvm/Analysis/PostDominators.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/CFG.h"
29 #include "llvm/IR/DebugInfoMetadata.h"
30 #include "llvm/IR/DebugLoc.h"
31 #include "llvm/IR/Dominators.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/Instruction.h"
34 #include "llvm/IR/Instructions.h"
35 #include "llvm/IR/Module.h"
36 #include "llvm/ProfileData/SampleProf.h"
37 #include "llvm/ProfileData/SampleProfReader.h"
38 #include "llvm/Support/CommandLine.h"
39 #include "llvm/Support/GenericDomTree.h"
40 #include "llvm/Support/raw_ostream.h"
41 #include "llvm/Transforms/Utils/SampleProfileInference.h"
42 #include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
43 
44 namespace llvm {
45 using namespace sampleprof;
46 using namespace sampleprofutil;
47 using ProfileCount = Function::ProfileCount;
48 
49 #define DEBUG_TYPE "sample-profile-impl"
50 
51 namespace afdo_detail {
52 
53 template <typename BlockT> struct IRTraits;
54 template <> struct IRTraits<BasicBlock> {
55   using InstructionT = Instruction;
56   using BasicBlockT = BasicBlock;
57   using FunctionT = Function;
58   using BlockFrequencyInfoT = BlockFrequencyInfo;
59   using LoopT = Loop;
60   using LoopInfoPtrT = std::unique_ptr<LoopInfo>;
61   using DominatorTreePtrT = std::unique_ptr<DominatorTree>;
62   using PostDominatorTreeT = PostDominatorTree;
63   using PostDominatorTreePtrT = std::unique_ptr<PostDominatorTree>;
64   using OptRemarkEmitterT = OptimizationRemarkEmitter;
65   using OptRemarkAnalysisT = OptimizationRemarkAnalysis;
66   using PredRangeT = pred_range;
67   using SuccRangeT = succ_range;
68   static Function &getFunction(Function &F) { return F; }
69   static const BasicBlock *getEntryBB(const Function *F) {
70     return &F->getEntryBlock();
71   }
72   static pred_range getPredecessors(BasicBlock *BB) { return predecessors(BB); }
73   static succ_range getSuccessors(BasicBlock *BB) { return successors(BB); }
74 };
75 
76 } // end namespace afdo_detail
77 
78 extern cl::opt<bool> SampleProfileUseProfi;
79 extern cl::opt<bool> SampleProfileInferEntryCount;
80 
81 template <typename BT> class SampleProfileLoaderBaseImpl {
82 public:
83   SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName)
84       : Filename(Name), RemappingFilename(RemapName) {}
85   void dump() { Reader->dump(); }
86 
87   using InstructionT = typename afdo_detail::IRTraits<BT>::InstructionT;
88   using BasicBlockT = typename afdo_detail::IRTraits<BT>::BasicBlockT;
89   using BlockFrequencyInfoT =
90       typename afdo_detail::IRTraits<BT>::BlockFrequencyInfoT;
91   using FunctionT = typename afdo_detail::IRTraits<BT>::FunctionT;
92   using LoopT = typename afdo_detail::IRTraits<BT>::LoopT;
93   using LoopInfoPtrT = typename afdo_detail::IRTraits<BT>::LoopInfoPtrT;
94   using DominatorTreePtrT =
95       typename afdo_detail::IRTraits<BT>::DominatorTreePtrT;
96   using PostDominatorTreePtrT =
97       typename afdo_detail::IRTraits<BT>::PostDominatorTreePtrT;
98   using PostDominatorTreeT =
99       typename afdo_detail::IRTraits<BT>::PostDominatorTreeT;
100   using OptRemarkEmitterT =
101       typename afdo_detail::IRTraits<BT>::OptRemarkEmitterT;
102   using OptRemarkAnalysisT =
103       typename afdo_detail::IRTraits<BT>::OptRemarkAnalysisT;
104   using PredRangeT = typename afdo_detail::IRTraits<BT>::PredRangeT;
105   using SuccRangeT = typename afdo_detail::IRTraits<BT>::SuccRangeT;
106 
107   using BlockWeightMap = DenseMap<const BasicBlockT *, uint64_t>;
108   using EquivalenceClassMap =
109       DenseMap<const BasicBlockT *, const BasicBlockT *>;
110   using Edge = std::pair<const BasicBlockT *, const BasicBlockT *>;
111   using EdgeWeightMap = DenseMap<Edge, uint64_t>;
112   using BlockEdgeMap =
113       DenseMap<const BasicBlockT *, SmallVector<const BasicBlockT *, 8>>;
114 
115 protected:
116   ~SampleProfileLoaderBaseImpl() = default;
117   friend class SampleCoverageTracker;
118 
119   Function &getFunction(FunctionT &F) {
120     return afdo_detail::IRTraits<BT>::getFunction(F);
121   }
122   const BasicBlockT *getEntryBB(const FunctionT *F) {
123     return afdo_detail::IRTraits<BT>::getEntryBB(F);
124   }
125   PredRangeT getPredecessors(BasicBlockT *BB) {
126     return afdo_detail::IRTraits<BT>::getPredecessors(BB);
127   }
128   SuccRangeT getSuccessors(BasicBlockT *BB) {
129     return afdo_detail::IRTraits<BT>::getSuccessors(BB);
130   }
131 
132   unsigned getFunctionLoc(FunctionT &Func);
133   virtual ErrorOr<uint64_t> getInstWeight(const InstructionT &Inst);
134   ErrorOr<uint64_t> getInstWeightImpl(const InstructionT &Inst);
135   ErrorOr<uint64_t> getBlockWeight(const BasicBlockT *BB);
136   mutable DenseMap<const DILocation *, const FunctionSamples *>
137       DILocation2SampleMap;
138   virtual const FunctionSamples *
139   findFunctionSamples(const InstructionT &I) const;
140   void printEdgeWeight(raw_ostream &OS, Edge E);
141   void printBlockWeight(raw_ostream &OS, const BasicBlockT *BB) const;
142   void printBlockEquivalence(raw_ostream &OS, const BasicBlockT *BB);
143   bool computeBlockWeights(FunctionT &F);
144   void findEquivalenceClasses(FunctionT &F);
145   void findEquivalencesFor(BasicBlockT *BB1,
146                            ArrayRef<BasicBlockT *> Descendants,
147                            PostDominatorTreeT *DomTree);
148   void propagateWeights(FunctionT &F);
149   void applyProfi(FunctionT &F, BlockEdgeMap &Successors,
150                   BlockWeightMap &SampleBlockWeights,
151                   BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights);
152   uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
153   void buildEdges(FunctionT &F);
154   bool propagateThroughEdges(FunctionT &F, bool UpdateBlockCount);
155   void clearFunctionData(bool ResetDT = true);
156   void computeDominanceAndLoopInfo(FunctionT &F);
157   bool
158   computeAndPropagateWeights(FunctionT &F,
159                              const DenseSet<GlobalValue::GUID> &InlinedGUIDs);
160   void initWeightPropagation(FunctionT &F,
161                              const DenseSet<GlobalValue::GUID> &InlinedGUIDs);
162   void
163   finalizeWeightPropagation(FunctionT &F,
164                             const DenseSet<GlobalValue::GUID> &InlinedGUIDs);
165   void emitCoverageRemarks(FunctionT &F);
166 
167   /// Map basic blocks to their computed weights.
168   ///
169   /// The weight of a basic block is defined to be the maximum
170   /// of all the instruction weights in that block.
171   BlockWeightMap BlockWeights;
172 
173   /// Map edges to their computed weights.
174   ///
175   /// Edge weights are computed by propagating basic block weights in
176   /// SampleProfile::propagateWeights.
177   EdgeWeightMap EdgeWeights;
178 
179   /// Set of visited blocks during propagation.
180   SmallPtrSet<const BasicBlockT *, 32> VisitedBlocks;
181 
182   /// Set of visited edges during propagation.
183   SmallSet<Edge, 32> VisitedEdges;
184 
185   /// Equivalence classes for block weights.
186   ///
187   /// Two blocks BB1 and BB2 are in the same equivalence class if they
188   /// dominate and post-dominate each other, and they are in the same loop
189   /// nest. When this happens, the two blocks are guaranteed to execute
190   /// the same number of times.
191   EquivalenceClassMap EquivalenceClass;
192 
193   /// Dominance, post-dominance and loop information.
194   DominatorTreePtrT DT;
195   PostDominatorTreePtrT PDT;
196   LoopInfoPtrT LI;
197 
198   /// Predecessors for each basic block in the CFG.
199   BlockEdgeMap Predecessors;
200 
201   /// Successors for each basic block in the CFG.
202   BlockEdgeMap Successors;
203 
204   /// Profile coverage tracker.
205   SampleCoverageTracker CoverageTracker;
206 
207   /// Profile reader object.
208   std::unique_ptr<SampleProfileReader> Reader;
209 
210   /// Samples collected for the body of this function.
211   FunctionSamples *Samples = nullptr;
212 
213   /// Name of the profile file to load.
214   std::string Filename;
215 
216   /// Name of the profile remapping file to load.
217   std::string RemappingFilename;
218 
219   /// Profile Summary Info computed from sample profile.
220   ProfileSummaryInfo *PSI = nullptr;
221 
222   /// Optimization Remark Emitter used to emit diagnostic remarks.
223   OptRemarkEmitterT *ORE = nullptr;
224 };
225 
226 /// Clear all the per-function data used to load samples and propagate weights.
227 template <typename BT>
228 void SampleProfileLoaderBaseImpl<BT>::clearFunctionData(bool ResetDT) {
229   BlockWeights.clear();
230   EdgeWeights.clear();
231   VisitedBlocks.clear();
232   VisitedEdges.clear();
233   EquivalenceClass.clear();
234   if (ResetDT) {
235     DT = nullptr;
236     PDT = nullptr;
237     LI = nullptr;
238   }
239   Predecessors.clear();
240   Successors.clear();
241   CoverageTracker.clear();
242 }
243 
244 #ifndef NDEBUG
245 /// Print the weight of edge \p E on stream \p OS.
246 ///
247 /// \param OS  Stream to emit the output to.
248 /// \param E  Edge to print.
249 template <typename BT>
250 void SampleProfileLoaderBaseImpl<BT>::printEdgeWeight(raw_ostream &OS, Edge E) {
251   OS << "weight[" << E.first->getName() << "->" << E.second->getName()
252      << "]: " << EdgeWeights[E] << "\n";
253 }
254 
255 /// Print the equivalence class of block \p BB on stream \p OS.
256 ///
257 /// \param OS  Stream to emit the output to.
258 /// \param BB  Block to print.
259 template <typename BT>
260 void SampleProfileLoaderBaseImpl<BT>::printBlockEquivalence(
261     raw_ostream &OS, const BasicBlockT *BB) {
262   const BasicBlockT *Equiv = EquivalenceClass[BB];
263   OS << "equivalence[" << BB->getName()
264      << "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n";
265 }
266 
267 /// Print the weight of block \p BB on stream \p OS.
268 ///
269 /// \param OS  Stream to emit the output to.
270 /// \param BB  Block to print.
271 template <typename BT>
272 void SampleProfileLoaderBaseImpl<BT>::printBlockWeight(
273     raw_ostream &OS, const BasicBlockT *BB) const {
274   const auto &I = BlockWeights.find(BB);
275   uint64_t W = (I == BlockWeights.end() ? 0 : I->second);
276   OS << "weight[" << BB->getName() << "]: " << W << "\n";
277 }
278 #endif
279 
280 /// Get the weight for an instruction.
281 ///
282 /// The "weight" of an instruction \p Inst is the number of samples
283 /// collected on that instruction at runtime. To retrieve it, we
284 /// need to compute the line number of \p Inst relative to the start of its
285 /// function. We use HeaderLineno to compute the offset. We then
286 /// look up the samples collected for \p Inst using BodySamples.
287 ///
288 /// \param Inst Instruction to query.
289 ///
290 /// \returns the weight of \p Inst.
291 template <typename BT>
292 ErrorOr<uint64_t>
293 SampleProfileLoaderBaseImpl<BT>::getInstWeight(const InstructionT &Inst) {
294   return getInstWeightImpl(Inst);
295 }
296 
297 template <typename BT>
298 ErrorOr<uint64_t>
299 SampleProfileLoaderBaseImpl<BT>::getInstWeightImpl(const InstructionT &Inst) {
300   const FunctionSamples *FS = findFunctionSamples(Inst);
301   if (!FS)
302     return std::error_code();
303 
304   const DebugLoc &DLoc = Inst.getDebugLoc();
305   if (!DLoc)
306     return std::error_code();
307 
308   const DILocation *DIL = DLoc;
309   uint32_t LineOffset = FunctionSamples::getOffset(DIL);
310   uint32_t Discriminator;
311   if (EnableFSDiscriminator)
312     Discriminator = DIL->getDiscriminator();
313   else
314     Discriminator = DIL->getBaseDiscriminator();
315 
316   ErrorOr<uint64_t> R = FS->findSamplesAt(LineOffset, Discriminator);
317   if (R) {
318     bool FirstMark =
319         CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get());
320     if (FirstMark) {
321       ORE->emit([&]() {
322         OptRemarkAnalysisT Remark(DEBUG_TYPE, "AppliedSamples", &Inst);
323         Remark << "Applied " << ore::NV("NumSamples", *R);
324         Remark << " samples from profile (offset: ";
325         Remark << ore::NV("LineOffset", LineOffset);
326         if (Discriminator) {
327           Remark << ".";
328           Remark << ore::NV("Discriminator", Discriminator);
329         }
330         Remark << ")";
331         return Remark;
332       });
333     }
334     LLVM_DEBUG(dbgs() << "    " << DLoc.getLine() << "." << Discriminator << ":"
335                       << Inst << " (line offset: " << LineOffset << "."
336                       << Discriminator << " - weight: " << R.get() << ")\n");
337   }
338   return R;
339 }
340 
341 /// Compute the weight of a basic block.
342 ///
343 /// The weight of basic block \p BB is the maximum weight of all the
344 /// instructions in BB.
345 ///
346 /// \param BB The basic block to query.
347 ///
348 /// \returns the weight for \p BB.
349 template <typename BT>
350 ErrorOr<uint64_t>
351 SampleProfileLoaderBaseImpl<BT>::getBlockWeight(const BasicBlockT *BB) {
352   uint64_t Max = 0;
353   bool HasWeight = false;
354   for (auto &I : *BB) {
355     const ErrorOr<uint64_t> &R = getInstWeight(I);
356     if (R) {
357       Max = std::max(Max, R.get());
358       HasWeight = true;
359     }
360   }
361   return HasWeight ? ErrorOr<uint64_t>(Max) : std::error_code();
362 }
363 
364 /// Compute and store the weights of every basic block.
365 ///
366 /// This populates the BlockWeights map by computing
367 /// the weights of every basic block in the CFG.
368 ///
369 /// \param F The function to query.
370 template <typename BT>
371 bool SampleProfileLoaderBaseImpl<BT>::computeBlockWeights(FunctionT &F) {
372   bool Changed = false;
373   LLVM_DEBUG(dbgs() << "Block weights\n");
374   for (const auto &BB : F) {
375     ErrorOr<uint64_t> Weight = getBlockWeight(&BB);
376     if (Weight) {
377       BlockWeights[&BB] = Weight.get();
378       VisitedBlocks.insert(&BB);
379       Changed = true;
380     }
381     LLVM_DEBUG(printBlockWeight(dbgs(), &BB));
382   }
383 
384   return Changed;
385 }
386 
387 /// Get the FunctionSamples for an instruction.
388 ///
389 /// The FunctionSamples of an instruction \p Inst is the inlined instance
390 /// in which that instruction is coming from. We traverse the inline stack
391 /// of that instruction, and match it with the tree nodes in the profile.
392 ///
393 /// \param Inst Instruction to query.
394 ///
395 /// \returns the FunctionSamples pointer to the inlined instance.
396 template <typename BT>
397 const FunctionSamples *SampleProfileLoaderBaseImpl<BT>::findFunctionSamples(
398     const InstructionT &Inst) const {
399   const DILocation *DIL = Inst.getDebugLoc();
400   if (!DIL)
401     return Samples;
402 
403   auto it = DILocation2SampleMap.try_emplace(DIL, nullptr);
404   if (it.second) {
405     it.first->second = Samples->findFunctionSamples(DIL, Reader->getRemapper());
406   }
407   return it.first->second;
408 }
409 
410 /// Find equivalence classes for the given block.
411 ///
412 /// This finds all the blocks that are guaranteed to execute the same
413 /// number of times as \p BB1. To do this, it traverses all the
414 /// descendants of \p BB1 in the dominator or post-dominator tree.
415 ///
416 /// A block BB2 will be in the same equivalence class as \p BB1 if
417 /// the following holds:
418 ///
419 /// 1- \p BB1 is a descendant of BB2 in the opposite tree. So, if BB2
420 ///    is a descendant of \p BB1 in the dominator tree, then BB2 should
421 ///    dominate BB1 in the post-dominator tree.
422 ///
423 /// 2- Both BB2 and \p BB1 must be in the same loop.
424 ///
425 /// For every block BB2 that meets those two requirements, we set BB2's
426 /// equivalence class to \p BB1.
427 ///
428 /// \param BB1  Block to check.
429 /// \param Descendants  Descendants of \p BB1 in either the dom or pdom tree.
430 /// \param DomTree  Opposite dominator tree. If \p Descendants is filled
431 ///                 with blocks from \p BB1's dominator tree, then
432 ///                 this is the post-dominator tree, and vice versa.
433 template <typename BT>
434 void SampleProfileLoaderBaseImpl<BT>::findEquivalencesFor(
435     BasicBlockT *BB1, ArrayRef<BasicBlockT *> Descendants,
436     PostDominatorTreeT *DomTree) {
437   const BasicBlockT *EC = EquivalenceClass[BB1];
438   uint64_t Weight = BlockWeights[EC];
439   for (const auto *BB2 : Descendants) {
440     bool IsDomParent = DomTree->dominates(BB2, BB1);
441     bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
442     if (BB1 != BB2 && IsDomParent && IsInSameLoop) {
443       EquivalenceClass[BB2] = EC;
444       // If BB2 is visited, then the entire EC should be marked as visited.
445       if (VisitedBlocks.count(BB2)) {
446         VisitedBlocks.insert(EC);
447       }
448 
449       // If BB2 is heavier than BB1, make BB2 have the same weight
450       // as BB1.
451       //
452       // Note that we don't worry about the opposite situation here
453       // (when BB2 is lighter than BB1). We will deal with this
454       // during the propagation phase. Right now, we just want to
455       // make sure that BB1 has the largest weight of all the
456       // members of its equivalence set.
457       Weight = std::max(Weight, BlockWeights[BB2]);
458     }
459   }
460   const BasicBlockT *EntryBB = getEntryBB(EC->getParent());
461   if (EC == EntryBB) {
462     BlockWeights[EC] = Samples->getHeadSamples() + 1;
463   } else {
464     BlockWeights[EC] = Weight;
465   }
466 }
467 
468 /// Find equivalence classes.
469 ///
470 /// Since samples may be missing from blocks, we can fill in the gaps by setting
471 /// the weights of all the blocks in the same equivalence class to the same
472 /// weight. To compute the concept of equivalence, we use dominance and loop
473 /// information. Two blocks B1 and B2 are in the same equivalence class if B1
474 /// dominates B2, B2 post-dominates B1 and both are in the same loop.
475 ///
476 /// \param F The function to query.
477 template <typename BT>
478 void SampleProfileLoaderBaseImpl<BT>::findEquivalenceClasses(FunctionT &F) {
479   SmallVector<BasicBlockT *, 8> DominatedBBs;
480   LLVM_DEBUG(dbgs() << "\nBlock equivalence classes\n");
481   // Find equivalence sets based on dominance and post-dominance information.
482   for (auto &BB : F) {
483     BasicBlockT *BB1 = &BB;
484 
485     // Compute BB1's equivalence class once.
486     if (EquivalenceClass.count(BB1)) {
487       LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1));
488       continue;
489     }
490 
491     // By default, blocks are in their own equivalence class.
492     EquivalenceClass[BB1] = BB1;
493 
494     // Traverse all the blocks dominated by BB1. We are looking for
495     // every basic block BB2 such that:
496     //
497     // 1- BB1 dominates BB2.
498     // 2- BB2 post-dominates BB1.
499     // 3- BB1 and BB2 are in the same loop nest.
500     //
501     // If all those conditions hold, it means that BB2 is executed
502     // as many times as BB1, so they are placed in the same equivalence
503     // class by making BB2's equivalence class be BB1.
504     DominatedBBs.clear();
505     DT->getDescendants(BB1, DominatedBBs);
506     findEquivalencesFor(BB1, DominatedBBs, &*PDT);
507 
508     LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1));
509   }
510 
511   // Assign weights to equivalence classes.
512   //
513   // All the basic blocks in the same equivalence class will execute
514   // the same number of times. Since we know that the head block in
515   // each equivalence class has the largest weight, assign that weight
516   // to all the blocks in that equivalence class.
517   LLVM_DEBUG(
518       dbgs() << "\nAssign the same weight to all blocks in the same class\n");
519   for (auto &BI : F) {
520     const BasicBlockT *BB = &BI;
521     const BasicBlockT *EquivBB = EquivalenceClass[BB];
522     if (BB != EquivBB)
523       BlockWeights[BB] = BlockWeights[EquivBB];
524     LLVM_DEBUG(printBlockWeight(dbgs(), BB));
525   }
526 }
527 
528 /// Visit the given edge to decide if it has a valid weight.
529 ///
530 /// If \p E has not been visited before, we copy to \p UnknownEdge
531 /// and increment the count of unknown edges.
532 ///
533 /// \param E  Edge to visit.
534 /// \param NumUnknownEdges  Current number of unknown edges.
535 /// \param UnknownEdge  Set if E has not been visited before.
536 ///
537 /// \returns E's weight, if known. Otherwise, return 0.
538 template <typename BT>
539 uint64_t SampleProfileLoaderBaseImpl<BT>::visitEdge(Edge E,
540                                                     unsigned *NumUnknownEdges,
541                                                     Edge *UnknownEdge) {
542   if (!VisitedEdges.count(E)) {
543     (*NumUnknownEdges)++;
544     *UnknownEdge = E;
545     return 0;
546   }
547 
548   return EdgeWeights[E];
549 }
550 
551 /// Propagate weights through incoming/outgoing edges.
552 ///
553 /// If the weight of a basic block is known, and there is only one edge
554 /// with an unknown weight, we can calculate the weight of that edge.
555 ///
556 /// Similarly, if all the edges have a known count, we can calculate the
557 /// count of the basic block, if needed.
558 ///
559 /// \param F  Function to process.
560 /// \param UpdateBlockCount  Whether we should update basic block counts that
561 ///                          has already been annotated.
562 ///
563 /// \returns  True if new weights were assigned to edges or blocks.
564 template <typename BT>
565 bool SampleProfileLoaderBaseImpl<BT>::propagateThroughEdges(
566     FunctionT &F, bool UpdateBlockCount) {
567   bool Changed = false;
568   LLVM_DEBUG(dbgs() << "\nPropagation through edges\n");
569   for (const auto &BI : F) {
570     const BasicBlockT *BB = &BI;
571     const BasicBlockT *EC = EquivalenceClass[BB];
572 
573     // Visit all the predecessor and successor edges to determine
574     // which ones have a weight assigned already. Note that it doesn't
575     // matter that we only keep track of a single unknown edge. The
576     // only case we are interested in handling is when only a single
577     // edge is unknown (see setEdgeOrBlockWeight).
578     for (unsigned i = 0; i < 2; i++) {
579       uint64_t TotalWeight = 0;
580       unsigned NumUnknownEdges = 0, NumTotalEdges = 0;
581       Edge UnknownEdge, SelfReferentialEdge, SingleEdge;
582 
583       if (i == 0) {
584         // First, visit all predecessor edges.
585         NumTotalEdges = Predecessors[BB].size();
586         for (auto *Pred : Predecessors[BB]) {
587           Edge E = std::make_pair(Pred, BB);
588           TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
589           if (E.first == E.second)
590             SelfReferentialEdge = E;
591         }
592         if (NumTotalEdges == 1) {
593           SingleEdge = std::make_pair(Predecessors[BB][0], BB);
594         }
595       } else {
596         // On the second round, visit all successor edges.
597         NumTotalEdges = Successors[BB].size();
598         for (auto *Succ : Successors[BB]) {
599           Edge E = std::make_pair(BB, Succ);
600           TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
601         }
602         if (NumTotalEdges == 1) {
603           SingleEdge = std::make_pair(BB, Successors[BB][0]);
604         }
605       }
606 
607       // After visiting all the edges, there are three cases that we
608       // can handle immediately:
609       //
610       // - All the edge weights are known (i.e., NumUnknownEdges == 0).
611       //   In this case, we simply check that the sum of all the edges
612       //   is the same as BB's weight. If not, we change BB's weight
613       //   to match. Additionally, if BB had not been visited before,
614       //   we mark it visited.
615       //
616       // - Only one edge is unknown and BB has already been visited.
617       //   In this case, we can compute the weight of the edge by
618       //   subtracting the total block weight from all the known
619       //   edge weights. If the edges weight more than BB, then the
620       //   edge of the last remaining edge is set to zero.
621       //
622       // - There exists a self-referential edge and the weight of BB is
623       //   known. In this case, this edge can be based on BB's weight.
624       //   We add up all the other known edges and set the weight on
625       //   the self-referential edge as we did in the previous case.
626       //
627       // In any other case, we must continue iterating. Eventually,
628       // all edges will get a weight, or iteration will stop when
629       // it reaches SampleProfileMaxPropagateIterations.
630       if (NumUnknownEdges <= 1) {
631         uint64_t &BBWeight = BlockWeights[EC];
632         if (NumUnknownEdges == 0) {
633           if (!VisitedBlocks.count(EC)) {
634             // If we already know the weight of all edges, the weight of the
635             // basic block can be computed. It should be no larger than the sum
636             // of all edge weights.
637             if (TotalWeight > BBWeight) {
638               BBWeight = TotalWeight;
639               Changed = true;
640               LLVM_DEBUG(dbgs() << "All edge weights for " << BB->getName()
641                                 << " known. Set weight for block: ";
642                          printBlockWeight(dbgs(), BB););
643             }
644           } else if (NumTotalEdges == 1 &&
645                      EdgeWeights[SingleEdge] < BlockWeights[EC]) {
646             // If there is only one edge for the visited basic block, use the
647             // block weight to adjust edge weight if edge weight is smaller.
648             EdgeWeights[SingleEdge] = BlockWeights[EC];
649             Changed = true;
650           }
651         } else if (NumUnknownEdges == 1 && VisitedBlocks.count(EC)) {
652           // If there is a single unknown edge and the block has been
653           // visited, then we can compute E's weight.
654           if (BBWeight >= TotalWeight)
655             EdgeWeights[UnknownEdge] = BBWeight - TotalWeight;
656           else
657             EdgeWeights[UnknownEdge] = 0;
658           const BasicBlockT *OtherEC;
659           if (i == 0)
660             OtherEC = EquivalenceClass[UnknownEdge.first];
661           else
662             OtherEC = EquivalenceClass[UnknownEdge.second];
663           // Edge weights should never exceed the BB weights it connects.
664           if (VisitedBlocks.count(OtherEC) &&
665               EdgeWeights[UnknownEdge] > BlockWeights[OtherEC])
666             EdgeWeights[UnknownEdge] = BlockWeights[OtherEC];
667           VisitedEdges.insert(UnknownEdge);
668           Changed = true;
669           LLVM_DEBUG(dbgs() << "Set weight for edge: ";
670                      printEdgeWeight(dbgs(), UnknownEdge));
671         }
672       } else if (VisitedBlocks.count(EC) && BlockWeights[EC] == 0) {
673         // If a block Weights 0, all its in/out edges should weight 0.
674         if (i == 0) {
675           for (auto *Pred : Predecessors[BB]) {
676             Edge E = std::make_pair(Pred, BB);
677             EdgeWeights[E] = 0;
678             VisitedEdges.insert(E);
679           }
680         } else {
681           for (auto *Succ : Successors[BB]) {
682             Edge E = std::make_pair(BB, Succ);
683             EdgeWeights[E] = 0;
684             VisitedEdges.insert(E);
685           }
686         }
687       } else if (SelfReferentialEdge.first && VisitedBlocks.count(EC)) {
688         uint64_t &BBWeight = BlockWeights[BB];
689         // We have a self-referential edge and the weight of BB is known.
690         if (BBWeight >= TotalWeight)
691           EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight;
692         else
693           EdgeWeights[SelfReferentialEdge] = 0;
694         VisitedEdges.insert(SelfReferentialEdge);
695         Changed = true;
696         LLVM_DEBUG(dbgs() << "Set self-referential edge weight to: ";
697                    printEdgeWeight(dbgs(), SelfReferentialEdge));
698       }
699       if (UpdateBlockCount && !VisitedBlocks.count(EC) && TotalWeight > 0) {
700         BlockWeights[EC] = TotalWeight;
701         VisitedBlocks.insert(EC);
702         Changed = true;
703       }
704     }
705   }
706 
707   return Changed;
708 }
709 
710 /// Build in/out edge lists for each basic block in the CFG.
711 ///
712 /// We are interested in unique edges. If a block B1 has multiple
713 /// edges to another block B2, we only add a single B1->B2 edge.
714 template <typename BT>
715 void SampleProfileLoaderBaseImpl<BT>::buildEdges(FunctionT &F) {
716   for (auto &BI : F) {
717     BasicBlockT *B1 = &BI;
718 
719     // Add predecessors for B1.
720     SmallPtrSet<BasicBlockT *, 16> Visited;
721     if (!Predecessors[B1].empty())
722       llvm_unreachable("Found a stale predecessors list in a basic block.");
723     for (auto *B2 : getPredecessors(B1))
724       if (Visited.insert(B2).second)
725         Predecessors[B1].push_back(B2);
726 
727     // Add successors for B1.
728     Visited.clear();
729     if (!Successors[B1].empty())
730       llvm_unreachable("Found a stale successors list in a basic block.");
731     for (auto *B2 : getSuccessors(B1))
732       if (Visited.insert(B2).second)
733         Successors[B1].push_back(B2);
734   }
735 }
736 
737 /// Propagate weights into edges
738 ///
739 /// The following rules are applied to every block BB in the CFG:
740 ///
741 /// - If BB has a single predecessor/successor, then the weight
742 ///   of that edge is the weight of the block.
743 ///
744 /// - If all incoming or outgoing edges are known except one, and the
745 ///   weight of the block is already known, the weight of the unknown
746 ///   edge will be the weight of the block minus the sum of all the known
747 ///   edges. If the sum of all the known edges is larger than BB's weight,
748 ///   we set the unknown edge weight to zero.
749 ///
750 /// - If there is a self-referential edge, and the weight of the block is
751 ///   known, the weight for that edge is set to the weight of the block
752 ///   minus the weight of the other incoming edges to that block (if
753 ///   known).
754 template <typename BT>
755 void SampleProfileLoaderBaseImpl<BT>::propagateWeights(FunctionT &F) {
756   // Flow-based profile inference is only usable with BasicBlock instantiation
757   // of SampleProfileLoaderBaseImpl.
758   if (SampleProfileUseProfi) {
759     // Prepare block sample counts for inference.
760     BlockWeightMap SampleBlockWeights;
761     for (const auto &BI : F) {
762       ErrorOr<uint64_t> Weight = getBlockWeight(&BI);
763       if (Weight)
764         SampleBlockWeights[&BI] = Weight.get();
765     }
766     // Fill in BlockWeights and EdgeWeights using an inference algorithm.
767     applyProfi(F, Successors, SampleBlockWeights, BlockWeights, EdgeWeights);
768   } else {
769     bool Changed = true;
770     unsigned I = 0;
771 
772     // If BB weight is larger than its corresponding loop's header BB weight,
773     // use the BB weight to replace the loop header BB weight.
774     for (auto &BI : F) {
775       BasicBlockT *BB = &BI;
776       LoopT *L = LI->getLoopFor(BB);
777       if (!L) {
778         continue;
779       }
780       BasicBlockT *Header = L->getHeader();
781       if (Header && BlockWeights[BB] > BlockWeights[Header]) {
782         BlockWeights[Header] = BlockWeights[BB];
783       }
784     }
785 
786     // Propagate until we converge or we go past the iteration limit.
787     while (Changed && I++ < SampleProfileMaxPropagateIterations) {
788       Changed = propagateThroughEdges(F, false);
789     }
790 
791     // The first propagation propagates BB counts from annotated BBs to unknown
792     // BBs. The 2nd propagation pass resets edges weights, and use all BB
793     // weights to propagate edge weights.
794     VisitedEdges.clear();
795     Changed = true;
796     while (Changed && I++ < SampleProfileMaxPropagateIterations) {
797       Changed = propagateThroughEdges(F, false);
798     }
799 
800     // The 3rd propagation pass allows adjust annotated BB weights that are
801     // obviously wrong.
802     Changed = true;
803     while (Changed && I++ < SampleProfileMaxPropagateIterations) {
804       Changed = propagateThroughEdges(F, true);
805     }
806   }
807 }
808 
809 template <typename BT>
810 void SampleProfileLoaderBaseImpl<BT>::applyProfi(
811     FunctionT &F, BlockEdgeMap &Successors, BlockWeightMap &SampleBlockWeights,
812     BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights) {
813   auto Infer = SampleProfileInference<BT>(F, Successors, SampleBlockWeights);
814   Infer.apply(BlockWeights, EdgeWeights);
815 }
816 
817 /// Generate branch weight metadata for all branches in \p F.
818 ///
819 /// Branch weights are computed out of instruction samples using a
820 /// propagation heuristic. Propagation proceeds in 3 phases:
821 ///
822 /// 1- Assignment of block weights. All the basic blocks in the function
823 ///    are initial assigned the same weight as their most frequently
824 ///    executed instruction.
825 ///
826 /// 2- Creation of equivalence classes. Since samples may be missing from
827 ///    blocks, we can fill in the gaps by setting the weights of all the
828 ///    blocks in the same equivalence class to the same weight. To compute
829 ///    the concept of equivalence, we use dominance and loop information.
830 ///    Two blocks B1 and B2 are in the same equivalence class if B1
831 ///    dominates B2, B2 post-dominates B1 and both are in the same loop.
832 ///
833 /// 3- Propagation of block weights into edges. This uses a simple
834 ///    propagation heuristic. The following rules are applied to every
835 ///    block BB in the CFG:
836 ///
837 ///    - If BB has a single predecessor/successor, then the weight
838 ///      of that edge is the weight of the block.
839 ///
840 ///    - If all the edges are known except one, and the weight of the
841 ///      block is already known, the weight of the unknown edge will
842 ///      be the weight of the block minus the sum of all the known
843 ///      edges. If the sum of all the known edges is larger than BB's weight,
844 ///      we set the unknown edge weight to zero.
845 ///
846 ///    - If there is a self-referential edge, and the weight of the block is
847 ///      known, the weight for that edge is set to the weight of the block
848 ///      minus the weight of the other incoming edges to that block (if
849 ///      known).
850 ///
851 /// Since this propagation is not guaranteed to finalize for every CFG, we
852 /// only allow it to proceed for a limited number of iterations (controlled
853 /// by -sample-profile-max-propagate-iterations).
854 ///
855 /// FIXME: Try to replace this propagation heuristic with a scheme
856 /// that is guaranteed to finalize. A work-list approach similar to
857 /// the standard value propagation algorithm used by SSA-CCP might
858 /// work here.
859 ///
860 /// \param F The function to query.
861 ///
862 /// \returns true if \p F was modified. Returns false, otherwise.
863 template <typename BT>
864 bool SampleProfileLoaderBaseImpl<BT>::computeAndPropagateWeights(
865     FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
866   bool Changed = (InlinedGUIDs.size() != 0);
867 
868   // Compute basic block weights.
869   Changed |= computeBlockWeights(F);
870 
871   if (Changed) {
872     // Initialize propagation.
873     initWeightPropagation(F, InlinedGUIDs);
874 
875     // Propagate weights to all edges.
876     propagateWeights(F);
877 
878     // Post-process propagated weights.
879     finalizeWeightPropagation(F, InlinedGUIDs);
880   }
881 
882   return Changed;
883 }
884 
885 template <typename BT>
886 void SampleProfileLoaderBaseImpl<BT>::initWeightPropagation(
887     FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
888   // Add an entry count to the function using the samples gathered at the
889   // function entry.
890   // Sets the GUIDs that are inlined in the profiled binary. This is used
891   // for ThinLink to make correct liveness analysis, and also make the IR
892   // match the profiled binary before annotation.
893   getFunction(F).setEntryCount(
894       ProfileCount(Samples->getHeadSamples() + 1, Function::PCT_Real),
895       &InlinedGUIDs);
896 
897   if (!SampleProfileUseProfi) {
898     // Compute dominance and loop info needed for propagation.
899     computeDominanceAndLoopInfo(F);
900 
901     // Find equivalence classes.
902     findEquivalenceClasses(F);
903   }
904 
905   // Before propagation starts, build, for each block, a list of
906   // unique predecessors and successors. This is necessary to handle
907   // identical edges in multiway branches. Since we visit all blocks and all
908   // edges of the CFG, it is cleaner to build these lists once at the start
909   // of the pass.
910   buildEdges(F);
911 }
912 
913 template <typename BT>
914 void SampleProfileLoaderBaseImpl<BT>::finalizeWeightPropagation(
915     FunctionT &F, const DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
916   // If we utilize a flow-based count inference, then we trust the computed
917   // counts and set the entry count as computed by the algorithm. This is
918   // primarily done to sync the counts produced by profi and BFI inference,
919   // which uses the entry count for mass propagation.
920   // If profi produces a zero-value for the entry count, we fallback to
921   // Samples->getHeadSamples() + 1 to avoid functions with zero count.
922   if (SampleProfileUseProfi) {
923     const BasicBlockT *EntryBB = getEntryBB(&F);
924     ErrorOr<uint64_t> EntryWeight = getBlockWeight(EntryBB);
925     if (BlockWeights[EntryBB] > 0 &&
926         (SampleProfileInferEntryCount || !EntryWeight)) {
927       getFunction(F).setEntryCount(
928           ProfileCount(BlockWeights[EntryBB], Function::PCT_Real),
929           &InlinedGUIDs);
930     }
931   }
932 }
933 
934 template <typename BT>
935 void SampleProfileLoaderBaseImpl<BT>::emitCoverageRemarks(FunctionT &F) {
936   // If coverage checking was requested, compute it now.
937   const Function &Func = getFunction(F);
938   if (SampleProfileRecordCoverage) {
939     unsigned Used = CoverageTracker.countUsedRecords(Samples, PSI);
940     unsigned Total = CoverageTracker.countBodyRecords(Samples, PSI);
941     unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
942     if (Coverage < SampleProfileRecordCoverage) {
943       Func.getContext().diagnose(DiagnosticInfoSampleProfile(
944           Func.getSubprogram()->getFilename(), getFunctionLoc(F),
945           Twine(Used) + " of " + Twine(Total) + " available profile records (" +
946               Twine(Coverage) + "%) were applied",
947           DS_Warning));
948     }
949   }
950 
951   if (SampleProfileSampleCoverage) {
952     uint64_t Used = CoverageTracker.getTotalUsedSamples();
953     uint64_t Total = CoverageTracker.countBodySamples(Samples, PSI);
954     unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
955     if (Coverage < SampleProfileSampleCoverage) {
956       Func.getContext().diagnose(DiagnosticInfoSampleProfile(
957           Func.getSubprogram()->getFilename(), getFunctionLoc(F),
958           Twine(Used) + " of " + Twine(Total) + " available profile samples (" +
959               Twine(Coverage) + "%) were applied",
960           DS_Warning));
961     }
962   }
963 }
964 
965 /// Get the line number for the function header.
966 ///
967 /// This looks up function \p F in the current compilation unit and
968 /// retrieves the line number where the function is defined. This is
969 /// line 0 for all the samples read from the profile file. Every line
970 /// number is relative to this line.
971 ///
972 /// \param F  Function object to query.
973 ///
974 /// \returns the line number where \p F is defined. If it returns 0,
975 ///          it means that there is no debug information available for \p F.
976 template <typename BT>
977 unsigned SampleProfileLoaderBaseImpl<BT>::getFunctionLoc(FunctionT &F) {
978   const Function &Func = getFunction(F);
979   if (DISubprogram *S = Func.getSubprogram())
980     return S->getLine();
981 
982   if (NoWarnSampleUnused)
983     return 0;
984 
985   // If the start of \p F is missing, emit a diagnostic to inform the user
986   // about the missed opportunity.
987   Func.getContext().diagnose(DiagnosticInfoSampleProfile(
988       "No debug information found in function " + Func.getName() +
989           ": Function profile not used",
990       DS_Warning));
991   return 0;
992 }
993 
994 template <typename BT>
995 void SampleProfileLoaderBaseImpl<BT>::computeDominanceAndLoopInfo(
996     FunctionT &F) {
997   DT.reset(new DominatorTree);
998   DT->recalculate(F);
999 
1000   PDT.reset(new PostDominatorTree(F));
1001 
1002   LI.reset(new LoopInfo);
1003   LI->analyze(*DT);
1004 }
1005 
1006 #undef DEBUG_TYPE
1007 
1008 } // namespace llvm
1009 #endif // LLVM_TRANSFORMS_UTILS_SAMPLEPROFILELOADERBASEIMPL_H
1010