1 //===- SampleProfileProbe.cpp - Pseudo probe Instrumentation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SampleProfileProber transformation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Transforms/IPO/SampleProfileProbe.h"
14 #include "llvm/ADT/Statistic.h"
15 #include "llvm/Analysis/BlockFrequencyInfo.h"
16 #include "llvm/Analysis/LoopInfo.h"
17 #include "llvm/IR/BasicBlock.h"
18 #include "llvm/IR/Constants.h"
19 #include "llvm/IR/DebugInfoMetadata.h"
20 #include "llvm/IR/IRBuilder.h"
21 #include "llvm/IR/Instruction.h"
22 #include "llvm/IR/IntrinsicInst.h"
23 #include "llvm/IR/MDBuilder.h"
24 #include "llvm/IR/PseudoProbe.h"
25 #include "llvm/ProfileData/SampleProf.h"
26 #include "llvm/Support/CRC.h"
27 #include "llvm/Support/CommandLine.h"
28 #include "llvm/Target/TargetMachine.h"
29 #include "llvm/Transforms/Instrumentation.h"
30 #include "llvm/Transforms/Utils/ModuleUtils.h"
31 #include <unordered_set>
32 #include <vector>
33 
34 using namespace llvm;
35 #define DEBUG_TYPE "sample-profile-probe"
36 
37 STATISTIC(ArtificialDbgLine,
38           "Number of probes that have an artificial debug line");
39 
40 static cl::opt<bool>
41     VerifyPseudoProbe("verify-pseudo-probe", cl::init(false), cl::Hidden,
42                       cl::desc("Do pseudo probe verification"));
43 
44 static cl::list<std::string> VerifyPseudoProbeFuncList(
45     "verify-pseudo-probe-funcs", cl::Hidden,
46     cl::desc("The option to specify the name of the functions to verify."));
47 
48 static cl::opt<bool>
49     UpdatePseudoProbe("update-pseudo-probe", cl::init(true), cl::Hidden,
50                       cl::desc("Update pseudo probe distribution factor"));
51 
52 static uint64_t getCallStackHash(const DILocation *DIL) {
53   uint64_t Hash = 0;
54   const DILocation *InlinedAt = DIL ? DIL->getInlinedAt() : nullptr;
55   while (InlinedAt) {
56     Hash ^= MD5Hash(std::to_string(InlinedAt->getLine()));
57     Hash ^= MD5Hash(std::to_string(InlinedAt->getColumn()));
58     const DISubprogram *SP = InlinedAt->getScope()->getSubprogram();
59     // Use linkage name for C++ if possible.
60     auto Name = SP->getLinkageName();
61     if (Name.empty())
62       Name = SP->getName();
63     Hash ^= MD5Hash(Name);
64     InlinedAt = InlinedAt->getInlinedAt();
65   }
66   return Hash;
67 }
68 
69 static uint64_t computeCallStackHash(const Instruction &Inst) {
70   return getCallStackHash(Inst.getDebugLoc());
71 }
72 
73 bool PseudoProbeVerifier::shouldVerifyFunction(const Function *F) {
74   // Skip function declaration.
75   if (F->isDeclaration())
76     return false;
77   // Skip function that will not be emitted into object file. The prevailing
78   // defintion will be verified instead.
79   if (F->hasAvailableExternallyLinkage())
80     return false;
81   // Do a name matching.
82   static std::unordered_set<std::string> VerifyFuncNames(
83       VerifyPseudoProbeFuncList.begin(), VerifyPseudoProbeFuncList.end());
84   return VerifyFuncNames.empty() || VerifyFuncNames.count(F->getName().str());
85 }
86 
87 void PseudoProbeVerifier::registerCallbacks(PassInstrumentationCallbacks &PIC) {
88   if (VerifyPseudoProbe) {
89     PIC.registerAfterPassCallback(
90         [this](StringRef P, Any IR, const PreservedAnalyses &) {
91           this->runAfterPass(P, IR);
92         });
93   }
94 }
95 
96 // Callback to run after each transformation for the new pass manager.
97 void PseudoProbeVerifier::runAfterPass(StringRef PassID, Any IR) {
98   std::string Banner =
99       "\n*** Pseudo Probe Verification After " + PassID.str() + " ***\n";
100   dbgs() << Banner;
101   if (const auto **M = any_cast<const Module *>(&IR))
102     runAfterPass(*M);
103   else if (const auto **F = any_cast<const Function *>(&IR))
104     runAfterPass(*F);
105   else if (const auto **C = any_cast<const LazyCallGraph::SCC *>(&IR))
106     runAfterPass(*C);
107   else if (const auto **L = any_cast<const Loop *>(&IR))
108     runAfterPass(*L);
109   else
110     llvm_unreachable("Unknown IR unit");
111 }
112 
113 void PseudoProbeVerifier::runAfterPass(const Module *M) {
114   for (const Function &F : *M)
115     runAfterPass(&F);
116 }
117 
118 void PseudoProbeVerifier::runAfterPass(const LazyCallGraph::SCC *C) {
119   for (const LazyCallGraph::Node &N : *C)
120     runAfterPass(&N.getFunction());
121 }
122 
123 void PseudoProbeVerifier::runAfterPass(const Function *F) {
124   if (!shouldVerifyFunction(F))
125     return;
126   ProbeFactorMap ProbeFactors;
127   for (const auto &BB : *F)
128     collectProbeFactors(&BB, ProbeFactors);
129   verifyProbeFactors(F, ProbeFactors);
130 }
131 
132 void PseudoProbeVerifier::runAfterPass(const Loop *L) {
133   const Function *F = L->getHeader()->getParent();
134   runAfterPass(F);
135 }
136 
137 void PseudoProbeVerifier::collectProbeFactors(const BasicBlock *Block,
138                                               ProbeFactorMap &ProbeFactors) {
139   for (const auto &I : *Block) {
140     if (std::optional<PseudoProbe> Probe = extractProbe(I)) {
141       uint64_t Hash = computeCallStackHash(I);
142       ProbeFactors[{Probe->Id, Hash}] += Probe->Factor;
143     }
144   }
145 }
146 
147 void PseudoProbeVerifier::verifyProbeFactors(
148     const Function *F, const ProbeFactorMap &ProbeFactors) {
149   bool BannerPrinted = false;
150   auto &PrevProbeFactors = FunctionProbeFactors[F->getName()];
151   for (const auto &I : ProbeFactors) {
152     float CurProbeFactor = I.second;
153     if (PrevProbeFactors.count(I.first)) {
154       float PrevProbeFactor = PrevProbeFactors[I.first];
155       if (std::abs(CurProbeFactor - PrevProbeFactor) >
156           DistributionFactorVariance) {
157         if (!BannerPrinted) {
158           dbgs() << "Function " << F->getName() << ":\n";
159           BannerPrinted = true;
160         }
161         dbgs() << "Probe " << I.first.first << "\tprevious factor "
162                << format("%0.2f", PrevProbeFactor) << "\tcurrent factor "
163                << format("%0.2f", CurProbeFactor) << "\n";
164       }
165     }
166 
167     // Update
168     PrevProbeFactors[I.first] = I.second;
169   }
170 }
171 
172 PseudoProbeManager::PseudoProbeManager(const Module &M) {
173   if (NamedMDNode *FuncInfo = M.getNamedMetadata(PseudoProbeDescMetadataName)) {
174     for (const auto *Operand : FuncInfo->operands()) {
175       const auto *MD = cast<MDNode>(Operand);
176       auto GUID =
177           mdconst::dyn_extract<ConstantInt>(MD->getOperand(0))->getZExtValue();
178       auto Hash =
179           mdconst::dyn_extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
180       GUIDToProbeDescMap.try_emplace(GUID, PseudoProbeDescriptor(GUID, Hash));
181     }
182   }
183 }
184 
185 const PseudoProbeDescriptor *
186 PseudoProbeManager::getDesc(const Function &F) const {
187   auto I = GUIDToProbeDescMap.find(
188       Function::getGUID(FunctionSamples::getCanonicalFnName(F)));
189   return I == GUIDToProbeDescMap.end() ? nullptr : &I->second;
190 }
191 
192 bool PseudoProbeManager::moduleIsProbed(const Module &M) const {
193   return M.getNamedMetadata(PseudoProbeDescMetadataName);
194 }
195 
196 bool PseudoProbeManager::profileIsValid(const Function &F,
197                                         const FunctionSamples &Samples) const {
198   const auto *Desc = getDesc(F);
199   if (!Desc) {
200     LLVM_DEBUG(dbgs() << "Probe descriptor missing for Function " << F.getName()
201                       << "\n");
202     return false;
203   } else {
204     if (Desc->getFunctionHash() != Samples.getFunctionHash()) {
205       LLVM_DEBUG(dbgs() << "Hash mismatch for Function " << F.getName()
206                         << "\n");
207       return false;
208     }
209   }
210   return true;
211 }
212 
213 SampleProfileProber::SampleProfileProber(Function &Func,
214                                          const std::string &CurModuleUniqueId)
215     : F(&Func), CurModuleUniqueId(CurModuleUniqueId) {
216   BlockProbeIds.clear();
217   CallProbeIds.clear();
218   LastProbeId = (uint32_t)PseudoProbeReservedId::Last;
219   computeProbeIdForBlocks();
220   computeProbeIdForCallsites();
221   computeCFGHash();
222 }
223 
224 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
225 // value of each BB in the CFG. The higher 32 bits record the number of edges
226 // preceded by the number of indirect calls.
227 // This is derived from FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash().
228 void SampleProfileProber::computeCFGHash() {
229   std::vector<uint8_t> Indexes;
230   JamCRC JC;
231   for (auto &BB : *F) {
232     auto *TI = BB.getTerminator();
233     for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
234       auto *Succ = TI->getSuccessor(I);
235       auto Index = getBlockId(Succ);
236       for (int J = 0; J < 4; J++)
237         Indexes.push_back((uint8_t)(Index >> (J * 8)));
238     }
239   }
240 
241   JC.update(Indexes);
242 
243   FunctionHash = (uint64_t)CallProbeIds.size() << 48 |
244                  (uint64_t)Indexes.size() << 32 | JC.getCRC();
245   // Reserve bit 60-63 for other information purpose.
246   FunctionHash &= 0x0FFFFFFFFFFFFFFF;
247   assert(FunctionHash && "Function checksum should not be zero");
248   LLVM_DEBUG(dbgs() << "\nFunction Hash Computation for " << F->getName()
249                     << ":\n"
250                     << " CRC = " << JC.getCRC() << ", Edges = "
251                     << Indexes.size() << ", ICSites = " << CallProbeIds.size()
252                     << ", Hash = " << FunctionHash << "\n");
253 }
254 
255 void SampleProfileProber::computeProbeIdForBlocks() {
256   for (auto &BB : *F) {
257     BlockProbeIds[&BB] = ++LastProbeId;
258   }
259 }
260 
261 void SampleProfileProber::computeProbeIdForCallsites() {
262   for (auto &BB : *F) {
263     for (auto &I : BB) {
264       if (!isa<CallBase>(I))
265         continue;
266       if (isa<IntrinsicInst>(&I))
267         continue;
268       CallProbeIds[&I] = ++LastProbeId;
269     }
270   }
271 }
272 
273 uint32_t SampleProfileProber::getBlockId(const BasicBlock *BB) const {
274   auto I = BlockProbeIds.find(const_cast<BasicBlock *>(BB));
275   return I == BlockProbeIds.end() ? 0 : I->second;
276 }
277 
278 uint32_t SampleProfileProber::getCallsiteId(const Instruction *Call) const {
279   auto Iter = CallProbeIds.find(const_cast<Instruction *>(Call));
280   return Iter == CallProbeIds.end() ? 0 : Iter->second;
281 }
282 
283 void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) {
284   Module *M = F.getParent();
285   MDBuilder MDB(F.getContext());
286   // Compute a GUID without considering the function's linkage type. This is
287   // fine since function name is the only key in the profile database.
288   uint64_t Guid = Function::getGUID(F.getName());
289 
290   // Assign an artificial debug line to a probe that doesn't come with a real
291   // line. A probe not having a debug line will get an incomplete inline
292   // context. This will cause samples collected on the probe to be counted
293   // into the base profile instead of a context profile. The line number
294   // itself is not important though.
295   auto AssignDebugLoc = [&](Instruction *I) {
296     assert((isa<PseudoProbeInst>(I) || isa<CallBase>(I)) &&
297            "Expecting pseudo probe or call instructions");
298     if (!I->getDebugLoc()) {
299       if (auto *SP = F.getSubprogram()) {
300         auto DIL = DILocation::get(SP->getContext(), 0, 0, SP);
301         I->setDebugLoc(DIL);
302         ArtificialDbgLine++;
303         LLVM_DEBUG({
304           dbgs() << "\nIn Function " << F.getName()
305                  << " Probe gets an artificial debug line\n";
306           I->dump();
307         });
308       }
309     }
310   };
311 
312   // Probe basic blocks.
313   for (auto &I : BlockProbeIds) {
314     BasicBlock *BB = I.first;
315     uint32_t Index = I.second;
316     // Insert a probe before an instruction with a valid debug line number which
317     // will be assigned to the probe. The line number will be used later to
318     // model the inline context when the probe is inlined into other functions.
319     // Debug instructions, phi nodes and lifetime markers do not have an valid
320     // line number. Real instructions generated by optimizations may not come
321     // with a line number either.
322     auto HasValidDbgLine = [](Instruction *J) {
323       return !isa<PHINode>(J) && !isa<DbgInfoIntrinsic>(J) &&
324              !J->isLifetimeStartOrEnd() && J->getDebugLoc();
325     };
326 
327     Instruction *J = &*BB->getFirstInsertionPt();
328     while (J != BB->getTerminator() && !HasValidDbgLine(J)) {
329       J = J->getNextNode();
330     }
331 
332     IRBuilder<> Builder(J);
333     assert(Builder.GetInsertPoint() != BB->end() &&
334            "Cannot get the probing point");
335     Function *ProbeFn =
336         llvm::Intrinsic::getDeclaration(M, Intrinsic::pseudoprobe);
337     Value *Args[] = {Builder.getInt64(Guid), Builder.getInt64(Index),
338                      Builder.getInt32(0),
339                      Builder.getInt64(PseudoProbeFullDistributionFactor)};
340     auto *Probe = Builder.CreateCall(ProbeFn, Args);
341     AssignDebugLoc(Probe);
342   }
343 
344   // Probe both direct calls and indirect calls. Direct calls are probed so that
345   // their probe ID can be used as an call site identifier to represent a
346   // calling context.
347   for (auto &I : CallProbeIds) {
348     auto *Call = I.first;
349     uint32_t Index = I.second;
350     uint32_t Type = cast<CallBase>(Call)->getCalledFunction()
351                         ? (uint32_t)PseudoProbeType::DirectCall
352                         : (uint32_t)PseudoProbeType::IndirectCall;
353     AssignDebugLoc(Call);
354     // Levarge the 32-bit discriminator field of debug data to store the ID and
355     // type of a callsite probe. This gets rid of the dependency on plumbing a
356     // customized metadata through the codegen pipeline.
357     uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(
358         Index, Type, 0, PseudoProbeDwarfDiscriminator::FullDistributionFactor);
359     if (auto DIL = Call->getDebugLoc()) {
360       DIL = DIL->cloneWithDiscriminator(V);
361       Call->setDebugLoc(DIL);
362     }
363   }
364 
365   // Create module-level metadata that contains function info necessary to
366   // synthesize probe-based sample counts,  which are
367   // - FunctionGUID
368   // - FunctionHash.
369   // - FunctionName
370   auto Hash = getFunctionHash();
371   auto *MD = MDB.createPseudoProbeDesc(Guid, Hash, &F);
372   auto *NMD = M->getNamedMetadata(PseudoProbeDescMetadataName);
373   assert(NMD && "llvm.pseudo_probe_desc should be pre-created");
374   NMD->addOperand(MD);
375 
376   // Preserve a comdat group to hold all probes materialized later. This
377   // allows that when the function is considered dead and removed, the
378   // materialized probes are disposed too.
379   // Imported functions are defined in another module. They do not need
380   // the following handling since same care will be taken for them in their
381   // original module. The pseudo probes inserted into an imported functions
382   // above will naturally not be emitted since the imported function is free
383   // from object emission. However they will be emitted together with the
384   // inliner functions that the imported function is inlined into. We are not
385   // creating a comdat group for an import function since it's useless anyway.
386   if (!F.isDeclarationForLinker()) {
387     if (TM) {
388       auto Triple = TM->getTargetTriple();
389       if (Triple.supportsCOMDAT() && TM->getFunctionSections())
390         getOrCreateFunctionComdat(F, Triple);
391     }
392   }
393 }
394 
395 PreservedAnalyses SampleProfileProbePass::run(Module &M,
396                                               ModuleAnalysisManager &AM) {
397   auto ModuleId = getUniqueModuleId(&M);
398   // Create the pseudo probe desc metadata beforehand.
399   // Note that modules with only data but no functions will require this to
400   // be set up so that they will be known as probed later.
401   M.getOrInsertNamedMetadata(PseudoProbeDescMetadataName);
402 
403   for (auto &F : M) {
404     if (F.isDeclaration())
405       continue;
406     SampleProfileProber ProbeManager(F, ModuleId);
407     ProbeManager.instrumentOneFunc(F, TM);
408   }
409 
410   return PreservedAnalyses::none();
411 }
412 
413 void PseudoProbeUpdatePass::runOnFunction(Function &F,
414                                           FunctionAnalysisManager &FAM) {
415   BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
416   auto BBProfileCount = [&BFI](BasicBlock *BB) {
417     return BFI.getBlockProfileCount(BB).value_or(0);
418   };
419 
420   // Collect the sum of execution weight for each probe.
421   ProbeFactorMap ProbeFactors;
422   for (auto &Block : F) {
423     for (auto &I : Block) {
424       if (std::optional<PseudoProbe> Probe = extractProbe(I)) {
425         uint64_t Hash = computeCallStackHash(I);
426         ProbeFactors[{Probe->Id, Hash}] += BBProfileCount(&Block);
427       }
428     }
429   }
430 
431   // Fix up over-counted probes.
432   for (auto &Block : F) {
433     for (auto &I : Block) {
434       if (std::optional<PseudoProbe> Probe = extractProbe(I)) {
435         uint64_t Hash = computeCallStackHash(I);
436         float Sum = ProbeFactors[{Probe->Id, Hash}];
437         if (Sum != 0)
438           setProbeDistributionFactor(I, BBProfileCount(&Block) / Sum);
439       }
440     }
441   }
442 }
443 
444 PreservedAnalyses PseudoProbeUpdatePass::run(Module &M,
445                                              ModuleAnalysisManager &AM) {
446   if (UpdatePseudoProbe) {
447     for (auto &F : M) {
448       if (F.isDeclaration())
449         continue;
450       FunctionAnalysisManager &FAM =
451           AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
452       runOnFunction(F, FAM);
453     }
454   }
455   return PreservedAnalyses::none();
456 }
457