1 //===-- InstrProfiling.cpp - Frontend instrumentation based profiling -----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass lowers instrprof_* intrinsics emitted by a frontend for profiling.
11 // It also builds the data structures and initialization code needed for
12 // updating execution counts and emitting the profile at runtime.
13 //
14 //===----------------------------------------------------------------------===//
15
16 #include "llvm/Transforms/Instrumentation/InstrProfiling.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/Triple.h"
21 #include "llvm/ADT/Twine.h"
22 #include "llvm/Analysis/LoopInfo.h"
23 #include "llvm/Analysis/TargetLibraryInfo.h"
24 #include "llvm/IR/Attributes.h"
25 #include "llvm/IR/BasicBlock.h"
26 #include "llvm/IR/Constant.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Dominators.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/IR/GlobalValue.h"
32 #include "llvm/IR/GlobalVariable.h"
33 #include "llvm/IR/IRBuilder.h"
34 #include "llvm/IR/Instruction.h"
35 #include "llvm/IR/Instructions.h"
36 #include "llvm/IR/IntrinsicInst.h"
37 #include "llvm/IR/Module.h"
38 #include "llvm/IR/Type.h"
39 #include "llvm/Pass.h"
40 #include "llvm/ProfileData/InstrProf.h"
41 #include "llvm/Support/Casting.h"
42 #include "llvm/Support/CommandLine.h"
43 #include "llvm/Support/Error.h"
44 #include "llvm/Support/ErrorHandling.h"
45 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
46 #include "llvm/Transforms/Utils/ModuleUtils.h"
47 #include "llvm/Transforms/Utils/SSAUpdater.h"
48 #include <algorithm>
49 #include <cassert>
50 #include <cstddef>
51 #include <cstdint>
52 #include <string>
53
54 using namespace llvm;
55
56 #define DEBUG_TYPE "instrprof"
57
58 // The start and end values of precise value profile range for memory
59 // intrinsic sizes
60 cl::opt<std::string> MemOPSizeRange(
61 "memop-size-range",
62 cl::desc("Set the range of size in memory intrinsic calls to be profiled "
63 "precisely, in a format of <start_val>:<end_val>"),
64 cl::init(""));
65
66 // The value that considered to be large value in memory intrinsic.
67 cl::opt<unsigned> MemOPSizeLarge(
68 "memop-size-large",
69 cl::desc("Set large value thresthold in memory intrinsic size profiling. "
70 "Value of 0 disables the large value profiling."),
71 cl::init(8192));
72
73 namespace {
74
75 cl::opt<bool> DoNameCompression("enable-name-compression",
76 cl::desc("Enable name string compression"),
77 cl::init(true));
78
79 cl::opt<bool> DoHashBasedCounterSplit(
80 "hash-based-counter-split",
81 cl::desc("Rename counter variable of a comdat function based on cfg hash"),
82 cl::init(true));
83
84 cl::opt<bool> ValueProfileStaticAlloc(
85 "vp-static-alloc",
86 cl::desc("Do static counter allocation for value profiler"),
87 cl::init(true));
88
89 cl::opt<double> NumCountersPerValueSite(
90 "vp-counters-per-site",
91 cl::desc("The average number of profile counters allocated "
92 "per value profiling site."),
93 // This is set to a very small value because in real programs, only
94 // a very small percentage of value sites have non-zero targets, e.g, 1/30.
95 // For those sites with non-zero profile, the average number of targets
96 // is usually smaller than 2.
97 cl::init(1.0));
98
99 cl::opt<bool> AtomicCounterUpdatePromoted(
100 "atomic-counter-update-promoted", cl::ZeroOrMore,
101 cl::desc("Do counter update using atomic fetch add "
102 " for promoted counters only"),
103 cl::init(false));
104
105 // If the option is not specified, the default behavior about whether
106 // counter promotion is done depends on how instrumentaiton lowering
107 // pipeline is setup, i.e., the default value of true of this option
108 // does not mean the promotion will be done by default. Explicitly
109 // setting this option can override the default behavior.
110 cl::opt<bool> DoCounterPromotion("do-counter-promotion", cl::ZeroOrMore,
111 cl::desc("Do counter register promotion"),
112 cl::init(false));
113 cl::opt<unsigned> MaxNumOfPromotionsPerLoop(
114 cl::ZeroOrMore, "max-counter-promotions-per-loop", cl::init(20),
115 cl::desc("Max number counter promotions per loop to avoid"
116 " increasing register pressure too much"));
117
118 // A debug option
119 cl::opt<int>
120 MaxNumOfPromotions(cl::ZeroOrMore, "max-counter-promotions", cl::init(-1),
121 cl::desc("Max number of allowed counter promotions"));
122
123 cl::opt<unsigned> SpeculativeCounterPromotionMaxExiting(
124 cl::ZeroOrMore, "speculative-counter-promotion-max-exiting", cl::init(3),
125 cl::desc("The max number of exiting blocks of a loop to allow "
126 " speculative counter promotion"));
127
128 cl::opt<bool> SpeculativeCounterPromotionToLoop(
129 cl::ZeroOrMore, "speculative-counter-promotion-to-loop", cl::init(false),
130 cl::desc("When the option is false, if the target block is in a loop, "
131 "the promotion will be disallowed unless the promoted counter "
132 " update can be further/iteratively promoted into an acyclic "
133 " region."));
134
135 cl::opt<bool> IterativeCounterPromotion(
136 cl::ZeroOrMore, "iterative-counter-promotion", cl::init(true),
137 cl::desc("Allow counter promotion across the whole loop nest."));
138
139 class InstrProfilingLegacyPass : public ModulePass {
140 InstrProfiling InstrProf;
141
142 public:
143 static char ID;
144
InstrProfilingLegacyPass()145 InstrProfilingLegacyPass() : ModulePass(ID) {}
InstrProfilingLegacyPass(const InstrProfOptions & Options)146 InstrProfilingLegacyPass(const InstrProfOptions &Options)
147 : ModulePass(ID), InstrProf(Options) {}
148
getPassName() const149 StringRef getPassName() const override {
150 return "Frontend instrumentation-based coverage lowering";
151 }
152
runOnModule(Module & M)153 bool runOnModule(Module &M) override {
154 return InstrProf.run(M, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI());
155 }
156
getAnalysisUsage(AnalysisUsage & AU) const157 void getAnalysisUsage(AnalysisUsage &AU) const override {
158 AU.setPreservesCFG();
159 AU.addRequired<TargetLibraryInfoWrapperPass>();
160 }
161 };
162
163 ///
164 /// A helper class to promote one counter RMW operation in the loop
165 /// into register update.
166 ///
167 /// RWM update for the counter will be sinked out of the loop after
168 /// the transformation.
169 ///
170 class PGOCounterPromoterHelper : public LoadAndStorePromoter {
171 public:
PGOCounterPromoterHelper(Instruction * L,Instruction * S,SSAUpdater & SSA,Value * Init,BasicBlock * PH,ArrayRef<BasicBlock * > ExitBlocks,ArrayRef<Instruction * > InsertPts,DenseMap<Loop *,SmallVector<LoadStorePair,8>> & LoopToCands,LoopInfo & LI)172 PGOCounterPromoterHelper(
173 Instruction *L, Instruction *S, SSAUpdater &SSA, Value *Init,
174 BasicBlock *PH, ArrayRef<BasicBlock *> ExitBlocks,
175 ArrayRef<Instruction *> InsertPts,
176 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
177 LoopInfo &LI)
178 : LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks),
179 InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI) {
180 assert(isa<LoadInst>(L));
181 assert(isa<StoreInst>(S));
182 SSA.AddAvailableValue(PH, Init);
183 }
184
doExtraRewritesBeforeFinalDeletion() const185 void doExtraRewritesBeforeFinalDeletion() const override {
186 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
187 BasicBlock *ExitBlock = ExitBlocks[i];
188 Instruction *InsertPos = InsertPts[i];
189 // Get LiveIn value into the ExitBlock. If there are multiple
190 // predecessors, the value is defined by a PHI node in this
191 // block.
192 Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
193 Value *Addr = cast<StoreInst>(Store)->getPointerOperand();
194 IRBuilder<> Builder(InsertPos);
195 if (AtomicCounterUpdatePromoted)
196 // automic update currently can only be promoted across the current
197 // loop, not the whole loop nest.
198 Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue,
199 AtomicOrdering::SequentiallyConsistent);
200 else {
201 LoadInst *OldVal = Builder.CreateLoad(Addr, "pgocount.promoted");
202 auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue);
203 auto *NewStore = Builder.CreateStore(NewVal, Addr);
204
205 // Now update the parent loop's candidate list:
206 if (IterativeCounterPromotion) {
207 auto *TargetLoop = LI.getLoopFor(ExitBlock);
208 if (TargetLoop)
209 LoopToCandidates[TargetLoop].emplace_back(OldVal, NewStore);
210 }
211 }
212 }
213 }
214
215 private:
216 Instruction *Store;
217 ArrayRef<BasicBlock *> ExitBlocks;
218 ArrayRef<Instruction *> InsertPts;
219 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
220 LoopInfo &LI;
221 };
222
223 /// A helper class to do register promotion for all profile counter
224 /// updates in a loop.
225 ///
226 class PGOCounterPromoter {
227 public:
PGOCounterPromoter(DenseMap<Loop *,SmallVector<LoadStorePair,8>> & LoopToCands,Loop & CurLoop,LoopInfo & LI)228 PGOCounterPromoter(
229 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
230 Loop &CurLoop, LoopInfo &LI)
231 : LoopToCandidates(LoopToCands), ExitBlocks(), InsertPts(), L(CurLoop),
232 LI(LI) {
233
234 SmallVector<BasicBlock *, 8> LoopExitBlocks;
235 SmallPtrSet<BasicBlock *, 8> BlockSet;
236 L.getExitBlocks(LoopExitBlocks);
237
238 for (BasicBlock *ExitBlock : LoopExitBlocks) {
239 if (BlockSet.insert(ExitBlock).second) {
240 ExitBlocks.push_back(ExitBlock);
241 InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
242 }
243 }
244 }
245
run(int64_t * NumPromoted)246 bool run(int64_t *NumPromoted) {
247 // Skip 'infinite' loops:
248 if (ExitBlocks.size() == 0)
249 return false;
250 unsigned MaxProm = getMaxNumOfPromotionsInLoop(&L);
251 if (MaxProm == 0)
252 return false;
253
254 unsigned Promoted = 0;
255 for (auto &Cand : LoopToCandidates[&L]) {
256
257 SmallVector<PHINode *, 4> NewPHIs;
258 SSAUpdater SSA(&NewPHIs);
259 Value *InitVal = ConstantInt::get(Cand.first->getType(), 0);
260
261 PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal,
262 L.getLoopPreheader(), ExitBlocks,
263 InsertPts, LoopToCandidates, LI);
264 Promoter.run(SmallVector<Instruction *, 2>({Cand.first, Cand.second}));
265 Promoted++;
266 if (Promoted >= MaxProm)
267 break;
268
269 (*NumPromoted)++;
270 if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions)
271 break;
272 }
273
274 LLVM_DEBUG(dbgs() << Promoted << " counters promoted for loop (depth="
275 << L.getLoopDepth() << ")\n");
276 return Promoted != 0;
277 }
278
279 private:
allowSpeculativeCounterPromotion(Loop * LP)280 bool allowSpeculativeCounterPromotion(Loop *LP) {
281 SmallVector<BasicBlock *, 8> ExitingBlocks;
282 L.getExitingBlocks(ExitingBlocks);
283 // Not considierered speculative.
284 if (ExitingBlocks.size() == 1)
285 return true;
286 if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
287 return false;
288 return true;
289 }
290
291 // Returns the max number of Counter Promotions for LP.
getMaxNumOfPromotionsInLoop(Loop * LP)292 unsigned getMaxNumOfPromotionsInLoop(Loop *LP) {
293 // We can't insert into a catchswitch.
294 SmallVector<BasicBlock *, 8> LoopExitBlocks;
295 LP->getExitBlocks(LoopExitBlocks);
296 if (llvm::any_of(LoopExitBlocks, [](BasicBlock *Exit) {
297 return isa<CatchSwitchInst>(Exit->getTerminator());
298 }))
299 return 0;
300
301 if (!LP->hasDedicatedExits())
302 return 0;
303
304 BasicBlock *PH = LP->getLoopPreheader();
305 if (!PH)
306 return 0;
307
308 SmallVector<BasicBlock *, 8> ExitingBlocks;
309 LP->getExitingBlocks(ExitingBlocks);
310 // Not considierered speculative.
311 if (ExitingBlocks.size() == 1)
312 return MaxNumOfPromotionsPerLoop;
313
314 if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
315 return 0;
316
317 // Whether the target block is in a loop does not matter:
318 if (SpeculativeCounterPromotionToLoop)
319 return MaxNumOfPromotionsPerLoop;
320
321 // Now check the target block:
322 unsigned MaxProm = MaxNumOfPromotionsPerLoop;
323 for (auto *TargetBlock : LoopExitBlocks) {
324 auto *TargetLoop = LI.getLoopFor(TargetBlock);
325 if (!TargetLoop)
326 continue;
327 unsigned MaxPromForTarget = getMaxNumOfPromotionsInLoop(TargetLoop);
328 unsigned PendingCandsInTarget = LoopToCandidates[TargetLoop].size();
329 MaxProm =
330 std::min(MaxProm, std::max(MaxPromForTarget, PendingCandsInTarget) -
331 PendingCandsInTarget);
332 }
333 return MaxProm;
334 }
335
336 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
337 SmallVector<BasicBlock *, 8> ExitBlocks;
338 SmallVector<Instruction *, 8> InsertPts;
339 Loop &L;
340 LoopInfo &LI;
341 };
342
343 } // end anonymous namespace
344
run(Module & M,ModuleAnalysisManager & AM)345 PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) {
346 auto &TLI = AM.getResult<TargetLibraryAnalysis>(M);
347 if (!run(M, TLI))
348 return PreservedAnalyses::all();
349
350 return PreservedAnalyses::none();
351 }
352
353 char InstrProfilingLegacyPass::ID = 0;
354 INITIALIZE_PASS_BEGIN(
355 InstrProfilingLegacyPass, "instrprof",
356 "Frontend instrumentation-based coverage lowering.", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)357 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
358 INITIALIZE_PASS_END(
359 InstrProfilingLegacyPass, "instrprof",
360 "Frontend instrumentation-based coverage lowering.", false, false)
361
362 ModulePass *
363 llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options) {
364 return new InstrProfilingLegacyPass(Options);
365 }
366
castToIncrementInst(Instruction * Instr)367 static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) {
368 InstrProfIncrementInst *Inc = dyn_cast<InstrProfIncrementInstStep>(Instr);
369 if (Inc)
370 return Inc;
371 return dyn_cast<InstrProfIncrementInst>(Instr);
372 }
373
lowerIntrinsics(Function * F)374 bool InstrProfiling::lowerIntrinsics(Function *F) {
375 bool MadeChange = false;
376 PromotionCandidates.clear();
377 for (BasicBlock &BB : *F) {
378 for (auto I = BB.begin(), E = BB.end(); I != E;) {
379 auto Instr = I++;
380 InstrProfIncrementInst *Inc = castToIncrementInst(&*Instr);
381 if (Inc) {
382 lowerIncrement(Inc);
383 MadeChange = true;
384 } else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Instr)) {
385 lowerValueProfileInst(Ind);
386 MadeChange = true;
387 }
388 }
389 }
390
391 if (!MadeChange)
392 return false;
393
394 promoteCounterLoadStores(F);
395 return true;
396 }
397
isCounterPromotionEnabled() const398 bool InstrProfiling::isCounterPromotionEnabled() const {
399 if (DoCounterPromotion.getNumOccurrences() > 0)
400 return DoCounterPromotion;
401
402 return Options.DoCounterPromotion;
403 }
404
promoteCounterLoadStores(Function * F)405 void InstrProfiling::promoteCounterLoadStores(Function *F) {
406 if (!isCounterPromotionEnabled())
407 return;
408
409 DominatorTree DT(*F);
410 LoopInfo LI(DT);
411 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates;
412
413 for (const auto &LoadStore : PromotionCandidates) {
414 auto *CounterLoad = LoadStore.first;
415 auto *CounterStore = LoadStore.second;
416 BasicBlock *BB = CounterLoad->getParent();
417 Loop *ParentLoop = LI.getLoopFor(BB);
418 if (!ParentLoop)
419 continue;
420 LoopPromotionCandidates[ParentLoop].emplace_back(CounterLoad, CounterStore);
421 }
422
423 SmallVector<Loop *, 4> Loops = LI.getLoopsInPreorder();
424
425 // Do a post-order traversal of the loops so that counter updates can be
426 // iteratively hoisted outside the loop nest.
427 for (auto *Loop : llvm::reverse(Loops)) {
428 PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI);
429 Promoter.run(&TotalCountersPromoted);
430 }
431 }
432
433 /// Check if the module contains uses of any profiling intrinsics.
containsProfilingIntrinsics(Module & M)434 static bool containsProfilingIntrinsics(Module &M) {
435 if (auto *F = M.getFunction(
436 Intrinsic::getName(llvm::Intrinsic::instrprof_increment)))
437 if (!F->use_empty())
438 return true;
439 if (auto *F = M.getFunction(
440 Intrinsic::getName(llvm::Intrinsic::instrprof_increment_step)))
441 if (!F->use_empty())
442 return true;
443 if (auto *F = M.getFunction(
444 Intrinsic::getName(llvm::Intrinsic::instrprof_value_profile)))
445 if (!F->use_empty())
446 return true;
447 return false;
448 }
449
run(Module & M,const TargetLibraryInfo & TLI)450 bool InstrProfiling::run(Module &M, const TargetLibraryInfo &TLI) {
451 this->M = &M;
452 this->TLI = &TLI;
453 NamesVar = nullptr;
454 NamesSize = 0;
455 ProfileDataMap.clear();
456 UsedVars.clear();
457 getMemOPSizeRangeFromOption(MemOPSizeRange, MemOPSizeRangeStart,
458 MemOPSizeRangeLast);
459 TT = Triple(M.getTargetTriple());
460
461 // Emit the runtime hook even if no counters are present.
462 bool MadeChange = emitRuntimeHook();
463
464 // Improve compile time by avoiding linear scans when there is no work.
465 GlobalVariable *CoverageNamesVar =
466 M.getNamedGlobal(getCoverageUnusedNamesVarName());
467 if (!containsProfilingIntrinsics(M) && !CoverageNamesVar)
468 return MadeChange;
469
470 // We did not know how many value sites there would be inside
471 // the instrumented function. This is counting the number of instrumented
472 // target value sites to enter it as field in the profile data variable.
473 for (Function &F : M) {
474 InstrProfIncrementInst *FirstProfIncInst = nullptr;
475 for (BasicBlock &BB : F)
476 for (auto I = BB.begin(), E = BB.end(); I != E; I++)
477 if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(I))
478 computeNumValueSiteCounts(Ind);
479 else if (FirstProfIncInst == nullptr)
480 FirstProfIncInst = dyn_cast<InstrProfIncrementInst>(I);
481
482 // Value profiling intrinsic lowering requires per-function profile data
483 // variable to be created first.
484 if (FirstProfIncInst != nullptr)
485 static_cast<void>(getOrCreateRegionCounters(FirstProfIncInst));
486 }
487
488 for (Function &F : M)
489 MadeChange |= lowerIntrinsics(&F);
490
491 if (CoverageNamesVar) {
492 lowerCoverageData(CoverageNamesVar);
493 MadeChange = true;
494 }
495
496 if (!MadeChange)
497 return false;
498
499 emitVNodes();
500 emitNameData();
501 emitRegistration();
502 emitUses();
503 emitInitialization();
504 return true;
505 }
506
getOrInsertValueProfilingCall(Module & M,const TargetLibraryInfo & TLI,bool IsRange=false)507 static Constant *getOrInsertValueProfilingCall(Module &M,
508 const TargetLibraryInfo &TLI,
509 bool IsRange = false) {
510 LLVMContext &Ctx = M.getContext();
511 auto *ReturnTy = Type::getVoidTy(M.getContext());
512
513 Constant *Res;
514 if (!IsRange) {
515 Type *ParamTypes[] = {
516 #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
517 #include "llvm/ProfileData/InstrProfData.inc"
518 };
519 auto *ValueProfilingCallTy =
520 FunctionType::get(ReturnTy, makeArrayRef(ParamTypes), false);
521 Res = M.getOrInsertFunction(getInstrProfValueProfFuncName(),
522 ValueProfilingCallTy);
523 } else {
524 Type *RangeParamTypes[] = {
525 #define VALUE_RANGE_PROF 1
526 #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
527 #include "llvm/ProfileData/InstrProfData.inc"
528 #undef VALUE_RANGE_PROF
529 };
530 auto *ValueRangeProfilingCallTy =
531 FunctionType::get(ReturnTy, makeArrayRef(RangeParamTypes), false);
532 Res = M.getOrInsertFunction(getInstrProfValueRangeProfFuncName(),
533 ValueRangeProfilingCallTy);
534 }
535
536 if (Function *FunRes = dyn_cast<Function>(Res)) {
537 if (auto AK = TLI.getExtAttrForI32Param(false))
538 FunRes->addParamAttr(2, AK);
539 }
540 return Res;
541 }
542
computeNumValueSiteCounts(InstrProfValueProfileInst * Ind)543 void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
544 GlobalVariable *Name = Ind->getName();
545 uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
546 uint64_t Index = Ind->getIndex()->getZExtValue();
547 auto It = ProfileDataMap.find(Name);
548 if (It == ProfileDataMap.end()) {
549 PerFunctionProfileData PD;
550 PD.NumValueSites[ValueKind] = Index + 1;
551 ProfileDataMap[Name] = PD;
552 } else if (It->second.NumValueSites[ValueKind] <= Index)
553 It->second.NumValueSites[ValueKind] = Index + 1;
554 }
555
lowerValueProfileInst(InstrProfValueProfileInst * Ind)556 void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
557 GlobalVariable *Name = Ind->getName();
558 auto It = ProfileDataMap.find(Name);
559 assert(It != ProfileDataMap.end() && It->second.DataVar &&
560 "value profiling detected in function with no counter incerement");
561
562 GlobalVariable *DataVar = It->second.DataVar;
563 uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
564 uint64_t Index = Ind->getIndex()->getZExtValue();
565 for (uint32_t Kind = IPVK_First; Kind < ValueKind; ++Kind)
566 Index += It->second.NumValueSites[Kind];
567
568 IRBuilder<> Builder(Ind);
569 bool IsRange = (Ind->getValueKind()->getZExtValue() ==
570 llvm::InstrProfValueKind::IPVK_MemOPSize);
571 CallInst *Call = nullptr;
572 if (!IsRange) {
573 Value *Args[3] = {Ind->getTargetValue(),
574 Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
575 Builder.getInt32(Index)};
576 Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI), Args);
577 } else {
578 Value *Args[6] = {
579 Ind->getTargetValue(),
580 Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
581 Builder.getInt32(Index),
582 Builder.getInt64(MemOPSizeRangeStart),
583 Builder.getInt64(MemOPSizeRangeLast),
584 Builder.getInt64(MemOPSizeLarge == 0 ? INT64_MIN : MemOPSizeLarge)};
585 Call =
586 Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI, true), Args);
587 }
588 if (auto AK = TLI->getExtAttrForI32Param(false))
589 Call->addParamAttr(2, AK);
590 Ind->replaceAllUsesWith(Call);
591 Ind->eraseFromParent();
592 }
593
lowerIncrement(InstrProfIncrementInst * Inc)594 void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
595 GlobalVariable *Counters = getOrCreateRegionCounters(Inc);
596
597 IRBuilder<> Builder(Inc);
598 uint64_t Index = Inc->getIndex()->getZExtValue();
599 Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Index);
600 Value *Load = Builder.CreateLoad(Addr, "pgocount");
601 auto *Count = Builder.CreateAdd(Load, Inc->getStep());
602 auto *Store = Builder.CreateStore(Count, Addr);
603 Inc->replaceAllUsesWith(Store);
604 if (isCounterPromotionEnabled())
605 PromotionCandidates.emplace_back(cast<Instruction>(Load), Store);
606 Inc->eraseFromParent();
607 }
608
lowerCoverageData(GlobalVariable * CoverageNamesVar)609 void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
610 ConstantArray *Names =
611 cast<ConstantArray>(CoverageNamesVar->getInitializer());
612 for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) {
613 Constant *NC = Names->getOperand(I);
614 Value *V = NC->stripPointerCasts();
615 assert(isa<GlobalVariable>(V) && "Missing reference to function name");
616 GlobalVariable *Name = cast<GlobalVariable>(V);
617
618 Name->setLinkage(GlobalValue::PrivateLinkage);
619 ReferencedNames.push_back(Name);
620 NC->dropAllReferences();
621 }
622 CoverageNamesVar->eraseFromParent();
623 }
624
625 /// Get the name of a profiling variable for a particular function.
getVarName(InstrProfIncrementInst * Inc,StringRef Prefix)626 static std::string getVarName(InstrProfIncrementInst *Inc, StringRef Prefix) {
627 StringRef NamePrefix = getInstrProfNameVarPrefix();
628 StringRef Name = Inc->getName()->getName().substr(NamePrefix.size());
629 Function *F = Inc->getParent()->getParent();
630 Module *M = F->getParent();
631 if (!DoHashBasedCounterSplit || !isIRPGOFlagSet(M) ||
632 !canRenameComdatFunc(*F))
633 return (Prefix + Name).str();
634 uint64_t FuncHash = Inc->getHash()->getZExtValue();
635 SmallVector<char, 24> HashPostfix;
636 if (Name.endswith((Twine(".") + Twine(FuncHash)).toStringRef(HashPostfix)))
637 return (Prefix + Name).str();
638 return (Prefix + Name + "." + Twine(FuncHash)).str();
639 }
640
shouldRecordFunctionAddr(Function * F)641 static inline bool shouldRecordFunctionAddr(Function *F) {
642 // Check the linkage
643 bool HasAvailableExternallyLinkage = F->hasAvailableExternallyLinkage();
644 if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
645 !HasAvailableExternallyLinkage)
646 return true;
647
648 // A function marked 'alwaysinline' with available_externally linkage can't
649 // have its address taken. Doing so would create an undefined external ref to
650 // the function, which would fail to link.
651 if (HasAvailableExternallyLinkage &&
652 F->hasFnAttribute(Attribute::AlwaysInline))
653 return false;
654
655 // Prohibit function address recording if the function is both internal and
656 // COMDAT. This avoids the profile data variable referencing internal symbols
657 // in COMDAT.
658 if (F->hasLocalLinkage() && F->hasComdat())
659 return false;
660
661 // Check uses of this function for other than direct calls or invokes to it.
662 // Inline virtual functions have linkeOnceODR linkage. When a key method
663 // exists, the vtable will only be emitted in the TU where the key method
664 // is defined. In a TU where vtable is not available, the function won't
665 // be 'addresstaken'. If its address is not recorded here, the profile data
666 // with missing address may be picked by the linker leading to missing
667 // indirect call target info.
668 return F->hasAddressTaken() || F->hasLinkOnceLinkage();
669 }
670
getOrCreateProfileComdat(Module & M,Function & F,InstrProfIncrementInst * Inc)671 static inline Comdat *getOrCreateProfileComdat(Module &M, Function &F,
672 InstrProfIncrementInst *Inc) {
673 if (!needsComdatForCounter(F, M))
674 return nullptr;
675
676 // COFF format requires a COMDAT section to have a key symbol with the same
677 // name. The linker targeting COFF also requires that the COMDAT
678 // a section is associated to must precede the associating section. For this
679 // reason, we must choose the counter var's name as the name of the comdat.
680 StringRef ComdatPrefix = (Triple(M.getTargetTriple()).isOSBinFormatCOFF()
681 ? getInstrProfCountersVarPrefix()
682 : getInstrProfComdatPrefix());
683 return M.getOrInsertComdat(StringRef(getVarName(Inc, ComdatPrefix)));
684 }
685
needsRuntimeRegistrationOfSectionRange(const Module & M)686 static bool needsRuntimeRegistrationOfSectionRange(const Module &M) {
687 // Don't do this for Darwin. compiler-rt uses linker magic.
688 if (Triple(M.getTargetTriple()).isOSDarwin())
689 return false;
690
691 // Use linker script magic to get data/cnts/name start/end.
692 if (Triple(M.getTargetTriple()).isOSLinux() ||
693 Triple(M.getTargetTriple()).isOSFreeBSD() ||
694 Triple(M.getTargetTriple()).isOSFuchsia() ||
695 Triple(M.getTargetTriple()).isPS4CPU())
696 return false;
697
698 return true;
699 }
700
701 GlobalVariable *
getOrCreateRegionCounters(InstrProfIncrementInst * Inc)702 InstrProfiling::getOrCreateRegionCounters(InstrProfIncrementInst *Inc) {
703 GlobalVariable *NamePtr = Inc->getName();
704 auto It = ProfileDataMap.find(NamePtr);
705 PerFunctionProfileData PD;
706 if (It != ProfileDataMap.end()) {
707 if (It->second.RegionCounters)
708 return It->second.RegionCounters;
709 PD = It->second;
710 }
711
712 // Move the name variable to the right section. Place them in a COMDAT group
713 // if the associated function is a COMDAT. This will make sure that
714 // only one copy of counters of the COMDAT function will be emitted after
715 // linking.
716 Function *Fn = Inc->getParent()->getParent();
717 Comdat *ProfileVarsComdat = nullptr;
718 ProfileVarsComdat = getOrCreateProfileComdat(*M, *Fn, Inc);
719
720 uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
721 LLVMContext &Ctx = M->getContext();
722 ArrayType *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
723
724 // Create the counters variable.
725 auto *CounterPtr =
726 new GlobalVariable(*M, CounterTy, false, NamePtr->getLinkage(),
727 Constant::getNullValue(CounterTy),
728 getVarName(Inc, getInstrProfCountersVarPrefix()));
729 CounterPtr->setVisibility(NamePtr->getVisibility());
730 CounterPtr->setSection(
731 getInstrProfSectionName(IPSK_cnts, TT.getObjectFormat()));
732 CounterPtr->setAlignment(8);
733 CounterPtr->setComdat(ProfileVarsComdat);
734
735 auto *Int8PtrTy = Type::getInt8PtrTy(Ctx);
736 // Allocate statically the array of pointers to value profile nodes for
737 // the current function.
738 Constant *ValuesPtrExpr = ConstantPointerNull::get(Int8PtrTy);
739 if (ValueProfileStaticAlloc && !needsRuntimeRegistrationOfSectionRange(*M)) {
740 uint64_t NS = 0;
741 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
742 NS += PD.NumValueSites[Kind];
743 if (NS) {
744 ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS);
745
746 auto *ValuesVar =
747 new GlobalVariable(*M, ValuesTy, false, NamePtr->getLinkage(),
748 Constant::getNullValue(ValuesTy),
749 getVarName(Inc, getInstrProfValuesVarPrefix()));
750 ValuesVar->setVisibility(NamePtr->getVisibility());
751 ValuesVar->setSection(
752 getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
753 ValuesVar->setAlignment(8);
754 ValuesVar->setComdat(ProfileVarsComdat);
755 ValuesPtrExpr =
756 ConstantExpr::getBitCast(ValuesVar, Type::getInt8PtrTy(Ctx));
757 }
758 }
759
760 // Create data variable.
761 auto *Int16Ty = Type::getInt16Ty(Ctx);
762 auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last + 1);
763 Type *DataTypes[] = {
764 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType,
765 #include "llvm/ProfileData/InstrProfData.inc"
766 };
767 auto *DataTy = StructType::get(Ctx, makeArrayRef(DataTypes));
768
769 Constant *FunctionAddr = shouldRecordFunctionAddr(Fn)
770 ? ConstantExpr::getBitCast(Fn, Int8PtrTy)
771 : ConstantPointerNull::get(Int8PtrTy);
772
773 Constant *Int16ArrayVals[IPVK_Last + 1];
774 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
775 Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]);
776
777 Constant *DataVals[] = {
778 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
779 #include "llvm/ProfileData/InstrProfData.inc"
780 };
781 auto *Data = new GlobalVariable(*M, DataTy, false, NamePtr->getLinkage(),
782 ConstantStruct::get(DataTy, DataVals),
783 getVarName(Inc, getInstrProfDataVarPrefix()));
784 Data->setVisibility(NamePtr->getVisibility());
785 Data->setSection(getInstrProfSectionName(IPSK_data, TT.getObjectFormat()));
786 Data->setAlignment(INSTR_PROF_DATA_ALIGNMENT);
787 Data->setComdat(ProfileVarsComdat);
788
789 PD.RegionCounters = CounterPtr;
790 PD.DataVar = Data;
791 ProfileDataMap[NamePtr] = PD;
792
793 // Mark the data variable as used so that it isn't stripped out.
794 UsedVars.push_back(Data);
795 // Now that the linkage set by the FE has been passed to the data and counter
796 // variables, reset Name variable's linkage and visibility to private so that
797 // it can be removed later by the compiler.
798 NamePtr->setLinkage(GlobalValue::PrivateLinkage);
799 // Collect the referenced names to be used by emitNameData.
800 ReferencedNames.push_back(NamePtr);
801
802 return CounterPtr;
803 }
804
emitVNodes()805 void InstrProfiling::emitVNodes() {
806 if (!ValueProfileStaticAlloc)
807 return;
808
809 // For now only support this on platforms that do
810 // not require runtime registration to discover
811 // named section start/end.
812 if (needsRuntimeRegistrationOfSectionRange(*M))
813 return;
814
815 size_t TotalNS = 0;
816 for (auto &PD : ProfileDataMap) {
817 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
818 TotalNS += PD.second.NumValueSites[Kind];
819 }
820
821 if (!TotalNS)
822 return;
823
824 uint64_t NumCounters = TotalNS * NumCountersPerValueSite;
825 // Heuristic for small programs with very few total value sites.
826 // The default value of vp-counters-per-site is chosen based on
827 // the observation that large apps usually have a low percentage
828 // of value sites that actually have any profile data, and thus
829 // the average number of counters per site is low. For small
830 // apps with very few sites, this may not be true. Bump up the
831 // number of counters in this case.
832 #define INSTR_PROF_MIN_VAL_COUNTS 10
833 if (NumCounters < INSTR_PROF_MIN_VAL_COUNTS)
834 NumCounters = std::max(INSTR_PROF_MIN_VAL_COUNTS, (int)NumCounters * 2);
835
836 auto &Ctx = M->getContext();
837 Type *VNodeTypes[] = {
838 #define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Init) LLVMType,
839 #include "llvm/ProfileData/InstrProfData.inc"
840 };
841 auto *VNodeTy = StructType::get(Ctx, makeArrayRef(VNodeTypes));
842
843 ArrayType *VNodesTy = ArrayType::get(VNodeTy, NumCounters);
844 auto *VNodesVar = new GlobalVariable(
845 *M, VNodesTy, false, GlobalValue::PrivateLinkage,
846 Constant::getNullValue(VNodesTy), getInstrProfVNodesVarName());
847 VNodesVar->setSection(
848 getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat()));
849 UsedVars.push_back(VNodesVar);
850 }
851
emitNameData()852 void InstrProfiling::emitNameData() {
853 std::string UncompressedData;
854
855 if (ReferencedNames.empty())
856 return;
857
858 std::string CompressedNameStr;
859 if (Error E = collectPGOFuncNameStrings(ReferencedNames, CompressedNameStr,
860 DoNameCompression)) {
861 report_fatal_error(toString(std::move(E)), false);
862 }
863
864 auto &Ctx = M->getContext();
865 auto *NamesVal = ConstantDataArray::getString(
866 Ctx, StringRef(CompressedNameStr), false);
867 NamesVar = new GlobalVariable(*M, NamesVal->getType(), true,
868 GlobalValue::PrivateLinkage, NamesVal,
869 getInstrProfNamesVarName());
870 NamesSize = CompressedNameStr.size();
871 NamesVar->setSection(
872 getInstrProfSectionName(IPSK_name, TT.getObjectFormat()));
873 UsedVars.push_back(NamesVar);
874
875 for (auto *NamePtr : ReferencedNames)
876 NamePtr->eraseFromParent();
877 }
878
emitRegistration()879 void InstrProfiling::emitRegistration() {
880 if (!needsRuntimeRegistrationOfSectionRange(*M))
881 return;
882
883 // Construct the function.
884 auto *VoidTy = Type::getVoidTy(M->getContext());
885 auto *VoidPtrTy = Type::getInt8PtrTy(M->getContext());
886 auto *Int64Ty = Type::getInt64Ty(M->getContext());
887 auto *RegisterFTy = FunctionType::get(VoidTy, false);
888 auto *RegisterF = Function::Create(RegisterFTy, GlobalValue::InternalLinkage,
889 getInstrProfRegFuncsName(), M);
890 RegisterF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
891 if (Options.NoRedZone)
892 RegisterF->addFnAttr(Attribute::NoRedZone);
893
894 auto *RuntimeRegisterTy = FunctionType::get(VoidTy, VoidPtrTy, false);
895 auto *RuntimeRegisterF =
896 Function::Create(RuntimeRegisterTy, GlobalVariable::ExternalLinkage,
897 getInstrProfRegFuncName(), M);
898
899 IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", RegisterF));
900 for (Value *Data : UsedVars)
901 if (Data != NamesVar && !isa<Function>(Data))
902 IRB.CreateCall(RuntimeRegisterF, IRB.CreateBitCast(Data, VoidPtrTy));
903
904 if (NamesVar) {
905 Type *ParamTypes[] = {VoidPtrTy, Int64Ty};
906 auto *NamesRegisterTy =
907 FunctionType::get(VoidTy, makeArrayRef(ParamTypes), false);
908 auto *NamesRegisterF =
909 Function::Create(NamesRegisterTy, GlobalVariable::ExternalLinkage,
910 getInstrProfNamesRegFuncName(), M);
911 IRB.CreateCall(NamesRegisterF, {IRB.CreateBitCast(NamesVar, VoidPtrTy),
912 IRB.getInt64(NamesSize)});
913 }
914
915 IRB.CreateRetVoid();
916 }
917
emitRuntimeHook()918 bool InstrProfiling::emitRuntimeHook() {
919 // We expect the linker to be invoked with -u<hook_var> flag for linux,
920 // for which case there is no need to emit the user function.
921 if (Triple(M->getTargetTriple()).isOSLinux())
922 return false;
923
924 // If the module's provided its own runtime, we don't need to do anything.
925 if (M->getGlobalVariable(getInstrProfRuntimeHookVarName()))
926 return false;
927
928 // Declare an external variable that will pull in the runtime initialization.
929 auto *Int32Ty = Type::getInt32Ty(M->getContext());
930 auto *Var =
931 new GlobalVariable(*M, Int32Ty, false, GlobalValue::ExternalLinkage,
932 nullptr, getInstrProfRuntimeHookVarName());
933
934 // Make a function that uses it.
935 auto *User = Function::Create(FunctionType::get(Int32Ty, false),
936 GlobalValue::LinkOnceODRLinkage,
937 getInstrProfRuntimeHookVarUseFuncName(), M);
938 User->addFnAttr(Attribute::NoInline);
939 if (Options.NoRedZone)
940 User->addFnAttr(Attribute::NoRedZone);
941 User->setVisibility(GlobalValue::HiddenVisibility);
942 if (Triple(M->getTargetTriple()).supportsCOMDAT())
943 User->setComdat(M->getOrInsertComdat(User->getName()));
944
945 IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", User));
946 auto *Load = IRB.CreateLoad(Var);
947 IRB.CreateRet(Load);
948
949 // Mark the user variable as used so that it isn't stripped out.
950 UsedVars.push_back(User);
951 return true;
952 }
953
emitUses()954 void InstrProfiling::emitUses() {
955 if (!UsedVars.empty())
956 appendToUsed(*M, UsedVars);
957 }
958
emitInitialization()959 void InstrProfiling::emitInitialization() {
960 StringRef InstrProfileOutput = Options.InstrProfileOutput;
961
962 if (!InstrProfileOutput.empty()) {
963 // Create variable for profile name.
964 Constant *ProfileNameConst =
965 ConstantDataArray::getString(M->getContext(), InstrProfileOutput, true);
966 GlobalVariable *ProfileNameVar = new GlobalVariable(
967 *M, ProfileNameConst->getType(), true, GlobalValue::WeakAnyLinkage,
968 ProfileNameConst, INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR));
969 if (TT.supportsCOMDAT()) {
970 ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage);
971 ProfileNameVar->setComdat(M->getOrInsertComdat(
972 StringRef(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR))));
973 }
974 }
975
976 Constant *RegisterF = M->getFunction(getInstrProfRegFuncsName());
977 if (!RegisterF)
978 return;
979
980 // Create the initialization function.
981 auto *VoidTy = Type::getVoidTy(M->getContext());
982 auto *F = Function::Create(FunctionType::get(VoidTy, false),
983 GlobalValue::InternalLinkage,
984 getInstrProfInitFuncName(), M);
985 F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
986 F->addFnAttr(Attribute::NoInline);
987 if (Options.NoRedZone)
988 F->addFnAttr(Attribute::NoRedZone);
989
990 // Add the basic block and the necessary calls.
991 IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", F));
992 if (RegisterF)
993 IRB.CreateCall(RegisterF, {});
994 IRB.CreateRetVoid();
995
996 appendToGlobalCtors(*M, F, 0);
997 }
998