1 //===-- InstrProfiling.cpp - Frontend instrumentation based profiling -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass lowers instrprof_* intrinsics emitted by an instrumentor.
10 // It also builds the data structures and initialization code needed for
11 // updating execution counts and emitting the profile at runtime.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "llvm/Transforms/Instrumentation/InstrProfiling.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/Twine.h"
21 #include "llvm/Analysis/BlockFrequencyInfo.h"
22 #include "llvm/Analysis/BranchProbabilityInfo.h"
23 #include "llvm/Analysis/LoopInfo.h"
24 #include "llvm/Analysis/TargetLibraryInfo.h"
25 #include "llvm/IR/Attributes.h"
26 #include "llvm/IR/BasicBlock.h"
27 #include "llvm/IR/CFG.h"
28 #include "llvm/IR/Constant.h"
29 #include "llvm/IR/Constants.h"
30 #include "llvm/IR/DIBuilder.h"
31 #include "llvm/IR/DerivedTypes.h"
32 #include "llvm/IR/DiagnosticInfo.h"
33 #include "llvm/IR/Dominators.h"
34 #include "llvm/IR/Function.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/IR/GlobalVariable.h"
37 #include "llvm/IR/IRBuilder.h"
38 #include "llvm/IR/Instruction.h"
39 #include "llvm/IR/Instructions.h"
40 #include "llvm/IR/IntrinsicInst.h"
41 #include "llvm/IR/Module.h"
42 #include "llvm/IR/Type.h"
43 #include "llvm/InitializePasses.h"
44 #include "llvm/Pass.h"
45 #include "llvm/ProfileData/InstrProf.h"
46 #include "llvm/ProfileData/InstrProfCorrelator.h"
47 #include "llvm/Support/Casting.h"
48 #include "llvm/Support/CommandLine.h"
49 #include "llvm/Support/Error.h"
50 #include "llvm/Support/ErrorHandling.h"
51 #include "llvm/TargetParser/Triple.h"
52 #include "llvm/Transforms/Instrumentation.h"
53 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
54 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
55 #include "llvm/Transforms/Utils/ModuleUtils.h"
56 #include "llvm/Transforms/Utils/SSAUpdater.h"
57 #include <algorithm>
58 #include <cassert>
59 #include <cstdint>
60 #include <string>
61
62 using namespace llvm;
63
64 #define DEBUG_TYPE "instrprof"
65
66 namespace llvm {
67 // TODO: Remove -debug-info-correlate in next LLVM release, in favor of
68 // -profile-correlate=debug-info.
69 cl::opt<bool> DebugInfoCorrelate(
70 "debug-info-correlate",
71 cl::desc("Use debug info to correlate profiles. (Deprecated, use "
72 "-profile-correlate=debug-info)"),
73 cl::init(false));
74
75 cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate(
76 "profile-correlate",
77 cl::desc("Use debug info or binary file to correlate profiles."),
78 cl::init(InstrProfCorrelator::NONE),
79 cl::values(clEnumValN(InstrProfCorrelator::NONE, "",
80 "No profile correlation"),
81 clEnumValN(InstrProfCorrelator::DEBUG_INFO, "debug-info",
82 "Use debug info to correlate"),
83 clEnumValN(InstrProfCorrelator::BINARY, "binary",
84 "Use binary to correlate")));
85 } // namespace llvm
86
87 namespace {
88
89 cl::opt<bool> DoHashBasedCounterSplit(
90 "hash-based-counter-split",
91 cl::desc("Rename counter variable of a comdat function based on cfg hash"),
92 cl::init(true));
93
94 cl::opt<bool>
95 RuntimeCounterRelocation("runtime-counter-relocation",
96 cl::desc("Enable relocating counters at runtime."),
97 cl::init(false));
98
99 cl::opt<bool> ValueProfileStaticAlloc(
100 "vp-static-alloc",
101 cl::desc("Do static counter allocation for value profiler"),
102 cl::init(true));
103
104 cl::opt<double> NumCountersPerValueSite(
105 "vp-counters-per-site",
106 cl::desc("The average number of profile counters allocated "
107 "per value profiling site."),
108 // This is set to a very small value because in real programs, only
109 // a very small percentage of value sites have non-zero targets, e.g, 1/30.
110 // For those sites with non-zero profile, the average number of targets
111 // is usually smaller than 2.
112 cl::init(1.0));
113
114 cl::opt<bool> AtomicCounterUpdateAll(
115 "instrprof-atomic-counter-update-all",
116 cl::desc("Make all profile counter updates atomic (for testing only)"),
117 cl::init(false));
118
119 cl::opt<bool> AtomicCounterUpdatePromoted(
120 "atomic-counter-update-promoted",
121 cl::desc("Do counter update using atomic fetch add "
122 " for promoted counters only"),
123 cl::init(false));
124
125 cl::opt<bool> AtomicFirstCounter(
126 "atomic-first-counter",
127 cl::desc("Use atomic fetch add for first counter in a function (usually "
128 "the entry counter)"),
129 cl::init(false));
130
131 // If the option is not specified, the default behavior about whether
132 // counter promotion is done depends on how instrumentaiton lowering
133 // pipeline is setup, i.e., the default value of true of this option
134 // does not mean the promotion will be done by default. Explicitly
135 // setting this option can override the default behavior.
136 cl::opt<bool> DoCounterPromotion("do-counter-promotion",
137 cl::desc("Do counter register promotion"),
138 cl::init(false));
139 cl::opt<unsigned> MaxNumOfPromotionsPerLoop(
140 "max-counter-promotions-per-loop", cl::init(20),
141 cl::desc("Max number counter promotions per loop to avoid"
142 " increasing register pressure too much"));
143
144 // A debug option
145 cl::opt<int>
146 MaxNumOfPromotions("max-counter-promotions", cl::init(-1),
147 cl::desc("Max number of allowed counter promotions"));
148
149 cl::opt<unsigned> SpeculativeCounterPromotionMaxExiting(
150 "speculative-counter-promotion-max-exiting", cl::init(3),
151 cl::desc("The max number of exiting blocks of a loop to allow "
152 " speculative counter promotion"));
153
154 cl::opt<bool> SpeculativeCounterPromotionToLoop(
155 "speculative-counter-promotion-to-loop",
156 cl::desc("When the option is false, if the target block is in a loop, "
157 "the promotion will be disallowed unless the promoted counter "
158 " update can be further/iteratively promoted into an acyclic "
159 " region."));
160
161 cl::opt<bool> IterativeCounterPromotion(
162 "iterative-counter-promotion", cl::init(true),
163 cl::desc("Allow counter promotion across the whole loop nest."));
164
165 cl::opt<bool> SkipRetExitBlock(
166 "skip-ret-exit-block", cl::init(true),
167 cl::desc("Suppress counter promotion if exit blocks contain ret."));
168
169 using LoadStorePair = std::pair<Instruction *, Instruction *>;
170
171 class InstrLowerer final {
172 public:
InstrLowerer(Module & M,const InstrProfOptions & Options,std::function<const TargetLibraryInfo & (Function & F)> GetTLI,bool IsCS)173 InstrLowerer(Module &M, const InstrProfOptions &Options,
174 std::function<const TargetLibraryInfo &(Function &F)> GetTLI,
175 bool IsCS)
176 : M(M), Options(Options), TT(Triple(M.getTargetTriple())), IsCS(IsCS),
177 GetTLI(GetTLI) {}
178
179 bool lower();
180
181 private:
182 Module &M;
183 const InstrProfOptions Options;
184 const Triple TT;
185 // Is this lowering for the context-sensitive instrumentation.
186 const bool IsCS;
187
188 std::function<const TargetLibraryInfo &(Function &F)> GetTLI;
189 struct PerFunctionProfileData {
190 uint32_t NumValueSites[IPVK_Last + 1] = {};
191 GlobalVariable *RegionCounters = nullptr;
192 GlobalVariable *DataVar = nullptr;
193 GlobalVariable *RegionBitmaps = nullptr;
194 uint32_t NumBitmapBytes = 0;
195
196 PerFunctionProfileData() = default;
197 };
198 DenseMap<GlobalVariable *, PerFunctionProfileData> ProfileDataMap;
199 /// If runtime relocation is enabled, this maps functions to the load
200 /// instruction that produces the profile relocation bias.
201 DenseMap<const Function *, LoadInst *> FunctionToProfileBiasMap;
202 std::vector<GlobalValue *> CompilerUsedVars;
203 std::vector<GlobalValue *> UsedVars;
204 std::vector<GlobalVariable *> ReferencedNames;
205 GlobalVariable *NamesVar = nullptr;
206 size_t NamesSize = 0;
207
208 // vector of counter load/store pairs to be register promoted.
209 std::vector<LoadStorePair> PromotionCandidates;
210
211 int64_t TotalCountersPromoted = 0;
212
213 /// Lower instrumentation intrinsics in the function. Returns true if there
214 /// any lowering.
215 bool lowerIntrinsics(Function *F);
216
217 /// Register-promote counter loads and stores in loops.
218 void promoteCounterLoadStores(Function *F);
219
220 /// Returns true if relocating counters at runtime is enabled.
221 bool isRuntimeCounterRelocationEnabled() const;
222
223 /// Returns true if profile counter update register promotion is enabled.
224 bool isCounterPromotionEnabled() const;
225
226 /// Count the number of instrumented value sites for the function.
227 void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins);
228
229 /// Replace instrprof.value.profile with a call to runtime library.
230 void lowerValueProfileInst(InstrProfValueProfileInst *Ins);
231
232 /// Replace instrprof.cover with a store instruction to the coverage byte.
233 void lowerCover(InstrProfCoverInst *Inc);
234
235 /// Replace instrprof.timestamp with a call to
236 /// INSTR_PROF_PROFILE_SET_TIMESTAMP.
237 void lowerTimestamp(InstrProfTimestampInst *TimestampInstruction);
238
239 /// Replace instrprof.increment with an increment of the appropriate value.
240 void lowerIncrement(InstrProfIncrementInst *Inc);
241
242 /// Force emitting of name vars for unused functions.
243 void lowerCoverageData(GlobalVariable *CoverageNamesVar);
244
245 /// Replace instrprof.mcdc.tvbitmask.update with a shift and or instruction
246 /// using the index represented by the a temp value into a bitmap.
247 void lowerMCDCTestVectorBitmapUpdate(InstrProfMCDCTVBitmapUpdate *Ins);
248
249 /// Replace instrprof.mcdc.temp.update with a shift and or instruction using
250 /// the corresponding condition ID.
251 void lowerMCDCCondBitmapUpdate(InstrProfMCDCCondBitmapUpdate *Ins);
252
253 /// Compute the address of the counter value that this profiling instruction
254 /// acts on.
255 Value *getCounterAddress(InstrProfCntrInstBase *I);
256
257 /// Get the region counters for an increment, creating them if necessary.
258 ///
259 /// If the counter array doesn't yet exist, the profile data variables
260 /// referring to them will also be created.
261 GlobalVariable *getOrCreateRegionCounters(InstrProfCntrInstBase *Inc);
262
263 /// Create the region counters.
264 GlobalVariable *createRegionCounters(InstrProfCntrInstBase *Inc,
265 StringRef Name,
266 GlobalValue::LinkageTypes Linkage);
267
268 /// Compute the address of the test vector bitmap that this profiling
269 /// instruction acts on.
270 Value *getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I);
271
272 /// Get the region bitmaps for an increment, creating them if necessary.
273 ///
274 /// If the bitmap array doesn't yet exist, the profile data variables
275 /// referring to them will also be created.
276 GlobalVariable *getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc);
277
278 /// Create the MC/DC bitmap as a byte-aligned array of bytes associated with
279 /// an MC/DC Decision region. The number of bytes required is indicated by
280 /// the intrinsic used (type InstrProfMCDCBitmapInstBase). This is called
281 /// as part of setupProfileSection() and is conceptually very similar to
282 /// what is done for profile data counters in createRegionCounters().
283 GlobalVariable *createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc,
284 StringRef Name,
285 GlobalValue::LinkageTypes Linkage);
286
287 /// Set Comdat property of GV, if required.
288 void maybeSetComdat(GlobalVariable *GV, Function *Fn, StringRef VarName);
289
290 /// Setup the sections into which counters and bitmaps are allocated.
291 GlobalVariable *setupProfileSection(InstrProfInstBase *Inc,
292 InstrProfSectKind IPSK);
293
294 /// Create INSTR_PROF_DATA variable for counters and bitmaps.
295 void createDataVariable(InstrProfCntrInstBase *Inc);
296
297 /// Emit the section with compressed function names.
298 void emitNameData();
299
300 /// Emit value nodes section for value profiling.
301 void emitVNodes();
302
303 /// Emit runtime registration functions for each profile data variable.
304 void emitRegistration();
305
306 /// Emit the necessary plumbing to pull in the runtime initialization.
307 /// Returns true if a change was made.
308 bool emitRuntimeHook();
309
310 /// Add uses of our data variables and runtime hook.
311 void emitUses();
312
313 /// Create a static initializer for our data, on platforms that need it,
314 /// and for any profile output file that was specified.
315 void emitInitialization();
316 };
317
318 ///
319 /// A helper class to promote one counter RMW operation in the loop
320 /// into register update.
321 ///
322 /// RWM update for the counter will be sinked out of the loop after
323 /// the transformation.
324 ///
325 class PGOCounterPromoterHelper : public LoadAndStorePromoter {
326 public:
PGOCounterPromoterHelper(Instruction * L,Instruction * S,SSAUpdater & SSA,Value * Init,BasicBlock * PH,ArrayRef<BasicBlock * > ExitBlocks,ArrayRef<Instruction * > InsertPts,DenseMap<Loop *,SmallVector<LoadStorePair,8>> & LoopToCands,LoopInfo & LI)327 PGOCounterPromoterHelper(
328 Instruction *L, Instruction *S, SSAUpdater &SSA, Value *Init,
329 BasicBlock *PH, ArrayRef<BasicBlock *> ExitBlocks,
330 ArrayRef<Instruction *> InsertPts,
331 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
332 LoopInfo &LI)
333 : LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks),
334 InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI) {
335 assert(isa<LoadInst>(L));
336 assert(isa<StoreInst>(S));
337 SSA.AddAvailableValue(PH, Init);
338 }
339
doExtraRewritesBeforeFinalDeletion()340 void doExtraRewritesBeforeFinalDeletion() override {
341 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
342 BasicBlock *ExitBlock = ExitBlocks[i];
343 Instruction *InsertPos = InsertPts[i];
344 // Get LiveIn value into the ExitBlock. If there are multiple
345 // predecessors, the value is defined by a PHI node in this
346 // block.
347 Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
348 Value *Addr = cast<StoreInst>(Store)->getPointerOperand();
349 Type *Ty = LiveInValue->getType();
350 IRBuilder<> Builder(InsertPos);
351 if (auto *AddrInst = dyn_cast_or_null<IntToPtrInst>(Addr)) {
352 // If isRuntimeCounterRelocationEnabled() is true then the address of
353 // the store instruction is computed with two instructions in
354 // InstrProfiling::getCounterAddress(). We need to copy those
355 // instructions to this block to compute Addr correctly.
356 // %BiasAdd = add i64 ptrtoint <__profc_>, <__llvm_profile_counter_bias>
357 // %Addr = inttoptr i64 %BiasAdd to i64*
358 auto *OrigBiasInst = dyn_cast<BinaryOperator>(AddrInst->getOperand(0));
359 assert(OrigBiasInst->getOpcode() == Instruction::BinaryOps::Add);
360 Value *BiasInst = Builder.Insert(OrigBiasInst->clone());
361 Addr = Builder.CreateIntToPtr(BiasInst,
362 PointerType::getUnqual(Ty->getContext()));
363 }
364 if (AtomicCounterUpdatePromoted)
365 // automic update currently can only be promoted across the current
366 // loop, not the whole loop nest.
367 Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue,
368 MaybeAlign(),
369 AtomicOrdering::SequentiallyConsistent);
370 else {
371 LoadInst *OldVal = Builder.CreateLoad(Ty, Addr, "pgocount.promoted");
372 auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue);
373 auto *NewStore = Builder.CreateStore(NewVal, Addr);
374
375 // Now update the parent loop's candidate list:
376 if (IterativeCounterPromotion) {
377 auto *TargetLoop = LI.getLoopFor(ExitBlock);
378 if (TargetLoop)
379 LoopToCandidates[TargetLoop].emplace_back(OldVal, NewStore);
380 }
381 }
382 }
383 }
384
385 private:
386 Instruction *Store;
387 ArrayRef<BasicBlock *> ExitBlocks;
388 ArrayRef<Instruction *> InsertPts;
389 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
390 LoopInfo &LI;
391 };
392
393 /// A helper class to do register promotion for all profile counter
394 /// updates in a loop.
395 ///
396 class PGOCounterPromoter {
397 public:
PGOCounterPromoter(DenseMap<Loop *,SmallVector<LoadStorePair,8>> & LoopToCands,Loop & CurLoop,LoopInfo & LI,BlockFrequencyInfo * BFI)398 PGOCounterPromoter(
399 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,
400 Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI)
401 : LoopToCandidates(LoopToCands), L(CurLoop), LI(LI), BFI(BFI) {
402
403 // Skip collection of ExitBlocks and InsertPts for loops that will not be
404 // able to have counters promoted.
405 SmallVector<BasicBlock *, 8> LoopExitBlocks;
406 SmallPtrSet<BasicBlock *, 8> BlockSet;
407
408 L.getExitBlocks(LoopExitBlocks);
409 if (!isPromotionPossible(&L, LoopExitBlocks))
410 return;
411
412 for (BasicBlock *ExitBlock : LoopExitBlocks) {
413 if (BlockSet.insert(ExitBlock).second &&
414 llvm::none_of(predecessors(ExitBlock), [&](const BasicBlock *Pred) {
415 return llvm::isPresplitCoroSuspendExitEdge(*Pred, *ExitBlock);
416 })) {
417 ExitBlocks.push_back(ExitBlock);
418 InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
419 }
420 }
421 }
422
run(int64_t * NumPromoted)423 bool run(int64_t *NumPromoted) {
424 // Skip 'infinite' loops:
425 if (ExitBlocks.size() == 0)
426 return false;
427
428 // Skip if any of the ExitBlocks contains a ret instruction.
429 // This is to prevent dumping of incomplete profile -- if the
430 // the loop is a long running loop and dump is called in the middle
431 // of the loop, the result profile is incomplete.
432 // FIXME: add other heuristics to detect long running loops.
433 if (SkipRetExitBlock) {
434 for (auto *BB : ExitBlocks)
435 if (isa<ReturnInst>(BB->getTerminator()))
436 return false;
437 }
438
439 unsigned MaxProm = getMaxNumOfPromotionsInLoop(&L);
440 if (MaxProm == 0)
441 return false;
442
443 unsigned Promoted = 0;
444 for (auto &Cand : LoopToCandidates[&L]) {
445
446 SmallVector<PHINode *, 4> NewPHIs;
447 SSAUpdater SSA(&NewPHIs);
448 Value *InitVal = ConstantInt::get(Cand.first->getType(), 0);
449
450 // If BFI is set, we will use it to guide the promotions.
451 if (BFI) {
452 auto *BB = Cand.first->getParent();
453 auto InstrCount = BFI->getBlockProfileCount(BB);
454 if (!InstrCount)
455 continue;
456 auto PreheaderCount = BFI->getBlockProfileCount(L.getLoopPreheader());
457 // If the average loop trip count is not greater than 1.5, we skip
458 // promotion.
459 if (PreheaderCount && (*PreheaderCount * 3) >= (*InstrCount * 2))
460 continue;
461 }
462
463 PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal,
464 L.getLoopPreheader(), ExitBlocks,
465 InsertPts, LoopToCandidates, LI);
466 Promoter.run(SmallVector<Instruction *, 2>({Cand.first, Cand.second}));
467 Promoted++;
468 if (Promoted >= MaxProm)
469 break;
470
471 (*NumPromoted)++;
472 if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions)
473 break;
474 }
475
476 LLVM_DEBUG(dbgs() << Promoted << " counters promoted for loop (depth="
477 << L.getLoopDepth() << ")\n");
478 return Promoted != 0;
479 }
480
481 private:
allowSpeculativeCounterPromotion(Loop * LP)482 bool allowSpeculativeCounterPromotion(Loop *LP) {
483 SmallVector<BasicBlock *, 8> ExitingBlocks;
484 L.getExitingBlocks(ExitingBlocks);
485 // Not considierered speculative.
486 if (ExitingBlocks.size() == 1)
487 return true;
488 if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
489 return false;
490 return true;
491 }
492
493 // Check whether the loop satisfies the basic conditions needed to perform
494 // Counter Promotions.
495 bool
isPromotionPossible(Loop * LP,const SmallVectorImpl<BasicBlock * > & LoopExitBlocks)496 isPromotionPossible(Loop *LP,
497 const SmallVectorImpl<BasicBlock *> &LoopExitBlocks) {
498 // We can't insert into a catchswitch.
499 if (llvm::any_of(LoopExitBlocks, [](BasicBlock *Exit) {
500 return isa<CatchSwitchInst>(Exit->getTerminator());
501 }))
502 return false;
503
504 if (!LP->hasDedicatedExits())
505 return false;
506
507 BasicBlock *PH = LP->getLoopPreheader();
508 if (!PH)
509 return false;
510
511 return true;
512 }
513
514 // Returns the max number of Counter Promotions for LP.
getMaxNumOfPromotionsInLoop(Loop * LP)515 unsigned getMaxNumOfPromotionsInLoop(Loop *LP) {
516 SmallVector<BasicBlock *, 8> LoopExitBlocks;
517 LP->getExitBlocks(LoopExitBlocks);
518 if (!isPromotionPossible(LP, LoopExitBlocks))
519 return 0;
520
521 SmallVector<BasicBlock *, 8> ExitingBlocks;
522 LP->getExitingBlocks(ExitingBlocks);
523
524 // If BFI is set, we do more aggressive promotions based on BFI.
525 if (BFI)
526 return (unsigned)-1;
527
528 // Not considierered speculative.
529 if (ExitingBlocks.size() == 1)
530 return MaxNumOfPromotionsPerLoop;
531
532 if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)
533 return 0;
534
535 // Whether the target block is in a loop does not matter:
536 if (SpeculativeCounterPromotionToLoop)
537 return MaxNumOfPromotionsPerLoop;
538
539 // Now check the target block:
540 unsigned MaxProm = MaxNumOfPromotionsPerLoop;
541 for (auto *TargetBlock : LoopExitBlocks) {
542 auto *TargetLoop = LI.getLoopFor(TargetBlock);
543 if (!TargetLoop)
544 continue;
545 unsigned MaxPromForTarget = getMaxNumOfPromotionsInLoop(TargetLoop);
546 unsigned PendingCandsInTarget = LoopToCandidates[TargetLoop].size();
547 MaxProm =
548 std::min(MaxProm, std::max(MaxPromForTarget, PendingCandsInTarget) -
549 PendingCandsInTarget);
550 }
551 return MaxProm;
552 }
553
554 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;
555 SmallVector<BasicBlock *, 8> ExitBlocks;
556 SmallVector<Instruction *, 8> InsertPts;
557 Loop &L;
558 LoopInfo &LI;
559 BlockFrequencyInfo *BFI;
560 };
561
562 enum class ValueProfilingCallType {
563 // Individual values are tracked. Currently used for indiret call target
564 // profiling.
565 Default,
566
567 // MemOp: the memop size value profiling.
568 MemOp
569 };
570
571 } // end anonymous namespace
572
run(Module & M,ModuleAnalysisManager & AM)573 PreservedAnalyses InstrProfilingLoweringPass::run(Module &M,
574 ModuleAnalysisManager &AM) {
575 FunctionAnalysisManager &FAM =
576 AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
577 auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
578 return FAM.getResult<TargetLibraryAnalysis>(F);
579 };
580 InstrLowerer Lowerer(M, Options, GetTLI, IsCS);
581 if (!Lowerer.lower())
582 return PreservedAnalyses::all();
583
584 return PreservedAnalyses::none();
585 }
586
lowerIntrinsics(Function * F)587 bool InstrLowerer::lowerIntrinsics(Function *F) {
588 bool MadeChange = false;
589 PromotionCandidates.clear();
590 for (BasicBlock &BB : *F) {
591 for (Instruction &Instr : llvm::make_early_inc_range(BB)) {
592 if (auto *IPIS = dyn_cast<InstrProfIncrementInstStep>(&Instr)) {
593 lowerIncrement(IPIS);
594 MadeChange = true;
595 } else if (auto *IPI = dyn_cast<InstrProfIncrementInst>(&Instr)) {
596 lowerIncrement(IPI);
597 MadeChange = true;
598 } else if (auto *IPC = dyn_cast<InstrProfTimestampInst>(&Instr)) {
599 lowerTimestamp(IPC);
600 MadeChange = true;
601 } else if (auto *IPC = dyn_cast<InstrProfCoverInst>(&Instr)) {
602 lowerCover(IPC);
603 MadeChange = true;
604 } else if (auto *IPVP = dyn_cast<InstrProfValueProfileInst>(&Instr)) {
605 lowerValueProfileInst(IPVP);
606 MadeChange = true;
607 } else if (auto *IPMP = dyn_cast<InstrProfMCDCBitmapParameters>(&Instr)) {
608 IPMP->eraseFromParent();
609 MadeChange = true;
610 } else if (auto *IPBU = dyn_cast<InstrProfMCDCTVBitmapUpdate>(&Instr)) {
611 lowerMCDCTestVectorBitmapUpdate(IPBU);
612 MadeChange = true;
613 } else if (auto *IPTU = dyn_cast<InstrProfMCDCCondBitmapUpdate>(&Instr)) {
614 lowerMCDCCondBitmapUpdate(IPTU);
615 MadeChange = true;
616 }
617 }
618 }
619
620 if (!MadeChange)
621 return false;
622
623 promoteCounterLoadStores(F);
624 return true;
625 }
626
isRuntimeCounterRelocationEnabled() const627 bool InstrLowerer::isRuntimeCounterRelocationEnabled() const {
628 // Mach-O don't support weak external references.
629 if (TT.isOSBinFormatMachO())
630 return false;
631
632 if (RuntimeCounterRelocation.getNumOccurrences() > 0)
633 return RuntimeCounterRelocation;
634
635 // Fuchsia uses runtime counter relocation by default.
636 return TT.isOSFuchsia();
637 }
638
isCounterPromotionEnabled() const639 bool InstrLowerer::isCounterPromotionEnabled() const {
640 if (DoCounterPromotion.getNumOccurrences() > 0)
641 return DoCounterPromotion;
642
643 return Options.DoCounterPromotion;
644 }
645
promoteCounterLoadStores(Function * F)646 void InstrLowerer::promoteCounterLoadStores(Function *F) {
647 if (!isCounterPromotionEnabled())
648 return;
649
650 DominatorTree DT(*F);
651 LoopInfo LI(DT);
652 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates;
653
654 std::unique_ptr<BlockFrequencyInfo> BFI;
655 if (Options.UseBFIInPromotion) {
656 std::unique_ptr<BranchProbabilityInfo> BPI;
657 BPI.reset(new BranchProbabilityInfo(*F, LI, &GetTLI(*F)));
658 BFI.reset(new BlockFrequencyInfo(*F, *BPI, LI));
659 }
660
661 for (const auto &LoadStore : PromotionCandidates) {
662 auto *CounterLoad = LoadStore.first;
663 auto *CounterStore = LoadStore.second;
664 BasicBlock *BB = CounterLoad->getParent();
665 Loop *ParentLoop = LI.getLoopFor(BB);
666 if (!ParentLoop)
667 continue;
668 LoopPromotionCandidates[ParentLoop].emplace_back(CounterLoad, CounterStore);
669 }
670
671 SmallVector<Loop *, 4> Loops = LI.getLoopsInPreorder();
672
673 // Do a post-order traversal of the loops so that counter updates can be
674 // iteratively hoisted outside the loop nest.
675 for (auto *Loop : llvm::reverse(Loops)) {
676 PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI, BFI.get());
677 Promoter.run(&TotalCountersPromoted);
678 }
679 }
680
needsRuntimeHookUnconditionally(const Triple & TT)681 static bool needsRuntimeHookUnconditionally(const Triple &TT) {
682 // On Fuchsia, we only need runtime hook if any counters are present.
683 if (TT.isOSFuchsia())
684 return false;
685
686 return true;
687 }
688
689 /// Check if the module contains uses of any profiling intrinsics.
containsProfilingIntrinsics(Module & M)690 static bool containsProfilingIntrinsics(Module &M) {
691 auto containsIntrinsic = [&](int ID) {
692 if (auto *F = M.getFunction(Intrinsic::getName(ID)))
693 return !F->use_empty();
694 return false;
695 };
696 return containsIntrinsic(llvm::Intrinsic::instrprof_cover) ||
697 containsIntrinsic(llvm::Intrinsic::instrprof_increment) ||
698 containsIntrinsic(llvm::Intrinsic::instrprof_increment_step) ||
699 containsIntrinsic(llvm::Intrinsic::instrprof_timestamp) ||
700 containsIntrinsic(llvm::Intrinsic::instrprof_value_profile);
701 }
702
lower()703 bool InstrLowerer::lower() {
704 bool MadeChange = false;
705 bool NeedsRuntimeHook = needsRuntimeHookUnconditionally(TT);
706 if (NeedsRuntimeHook)
707 MadeChange = emitRuntimeHook();
708
709 bool ContainsProfiling = containsProfilingIntrinsics(M);
710 GlobalVariable *CoverageNamesVar =
711 M.getNamedGlobal(getCoverageUnusedNamesVarName());
712 // Improve compile time by avoiding linear scans when there is no work.
713 if (!ContainsProfiling && !CoverageNamesVar)
714 return MadeChange;
715
716 // We did not know how many value sites there would be inside
717 // the instrumented function. This is counting the number of instrumented
718 // target value sites to enter it as field in the profile data variable.
719 for (Function &F : M) {
720 InstrProfCntrInstBase *FirstProfInst = nullptr;
721 for (BasicBlock &BB : F) {
722 for (auto I = BB.begin(), E = BB.end(); I != E; I++) {
723 if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(I))
724 computeNumValueSiteCounts(Ind);
725 else {
726 if (FirstProfInst == nullptr &&
727 (isa<InstrProfIncrementInst>(I) || isa<InstrProfCoverInst>(I)))
728 FirstProfInst = dyn_cast<InstrProfCntrInstBase>(I);
729 // If the MCDCBitmapParameters intrinsic seen, create the bitmaps.
730 if (const auto &Params = dyn_cast<InstrProfMCDCBitmapParameters>(I))
731 static_cast<void>(getOrCreateRegionBitmaps(Params));
732 }
733 }
734 }
735
736 // Use a profile intrinsic to create the region counters and data variable.
737 // Also create the data variable based on the MCDCParams.
738 if (FirstProfInst != nullptr) {
739 static_cast<void>(getOrCreateRegionCounters(FirstProfInst));
740 }
741 }
742
743 for (Function &F : M)
744 MadeChange |= lowerIntrinsics(&F);
745
746 if (CoverageNamesVar) {
747 lowerCoverageData(CoverageNamesVar);
748 MadeChange = true;
749 }
750
751 if (!MadeChange)
752 return false;
753
754 emitVNodes();
755 emitNameData();
756
757 // Emit runtime hook for the cases where the target does not unconditionally
758 // require pulling in profile runtime, and coverage is enabled on code that is
759 // not eliminated by the front-end, e.g. unused functions with internal
760 // linkage.
761 if (!NeedsRuntimeHook && ContainsProfiling)
762 emitRuntimeHook();
763
764 emitRegistration();
765 emitUses();
766 emitInitialization();
767 return true;
768 }
769
getOrInsertValueProfilingCall(Module & M,const TargetLibraryInfo & TLI,ValueProfilingCallType CallType=ValueProfilingCallType::Default)770 static FunctionCallee getOrInsertValueProfilingCall(
771 Module &M, const TargetLibraryInfo &TLI,
772 ValueProfilingCallType CallType = ValueProfilingCallType::Default) {
773 LLVMContext &Ctx = M.getContext();
774 auto *ReturnTy = Type::getVoidTy(M.getContext());
775
776 AttributeList AL;
777 if (auto AK = TLI.getExtAttrForI32Param(false))
778 AL = AL.addParamAttribute(M.getContext(), 2, AK);
779
780 assert((CallType == ValueProfilingCallType::Default ||
781 CallType == ValueProfilingCallType::MemOp) &&
782 "Must be Default or MemOp");
783 Type *ParamTypes[] = {
784 #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
785 #include "llvm/ProfileData/InstrProfData.inc"
786 };
787 auto *ValueProfilingCallTy =
788 FunctionType::get(ReturnTy, ArrayRef(ParamTypes), false);
789 StringRef FuncName = CallType == ValueProfilingCallType::Default
790 ? getInstrProfValueProfFuncName()
791 : getInstrProfValueProfMemOpFuncName();
792 return M.getOrInsertFunction(FuncName, ValueProfilingCallTy, AL);
793 }
794
computeNumValueSiteCounts(InstrProfValueProfileInst * Ind)795 void InstrLowerer::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
796 GlobalVariable *Name = Ind->getName();
797 uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
798 uint64_t Index = Ind->getIndex()->getZExtValue();
799 auto &PD = ProfileDataMap[Name];
800 PD.NumValueSites[ValueKind] =
801 std::max(PD.NumValueSites[ValueKind], (uint32_t)(Index + 1));
802 }
803
lowerValueProfileInst(InstrProfValueProfileInst * Ind)804 void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
805 // TODO: Value profiling heavily depends on the data section which is omitted
806 // in lightweight mode. We need to move the value profile pointer to the
807 // Counter struct to get this working.
808 assert(
809 !DebugInfoCorrelate && ProfileCorrelate == InstrProfCorrelator::NONE &&
810 "Value profiling is not yet supported with lightweight instrumentation");
811 GlobalVariable *Name = Ind->getName();
812 auto It = ProfileDataMap.find(Name);
813 assert(It != ProfileDataMap.end() && It->second.DataVar &&
814 "value profiling detected in function with no counter incerement");
815
816 GlobalVariable *DataVar = It->second.DataVar;
817 uint64_t ValueKind = Ind->getValueKind()->getZExtValue();
818 uint64_t Index = Ind->getIndex()->getZExtValue();
819 for (uint32_t Kind = IPVK_First; Kind < ValueKind; ++Kind)
820 Index += It->second.NumValueSites[Kind];
821
822 IRBuilder<> Builder(Ind);
823 bool IsMemOpSize = (Ind->getValueKind()->getZExtValue() ==
824 llvm::InstrProfValueKind::IPVK_MemOPSize);
825 CallInst *Call = nullptr;
826 auto *TLI = &GetTLI(*Ind->getFunction());
827
828 // To support value profiling calls within Windows exception handlers, funclet
829 // information contained within operand bundles needs to be copied over to
830 // the library call. This is required for the IR to be processed by the
831 // WinEHPrepare pass.
832 SmallVector<OperandBundleDef, 1> OpBundles;
833 Ind->getOperandBundlesAsDefs(OpBundles);
834 if (!IsMemOpSize) {
835 Value *Args[3] = {Ind->getTargetValue(), DataVar, Builder.getInt32(Index)};
836 Call = Builder.CreateCall(getOrInsertValueProfilingCall(M, *TLI), Args,
837 OpBundles);
838 } else {
839 Value *Args[3] = {Ind->getTargetValue(), DataVar, Builder.getInt32(Index)};
840 Call = Builder.CreateCall(
841 getOrInsertValueProfilingCall(M, *TLI, ValueProfilingCallType::MemOp),
842 Args, OpBundles);
843 }
844 if (auto AK = TLI->getExtAttrForI32Param(false))
845 Call->addParamAttr(2, AK);
846 Ind->replaceAllUsesWith(Call);
847 Ind->eraseFromParent();
848 }
849
getCounterAddress(InstrProfCntrInstBase * I)850 Value *InstrLowerer::getCounterAddress(InstrProfCntrInstBase *I) {
851 auto *Counters = getOrCreateRegionCounters(I);
852 IRBuilder<> Builder(I);
853
854 if (isa<InstrProfTimestampInst>(I))
855 Counters->setAlignment(Align(8));
856
857 auto *Addr = Builder.CreateConstInBoundsGEP2_32(
858 Counters->getValueType(), Counters, 0, I->getIndex()->getZExtValue());
859
860 if (!isRuntimeCounterRelocationEnabled())
861 return Addr;
862
863 Type *Int64Ty = Type::getInt64Ty(M.getContext());
864 Function *Fn = I->getParent()->getParent();
865 LoadInst *&BiasLI = FunctionToProfileBiasMap[Fn];
866 if (!BiasLI) {
867 IRBuilder<> EntryBuilder(&Fn->getEntryBlock().front());
868 auto *Bias = M.getGlobalVariable(getInstrProfCounterBiasVarName());
869 if (!Bias) {
870 // Compiler must define this variable when runtime counter relocation
871 // is being used. Runtime has a weak external reference that is used
872 // to check whether that's the case or not.
873 Bias = new GlobalVariable(
874 M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage,
875 Constant::getNullValue(Int64Ty), getInstrProfCounterBiasVarName());
876 Bias->setVisibility(GlobalVariable::HiddenVisibility);
877 // A definition that's weak (linkonce_odr) without being in a COMDAT
878 // section wouldn't lead to link errors, but it would lead to a dead
879 // data word from every TU but one. Putting it in COMDAT ensures there
880 // will be exactly one data slot in the link.
881 if (TT.supportsCOMDAT())
882 Bias->setComdat(M.getOrInsertComdat(Bias->getName()));
883 }
884 BiasLI = EntryBuilder.CreateLoad(Int64Ty, Bias);
885 }
886 auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), BiasLI);
887 return Builder.CreateIntToPtr(Add, Addr->getType());
888 }
889
getBitmapAddress(InstrProfMCDCTVBitmapUpdate * I)890 Value *InstrLowerer::getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I) {
891 auto *Bitmaps = getOrCreateRegionBitmaps(I);
892 IRBuilder<> Builder(I);
893
894 auto *Addr = Builder.CreateConstInBoundsGEP2_32(
895 Bitmaps->getValueType(), Bitmaps, 0, I->getBitmapIndex()->getZExtValue());
896
897 if (isRuntimeCounterRelocationEnabled()) {
898 LLVMContext &Ctx = M.getContext();
899 Ctx.diagnose(DiagnosticInfoPGOProfile(
900 M.getName().data(),
901 Twine("Runtime counter relocation is presently not supported for MC/DC "
902 "bitmaps."),
903 DS_Warning));
904 }
905
906 return Addr;
907 }
908
lowerCover(InstrProfCoverInst * CoverInstruction)909 void InstrLowerer::lowerCover(InstrProfCoverInst *CoverInstruction) {
910 auto *Addr = getCounterAddress(CoverInstruction);
911 IRBuilder<> Builder(CoverInstruction);
912 // We store zero to represent that this block is covered.
913 Builder.CreateStore(Builder.getInt8(0), Addr);
914 CoverInstruction->eraseFromParent();
915 }
916
lowerTimestamp(InstrProfTimestampInst * TimestampInstruction)917 void InstrLowerer::lowerTimestamp(
918 InstrProfTimestampInst *TimestampInstruction) {
919 assert(TimestampInstruction->getIndex()->isZeroValue() &&
920 "timestamp probes are always the first probe for a function");
921 auto &Ctx = M.getContext();
922 auto *TimestampAddr = getCounterAddress(TimestampInstruction);
923 IRBuilder<> Builder(TimestampInstruction);
924 auto *CalleeTy =
925 FunctionType::get(Type::getVoidTy(Ctx), TimestampAddr->getType(), false);
926 auto Callee = M.getOrInsertFunction(
927 INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SET_TIMESTAMP), CalleeTy);
928 Builder.CreateCall(Callee, {TimestampAddr});
929 TimestampInstruction->eraseFromParent();
930 }
931
lowerIncrement(InstrProfIncrementInst * Inc)932 void InstrLowerer::lowerIncrement(InstrProfIncrementInst *Inc) {
933 auto *Addr = getCounterAddress(Inc);
934
935 IRBuilder<> Builder(Inc);
936 if (Options.Atomic || AtomicCounterUpdateAll ||
937 (Inc->getIndex()->isZeroValue() && AtomicFirstCounter)) {
938 Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(),
939 MaybeAlign(), AtomicOrdering::Monotonic);
940 } else {
941 Value *IncStep = Inc->getStep();
942 Value *Load = Builder.CreateLoad(IncStep->getType(), Addr, "pgocount");
943 auto *Count = Builder.CreateAdd(Load, Inc->getStep());
944 auto *Store = Builder.CreateStore(Count, Addr);
945 if (isCounterPromotionEnabled())
946 PromotionCandidates.emplace_back(cast<Instruction>(Load), Store);
947 }
948 Inc->eraseFromParent();
949 }
950
lowerCoverageData(GlobalVariable * CoverageNamesVar)951 void InstrLowerer::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
952 ConstantArray *Names =
953 cast<ConstantArray>(CoverageNamesVar->getInitializer());
954 for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) {
955 Constant *NC = Names->getOperand(I);
956 Value *V = NC->stripPointerCasts();
957 assert(isa<GlobalVariable>(V) && "Missing reference to function name");
958 GlobalVariable *Name = cast<GlobalVariable>(V);
959
960 Name->setLinkage(GlobalValue::PrivateLinkage);
961 ReferencedNames.push_back(Name);
962 if (isa<ConstantExpr>(NC))
963 NC->dropAllReferences();
964 }
965 CoverageNamesVar->eraseFromParent();
966 }
967
lowerMCDCTestVectorBitmapUpdate(InstrProfMCDCTVBitmapUpdate * Update)968 void InstrLowerer::lowerMCDCTestVectorBitmapUpdate(
969 InstrProfMCDCTVBitmapUpdate *Update) {
970 IRBuilder<> Builder(Update);
971 auto *Int8Ty = Type::getInt8Ty(M.getContext());
972 auto *Int8PtrTy = PointerType::getUnqual(M.getContext());
973 auto *Int32Ty = Type::getInt32Ty(M.getContext());
974 auto *Int64Ty = Type::getInt64Ty(M.getContext());
975 auto *MCDCCondBitmapAddr = Update->getMCDCCondBitmapAddr();
976 auto *BitmapAddr = getBitmapAddress(Update);
977
978 // Load Temp Val.
979 // %mcdc.temp = load i32, ptr %mcdc.addr, align 4
980 auto *Temp = Builder.CreateLoad(Int32Ty, MCDCCondBitmapAddr, "mcdc.temp");
981
982 // Calculate byte offset using div8.
983 // %1 = lshr i32 %mcdc.temp, 3
984 auto *BitmapByteOffset = Builder.CreateLShr(Temp, 0x3);
985
986 // Add byte offset to section base byte address.
987 // %2 = zext i32 %1 to i64
988 // %3 = add i64 ptrtoint (ptr @__profbm_test to i64), %2
989 auto *BitmapByteAddr =
990 Builder.CreateAdd(Builder.CreatePtrToInt(BitmapAddr, Int64Ty),
991 Builder.CreateZExtOrBitCast(BitmapByteOffset, Int64Ty));
992
993 // Convert to a pointer.
994 // %4 = inttoptr i32 %3 to ptr
995 BitmapByteAddr = Builder.CreateIntToPtr(BitmapByteAddr, Int8PtrTy);
996
997 // Calculate bit offset into bitmap byte by using div8 remainder (AND ~8)
998 // %5 = and i32 %mcdc.temp, 7
999 // %6 = trunc i32 %5 to i8
1000 auto *BitToSet = Builder.CreateTrunc(Builder.CreateAnd(Temp, 0x7), Int8Ty);
1001
1002 // Shift bit offset left to form a bitmap.
1003 // %7 = shl i8 1, %6
1004 auto *ShiftedVal = Builder.CreateShl(Builder.getInt8(0x1), BitToSet);
1005
1006 // Load profile bitmap byte.
1007 // %mcdc.bits = load i8, ptr %4, align 1
1008 auto *Bitmap = Builder.CreateLoad(Int8Ty, BitmapByteAddr, "mcdc.bits");
1009
1010 // Perform logical OR of profile bitmap byte and shifted bit offset.
1011 // %8 = or i8 %mcdc.bits, %7
1012 auto *Result = Builder.CreateOr(Bitmap, ShiftedVal);
1013
1014 // Store the updated profile bitmap byte.
1015 // store i8 %8, ptr %3, align 1
1016 Builder.CreateStore(Result, BitmapByteAddr);
1017 Update->eraseFromParent();
1018 }
1019
lowerMCDCCondBitmapUpdate(InstrProfMCDCCondBitmapUpdate * Update)1020 void InstrLowerer::lowerMCDCCondBitmapUpdate(
1021 InstrProfMCDCCondBitmapUpdate *Update) {
1022 IRBuilder<> Builder(Update);
1023 auto *Int32Ty = Type::getInt32Ty(M.getContext());
1024 auto *MCDCCondBitmapAddr = Update->getMCDCCondBitmapAddr();
1025
1026 // Load the MCDC temporary value from the stack.
1027 // %mcdc.temp = load i32, ptr %mcdc.addr, align 4
1028 auto *Temp = Builder.CreateLoad(Int32Ty, MCDCCondBitmapAddr, "mcdc.temp");
1029
1030 // Zero-extend the evaluated condition boolean value (0 or 1) by 32bits.
1031 // %1 = zext i1 %tobool to i32
1032 auto *CondV_32 = Builder.CreateZExt(Update->getCondBool(), Int32Ty);
1033
1034 // Shift the boolean value left (by the condition's ID) to form a bitmap.
1035 // %2 = shl i32 %1, <Update->getCondID()>
1036 auto *ShiftedVal = Builder.CreateShl(CondV_32, Update->getCondID());
1037
1038 // Perform logical OR of the bitmap against the loaded MCDC temporary value.
1039 // %3 = or i32 %mcdc.temp, %2
1040 auto *Result = Builder.CreateOr(Temp, ShiftedVal);
1041
1042 // Store the updated temporary value back to the stack.
1043 // store i32 %3, ptr %mcdc.addr, align 4
1044 Builder.CreateStore(Result, MCDCCondBitmapAddr);
1045 Update->eraseFromParent();
1046 }
1047
1048 /// Get the name of a profiling variable for a particular function.
getVarName(InstrProfInstBase * Inc,StringRef Prefix,bool & Renamed)1049 static std::string getVarName(InstrProfInstBase *Inc, StringRef Prefix,
1050 bool &Renamed) {
1051 StringRef NamePrefix = getInstrProfNameVarPrefix();
1052 StringRef Name = Inc->getName()->getName().substr(NamePrefix.size());
1053 Function *F = Inc->getParent()->getParent();
1054 Module *M = F->getParent();
1055 if (!DoHashBasedCounterSplit || !isIRPGOFlagSet(M) ||
1056 !canRenameComdatFunc(*F)) {
1057 Renamed = false;
1058 return (Prefix + Name).str();
1059 }
1060 Renamed = true;
1061 uint64_t FuncHash = Inc->getHash()->getZExtValue();
1062 SmallVector<char, 24> HashPostfix;
1063 if (Name.ends_with((Twine(".") + Twine(FuncHash)).toStringRef(HashPostfix)))
1064 return (Prefix + Name).str();
1065 return (Prefix + Name + "." + Twine(FuncHash)).str();
1066 }
1067
getIntModuleFlagOrZero(const Module & M,StringRef Flag)1068 static uint64_t getIntModuleFlagOrZero(const Module &M, StringRef Flag) {
1069 auto *MD = dyn_cast_or_null<ConstantAsMetadata>(M.getModuleFlag(Flag));
1070 if (!MD)
1071 return 0;
1072
1073 // If the flag is a ConstantAsMetadata, it should be an integer representable
1074 // in 64-bits.
1075 return cast<ConstantInt>(MD->getValue())->getZExtValue();
1076 }
1077
enablesValueProfiling(const Module & M)1078 static bool enablesValueProfiling(const Module &M) {
1079 return isIRPGOFlagSet(&M) ||
1080 getIntModuleFlagOrZero(M, "EnableValueProfiling") != 0;
1081 }
1082
1083 // Conservatively returns true if data variables may be referenced by code.
profDataReferencedByCode(const Module & M)1084 static bool profDataReferencedByCode(const Module &M) {
1085 return enablesValueProfiling(M);
1086 }
1087
shouldRecordFunctionAddr(Function * F)1088 static inline bool shouldRecordFunctionAddr(Function *F) {
1089 // Only record function addresses if IR PGO is enabled or if clang value
1090 // profiling is enabled. Recording function addresses greatly increases object
1091 // file size, because it prevents the inliner from deleting functions that
1092 // have been inlined everywhere.
1093 if (!profDataReferencedByCode(*F->getParent()))
1094 return false;
1095
1096 // Check the linkage
1097 bool HasAvailableExternallyLinkage = F->hasAvailableExternallyLinkage();
1098 if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
1099 !HasAvailableExternallyLinkage)
1100 return true;
1101
1102 // A function marked 'alwaysinline' with available_externally linkage can't
1103 // have its address taken. Doing so would create an undefined external ref to
1104 // the function, which would fail to link.
1105 if (HasAvailableExternallyLinkage &&
1106 F->hasFnAttribute(Attribute::AlwaysInline))
1107 return false;
1108
1109 // Prohibit function address recording if the function is both internal and
1110 // COMDAT. This avoids the profile data variable referencing internal symbols
1111 // in COMDAT.
1112 if (F->hasLocalLinkage() && F->hasComdat())
1113 return false;
1114
1115 // Check uses of this function for other than direct calls or invokes to it.
1116 // Inline virtual functions have linkeOnceODR linkage. When a key method
1117 // exists, the vtable will only be emitted in the TU where the key method
1118 // is defined. In a TU where vtable is not available, the function won't
1119 // be 'addresstaken'. If its address is not recorded here, the profile data
1120 // with missing address may be picked by the linker leading to missing
1121 // indirect call target info.
1122 return F->hasAddressTaken() || F->hasLinkOnceLinkage();
1123 }
1124
shouldUsePublicSymbol(Function * Fn)1125 static inline bool shouldUsePublicSymbol(Function *Fn) {
1126 // It isn't legal to make an alias of this function at all
1127 if (Fn->isDeclarationForLinker())
1128 return true;
1129
1130 // Symbols with local linkage can just use the symbol directly without
1131 // introducing relocations
1132 if (Fn->hasLocalLinkage())
1133 return true;
1134
1135 // PGO + ThinLTO + CFI cause duplicate symbols to be introduced due to some
1136 // unfavorable interaction between the new alias and the alias renaming done
1137 // in LowerTypeTests under ThinLTO. For comdat functions that would normally
1138 // be deduplicated, but the renaming scheme ends up preventing renaming, since
1139 // it creates unique names for each alias, resulting in duplicated symbols. In
1140 // the future, we should update the CFI related passes to migrate these
1141 // aliases to the same module as the jump-table they refer to will be defined.
1142 if (Fn->hasMetadata(LLVMContext::MD_type))
1143 return true;
1144
1145 // For comdat functions, an alias would need the same linkage as the original
1146 // function and hidden visibility. There is no point in adding an alias with
1147 // identical linkage an visibility to avoid introducing symbolic relocations.
1148 if (Fn->hasComdat() &&
1149 (Fn->getVisibility() == GlobalValue::VisibilityTypes::HiddenVisibility))
1150 return true;
1151
1152 // its OK to use an alias
1153 return false;
1154 }
1155
getFuncAddrForProfData(Function * Fn)1156 static inline Constant *getFuncAddrForProfData(Function *Fn) {
1157 auto *Int8PtrTy = PointerType::getUnqual(Fn->getContext());
1158 // Store a nullptr in __llvm_profd, if we shouldn't use a real address
1159 if (!shouldRecordFunctionAddr(Fn))
1160 return ConstantPointerNull::get(Int8PtrTy);
1161
1162 // If we can't use an alias, we must use the public symbol, even though this
1163 // may require a symbolic relocation.
1164 if (shouldUsePublicSymbol(Fn))
1165 return Fn;
1166
1167 // When possible use a private alias to avoid symbolic relocations.
1168 auto *GA = GlobalAlias::create(GlobalValue::LinkageTypes::PrivateLinkage,
1169 Fn->getName() + ".local", Fn);
1170
1171 // When the instrumented function is a COMDAT function, we cannot use a
1172 // private alias. If we did, we would create reference to a local label in
1173 // this function's section. If this version of the function isn't selected by
1174 // the linker, then the metadata would introduce a reference to a discarded
1175 // section. So, for COMDAT functions, we need to adjust the linkage of the
1176 // alias. Using hidden visibility avoids a dynamic relocation and an entry in
1177 // the dynamic symbol table.
1178 //
1179 // Note that this handles COMDAT functions with visibility other than Hidden,
1180 // since that case is covered in shouldUsePublicSymbol()
1181 if (Fn->hasComdat()) {
1182 GA->setLinkage(Fn->getLinkage());
1183 GA->setVisibility(GlobalValue::VisibilityTypes::HiddenVisibility);
1184 }
1185
1186 // appendToCompilerUsed(*Fn->getParent(), {GA});
1187
1188 return GA;
1189 }
1190
needsRuntimeRegistrationOfSectionRange(const Triple & TT)1191 static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) {
1192 // compiler-rt uses linker support to get data/counters/name start/end for
1193 // ELF, COFF, Mach-O and XCOFF.
1194 if (TT.isOSBinFormatELF() || TT.isOSBinFormatCOFF() ||
1195 TT.isOSBinFormatMachO() || TT.isOSBinFormatXCOFF())
1196 return false;
1197
1198 return true;
1199 }
1200
maybeSetComdat(GlobalVariable * GV,Function * Fn,StringRef VarName)1201 void InstrLowerer::maybeSetComdat(GlobalVariable *GV, Function *Fn,
1202 StringRef VarName) {
1203 bool DataReferencedByCode = profDataReferencedByCode(M);
1204 bool NeedComdat = needsComdatForCounter(*Fn, M);
1205 bool UseComdat = (NeedComdat || TT.isOSBinFormatELF());
1206
1207 if (!UseComdat)
1208 return;
1209
1210 StringRef GroupName =
1211 TT.isOSBinFormatCOFF() && DataReferencedByCode ? GV->getName() : VarName;
1212 Comdat *C = M.getOrInsertComdat(GroupName);
1213 if (!NeedComdat)
1214 C->setSelectionKind(Comdat::NoDeduplicate);
1215 GV->setComdat(C);
1216 // COFF doesn't allow the comdat group leader to have private linkage, so
1217 // upgrade private linkage to internal linkage to produce a symbol table
1218 // entry.
1219 if (TT.isOSBinFormatCOFF() && GV->hasPrivateLinkage())
1220 GV->setLinkage(GlobalValue::InternalLinkage);
1221 }
1222
setupProfileSection(InstrProfInstBase * Inc,InstrProfSectKind IPSK)1223 GlobalVariable *InstrLowerer::setupProfileSection(InstrProfInstBase *Inc,
1224 InstrProfSectKind IPSK) {
1225 GlobalVariable *NamePtr = Inc->getName();
1226
1227 // Match the linkage and visibility of the name global.
1228 Function *Fn = Inc->getParent()->getParent();
1229 GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage();
1230 GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
1231
1232 // Use internal rather than private linkage so the counter variable shows up
1233 // in the symbol table when using debug info for correlation.
1234 if ((DebugInfoCorrelate ||
1235 ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) &&
1236 TT.isOSBinFormatMachO() && Linkage == GlobalValue::PrivateLinkage)
1237 Linkage = GlobalValue::InternalLinkage;
1238
1239 // Due to the limitation of binder as of 2021/09/28, the duplicate weak
1240 // symbols in the same csect won't be discarded. When there are duplicate weak
1241 // symbols, we can NOT guarantee that the relocations get resolved to the
1242 // intended weak symbol, so we can not ensure the correctness of the relative
1243 // CounterPtr, so we have to use private linkage for counter and data symbols.
1244 if (TT.isOSBinFormatXCOFF()) {
1245 Linkage = GlobalValue::PrivateLinkage;
1246 Visibility = GlobalValue::DefaultVisibility;
1247 }
1248 // Move the name variable to the right section. Place them in a COMDAT group
1249 // if the associated function is a COMDAT. This will make sure that only one
1250 // copy of counters of the COMDAT function will be emitted after linking. Keep
1251 // in mind that this pass may run before the inliner, so we need to create a
1252 // new comdat group for the counters and profiling data. If we use the comdat
1253 // of the parent function, that will result in relocations against discarded
1254 // sections.
1255 //
1256 // If the data variable is referenced by code, counters and data have to be
1257 // in different comdats for COFF because the Visual C++ linker will report
1258 // duplicate symbol errors if there are multiple external symbols with the
1259 // same name marked IMAGE_COMDAT_SELECT_ASSOCIATIVE.
1260 //
1261 // For ELF, when not using COMDAT, put counters, data and values into a
1262 // nodeduplicate COMDAT which is lowered to a zero-flag section group. This
1263 // allows -z start-stop-gc to discard the entire group when the function is
1264 // discarded.
1265 bool Renamed;
1266 GlobalVariable *Ptr;
1267 StringRef VarPrefix;
1268 std::string VarName;
1269 if (IPSK == IPSK_cnts) {
1270 VarPrefix = getInstrProfCountersVarPrefix();
1271 VarName = getVarName(Inc, VarPrefix, Renamed);
1272 InstrProfCntrInstBase *CntrIncrement = dyn_cast<InstrProfCntrInstBase>(Inc);
1273 Ptr = createRegionCounters(CntrIncrement, VarName, Linkage);
1274 } else if (IPSK == IPSK_bitmap) {
1275 VarPrefix = getInstrProfBitmapVarPrefix();
1276 VarName = getVarName(Inc, VarPrefix, Renamed);
1277 InstrProfMCDCBitmapInstBase *BitmapUpdate =
1278 dyn_cast<InstrProfMCDCBitmapInstBase>(Inc);
1279 Ptr = createRegionBitmaps(BitmapUpdate, VarName, Linkage);
1280 } else {
1281 llvm_unreachable("Profile Section must be for Counters or Bitmaps");
1282 }
1283
1284 Ptr->setVisibility(Visibility);
1285 // Put the counters and bitmaps in their own sections so linkers can
1286 // remove unneeded sections.
1287 Ptr->setSection(getInstrProfSectionName(IPSK, TT.getObjectFormat()));
1288 Ptr->setLinkage(Linkage);
1289 maybeSetComdat(Ptr, Fn, VarName);
1290 return Ptr;
1291 }
1292
1293 GlobalVariable *
createRegionBitmaps(InstrProfMCDCBitmapInstBase * Inc,StringRef Name,GlobalValue::LinkageTypes Linkage)1294 InstrLowerer::createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc,
1295 StringRef Name,
1296 GlobalValue::LinkageTypes Linkage) {
1297 uint64_t NumBytes = Inc->getNumBitmapBytes()->getZExtValue();
1298 auto *BitmapTy = ArrayType::get(Type::getInt8Ty(M.getContext()), NumBytes);
1299 auto GV = new GlobalVariable(M, BitmapTy, false, Linkage,
1300 Constant::getNullValue(BitmapTy), Name);
1301 GV->setAlignment(Align(1));
1302 return GV;
1303 }
1304
1305 GlobalVariable *
getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase * Inc)1306 InstrLowerer::getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc) {
1307 GlobalVariable *NamePtr = Inc->getName();
1308 auto &PD = ProfileDataMap[NamePtr];
1309 if (PD.RegionBitmaps)
1310 return PD.RegionBitmaps;
1311
1312 // If RegionBitmaps doesn't already exist, create it by first setting up
1313 // the corresponding profile section.
1314 auto *BitmapPtr = setupProfileSection(Inc, IPSK_bitmap);
1315 PD.RegionBitmaps = BitmapPtr;
1316 PD.NumBitmapBytes = Inc->getNumBitmapBytes()->getZExtValue();
1317 return PD.RegionBitmaps;
1318 }
1319
1320 GlobalVariable *
createRegionCounters(InstrProfCntrInstBase * Inc,StringRef Name,GlobalValue::LinkageTypes Linkage)1321 InstrLowerer::createRegionCounters(InstrProfCntrInstBase *Inc, StringRef Name,
1322 GlobalValue::LinkageTypes Linkage) {
1323 uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
1324 auto &Ctx = M.getContext();
1325 GlobalVariable *GV;
1326 if (isa<InstrProfCoverInst>(Inc)) {
1327 auto *CounterTy = Type::getInt8Ty(Ctx);
1328 auto *CounterArrTy = ArrayType::get(CounterTy, NumCounters);
1329 // TODO: `Constant::getAllOnesValue()` does not yet accept an array type.
1330 std::vector<Constant *> InitialValues(NumCounters,
1331 Constant::getAllOnesValue(CounterTy));
1332 GV = new GlobalVariable(M, CounterArrTy, false, Linkage,
1333 ConstantArray::get(CounterArrTy, InitialValues),
1334 Name);
1335 GV->setAlignment(Align(1));
1336 } else {
1337 auto *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);
1338 GV = new GlobalVariable(M, CounterTy, false, Linkage,
1339 Constant::getNullValue(CounterTy), Name);
1340 GV->setAlignment(Align(8));
1341 }
1342 return GV;
1343 }
1344
1345 GlobalVariable *
getOrCreateRegionCounters(InstrProfCntrInstBase * Inc)1346 InstrLowerer::getOrCreateRegionCounters(InstrProfCntrInstBase *Inc) {
1347 GlobalVariable *NamePtr = Inc->getName();
1348 auto &PD = ProfileDataMap[NamePtr];
1349 if (PD.RegionCounters)
1350 return PD.RegionCounters;
1351
1352 // If RegionCounters doesn't already exist, create it by first setting up
1353 // the corresponding profile section.
1354 auto *CounterPtr = setupProfileSection(Inc, IPSK_cnts);
1355 PD.RegionCounters = CounterPtr;
1356
1357 if (DebugInfoCorrelate ||
1358 ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) {
1359 LLVMContext &Ctx = M.getContext();
1360 Function *Fn = Inc->getParent()->getParent();
1361 if (auto *SP = Fn->getSubprogram()) {
1362 DIBuilder DB(M, true, SP->getUnit());
1363 Metadata *FunctionNameAnnotation[] = {
1364 MDString::get(Ctx, InstrProfCorrelator::FunctionNameAttributeName),
1365 MDString::get(Ctx, getPGOFuncNameVarInitializer(NamePtr)),
1366 };
1367 Metadata *CFGHashAnnotation[] = {
1368 MDString::get(Ctx, InstrProfCorrelator::CFGHashAttributeName),
1369 ConstantAsMetadata::get(Inc->getHash()),
1370 };
1371 Metadata *NumCountersAnnotation[] = {
1372 MDString::get(Ctx, InstrProfCorrelator::NumCountersAttributeName),
1373 ConstantAsMetadata::get(Inc->getNumCounters()),
1374 };
1375 auto Annotations = DB.getOrCreateArray({
1376 MDNode::get(Ctx, FunctionNameAnnotation),
1377 MDNode::get(Ctx, CFGHashAnnotation),
1378 MDNode::get(Ctx, NumCountersAnnotation),
1379 });
1380 auto *DICounter = DB.createGlobalVariableExpression(
1381 SP, CounterPtr->getName(), /*LinkageName=*/StringRef(), SP->getFile(),
1382 /*LineNo=*/0, DB.createUnspecifiedType("Profile Data Type"),
1383 CounterPtr->hasLocalLinkage(), /*IsDefined=*/true, /*Expr=*/nullptr,
1384 /*Decl=*/nullptr, /*TemplateParams=*/nullptr, /*AlignInBits=*/0,
1385 Annotations);
1386 CounterPtr->addDebugInfo(DICounter);
1387 DB.finalize();
1388 }
1389
1390 // Mark the counter variable as used so that it isn't optimized out.
1391 CompilerUsedVars.push_back(PD.RegionCounters);
1392 }
1393
1394 // Create the data variable (if it doesn't already exist).
1395 createDataVariable(Inc);
1396
1397 return PD.RegionCounters;
1398 }
1399
createDataVariable(InstrProfCntrInstBase * Inc)1400 void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {
1401 // When debug information is correlated to profile data, a data variable
1402 // is not needed.
1403 if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO)
1404 return;
1405
1406 GlobalVariable *NamePtr = Inc->getName();
1407 auto &PD = ProfileDataMap[NamePtr];
1408
1409 // Return if data variable was already created.
1410 if (PD.DataVar)
1411 return;
1412
1413 LLVMContext &Ctx = M.getContext();
1414
1415 Function *Fn = Inc->getParent()->getParent();
1416 GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage();
1417 GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();
1418
1419 // Due to the limitation of binder as of 2021/09/28, the duplicate weak
1420 // symbols in the same csect won't be discarded. When there are duplicate weak
1421 // symbols, we can NOT guarantee that the relocations get resolved to the
1422 // intended weak symbol, so we can not ensure the correctness of the relative
1423 // CounterPtr, so we have to use private linkage for counter and data symbols.
1424 if (TT.isOSBinFormatXCOFF()) {
1425 Linkage = GlobalValue::PrivateLinkage;
1426 Visibility = GlobalValue::DefaultVisibility;
1427 }
1428
1429 bool DataReferencedByCode = profDataReferencedByCode(M);
1430 bool NeedComdat = needsComdatForCounter(*Fn, M);
1431 bool Renamed;
1432
1433 // The Data Variable section is anchored to profile counters.
1434 std::string CntsVarName =
1435 getVarName(Inc, getInstrProfCountersVarPrefix(), Renamed);
1436 std::string DataVarName =
1437 getVarName(Inc, getInstrProfDataVarPrefix(), Renamed);
1438
1439 auto *Int8PtrTy = PointerType::getUnqual(Ctx);
1440 // Allocate statically the array of pointers to value profile nodes for
1441 // the current function.
1442 Constant *ValuesPtrExpr = ConstantPointerNull::get(Int8PtrTy);
1443 uint64_t NS = 0;
1444 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1445 NS += PD.NumValueSites[Kind];
1446 if (NS > 0 && ValueProfileStaticAlloc &&
1447 !needsRuntimeRegistrationOfSectionRange(TT)) {
1448 ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS);
1449 auto *ValuesVar = new GlobalVariable(
1450 M, ValuesTy, false, Linkage, Constant::getNullValue(ValuesTy),
1451 getVarName(Inc, getInstrProfValuesVarPrefix(), Renamed));
1452 ValuesVar->setVisibility(Visibility);
1453 setGlobalVariableLargeSection(TT, *ValuesVar);
1454 ValuesVar->setSection(
1455 getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));
1456 ValuesVar->setAlignment(Align(8));
1457 maybeSetComdat(ValuesVar, Fn, CntsVarName);
1458 ValuesPtrExpr = ValuesVar;
1459 }
1460
1461 uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();
1462 auto *CounterPtr = PD.RegionCounters;
1463
1464 uint64_t NumBitmapBytes = PD.NumBitmapBytes;
1465
1466 // Create data variable.
1467 auto *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext());
1468 auto *Int16Ty = Type::getInt16Ty(Ctx);
1469 auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last + 1);
1470 Type *DataTypes[] = {
1471 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType,
1472 #include "llvm/ProfileData/InstrProfData.inc"
1473 };
1474 auto *DataTy = StructType::get(Ctx, ArrayRef(DataTypes));
1475
1476 Constant *FunctionAddr = getFuncAddrForProfData(Fn);
1477
1478 Constant *Int16ArrayVals[IPVK_Last + 1];
1479 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1480 Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]);
1481
1482 // If the data variable is not referenced by code (if we don't emit
1483 // @llvm.instrprof.value.profile, NS will be 0), and the counter keeps the
1484 // data variable live under linker GC, the data variable can be private. This
1485 // optimization applies to ELF.
1486 //
1487 // On COFF, a comdat leader cannot be local so we require DataReferencedByCode
1488 // to be false.
1489 //
1490 // If profd is in a deduplicate comdat, NS==0 with a hash suffix guarantees
1491 // that other copies must have the same CFG and cannot have value profiling.
1492 // If no hash suffix, other profd copies may be referenced by code.
1493 if (NS == 0 && !(DataReferencedByCode && NeedComdat && !Renamed) &&
1494 (TT.isOSBinFormatELF() ||
1495 (!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {
1496 Linkage = GlobalValue::PrivateLinkage;
1497 Visibility = GlobalValue::DefaultVisibility;
1498 }
1499 auto *Data =
1500 new GlobalVariable(M, DataTy, false, Linkage, nullptr, DataVarName);
1501 Constant *RelativeCounterPtr;
1502 GlobalVariable *BitmapPtr = PD.RegionBitmaps;
1503 Constant *RelativeBitmapPtr = ConstantInt::get(IntPtrTy, 0);
1504 InstrProfSectKind DataSectionKind;
1505 // With binary profile correlation, profile data is not loaded into memory.
1506 // profile data must reference profile counter with an absolute relocation.
1507 if (ProfileCorrelate == InstrProfCorrelator::BINARY) {
1508 DataSectionKind = IPSK_covdata;
1509 RelativeCounterPtr = ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy);
1510 if (BitmapPtr != nullptr)
1511 RelativeBitmapPtr = ConstantExpr::getPtrToInt(BitmapPtr, IntPtrTy);
1512 } else {
1513 // Reference the counter variable with a label difference (link-time
1514 // constant).
1515 DataSectionKind = IPSK_data;
1516 RelativeCounterPtr =
1517 ConstantExpr::getSub(ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy),
1518 ConstantExpr::getPtrToInt(Data, IntPtrTy));
1519 if (BitmapPtr != nullptr)
1520 RelativeBitmapPtr =
1521 ConstantExpr::getSub(ConstantExpr::getPtrToInt(BitmapPtr, IntPtrTy),
1522 ConstantExpr::getPtrToInt(Data, IntPtrTy));
1523 }
1524
1525 Constant *DataVals[] = {
1526 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,
1527 #include "llvm/ProfileData/InstrProfData.inc"
1528 };
1529 Data->setInitializer(ConstantStruct::get(DataTy, DataVals));
1530
1531 Data->setVisibility(Visibility);
1532 Data->setSection(
1533 getInstrProfSectionName(DataSectionKind, TT.getObjectFormat()));
1534 Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT));
1535 maybeSetComdat(Data, Fn, CntsVarName);
1536
1537 PD.DataVar = Data;
1538
1539 // Mark the data variable as used so that it isn't stripped out.
1540 CompilerUsedVars.push_back(Data);
1541 // Now that the linkage set by the FE has been passed to the data and counter
1542 // variables, reset Name variable's linkage and visibility to private so that
1543 // it can be removed later by the compiler.
1544 NamePtr->setLinkage(GlobalValue::PrivateLinkage);
1545 // Collect the referenced names to be used by emitNameData.
1546 ReferencedNames.push_back(NamePtr);
1547 }
1548
emitVNodes()1549 void InstrLowerer::emitVNodes() {
1550 if (!ValueProfileStaticAlloc)
1551 return;
1552
1553 // For now only support this on platforms that do
1554 // not require runtime registration to discover
1555 // named section start/end.
1556 if (needsRuntimeRegistrationOfSectionRange(TT))
1557 return;
1558
1559 size_t TotalNS = 0;
1560 for (auto &PD : ProfileDataMap) {
1561 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1562 TotalNS += PD.second.NumValueSites[Kind];
1563 }
1564
1565 if (!TotalNS)
1566 return;
1567
1568 uint64_t NumCounters = TotalNS * NumCountersPerValueSite;
1569 // Heuristic for small programs with very few total value sites.
1570 // The default value of vp-counters-per-site is chosen based on
1571 // the observation that large apps usually have a low percentage
1572 // of value sites that actually have any profile data, and thus
1573 // the average number of counters per site is low. For small
1574 // apps with very few sites, this may not be true. Bump up the
1575 // number of counters in this case.
1576 #define INSTR_PROF_MIN_VAL_COUNTS 10
1577 if (NumCounters < INSTR_PROF_MIN_VAL_COUNTS)
1578 NumCounters = std::max(INSTR_PROF_MIN_VAL_COUNTS, (int)NumCounters * 2);
1579
1580 auto &Ctx = M.getContext();
1581 Type *VNodeTypes[] = {
1582 #define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Init) LLVMType,
1583 #include "llvm/ProfileData/InstrProfData.inc"
1584 };
1585 auto *VNodeTy = StructType::get(Ctx, ArrayRef(VNodeTypes));
1586
1587 ArrayType *VNodesTy = ArrayType::get(VNodeTy, NumCounters);
1588 auto *VNodesVar = new GlobalVariable(
1589 M, VNodesTy, false, GlobalValue::PrivateLinkage,
1590 Constant::getNullValue(VNodesTy), getInstrProfVNodesVarName());
1591 setGlobalVariableLargeSection(TT, *VNodesVar);
1592 VNodesVar->setSection(
1593 getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat()));
1594 VNodesVar->setAlignment(M.getDataLayout().getABITypeAlign(VNodesTy));
1595 // VNodesVar is used by runtime but not referenced via relocation by other
1596 // sections. Conservatively make it linker retained.
1597 UsedVars.push_back(VNodesVar);
1598 }
1599
emitNameData()1600 void InstrLowerer::emitNameData() {
1601 std::string UncompressedData;
1602
1603 if (ReferencedNames.empty())
1604 return;
1605
1606 std::string CompressedNameStr;
1607 if (Error E = collectPGOFuncNameStrings(ReferencedNames, CompressedNameStr,
1608 DoInstrProfNameCompression)) {
1609 report_fatal_error(Twine(toString(std::move(E))), false);
1610 }
1611
1612 auto &Ctx = M.getContext();
1613 auto *NamesVal =
1614 ConstantDataArray::getString(Ctx, StringRef(CompressedNameStr), false);
1615 NamesVar = new GlobalVariable(M, NamesVal->getType(), true,
1616 GlobalValue::PrivateLinkage, NamesVal,
1617 getInstrProfNamesVarName());
1618 NamesSize = CompressedNameStr.size();
1619 setGlobalVariableLargeSection(TT, *NamesVar);
1620 NamesVar->setSection(
1621 ProfileCorrelate == InstrProfCorrelator::BINARY
1622 ? getInstrProfSectionName(IPSK_covname, TT.getObjectFormat())
1623 : getInstrProfSectionName(IPSK_name, TT.getObjectFormat()));
1624 // On COFF, it's important to reduce the alignment down to 1 to prevent the
1625 // linker from inserting padding before the start of the names section or
1626 // between names entries.
1627 NamesVar->setAlignment(Align(1));
1628 // NamesVar is used by runtime but not referenced via relocation by other
1629 // sections. Conservatively make it linker retained.
1630 UsedVars.push_back(NamesVar);
1631
1632 for (auto *NamePtr : ReferencedNames)
1633 NamePtr->eraseFromParent();
1634 }
1635
emitRegistration()1636 void InstrLowerer::emitRegistration() {
1637 if (!needsRuntimeRegistrationOfSectionRange(TT))
1638 return;
1639
1640 // Construct the function.
1641 auto *VoidTy = Type::getVoidTy(M.getContext());
1642 auto *VoidPtrTy = PointerType::getUnqual(M.getContext());
1643 auto *Int64Ty = Type::getInt64Ty(M.getContext());
1644 auto *RegisterFTy = FunctionType::get(VoidTy, false);
1645 auto *RegisterF = Function::Create(RegisterFTy, GlobalValue::InternalLinkage,
1646 getInstrProfRegFuncsName(), M);
1647 RegisterF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
1648 if (Options.NoRedZone)
1649 RegisterF->addFnAttr(Attribute::NoRedZone);
1650
1651 auto *RuntimeRegisterTy = FunctionType::get(VoidTy, VoidPtrTy, false);
1652 auto *RuntimeRegisterF =
1653 Function::Create(RuntimeRegisterTy, GlobalVariable::ExternalLinkage,
1654 getInstrProfRegFuncName(), M);
1655
1656 IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", RegisterF));
1657 for (Value *Data : CompilerUsedVars)
1658 if (!isa<Function>(Data))
1659 IRB.CreateCall(RuntimeRegisterF, Data);
1660 for (Value *Data : UsedVars)
1661 if (Data != NamesVar && !isa<Function>(Data))
1662 IRB.CreateCall(RuntimeRegisterF, Data);
1663
1664 if (NamesVar) {
1665 Type *ParamTypes[] = {VoidPtrTy, Int64Ty};
1666 auto *NamesRegisterTy =
1667 FunctionType::get(VoidTy, ArrayRef(ParamTypes), false);
1668 auto *NamesRegisterF =
1669 Function::Create(NamesRegisterTy, GlobalVariable::ExternalLinkage,
1670 getInstrProfNamesRegFuncName(), M);
1671 IRB.CreateCall(NamesRegisterF, {NamesVar, IRB.getInt64(NamesSize)});
1672 }
1673
1674 IRB.CreateRetVoid();
1675 }
1676
emitRuntimeHook()1677 bool InstrLowerer::emitRuntimeHook() {
1678 // We expect the linker to be invoked with -u<hook_var> flag for Linux
1679 // in which case there is no need to emit the external variable.
1680 if (TT.isOSLinux() || TT.isOSAIX())
1681 return false;
1682
1683 // If the module's provided its own runtime, we don't need to do anything.
1684 if (M.getGlobalVariable(getInstrProfRuntimeHookVarName()))
1685 return false;
1686
1687 // Declare an external variable that will pull in the runtime initialization.
1688 auto *Int32Ty = Type::getInt32Ty(M.getContext());
1689 auto *Var =
1690 new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage,
1691 nullptr, getInstrProfRuntimeHookVarName());
1692 Var->setVisibility(GlobalValue::HiddenVisibility);
1693
1694 if (TT.isOSBinFormatELF() && !TT.isPS()) {
1695 // Mark the user variable as used so that it isn't stripped out.
1696 CompilerUsedVars.push_back(Var);
1697 } else {
1698 // Make a function that uses it.
1699 auto *User = Function::Create(FunctionType::get(Int32Ty, false),
1700 GlobalValue::LinkOnceODRLinkage,
1701 getInstrProfRuntimeHookVarUseFuncName(), M);
1702 User->addFnAttr(Attribute::NoInline);
1703 if (Options.NoRedZone)
1704 User->addFnAttr(Attribute::NoRedZone);
1705 User->setVisibility(GlobalValue::HiddenVisibility);
1706 if (TT.supportsCOMDAT())
1707 User->setComdat(M.getOrInsertComdat(User->getName()));
1708
1709 IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", User));
1710 auto *Load = IRB.CreateLoad(Int32Ty, Var);
1711 IRB.CreateRet(Load);
1712
1713 // Mark the function as used so that it isn't stripped out.
1714 CompilerUsedVars.push_back(User);
1715 }
1716 return true;
1717 }
1718
emitUses()1719 void InstrLowerer::emitUses() {
1720 // The metadata sections are parallel arrays. Optimizers (e.g.
1721 // GlobalOpt/ConstantMerge) may not discard associated sections as a unit, so
1722 // we conservatively retain all unconditionally in the compiler.
1723 //
1724 // On ELF and Mach-O, the linker can guarantee the associated sections will be
1725 // retained or discarded as a unit, so llvm.compiler.used is sufficient.
1726 // Similarly on COFF, if prof data is not referenced by code we use one comdat
1727 // and ensure this GC property as well. Otherwise, we have to conservatively
1728 // make all of the sections retained by the linker.
1729 if (TT.isOSBinFormatELF() || TT.isOSBinFormatMachO() ||
1730 (TT.isOSBinFormatCOFF() && !profDataReferencedByCode(M)))
1731 appendToCompilerUsed(M, CompilerUsedVars);
1732 else
1733 appendToUsed(M, CompilerUsedVars);
1734
1735 // We do not add proper references from used metadata sections to NamesVar and
1736 // VNodesVar, so we have to be conservative and place them in llvm.used
1737 // regardless of the target,
1738 appendToUsed(M, UsedVars);
1739 }
1740
emitInitialization()1741 void InstrLowerer::emitInitialization() {
1742 // Create ProfileFileName variable. Don't don't this for the
1743 // context-sensitive instrumentation lowering: This lowering is after
1744 // LTO/ThinLTO linking. Pass PGOInstrumentationGenCreateVar should
1745 // have already create the variable before LTO/ThinLTO linking.
1746 if (!IsCS)
1747 createProfileFileNameVar(M, Options.InstrProfileOutput);
1748 Function *RegisterF = M.getFunction(getInstrProfRegFuncsName());
1749 if (!RegisterF)
1750 return;
1751
1752 // Create the initialization function.
1753 auto *VoidTy = Type::getVoidTy(M.getContext());
1754 auto *F = Function::Create(FunctionType::get(VoidTy, false),
1755 GlobalValue::InternalLinkage,
1756 getInstrProfInitFuncName(), M);
1757 F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
1758 F->addFnAttr(Attribute::NoInline);
1759 if (Options.NoRedZone)
1760 F->addFnAttr(Attribute::NoRedZone);
1761
1762 // Add the basic block and the necessary calls.
1763 IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", F));
1764 IRB.CreateCall(RegisterF, {});
1765 IRB.CreateRetVoid();
1766
1767 appendToGlobalCtors(M, F, 0);
1768 }
1769