1 //===-- InstrProfiling.cpp - Frontend instrumentation based profiling -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass lowers instrprof_* intrinsics emitted by an instrumentor. 10 // It also builds the data structures and initialization code needed for 11 // updating execution counts and emitting the profile at runtime. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/Instrumentation/InstrProfiling.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/ADT/Twine.h" 21 #include "llvm/Analysis/BlockFrequencyInfo.h" 22 #include "llvm/Analysis/BranchProbabilityInfo.h" 23 #include "llvm/Analysis/LoopInfo.h" 24 #include "llvm/Analysis/TargetLibraryInfo.h" 25 #include "llvm/IR/Attributes.h" 26 #include "llvm/IR/BasicBlock.h" 27 #include "llvm/IR/CFG.h" 28 #include "llvm/IR/Constant.h" 29 #include "llvm/IR/Constants.h" 30 #include "llvm/IR/DIBuilder.h" 31 #include "llvm/IR/DerivedTypes.h" 32 #include "llvm/IR/DiagnosticInfo.h" 33 #include "llvm/IR/Dominators.h" 34 #include "llvm/IR/Function.h" 35 #include "llvm/IR/GlobalValue.h" 36 #include "llvm/IR/GlobalVariable.h" 37 #include "llvm/IR/IRBuilder.h" 38 #include "llvm/IR/Instruction.h" 39 #include "llvm/IR/Instructions.h" 40 #include "llvm/IR/IntrinsicInst.h" 41 #include "llvm/IR/Module.h" 42 #include "llvm/IR/Type.h" 43 #include "llvm/InitializePasses.h" 44 #include "llvm/Pass.h" 45 #include "llvm/ProfileData/InstrProf.h" 46 #include "llvm/ProfileData/InstrProfCorrelator.h" 47 #include "llvm/Support/Casting.h" 48 #include "llvm/Support/CommandLine.h" 49 #include "llvm/Support/Error.h" 50 #include "llvm/Support/ErrorHandling.h" 51 #include "llvm/TargetParser/Triple.h" 52 #include "llvm/Transforms/Instrumentation.h" 53 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" 54 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 55 #include "llvm/Transforms/Utils/ModuleUtils.h" 56 #include "llvm/Transforms/Utils/SSAUpdater.h" 57 #include <algorithm> 58 #include <cassert> 59 #include <cstdint> 60 #include <string> 61 62 using namespace llvm; 63 64 #define DEBUG_TYPE "instrprof" 65 66 namespace llvm { 67 // TODO: Remove -debug-info-correlate in next LLVM release, in favor of 68 // -profile-correlate=debug-info. 69 cl::opt<bool> DebugInfoCorrelate( 70 "debug-info-correlate", 71 cl::desc("Use debug info to correlate profiles. (Deprecated, use " 72 "-profile-correlate=debug-info)"), 73 cl::init(false)); 74 75 cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate( 76 "profile-correlate", 77 cl::desc("Use debug info or binary file to correlate profiles."), 78 cl::init(InstrProfCorrelator::NONE), 79 cl::values(clEnumValN(InstrProfCorrelator::NONE, "", 80 "No profile correlation"), 81 clEnumValN(InstrProfCorrelator::DEBUG_INFO, "debug-info", 82 "Use debug info to correlate"), 83 clEnumValN(InstrProfCorrelator::BINARY, "binary", 84 "Use binary to correlate"))); 85 } // namespace llvm 86 87 namespace { 88 89 cl::opt<bool> DoHashBasedCounterSplit( 90 "hash-based-counter-split", 91 cl::desc("Rename counter variable of a comdat function based on cfg hash"), 92 cl::init(true)); 93 94 cl::opt<bool> 95 RuntimeCounterRelocation("runtime-counter-relocation", 96 cl::desc("Enable relocating counters at runtime."), 97 cl::init(false)); 98 99 cl::opt<bool> ValueProfileStaticAlloc( 100 "vp-static-alloc", 101 cl::desc("Do static counter allocation for value profiler"), 102 cl::init(true)); 103 104 cl::opt<double> NumCountersPerValueSite( 105 "vp-counters-per-site", 106 cl::desc("The average number of profile counters allocated " 107 "per value profiling site."), 108 // This is set to a very small value because in real programs, only 109 // a very small percentage of value sites have non-zero targets, e.g, 1/30. 110 // For those sites with non-zero profile, the average number of targets 111 // is usually smaller than 2. 112 cl::init(1.0)); 113 114 cl::opt<bool> AtomicCounterUpdateAll( 115 "instrprof-atomic-counter-update-all", 116 cl::desc("Make all profile counter updates atomic (for testing only)"), 117 cl::init(false)); 118 119 cl::opt<bool> AtomicCounterUpdatePromoted( 120 "atomic-counter-update-promoted", 121 cl::desc("Do counter update using atomic fetch add " 122 " for promoted counters only"), 123 cl::init(false)); 124 125 cl::opt<bool> AtomicFirstCounter( 126 "atomic-first-counter", 127 cl::desc("Use atomic fetch add for first counter in a function (usually " 128 "the entry counter)"), 129 cl::init(false)); 130 131 // If the option is not specified, the default behavior about whether 132 // counter promotion is done depends on how instrumentaiton lowering 133 // pipeline is setup, i.e., the default value of true of this option 134 // does not mean the promotion will be done by default. Explicitly 135 // setting this option can override the default behavior. 136 cl::opt<bool> DoCounterPromotion("do-counter-promotion", 137 cl::desc("Do counter register promotion"), 138 cl::init(false)); 139 cl::opt<unsigned> MaxNumOfPromotionsPerLoop( 140 "max-counter-promotions-per-loop", cl::init(20), 141 cl::desc("Max number counter promotions per loop to avoid" 142 " increasing register pressure too much")); 143 144 // A debug option 145 cl::opt<int> 146 MaxNumOfPromotions("max-counter-promotions", cl::init(-1), 147 cl::desc("Max number of allowed counter promotions")); 148 149 cl::opt<unsigned> SpeculativeCounterPromotionMaxExiting( 150 "speculative-counter-promotion-max-exiting", cl::init(3), 151 cl::desc("The max number of exiting blocks of a loop to allow " 152 " speculative counter promotion")); 153 154 cl::opt<bool> SpeculativeCounterPromotionToLoop( 155 "speculative-counter-promotion-to-loop", 156 cl::desc("When the option is false, if the target block is in a loop, " 157 "the promotion will be disallowed unless the promoted counter " 158 " update can be further/iteratively promoted into an acyclic " 159 " region.")); 160 161 cl::opt<bool> IterativeCounterPromotion( 162 "iterative-counter-promotion", cl::init(true), 163 cl::desc("Allow counter promotion across the whole loop nest.")); 164 165 cl::opt<bool> SkipRetExitBlock( 166 "skip-ret-exit-block", cl::init(true), 167 cl::desc("Suppress counter promotion if exit blocks contain ret.")); 168 169 using LoadStorePair = std::pair<Instruction *, Instruction *>; 170 171 class InstrLowerer final { 172 public: 173 InstrLowerer(Module &M, const InstrProfOptions &Options, 174 std::function<const TargetLibraryInfo &(Function &F)> GetTLI, 175 bool IsCS) 176 : M(M), Options(Options), TT(Triple(M.getTargetTriple())), IsCS(IsCS), 177 GetTLI(GetTLI) {} 178 179 bool lower(); 180 181 private: 182 Module &M; 183 const InstrProfOptions Options; 184 const Triple TT; 185 // Is this lowering for the context-sensitive instrumentation. 186 const bool IsCS; 187 188 std::function<const TargetLibraryInfo &(Function &F)> GetTLI; 189 struct PerFunctionProfileData { 190 uint32_t NumValueSites[IPVK_Last + 1] = {}; 191 GlobalVariable *RegionCounters = nullptr; 192 GlobalVariable *DataVar = nullptr; 193 GlobalVariable *RegionBitmaps = nullptr; 194 uint32_t NumBitmapBytes = 0; 195 196 PerFunctionProfileData() = default; 197 }; 198 DenseMap<GlobalVariable *, PerFunctionProfileData> ProfileDataMap; 199 /// If runtime relocation is enabled, this maps functions to the load 200 /// instruction that produces the profile relocation bias. 201 DenseMap<const Function *, LoadInst *> FunctionToProfileBiasMap; 202 std::vector<GlobalValue *> CompilerUsedVars; 203 std::vector<GlobalValue *> UsedVars; 204 std::vector<GlobalVariable *> ReferencedNames; 205 GlobalVariable *NamesVar = nullptr; 206 size_t NamesSize = 0; 207 208 // vector of counter load/store pairs to be register promoted. 209 std::vector<LoadStorePair> PromotionCandidates; 210 211 int64_t TotalCountersPromoted = 0; 212 213 /// Lower instrumentation intrinsics in the function. Returns true if there 214 /// any lowering. 215 bool lowerIntrinsics(Function *F); 216 217 /// Register-promote counter loads and stores in loops. 218 void promoteCounterLoadStores(Function *F); 219 220 /// Returns true if relocating counters at runtime is enabled. 221 bool isRuntimeCounterRelocationEnabled() const; 222 223 /// Returns true if profile counter update register promotion is enabled. 224 bool isCounterPromotionEnabled() const; 225 226 /// Count the number of instrumented value sites for the function. 227 void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins); 228 229 /// Replace instrprof.value.profile with a call to runtime library. 230 void lowerValueProfileInst(InstrProfValueProfileInst *Ins); 231 232 /// Replace instrprof.cover with a store instruction to the coverage byte. 233 void lowerCover(InstrProfCoverInst *Inc); 234 235 /// Replace instrprof.timestamp with a call to 236 /// INSTR_PROF_PROFILE_SET_TIMESTAMP. 237 void lowerTimestamp(InstrProfTimestampInst *TimestampInstruction); 238 239 /// Replace instrprof.increment with an increment of the appropriate value. 240 void lowerIncrement(InstrProfIncrementInst *Inc); 241 242 /// Force emitting of name vars for unused functions. 243 void lowerCoverageData(GlobalVariable *CoverageNamesVar); 244 245 /// Replace instrprof.mcdc.tvbitmask.update with a shift and or instruction 246 /// using the index represented by the a temp value into a bitmap. 247 void lowerMCDCTestVectorBitmapUpdate(InstrProfMCDCTVBitmapUpdate *Ins); 248 249 /// Replace instrprof.mcdc.temp.update with a shift and or instruction using 250 /// the corresponding condition ID. 251 void lowerMCDCCondBitmapUpdate(InstrProfMCDCCondBitmapUpdate *Ins); 252 253 /// Compute the address of the counter value that this profiling instruction 254 /// acts on. 255 Value *getCounterAddress(InstrProfCntrInstBase *I); 256 257 /// Get the region counters for an increment, creating them if necessary. 258 /// 259 /// If the counter array doesn't yet exist, the profile data variables 260 /// referring to them will also be created. 261 GlobalVariable *getOrCreateRegionCounters(InstrProfCntrInstBase *Inc); 262 263 /// Create the region counters. 264 GlobalVariable *createRegionCounters(InstrProfCntrInstBase *Inc, 265 StringRef Name, 266 GlobalValue::LinkageTypes Linkage); 267 268 /// Compute the address of the test vector bitmap that this profiling 269 /// instruction acts on. 270 Value *getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I); 271 272 /// Get the region bitmaps for an increment, creating them if necessary. 273 /// 274 /// If the bitmap array doesn't yet exist, the profile data variables 275 /// referring to them will also be created. 276 GlobalVariable *getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc); 277 278 /// Create the MC/DC bitmap as a byte-aligned array of bytes associated with 279 /// an MC/DC Decision region. The number of bytes required is indicated by 280 /// the intrinsic used (type InstrProfMCDCBitmapInstBase). This is called 281 /// as part of setupProfileSection() and is conceptually very similar to 282 /// what is done for profile data counters in createRegionCounters(). 283 GlobalVariable *createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc, 284 StringRef Name, 285 GlobalValue::LinkageTypes Linkage); 286 287 /// Set Comdat property of GV, if required. 288 void maybeSetComdat(GlobalVariable *GV, Function *Fn, StringRef VarName); 289 290 /// Setup the sections into which counters and bitmaps are allocated. 291 GlobalVariable *setupProfileSection(InstrProfInstBase *Inc, 292 InstrProfSectKind IPSK); 293 294 /// Create INSTR_PROF_DATA variable for counters and bitmaps. 295 void createDataVariable(InstrProfCntrInstBase *Inc); 296 297 /// Emit the section with compressed function names. 298 void emitNameData(); 299 300 /// Emit value nodes section for value profiling. 301 void emitVNodes(); 302 303 /// Emit runtime registration functions for each profile data variable. 304 void emitRegistration(); 305 306 /// Emit the necessary plumbing to pull in the runtime initialization. 307 /// Returns true if a change was made. 308 bool emitRuntimeHook(); 309 310 /// Add uses of our data variables and runtime hook. 311 void emitUses(); 312 313 /// Create a static initializer for our data, on platforms that need it, 314 /// and for any profile output file that was specified. 315 void emitInitialization(); 316 }; 317 318 /// 319 /// A helper class to promote one counter RMW operation in the loop 320 /// into register update. 321 /// 322 /// RWM update for the counter will be sinked out of the loop after 323 /// the transformation. 324 /// 325 class PGOCounterPromoterHelper : public LoadAndStorePromoter { 326 public: 327 PGOCounterPromoterHelper( 328 Instruction *L, Instruction *S, SSAUpdater &SSA, Value *Init, 329 BasicBlock *PH, ArrayRef<BasicBlock *> ExitBlocks, 330 ArrayRef<Instruction *> InsertPts, 331 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands, 332 LoopInfo &LI) 333 : LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks), 334 InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI) { 335 assert(isa<LoadInst>(L)); 336 assert(isa<StoreInst>(S)); 337 SSA.AddAvailableValue(PH, Init); 338 } 339 340 void doExtraRewritesBeforeFinalDeletion() override { 341 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { 342 BasicBlock *ExitBlock = ExitBlocks[i]; 343 Instruction *InsertPos = InsertPts[i]; 344 // Get LiveIn value into the ExitBlock. If there are multiple 345 // predecessors, the value is defined by a PHI node in this 346 // block. 347 Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock); 348 Value *Addr = cast<StoreInst>(Store)->getPointerOperand(); 349 Type *Ty = LiveInValue->getType(); 350 IRBuilder<> Builder(InsertPos); 351 if (auto *AddrInst = dyn_cast_or_null<IntToPtrInst>(Addr)) { 352 // If isRuntimeCounterRelocationEnabled() is true then the address of 353 // the store instruction is computed with two instructions in 354 // InstrProfiling::getCounterAddress(). We need to copy those 355 // instructions to this block to compute Addr correctly. 356 // %BiasAdd = add i64 ptrtoint <__profc_>, <__llvm_profile_counter_bias> 357 // %Addr = inttoptr i64 %BiasAdd to i64* 358 auto *OrigBiasInst = dyn_cast<BinaryOperator>(AddrInst->getOperand(0)); 359 assert(OrigBiasInst->getOpcode() == Instruction::BinaryOps::Add); 360 Value *BiasInst = Builder.Insert(OrigBiasInst->clone()); 361 Addr = Builder.CreateIntToPtr(BiasInst, 362 PointerType::getUnqual(Ty->getContext())); 363 } 364 if (AtomicCounterUpdatePromoted) 365 // automic update currently can only be promoted across the current 366 // loop, not the whole loop nest. 367 Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue, 368 MaybeAlign(), 369 AtomicOrdering::SequentiallyConsistent); 370 else { 371 LoadInst *OldVal = Builder.CreateLoad(Ty, Addr, "pgocount.promoted"); 372 auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue); 373 auto *NewStore = Builder.CreateStore(NewVal, Addr); 374 375 // Now update the parent loop's candidate list: 376 if (IterativeCounterPromotion) { 377 auto *TargetLoop = LI.getLoopFor(ExitBlock); 378 if (TargetLoop) 379 LoopToCandidates[TargetLoop].emplace_back(OldVal, NewStore); 380 } 381 } 382 } 383 } 384 385 private: 386 Instruction *Store; 387 ArrayRef<BasicBlock *> ExitBlocks; 388 ArrayRef<Instruction *> InsertPts; 389 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates; 390 LoopInfo &LI; 391 }; 392 393 /// A helper class to do register promotion for all profile counter 394 /// updates in a loop. 395 /// 396 class PGOCounterPromoter { 397 public: 398 PGOCounterPromoter( 399 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands, 400 Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI) 401 : LoopToCandidates(LoopToCands), L(CurLoop), LI(LI), BFI(BFI) { 402 403 // Skip collection of ExitBlocks and InsertPts for loops that will not be 404 // able to have counters promoted. 405 SmallVector<BasicBlock *, 8> LoopExitBlocks; 406 SmallPtrSet<BasicBlock *, 8> BlockSet; 407 408 L.getExitBlocks(LoopExitBlocks); 409 if (!isPromotionPossible(&L, LoopExitBlocks)) 410 return; 411 412 for (BasicBlock *ExitBlock : LoopExitBlocks) { 413 if (BlockSet.insert(ExitBlock).second && 414 llvm::none_of(predecessors(ExitBlock), [&](const BasicBlock *Pred) { 415 return llvm::isPresplitCoroSuspendExitEdge(*Pred, *ExitBlock); 416 })) { 417 ExitBlocks.push_back(ExitBlock); 418 InsertPts.push_back(&*ExitBlock->getFirstInsertionPt()); 419 } 420 } 421 } 422 423 bool run(int64_t *NumPromoted) { 424 // Skip 'infinite' loops: 425 if (ExitBlocks.size() == 0) 426 return false; 427 428 // Skip if any of the ExitBlocks contains a ret instruction. 429 // This is to prevent dumping of incomplete profile -- if the 430 // the loop is a long running loop and dump is called in the middle 431 // of the loop, the result profile is incomplete. 432 // FIXME: add other heuristics to detect long running loops. 433 if (SkipRetExitBlock) { 434 for (auto *BB : ExitBlocks) 435 if (isa<ReturnInst>(BB->getTerminator())) 436 return false; 437 } 438 439 unsigned MaxProm = getMaxNumOfPromotionsInLoop(&L); 440 if (MaxProm == 0) 441 return false; 442 443 unsigned Promoted = 0; 444 for (auto &Cand : LoopToCandidates[&L]) { 445 446 SmallVector<PHINode *, 4> NewPHIs; 447 SSAUpdater SSA(&NewPHIs); 448 Value *InitVal = ConstantInt::get(Cand.first->getType(), 0); 449 450 // If BFI is set, we will use it to guide the promotions. 451 if (BFI) { 452 auto *BB = Cand.first->getParent(); 453 auto InstrCount = BFI->getBlockProfileCount(BB); 454 if (!InstrCount) 455 continue; 456 auto PreheaderCount = BFI->getBlockProfileCount(L.getLoopPreheader()); 457 // If the average loop trip count is not greater than 1.5, we skip 458 // promotion. 459 if (PreheaderCount && (*PreheaderCount * 3) >= (*InstrCount * 2)) 460 continue; 461 } 462 463 PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal, 464 L.getLoopPreheader(), ExitBlocks, 465 InsertPts, LoopToCandidates, LI); 466 Promoter.run(SmallVector<Instruction *, 2>({Cand.first, Cand.second})); 467 Promoted++; 468 if (Promoted >= MaxProm) 469 break; 470 471 (*NumPromoted)++; 472 if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions) 473 break; 474 } 475 476 LLVM_DEBUG(dbgs() << Promoted << " counters promoted for loop (depth=" 477 << L.getLoopDepth() << ")\n"); 478 return Promoted != 0; 479 } 480 481 private: 482 bool allowSpeculativeCounterPromotion(Loop *LP) { 483 SmallVector<BasicBlock *, 8> ExitingBlocks; 484 L.getExitingBlocks(ExitingBlocks); 485 // Not considierered speculative. 486 if (ExitingBlocks.size() == 1) 487 return true; 488 if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting) 489 return false; 490 return true; 491 } 492 493 // Check whether the loop satisfies the basic conditions needed to perform 494 // Counter Promotions. 495 bool 496 isPromotionPossible(Loop *LP, 497 const SmallVectorImpl<BasicBlock *> &LoopExitBlocks) { 498 // We can't insert into a catchswitch. 499 if (llvm::any_of(LoopExitBlocks, [](BasicBlock *Exit) { 500 return isa<CatchSwitchInst>(Exit->getTerminator()); 501 })) 502 return false; 503 504 if (!LP->hasDedicatedExits()) 505 return false; 506 507 BasicBlock *PH = LP->getLoopPreheader(); 508 if (!PH) 509 return false; 510 511 return true; 512 } 513 514 // Returns the max number of Counter Promotions for LP. 515 unsigned getMaxNumOfPromotionsInLoop(Loop *LP) { 516 SmallVector<BasicBlock *, 8> LoopExitBlocks; 517 LP->getExitBlocks(LoopExitBlocks); 518 if (!isPromotionPossible(LP, LoopExitBlocks)) 519 return 0; 520 521 SmallVector<BasicBlock *, 8> ExitingBlocks; 522 LP->getExitingBlocks(ExitingBlocks); 523 524 // If BFI is set, we do more aggressive promotions based on BFI. 525 if (BFI) 526 return (unsigned)-1; 527 528 // Not considierered speculative. 529 if (ExitingBlocks.size() == 1) 530 return MaxNumOfPromotionsPerLoop; 531 532 if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting) 533 return 0; 534 535 // Whether the target block is in a loop does not matter: 536 if (SpeculativeCounterPromotionToLoop) 537 return MaxNumOfPromotionsPerLoop; 538 539 // Now check the target block: 540 unsigned MaxProm = MaxNumOfPromotionsPerLoop; 541 for (auto *TargetBlock : LoopExitBlocks) { 542 auto *TargetLoop = LI.getLoopFor(TargetBlock); 543 if (!TargetLoop) 544 continue; 545 unsigned MaxPromForTarget = getMaxNumOfPromotionsInLoop(TargetLoop); 546 unsigned PendingCandsInTarget = LoopToCandidates[TargetLoop].size(); 547 MaxProm = 548 std::min(MaxProm, std::max(MaxPromForTarget, PendingCandsInTarget) - 549 PendingCandsInTarget); 550 } 551 return MaxProm; 552 } 553 554 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates; 555 SmallVector<BasicBlock *, 8> ExitBlocks; 556 SmallVector<Instruction *, 8> InsertPts; 557 Loop &L; 558 LoopInfo &LI; 559 BlockFrequencyInfo *BFI; 560 }; 561 562 enum class ValueProfilingCallType { 563 // Individual values are tracked. Currently used for indiret call target 564 // profiling. 565 Default, 566 567 // MemOp: the memop size value profiling. 568 MemOp 569 }; 570 571 } // end anonymous namespace 572 573 PreservedAnalyses InstrProfilingLoweringPass::run(Module &M, 574 ModuleAnalysisManager &AM) { 575 FunctionAnalysisManager &FAM = 576 AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 577 auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { 578 return FAM.getResult<TargetLibraryAnalysis>(F); 579 }; 580 InstrLowerer Lowerer(M, Options, GetTLI, IsCS); 581 if (!Lowerer.lower()) 582 return PreservedAnalyses::all(); 583 584 return PreservedAnalyses::none(); 585 } 586 587 bool InstrLowerer::lowerIntrinsics(Function *F) { 588 bool MadeChange = false; 589 PromotionCandidates.clear(); 590 for (BasicBlock &BB : *F) { 591 for (Instruction &Instr : llvm::make_early_inc_range(BB)) { 592 if (auto *IPIS = dyn_cast<InstrProfIncrementInstStep>(&Instr)) { 593 lowerIncrement(IPIS); 594 MadeChange = true; 595 } else if (auto *IPI = dyn_cast<InstrProfIncrementInst>(&Instr)) { 596 lowerIncrement(IPI); 597 MadeChange = true; 598 } else if (auto *IPC = dyn_cast<InstrProfTimestampInst>(&Instr)) { 599 lowerTimestamp(IPC); 600 MadeChange = true; 601 } else if (auto *IPC = dyn_cast<InstrProfCoverInst>(&Instr)) { 602 lowerCover(IPC); 603 MadeChange = true; 604 } else if (auto *IPVP = dyn_cast<InstrProfValueProfileInst>(&Instr)) { 605 lowerValueProfileInst(IPVP); 606 MadeChange = true; 607 } else if (auto *IPMP = dyn_cast<InstrProfMCDCBitmapParameters>(&Instr)) { 608 IPMP->eraseFromParent(); 609 MadeChange = true; 610 } else if (auto *IPBU = dyn_cast<InstrProfMCDCTVBitmapUpdate>(&Instr)) { 611 lowerMCDCTestVectorBitmapUpdate(IPBU); 612 MadeChange = true; 613 } else if (auto *IPTU = dyn_cast<InstrProfMCDCCondBitmapUpdate>(&Instr)) { 614 lowerMCDCCondBitmapUpdate(IPTU); 615 MadeChange = true; 616 } 617 } 618 } 619 620 if (!MadeChange) 621 return false; 622 623 promoteCounterLoadStores(F); 624 return true; 625 } 626 627 bool InstrLowerer::isRuntimeCounterRelocationEnabled() const { 628 // Mach-O don't support weak external references. 629 if (TT.isOSBinFormatMachO()) 630 return false; 631 632 if (RuntimeCounterRelocation.getNumOccurrences() > 0) 633 return RuntimeCounterRelocation; 634 635 // Fuchsia uses runtime counter relocation by default. 636 return TT.isOSFuchsia(); 637 } 638 639 bool InstrLowerer::isCounterPromotionEnabled() const { 640 if (DoCounterPromotion.getNumOccurrences() > 0) 641 return DoCounterPromotion; 642 643 return Options.DoCounterPromotion; 644 } 645 646 void InstrLowerer::promoteCounterLoadStores(Function *F) { 647 if (!isCounterPromotionEnabled()) 648 return; 649 650 DominatorTree DT(*F); 651 LoopInfo LI(DT); 652 DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates; 653 654 std::unique_ptr<BlockFrequencyInfo> BFI; 655 if (Options.UseBFIInPromotion) { 656 std::unique_ptr<BranchProbabilityInfo> BPI; 657 BPI.reset(new BranchProbabilityInfo(*F, LI, &GetTLI(*F))); 658 BFI.reset(new BlockFrequencyInfo(*F, *BPI, LI)); 659 } 660 661 for (const auto &LoadStore : PromotionCandidates) { 662 auto *CounterLoad = LoadStore.first; 663 auto *CounterStore = LoadStore.second; 664 BasicBlock *BB = CounterLoad->getParent(); 665 Loop *ParentLoop = LI.getLoopFor(BB); 666 if (!ParentLoop) 667 continue; 668 LoopPromotionCandidates[ParentLoop].emplace_back(CounterLoad, CounterStore); 669 } 670 671 SmallVector<Loop *, 4> Loops = LI.getLoopsInPreorder(); 672 673 // Do a post-order traversal of the loops so that counter updates can be 674 // iteratively hoisted outside the loop nest. 675 for (auto *Loop : llvm::reverse(Loops)) { 676 PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI, BFI.get()); 677 Promoter.run(&TotalCountersPromoted); 678 } 679 } 680 681 static bool needsRuntimeHookUnconditionally(const Triple &TT) { 682 // On Fuchsia, we only need runtime hook if any counters are present. 683 if (TT.isOSFuchsia()) 684 return false; 685 686 return true; 687 } 688 689 /// Check if the module contains uses of any profiling intrinsics. 690 static bool containsProfilingIntrinsics(Module &M) { 691 auto containsIntrinsic = [&](int ID) { 692 if (auto *F = M.getFunction(Intrinsic::getName(ID))) 693 return !F->use_empty(); 694 return false; 695 }; 696 return containsIntrinsic(llvm::Intrinsic::instrprof_cover) || 697 containsIntrinsic(llvm::Intrinsic::instrprof_increment) || 698 containsIntrinsic(llvm::Intrinsic::instrprof_increment_step) || 699 containsIntrinsic(llvm::Intrinsic::instrprof_timestamp) || 700 containsIntrinsic(llvm::Intrinsic::instrprof_value_profile); 701 } 702 703 bool InstrLowerer::lower() { 704 bool MadeChange = false; 705 bool NeedsRuntimeHook = needsRuntimeHookUnconditionally(TT); 706 if (NeedsRuntimeHook) 707 MadeChange = emitRuntimeHook(); 708 709 bool ContainsProfiling = containsProfilingIntrinsics(M); 710 GlobalVariable *CoverageNamesVar = 711 M.getNamedGlobal(getCoverageUnusedNamesVarName()); 712 // Improve compile time by avoiding linear scans when there is no work. 713 if (!ContainsProfiling && !CoverageNamesVar) 714 return MadeChange; 715 716 // We did not know how many value sites there would be inside 717 // the instrumented function. This is counting the number of instrumented 718 // target value sites to enter it as field in the profile data variable. 719 for (Function &F : M) { 720 InstrProfCntrInstBase *FirstProfInst = nullptr; 721 for (BasicBlock &BB : F) { 722 for (auto I = BB.begin(), E = BB.end(); I != E; I++) { 723 if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(I)) 724 computeNumValueSiteCounts(Ind); 725 else { 726 if (FirstProfInst == nullptr && 727 (isa<InstrProfIncrementInst>(I) || isa<InstrProfCoverInst>(I))) 728 FirstProfInst = dyn_cast<InstrProfCntrInstBase>(I); 729 // If the MCDCBitmapParameters intrinsic seen, create the bitmaps. 730 if (const auto &Params = dyn_cast<InstrProfMCDCBitmapParameters>(I)) 731 static_cast<void>(getOrCreateRegionBitmaps(Params)); 732 } 733 } 734 } 735 736 // Use a profile intrinsic to create the region counters and data variable. 737 // Also create the data variable based on the MCDCParams. 738 if (FirstProfInst != nullptr) { 739 static_cast<void>(getOrCreateRegionCounters(FirstProfInst)); 740 } 741 } 742 743 for (Function &F : M) 744 MadeChange |= lowerIntrinsics(&F); 745 746 if (CoverageNamesVar) { 747 lowerCoverageData(CoverageNamesVar); 748 MadeChange = true; 749 } 750 751 if (!MadeChange) 752 return false; 753 754 emitVNodes(); 755 emitNameData(); 756 757 // Emit runtime hook for the cases where the target does not unconditionally 758 // require pulling in profile runtime, and coverage is enabled on code that is 759 // not eliminated by the front-end, e.g. unused functions with internal 760 // linkage. 761 if (!NeedsRuntimeHook && ContainsProfiling) 762 emitRuntimeHook(); 763 764 emitRegistration(); 765 emitUses(); 766 emitInitialization(); 767 return true; 768 } 769 770 static FunctionCallee getOrInsertValueProfilingCall( 771 Module &M, const TargetLibraryInfo &TLI, 772 ValueProfilingCallType CallType = ValueProfilingCallType::Default) { 773 LLVMContext &Ctx = M.getContext(); 774 auto *ReturnTy = Type::getVoidTy(M.getContext()); 775 776 AttributeList AL; 777 if (auto AK = TLI.getExtAttrForI32Param(false)) 778 AL = AL.addParamAttribute(M.getContext(), 2, AK); 779 780 assert((CallType == ValueProfilingCallType::Default || 781 CallType == ValueProfilingCallType::MemOp) && 782 "Must be Default or MemOp"); 783 Type *ParamTypes[] = { 784 #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType 785 #include "llvm/ProfileData/InstrProfData.inc" 786 }; 787 auto *ValueProfilingCallTy = 788 FunctionType::get(ReturnTy, ArrayRef(ParamTypes), false); 789 StringRef FuncName = CallType == ValueProfilingCallType::Default 790 ? getInstrProfValueProfFuncName() 791 : getInstrProfValueProfMemOpFuncName(); 792 return M.getOrInsertFunction(FuncName, ValueProfilingCallTy, AL); 793 } 794 795 void InstrLowerer::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) { 796 GlobalVariable *Name = Ind->getName(); 797 uint64_t ValueKind = Ind->getValueKind()->getZExtValue(); 798 uint64_t Index = Ind->getIndex()->getZExtValue(); 799 auto &PD = ProfileDataMap[Name]; 800 PD.NumValueSites[ValueKind] = 801 std::max(PD.NumValueSites[ValueKind], (uint32_t)(Index + 1)); 802 } 803 804 void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { 805 // TODO: Value profiling heavily depends on the data section which is omitted 806 // in lightweight mode. We need to move the value profile pointer to the 807 // Counter struct to get this working. 808 assert( 809 !DebugInfoCorrelate && ProfileCorrelate == InstrProfCorrelator::NONE && 810 "Value profiling is not yet supported with lightweight instrumentation"); 811 GlobalVariable *Name = Ind->getName(); 812 auto It = ProfileDataMap.find(Name); 813 assert(It != ProfileDataMap.end() && It->second.DataVar && 814 "value profiling detected in function with no counter incerement"); 815 816 GlobalVariable *DataVar = It->second.DataVar; 817 uint64_t ValueKind = Ind->getValueKind()->getZExtValue(); 818 uint64_t Index = Ind->getIndex()->getZExtValue(); 819 for (uint32_t Kind = IPVK_First; Kind < ValueKind; ++Kind) 820 Index += It->second.NumValueSites[Kind]; 821 822 IRBuilder<> Builder(Ind); 823 bool IsMemOpSize = (Ind->getValueKind()->getZExtValue() == 824 llvm::InstrProfValueKind::IPVK_MemOPSize); 825 CallInst *Call = nullptr; 826 auto *TLI = &GetTLI(*Ind->getFunction()); 827 828 // To support value profiling calls within Windows exception handlers, funclet 829 // information contained within operand bundles needs to be copied over to 830 // the library call. This is required for the IR to be processed by the 831 // WinEHPrepare pass. 832 SmallVector<OperandBundleDef, 1> OpBundles; 833 Ind->getOperandBundlesAsDefs(OpBundles); 834 if (!IsMemOpSize) { 835 Value *Args[3] = {Ind->getTargetValue(), DataVar, Builder.getInt32(Index)}; 836 Call = Builder.CreateCall(getOrInsertValueProfilingCall(M, *TLI), Args, 837 OpBundles); 838 } else { 839 Value *Args[3] = {Ind->getTargetValue(), DataVar, Builder.getInt32(Index)}; 840 Call = Builder.CreateCall( 841 getOrInsertValueProfilingCall(M, *TLI, ValueProfilingCallType::MemOp), 842 Args, OpBundles); 843 } 844 if (auto AK = TLI->getExtAttrForI32Param(false)) 845 Call->addParamAttr(2, AK); 846 Ind->replaceAllUsesWith(Call); 847 Ind->eraseFromParent(); 848 } 849 850 Value *InstrLowerer::getCounterAddress(InstrProfCntrInstBase *I) { 851 auto *Counters = getOrCreateRegionCounters(I); 852 IRBuilder<> Builder(I); 853 854 if (isa<InstrProfTimestampInst>(I)) 855 Counters->setAlignment(Align(8)); 856 857 auto *Addr = Builder.CreateConstInBoundsGEP2_32( 858 Counters->getValueType(), Counters, 0, I->getIndex()->getZExtValue()); 859 860 if (!isRuntimeCounterRelocationEnabled()) 861 return Addr; 862 863 Type *Int64Ty = Type::getInt64Ty(M.getContext()); 864 Function *Fn = I->getParent()->getParent(); 865 LoadInst *&BiasLI = FunctionToProfileBiasMap[Fn]; 866 if (!BiasLI) { 867 IRBuilder<> EntryBuilder(&Fn->getEntryBlock().front()); 868 auto *Bias = M.getGlobalVariable(getInstrProfCounterBiasVarName()); 869 if (!Bias) { 870 // Compiler must define this variable when runtime counter relocation 871 // is being used. Runtime has a weak external reference that is used 872 // to check whether that's the case or not. 873 Bias = new GlobalVariable( 874 M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage, 875 Constant::getNullValue(Int64Ty), getInstrProfCounterBiasVarName()); 876 Bias->setVisibility(GlobalVariable::HiddenVisibility); 877 // A definition that's weak (linkonce_odr) without being in a COMDAT 878 // section wouldn't lead to link errors, but it would lead to a dead 879 // data word from every TU but one. Putting it in COMDAT ensures there 880 // will be exactly one data slot in the link. 881 if (TT.supportsCOMDAT()) 882 Bias->setComdat(M.getOrInsertComdat(Bias->getName())); 883 } 884 BiasLI = EntryBuilder.CreateLoad(Int64Ty, Bias); 885 } 886 auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), BiasLI); 887 return Builder.CreateIntToPtr(Add, Addr->getType()); 888 } 889 890 Value *InstrLowerer::getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I) { 891 auto *Bitmaps = getOrCreateRegionBitmaps(I); 892 IRBuilder<> Builder(I); 893 894 auto *Addr = Builder.CreateConstInBoundsGEP2_32( 895 Bitmaps->getValueType(), Bitmaps, 0, I->getBitmapIndex()->getZExtValue()); 896 897 if (isRuntimeCounterRelocationEnabled()) { 898 LLVMContext &Ctx = M.getContext(); 899 Ctx.diagnose(DiagnosticInfoPGOProfile( 900 M.getName().data(), 901 Twine("Runtime counter relocation is presently not supported for MC/DC " 902 "bitmaps."), 903 DS_Warning)); 904 } 905 906 return Addr; 907 } 908 909 void InstrLowerer::lowerCover(InstrProfCoverInst *CoverInstruction) { 910 auto *Addr = getCounterAddress(CoverInstruction); 911 IRBuilder<> Builder(CoverInstruction); 912 // We store zero to represent that this block is covered. 913 Builder.CreateStore(Builder.getInt8(0), Addr); 914 CoverInstruction->eraseFromParent(); 915 } 916 917 void InstrLowerer::lowerTimestamp( 918 InstrProfTimestampInst *TimestampInstruction) { 919 assert(TimestampInstruction->getIndex()->isZeroValue() && 920 "timestamp probes are always the first probe for a function"); 921 auto &Ctx = M.getContext(); 922 auto *TimestampAddr = getCounterAddress(TimestampInstruction); 923 IRBuilder<> Builder(TimestampInstruction); 924 auto *CalleeTy = 925 FunctionType::get(Type::getVoidTy(Ctx), TimestampAddr->getType(), false); 926 auto Callee = M.getOrInsertFunction( 927 INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SET_TIMESTAMP), CalleeTy); 928 Builder.CreateCall(Callee, {TimestampAddr}); 929 TimestampInstruction->eraseFromParent(); 930 } 931 932 void InstrLowerer::lowerIncrement(InstrProfIncrementInst *Inc) { 933 auto *Addr = getCounterAddress(Inc); 934 935 IRBuilder<> Builder(Inc); 936 if (Options.Atomic || AtomicCounterUpdateAll || 937 (Inc->getIndex()->isZeroValue() && AtomicFirstCounter)) { 938 Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(), 939 MaybeAlign(), AtomicOrdering::Monotonic); 940 } else { 941 Value *IncStep = Inc->getStep(); 942 Value *Load = Builder.CreateLoad(IncStep->getType(), Addr, "pgocount"); 943 auto *Count = Builder.CreateAdd(Load, Inc->getStep()); 944 auto *Store = Builder.CreateStore(Count, Addr); 945 if (isCounterPromotionEnabled()) 946 PromotionCandidates.emplace_back(cast<Instruction>(Load), Store); 947 } 948 Inc->eraseFromParent(); 949 } 950 951 void InstrLowerer::lowerCoverageData(GlobalVariable *CoverageNamesVar) { 952 ConstantArray *Names = 953 cast<ConstantArray>(CoverageNamesVar->getInitializer()); 954 for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) { 955 Constant *NC = Names->getOperand(I); 956 Value *V = NC->stripPointerCasts(); 957 assert(isa<GlobalVariable>(V) && "Missing reference to function name"); 958 GlobalVariable *Name = cast<GlobalVariable>(V); 959 960 Name->setLinkage(GlobalValue::PrivateLinkage); 961 ReferencedNames.push_back(Name); 962 if (isa<ConstantExpr>(NC)) 963 NC->dropAllReferences(); 964 } 965 CoverageNamesVar->eraseFromParent(); 966 } 967 968 void InstrLowerer::lowerMCDCTestVectorBitmapUpdate( 969 InstrProfMCDCTVBitmapUpdate *Update) { 970 IRBuilder<> Builder(Update); 971 auto *Int8Ty = Type::getInt8Ty(M.getContext()); 972 auto *Int8PtrTy = PointerType::getUnqual(M.getContext()); 973 auto *Int32Ty = Type::getInt32Ty(M.getContext()); 974 auto *Int64Ty = Type::getInt64Ty(M.getContext()); 975 auto *MCDCCondBitmapAddr = Update->getMCDCCondBitmapAddr(); 976 auto *BitmapAddr = getBitmapAddress(Update); 977 978 // Load Temp Val. 979 // %mcdc.temp = load i32, ptr %mcdc.addr, align 4 980 auto *Temp = Builder.CreateLoad(Int32Ty, MCDCCondBitmapAddr, "mcdc.temp"); 981 982 // Calculate byte offset using div8. 983 // %1 = lshr i32 %mcdc.temp, 3 984 auto *BitmapByteOffset = Builder.CreateLShr(Temp, 0x3); 985 986 // Add byte offset to section base byte address. 987 // %2 = zext i32 %1 to i64 988 // %3 = add i64 ptrtoint (ptr @__profbm_test to i64), %2 989 auto *BitmapByteAddr = 990 Builder.CreateAdd(Builder.CreatePtrToInt(BitmapAddr, Int64Ty), 991 Builder.CreateZExtOrBitCast(BitmapByteOffset, Int64Ty)); 992 993 // Convert to a pointer. 994 // %4 = inttoptr i32 %3 to ptr 995 BitmapByteAddr = Builder.CreateIntToPtr(BitmapByteAddr, Int8PtrTy); 996 997 // Calculate bit offset into bitmap byte by using div8 remainder (AND ~8) 998 // %5 = and i32 %mcdc.temp, 7 999 // %6 = trunc i32 %5 to i8 1000 auto *BitToSet = Builder.CreateTrunc(Builder.CreateAnd(Temp, 0x7), Int8Ty); 1001 1002 // Shift bit offset left to form a bitmap. 1003 // %7 = shl i8 1, %6 1004 auto *ShiftedVal = Builder.CreateShl(Builder.getInt8(0x1), BitToSet); 1005 1006 // Load profile bitmap byte. 1007 // %mcdc.bits = load i8, ptr %4, align 1 1008 auto *Bitmap = Builder.CreateLoad(Int8Ty, BitmapByteAddr, "mcdc.bits"); 1009 1010 // Perform logical OR of profile bitmap byte and shifted bit offset. 1011 // %8 = or i8 %mcdc.bits, %7 1012 auto *Result = Builder.CreateOr(Bitmap, ShiftedVal); 1013 1014 // Store the updated profile bitmap byte. 1015 // store i8 %8, ptr %3, align 1 1016 Builder.CreateStore(Result, BitmapByteAddr); 1017 Update->eraseFromParent(); 1018 } 1019 1020 void InstrLowerer::lowerMCDCCondBitmapUpdate( 1021 InstrProfMCDCCondBitmapUpdate *Update) { 1022 IRBuilder<> Builder(Update); 1023 auto *Int32Ty = Type::getInt32Ty(M.getContext()); 1024 auto *MCDCCondBitmapAddr = Update->getMCDCCondBitmapAddr(); 1025 1026 // Load the MCDC temporary value from the stack. 1027 // %mcdc.temp = load i32, ptr %mcdc.addr, align 4 1028 auto *Temp = Builder.CreateLoad(Int32Ty, MCDCCondBitmapAddr, "mcdc.temp"); 1029 1030 // Zero-extend the evaluated condition boolean value (0 or 1) by 32bits. 1031 // %1 = zext i1 %tobool to i32 1032 auto *CondV_32 = Builder.CreateZExt(Update->getCondBool(), Int32Ty); 1033 1034 // Shift the boolean value left (by the condition's ID) to form a bitmap. 1035 // %2 = shl i32 %1, <Update->getCondID()> 1036 auto *ShiftedVal = Builder.CreateShl(CondV_32, Update->getCondID()); 1037 1038 // Perform logical OR of the bitmap against the loaded MCDC temporary value. 1039 // %3 = or i32 %mcdc.temp, %2 1040 auto *Result = Builder.CreateOr(Temp, ShiftedVal); 1041 1042 // Store the updated temporary value back to the stack. 1043 // store i32 %3, ptr %mcdc.addr, align 4 1044 Builder.CreateStore(Result, MCDCCondBitmapAddr); 1045 Update->eraseFromParent(); 1046 } 1047 1048 /// Get the name of a profiling variable for a particular function. 1049 static std::string getVarName(InstrProfInstBase *Inc, StringRef Prefix, 1050 bool &Renamed) { 1051 StringRef NamePrefix = getInstrProfNameVarPrefix(); 1052 StringRef Name = Inc->getName()->getName().substr(NamePrefix.size()); 1053 Function *F = Inc->getParent()->getParent(); 1054 Module *M = F->getParent(); 1055 if (!DoHashBasedCounterSplit || !isIRPGOFlagSet(M) || 1056 !canRenameComdatFunc(*F)) { 1057 Renamed = false; 1058 return (Prefix + Name).str(); 1059 } 1060 Renamed = true; 1061 uint64_t FuncHash = Inc->getHash()->getZExtValue(); 1062 SmallVector<char, 24> HashPostfix; 1063 if (Name.ends_with((Twine(".") + Twine(FuncHash)).toStringRef(HashPostfix))) 1064 return (Prefix + Name).str(); 1065 return (Prefix + Name + "." + Twine(FuncHash)).str(); 1066 } 1067 1068 static uint64_t getIntModuleFlagOrZero(const Module &M, StringRef Flag) { 1069 auto *MD = dyn_cast_or_null<ConstantAsMetadata>(M.getModuleFlag(Flag)); 1070 if (!MD) 1071 return 0; 1072 1073 // If the flag is a ConstantAsMetadata, it should be an integer representable 1074 // in 64-bits. 1075 return cast<ConstantInt>(MD->getValue())->getZExtValue(); 1076 } 1077 1078 static bool enablesValueProfiling(const Module &M) { 1079 return isIRPGOFlagSet(&M) || 1080 getIntModuleFlagOrZero(M, "EnableValueProfiling") != 0; 1081 } 1082 1083 // Conservatively returns true if data variables may be referenced by code. 1084 static bool profDataReferencedByCode(const Module &M) { 1085 return enablesValueProfiling(M); 1086 } 1087 1088 static inline bool shouldRecordFunctionAddr(Function *F) { 1089 // Only record function addresses if IR PGO is enabled or if clang value 1090 // profiling is enabled. Recording function addresses greatly increases object 1091 // file size, because it prevents the inliner from deleting functions that 1092 // have been inlined everywhere. 1093 if (!profDataReferencedByCode(*F->getParent())) 1094 return false; 1095 1096 // Check the linkage 1097 bool HasAvailableExternallyLinkage = F->hasAvailableExternallyLinkage(); 1098 if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() && 1099 !HasAvailableExternallyLinkage) 1100 return true; 1101 1102 // A function marked 'alwaysinline' with available_externally linkage can't 1103 // have its address taken. Doing so would create an undefined external ref to 1104 // the function, which would fail to link. 1105 if (HasAvailableExternallyLinkage && 1106 F->hasFnAttribute(Attribute::AlwaysInline)) 1107 return false; 1108 1109 // Prohibit function address recording if the function is both internal and 1110 // COMDAT. This avoids the profile data variable referencing internal symbols 1111 // in COMDAT. 1112 if (F->hasLocalLinkage() && F->hasComdat()) 1113 return false; 1114 1115 // Check uses of this function for other than direct calls or invokes to it. 1116 // Inline virtual functions have linkeOnceODR linkage. When a key method 1117 // exists, the vtable will only be emitted in the TU where the key method 1118 // is defined. In a TU where vtable is not available, the function won't 1119 // be 'addresstaken'. If its address is not recorded here, the profile data 1120 // with missing address may be picked by the linker leading to missing 1121 // indirect call target info. 1122 return F->hasAddressTaken() || F->hasLinkOnceLinkage(); 1123 } 1124 1125 static inline bool shouldUsePublicSymbol(Function *Fn) { 1126 // It isn't legal to make an alias of this function at all 1127 if (Fn->isDeclarationForLinker()) 1128 return true; 1129 1130 // Symbols with local linkage can just use the symbol directly without 1131 // introducing relocations 1132 if (Fn->hasLocalLinkage()) 1133 return true; 1134 1135 // PGO + ThinLTO + CFI cause duplicate symbols to be introduced due to some 1136 // unfavorable interaction between the new alias and the alias renaming done 1137 // in LowerTypeTests under ThinLTO. For comdat functions that would normally 1138 // be deduplicated, but the renaming scheme ends up preventing renaming, since 1139 // it creates unique names for each alias, resulting in duplicated symbols. In 1140 // the future, we should update the CFI related passes to migrate these 1141 // aliases to the same module as the jump-table they refer to will be defined. 1142 if (Fn->hasMetadata(LLVMContext::MD_type)) 1143 return true; 1144 1145 // For comdat functions, an alias would need the same linkage as the original 1146 // function and hidden visibility. There is no point in adding an alias with 1147 // identical linkage an visibility to avoid introducing symbolic relocations. 1148 if (Fn->hasComdat() && 1149 (Fn->getVisibility() == GlobalValue::VisibilityTypes::HiddenVisibility)) 1150 return true; 1151 1152 // its OK to use an alias 1153 return false; 1154 } 1155 1156 static inline Constant *getFuncAddrForProfData(Function *Fn) { 1157 auto *Int8PtrTy = PointerType::getUnqual(Fn->getContext()); 1158 // Store a nullptr in __llvm_profd, if we shouldn't use a real address 1159 if (!shouldRecordFunctionAddr(Fn)) 1160 return ConstantPointerNull::get(Int8PtrTy); 1161 1162 // If we can't use an alias, we must use the public symbol, even though this 1163 // may require a symbolic relocation. 1164 if (shouldUsePublicSymbol(Fn)) 1165 return Fn; 1166 1167 // When possible use a private alias to avoid symbolic relocations. 1168 auto *GA = GlobalAlias::create(GlobalValue::LinkageTypes::PrivateLinkage, 1169 Fn->getName() + ".local", Fn); 1170 1171 // When the instrumented function is a COMDAT function, we cannot use a 1172 // private alias. If we did, we would create reference to a local label in 1173 // this function's section. If this version of the function isn't selected by 1174 // the linker, then the metadata would introduce a reference to a discarded 1175 // section. So, for COMDAT functions, we need to adjust the linkage of the 1176 // alias. Using hidden visibility avoids a dynamic relocation and an entry in 1177 // the dynamic symbol table. 1178 // 1179 // Note that this handles COMDAT functions with visibility other than Hidden, 1180 // since that case is covered in shouldUsePublicSymbol() 1181 if (Fn->hasComdat()) { 1182 GA->setLinkage(Fn->getLinkage()); 1183 GA->setVisibility(GlobalValue::VisibilityTypes::HiddenVisibility); 1184 } 1185 1186 // appendToCompilerUsed(*Fn->getParent(), {GA}); 1187 1188 return GA; 1189 } 1190 1191 static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) { 1192 // Don't do this for Darwin. compiler-rt uses linker magic. 1193 if (TT.isOSDarwin()) 1194 return false; 1195 // Use linker script magic to get data/cnts/name start/end. 1196 if (TT.isOSAIX() || TT.isOSLinux() || TT.isOSFreeBSD() || TT.isOSNetBSD() || 1197 TT.isOSSolaris() || TT.isOSFuchsia() || TT.isPS() || TT.isOSWindows()) 1198 return false; 1199 1200 return true; 1201 } 1202 1203 void InstrLowerer::maybeSetComdat(GlobalVariable *GV, Function *Fn, 1204 StringRef VarName) { 1205 bool DataReferencedByCode = profDataReferencedByCode(M); 1206 bool NeedComdat = needsComdatForCounter(*Fn, M); 1207 bool UseComdat = (NeedComdat || TT.isOSBinFormatELF()); 1208 1209 if (!UseComdat) 1210 return; 1211 1212 StringRef GroupName = 1213 TT.isOSBinFormatCOFF() && DataReferencedByCode ? GV->getName() : VarName; 1214 Comdat *C = M.getOrInsertComdat(GroupName); 1215 if (!NeedComdat) 1216 C->setSelectionKind(Comdat::NoDeduplicate); 1217 GV->setComdat(C); 1218 // COFF doesn't allow the comdat group leader to have private linkage, so 1219 // upgrade private linkage to internal linkage to produce a symbol table 1220 // entry. 1221 if (TT.isOSBinFormatCOFF() && GV->hasPrivateLinkage()) 1222 GV->setLinkage(GlobalValue::InternalLinkage); 1223 } 1224 1225 GlobalVariable *InstrLowerer::setupProfileSection(InstrProfInstBase *Inc, 1226 InstrProfSectKind IPSK) { 1227 GlobalVariable *NamePtr = Inc->getName(); 1228 1229 // Match the linkage and visibility of the name global. 1230 Function *Fn = Inc->getParent()->getParent(); 1231 GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage(); 1232 GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility(); 1233 1234 // Use internal rather than private linkage so the counter variable shows up 1235 // in the symbol table when using debug info for correlation. 1236 if ((DebugInfoCorrelate || 1237 ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) && 1238 TT.isOSBinFormatMachO() && Linkage == GlobalValue::PrivateLinkage) 1239 Linkage = GlobalValue::InternalLinkage; 1240 1241 // Due to the limitation of binder as of 2021/09/28, the duplicate weak 1242 // symbols in the same csect won't be discarded. When there are duplicate weak 1243 // symbols, we can NOT guarantee that the relocations get resolved to the 1244 // intended weak symbol, so we can not ensure the correctness of the relative 1245 // CounterPtr, so we have to use private linkage for counter and data symbols. 1246 if (TT.isOSBinFormatXCOFF()) { 1247 Linkage = GlobalValue::PrivateLinkage; 1248 Visibility = GlobalValue::DefaultVisibility; 1249 } 1250 // Move the name variable to the right section. Place them in a COMDAT group 1251 // if the associated function is a COMDAT. This will make sure that only one 1252 // copy of counters of the COMDAT function will be emitted after linking. Keep 1253 // in mind that this pass may run before the inliner, so we need to create a 1254 // new comdat group for the counters and profiling data. If we use the comdat 1255 // of the parent function, that will result in relocations against discarded 1256 // sections. 1257 // 1258 // If the data variable is referenced by code, counters and data have to be 1259 // in different comdats for COFF because the Visual C++ linker will report 1260 // duplicate symbol errors if there are multiple external symbols with the 1261 // same name marked IMAGE_COMDAT_SELECT_ASSOCIATIVE. 1262 // 1263 // For ELF, when not using COMDAT, put counters, data and values into a 1264 // nodeduplicate COMDAT which is lowered to a zero-flag section group. This 1265 // allows -z start-stop-gc to discard the entire group when the function is 1266 // discarded. 1267 bool Renamed; 1268 GlobalVariable *Ptr; 1269 StringRef VarPrefix; 1270 std::string VarName; 1271 if (IPSK == IPSK_cnts) { 1272 VarPrefix = getInstrProfCountersVarPrefix(); 1273 VarName = getVarName(Inc, VarPrefix, Renamed); 1274 InstrProfCntrInstBase *CntrIncrement = dyn_cast<InstrProfCntrInstBase>(Inc); 1275 Ptr = createRegionCounters(CntrIncrement, VarName, Linkage); 1276 } else if (IPSK == IPSK_bitmap) { 1277 VarPrefix = getInstrProfBitmapVarPrefix(); 1278 VarName = getVarName(Inc, VarPrefix, Renamed); 1279 InstrProfMCDCBitmapInstBase *BitmapUpdate = 1280 dyn_cast<InstrProfMCDCBitmapInstBase>(Inc); 1281 Ptr = createRegionBitmaps(BitmapUpdate, VarName, Linkage); 1282 } else { 1283 llvm_unreachable("Profile Section must be for Counters or Bitmaps"); 1284 } 1285 1286 Ptr->setVisibility(Visibility); 1287 // Put the counters and bitmaps in their own sections so linkers can 1288 // remove unneeded sections. 1289 Ptr->setSection(getInstrProfSectionName(IPSK, TT.getObjectFormat())); 1290 Ptr->setLinkage(Linkage); 1291 maybeSetComdat(Ptr, Fn, VarName); 1292 return Ptr; 1293 } 1294 1295 GlobalVariable * 1296 InstrLowerer::createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc, 1297 StringRef Name, 1298 GlobalValue::LinkageTypes Linkage) { 1299 uint64_t NumBytes = Inc->getNumBitmapBytes()->getZExtValue(); 1300 auto *BitmapTy = ArrayType::get(Type::getInt8Ty(M.getContext()), NumBytes); 1301 auto GV = new GlobalVariable(M, BitmapTy, false, Linkage, 1302 Constant::getNullValue(BitmapTy), Name); 1303 GV->setAlignment(Align(1)); 1304 return GV; 1305 } 1306 1307 GlobalVariable * 1308 InstrLowerer::getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc) { 1309 GlobalVariable *NamePtr = Inc->getName(); 1310 auto &PD = ProfileDataMap[NamePtr]; 1311 if (PD.RegionBitmaps) 1312 return PD.RegionBitmaps; 1313 1314 // If RegionBitmaps doesn't already exist, create it by first setting up 1315 // the corresponding profile section. 1316 auto *BitmapPtr = setupProfileSection(Inc, IPSK_bitmap); 1317 PD.RegionBitmaps = BitmapPtr; 1318 PD.NumBitmapBytes = Inc->getNumBitmapBytes()->getZExtValue(); 1319 return PD.RegionBitmaps; 1320 } 1321 1322 GlobalVariable * 1323 InstrLowerer::createRegionCounters(InstrProfCntrInstBase *Inc, StringRef Name, 1324 GlobalValue::LinkageTypes Linkage) { 1325 uint64_t NumCounters = Inc->getNumCounters()->getZExtValue(); 1326 auto &Ctx = M.getContext(); 1327 GlobalVariable *GV; 1328 if (isa<InstrProfCoverInst>(Inc)) { 1329 auto *CounterTy = Type::getInt8Ty(Ctx); 1330 auto *CounterArrTy = ArrayType::get(CounterTy, NumCounters); 1331 // TODO: `Constant::getAllOnesValue()` does not yet accept an array type. 1332 std::vector<Constant *> InitialValues(NumCounters, 1333 Constant::getAllOnesValue(CounterTy)); 1334 GV = new GlobalVariable(M, CounterArrTy, false, Linkage, 1335 ConstantArray::get(CounterArrTy, InitialValues), 1336 Name); 1337 GV->setAlignment(Align(1)); 1338 } else { 1339 auto *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters); 1340 GV = new GlobalVariable(M, CounterTy, false, Linkage, 1341 Constant::getNullValue(CounterTy), Name); 1342 GV->setAlignment(Align(8)); 1343 } 1344 return GV; 1345 } 1346 1347 GlobalVariable * 1348 InstrLowerer::getOrCreateRegionCounters(InstrProfCntrInstBase *Inc) { 1349 GlobalVariable *NamePtr = Inc->getName(); 1350 auto &PD = ProfileDataMap[NamePtr]; 1351 if (PD.RegionCounters) 1352 return PD.RegionCounters; 1353 1354 // If RegionCounters doesn't already exist, create it by first setting up 1355 // the corresponding profile section. 1356 auto *CounterPtr = setupProfileSection(Inc, IPSK_cnts); 1357 PD.RegionCounters = CounterPtr; 1358 1359 if (DebugInfoCorrelate || 1360 ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) { 1361 LLVMContext &Ctx = M.getContext(); 1362 Function *Fn = Inc->getParent()->getParent(); 1363 if (auto *SP = Fn->getSubprogram()) { 1364 DIBuilder DB(M, true, SP->getUnit()); 1365 Metadata *FunctionNameAnnotation[] = { 1366 MDString::get(Ctx, InstrProfCorrelator::FunctionNameAttributeName), 1367 MDString::get(Ctx, getPGOFuncNameVarInitializer(NamePtr)), 1368 }; 1369 Metadata *CFGHashAnnotation[] = { 1370 MDString::get(Ctx, InstrProfCorrelator::CFGHashAttributeName), 1371 ConstantAsMetadata::get(Inc->getHash()), 1372 }; 1373 Metadata *NumCountersAnnotation[] = { 1374 MDString::get(Ctx, InstrProfCorrelator::NumCountersAttributeName), 1375 ConstantAsMetadata::get(Inc->getNumCounters()), 1376 }; 1377 auto Annotations = DB.getOrCreateArray({ 1378 MDNode::get(Ctx, FunctionNameAnnotation), 1379 MDNode::get(Ctx, CFGHashAnnotation), 1380 MDNode::get(Ctx, NumCountersAnnotation), 1381 }); 1382 auto *DICounter = DB.createGlobalVariableExpression( 1383 SP, CounterPtr->getName(), /*LinkageName=*/StringRef(), SP->getFile(), 1384 /*LineNo=*/0, DB.createUnspecifiedType("Profile Data Type"), 1385 CounterPtr->hasLocalLinkage(), /*IsDefined=*/true, /*Expr=*/nullptr, 1386 /*Decl=*/nullptr, /*TemplateParams=*/nullptr, /*AlignInBits=*/0, 1387 Annotations); 1388 CounterPtr->addDebugInfo(DICounter); 1389 DB.finalize(); 1390 } 1391 1392 // Mark the counter variable as used so that it isn't optimized out. 1393 CompilerUsedVars.push_back(PD.RegionCounters); 1394 } 1395 1396 // Create the data variable (if it doesn't already exist). 1397 createDataVariable(Inc); 1398 1399 return PD.RegionCounters; 1400 } 1401 1402 void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) { 1403 // When debug information is correlated to profile data, a data variable 1404 // is not needed. 1405 if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) 1406 return; 1407 1408 GlobalVariable *NamePtr = Inc->getName(); 1409 auto &PD = ProfileDataMap[NamePtr]; 1410 1411 // Return if data variable was already created. 1412 if (PD.DataVar) 1413 return; 1414 1415 LLVMContext &Ctx = M.getContext(); 1416 1417 Function *Fn = Inc->getParent()->getParent(); 1418 GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage(); 1419 GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility(); 1420 1421 // Due to the limitation of binder as of 2021/09/28, the duplicate weak 1422 // symbols in the same csect won't be discarded. When there are duplicate weak 1423 // symbols, we can NOT guarantee that the relocations get resolved to the 1424 // intended weak symbol, so we can not ensure the correctness of the relative 1425 // CounterPtr, so we have to use private linkage for counter and data symbols. 1426 if (TT.isOSBinFormatXCOFF()) { 1427 Linkage = GlobalValue::PrivateLinkage; 1428 Visibility = GlobalValue::DefaultVisibility; 1429 } 1430 1431 bool DataReferencedByCode = profDataReferencedByCode(M); 1432 bool NeedComdat = needsComdatForCounter(*Fn, M); 1433 bool Renamed; 1434 1435 // The Data Variable section is anchored to profile counters. 1436 std::string CntsVarName = 1437 getVarName(Inc, getInstrProfCountersVarPrefix(), Renamed); 1438 std::string DataVarName = 1439 getVarName(Inc, getInstrProfDataVarPrefix(), Renamed); 1440 1441 auto *Int8PtrTy = PointerType::getUnqual(Ctx); 1442 // Allocate statically the array of pointers to value profile nodes for 1443 // the current function. 1444 Constant *ValuesPtrExpr = ConstantPointerNull::get(Int8PtrTy); 1445 uint64_t NS = 0; 1446 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 1447 NS += PD.NumValueSites[Kind]; 1448 if (NS > 0 && ValueProfileStaticAlloc && 1449 !needsRuntimeRegistrationOfSectionRange(TT)) { 1450 ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS); 1451 auto *ValuesVar = new GlobalVariable( 1452 M, ValuesTy, false, Linkage, Constant::getNullValue(ValuesTy), 1453 getVarName(Inc, getInstrProfValuesVarPrefix(), Renamed)); 1454 ValuesVar->setVisibility(Visibility); 1455 setGlobalVariableLargeSection(TT, *ValuesVar); 1456 ValuesVar->setSection( 1457 getInstrProfSectionName(IPSK_vals, TT.getObjectFormat())); 1458 ValuesVar->setAlignment(Align(8)); 1459 maybeSetComdat(ValuesVar, Fn, CntsVarName); 1460 ValuesPtrExpr = ValuesVar; 1461 } 1462 1463 uint64_t NumCounters = Inc->getNumCounters()->getZExtValue(); 1464 auto *CounterPtr = PD.RegionCounters; 1465 1466 uint64_t NumBitmapBytes = PD.NumBitmapBytes; 1467 1468 // Create data variable. 1469 auto *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext()); 1470 auto *Int16Ty = Type::getInt16Ty(Ctx); 1471 auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last + 1); 1472 Type *DataTypes[] = { 1473 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType, 1474 #include "llvm/ProfileData/InstrProfData.inc" 1475 }; 1476 auto *DataTy = StructType::get(Ctx, ArrayRef(DataTypes)); 1477 1478 Constant *FunctionAddr = getFuncAddrForProfData(Fn); 1479 1480 Constant *Int16ArrayVals[IPVK_Last + 1]; 1481 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 1482 Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]); 1483 1484 // If the data variable is not referenced by code (if we don't emit 1485 // @llvm.instrprof.value.profile, NS will be 0), and the counter keeps the 1486 // data variable live under linker GC, the data variable can be private. This 1487 // optimization applies to ELF. 1488 // 1489 // On COFF, a comdat leader cannot be local so we require DataReferencedByCode 1490 // to be false. 1491 // 1492 // If profd is in a deduplicate comdat, NS==0 with a hash suffix guarantees 1493 // that other copies must have the same CFG and cannot have value profiling. 1494 // If no hash suffix, other profd copies may be referenced by code. 1495 if (NS == 0 && !(DataReferencedByCode && NeedComdat && !Renamed) && 1496 (TT.isOSBinFormatELF() || 1497 (!DataReferencedByCode && TT.isOSBinFormatCOFF()))) { 1498 Linkage = GlobalValue::PrivateLinkage; 1499 Visibility = GlobalValue::DefaultVisibility; 1500 } 1501 auto *Data = 1502 new GlobalVariable(M, DataTy, false, Linkage, nullptr, DataVarName); 1503 Constant *RelativeCounterPtr; 1504 GlobalVariable *BitmapPtr = PD.RegionBitmaps; 1505 Constant *RelativeBitmapPtr = ConstantInt::get(IntPtrTy, 0); 1506 InstrProfSectKind DataSectionKind; 1507 // With binary profile correlation, profile data is not loaded into memory. 1508 // profile data must reference profile counter with an absolute relocation. 1509 if (ProfileCorrelate == InstrProfCorrelator::BINARY) { 1510 DataSectionKind = IPSK_covdata; 1511 RelativeCounterPtr = ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy); 1512 if (BitmapPtr != nullptr) 1513 RelativeBitmapPtr = ConstantExpr::getPtrToInt(BitmapPtr, IntPtrTy); 1514 } else { 1515 // Reference the counter variable with a label difference (link-time 1516 // constant). 1517 DataSectionKind = IPSK_data; 1518 RelativeCounterPtr = 1519 ConstantExpr::getSub(ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy), 1520 ConstantExpr::getPtrToInt(Data, IntPtrTy)); 1521 if (BitmapPtr != nullptr) 1522 RelativeBitmapPtr = 1523 ConstantExpr::getSub(ConstantExpr::getPtrToInt(BitmapPtr, IntPtrTy), 1524 ConstantExpr::getPtrToInt(Data, IntPtrTy)); 1525 } 1526 1527 Constant *DataVals[] = { 1528 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init, 1529 #include "llvm/ProfileData/InstrProfData.inc" 1530 }; 1531 Data->setInitializer(ConstantStruct::get(DataTy, DataVals)); 1532 1533 Data->setVisibility(Visibility); 1534 Data->setSection( 1535 getInstrProfSectionName(DataSectionKind, TT.getObjectFormat())); 1536 Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT)); 1537 maybeSetComdat(Data, Fn, CntsVarName); 1538 1539 PD.DataVar = Data; 1540 1541 // Mark the data variable as used so that it isn't stripped out. 1542 CompilerUsedVars.push_back(Data); 1543 // Now that the linkage set by the FE has been passed to the data and counter 1544 // variables, reset Name variable's linkage and visibility to private so that 1545 // it can be removed later by the compiler. 1546 NamePtr->setLinkage(GlobalValue::PrivateLinkage); 1547 // Collect the referenced names to be used by emitNameData. 1548 ReferencedNames.push_back(NamePtr); 1549 } 1550 1551 void InstrLowerer::emitVNodes() { 1552 if (!ValueProfileStaticAlloc) 1553 return; 1554 1555 // For now only support this on platforms that do 1556 // not require runtime registration to discover 1557 // named section start/end. 1558 if (needsRuntimeRegistrationOfSectionRange(TT)) 1559 return; 1560 1561 size_t TotalNS = 0; 1562 for (auto &PD : ProfileDataMap) { 1563 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 1564 TotalNS += PD.second.NumValueSites[Kind]; 1565 } 1566 1567 if (!TotalNS) 1568 return; 1569 1570 uint64_t NumCounters = TotalNS * NumCountersPerValueSite; 1571 // Heuristic for small programs with very few total value sites. 1572 // The default value of vp-counters-per-site is chosen based on 1573 // the observation that large apps usually have a low percentage 1574 // of value sites that actually have any profile data, and thus 1575 // the average number of counters per site is low. For small 1576 // apps with very few sites, this may not be true. Bump up the 1577 // number of counters in this case. 1578 #define INSTR_PROF_MIN_VAL_COUNTS 10 1579 if (NumCounters < INSTR_PROF_MIN_VAL_COUNTS) 1580 NumCounters = std::max(INSTR_PROF_MIN_VAL_COUNTS, (int)NumCounters * 2); 1581 1582 auto &Ctx = M.getContext(); 1583 Type *VNodeTypes[] = { 1584 #define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Init) LLVMType, 1585 #include "llvm/ProfileData/InstrProfData.inc" 1586 }; 1587 auto *VNodeTy = StructType::get(Ctx, ArrayRef(VNodeTypes)); 1588 1589 ArrayType *VNodesTy = ArrayType::get(VNodeTy, NumCounters); 1590 auto *VNodesVar = new GlobalVariable( 1591 M, VNodesTy, false, GlobalValue::PrivateLinkage, 1592 Constant::getNullValue(VNodesTy), getInstrProfVNodesVarName()); 1593 setGlobalVariableLargeSection(TT, *VNodesVar); 1594 VNodesVar->setSection( 1595 getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat())); 1596 VNodesVar->setAlignment(M.getDataLayout().getABITypeAlign(VNodesTy)); 1597 // VNodesVar is used by runtime but not referenced via relocation by other 1598 // sections. Conservatively make it linker retained. 1599 UsedVars.push_back(VNodesVar); 1600 } 1601 1602 void InstrLowerer::emitNameData() { 1603 std::string UncompressedData; 1604 1605 if (ReferencedNames.empty()) 1606 return; 1607 1608 std::string CompressedNameStr; 1609 if (Error E = collectPGOFuncNameStrings(ReferencedNames, CompressedNameStr, 1610 DoInstrProfNameCompression)) { 1611 report_fatal_error(Twine(toString(std::move(E))), false); 1612 } 1613 1614 auto &Ctx = M.getContext(); 1615 auto *NamesVal = 1616 ConstantDataArray::getString(Ctx, StringRef(CompressedNameStr), false); 1617 NamesVar = new GlobalVariable(M, NamesVal->getType(), true, 1618 GlobalValue::PrivateLinkage, NamesVal, 1619 getInstrProfNamesVarName()); 1620 NamesSize = CompressedNameStr.size(); 1621 setGlobalVariableLargeSection(TT, *NamesVar); 1622 NamesVar->setSection( 1623 ProfileCorrelate == InstrProfCorrelator::BINARY 1624 ? getInstrProfSectionName(IPSK_covname, TT.getObjectFormat()) 1625 : getInstrProfSectionName(IPSK_name, TT.getObjectFormat())); 1626 // On COFF, it's important to reduce the alignment down to 1 to prevent the 1627 // linker from inserting padding before the start of the names section or 1628 // between names entries. 1629 NamesVar->setAlignment(Align(1)); 1630 // NamesVar is used by runtime but not referenced via relocation by other 1631 // sections. Conservatively make it linker retained. 1632 UsedVars.push_back(NamesVar); 1633 1634 for (auto *NamePtr : ReferencedNames) 1635 NamePtr->eraseFromParent(); 1636 } 1637 1638 void InstrLowerer::emitRegistration() { 1639 if (!needsRuntimeRegistrationOfSectionRange(TT)) 1640 return; 1641 1642 // Construct the function. 1643 auto *VoidTy = Type::getVoidTy(M.getContext()); 1644 auto *VoidPtrTy = PointerType::getUnqual(M.getContext()); 1645 auto *Int64Ty = Type::getInt64Ty(M.getContext()); 1646 auto *RegisterFTy = FunctionType::get(VoidTy, false); 1647 auto *RegisterF = Function::Create(RegisterFTy, GlobalValue::InternalLinkage, 1648 getInstrProfRegFuncsName(), M); 1649 RegisterF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 1650 if (Options.NoRedZone) 1651 RegisterF->addFnAttr(Attribute::NoRedZone); 1652 1653 auto *RuntimeRegisterTy = FunctionType::get(VoidTy, VoidPtrTy, false); 1654 auto *RuntimeRegisterF = 1655 Function::Create(RuntimeRegisterTy, GlobalVariable::ExternalLinkage, 1656 getInstrProfRegFuncName(), M); 1657 1658 IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", RegisterF)); 1659 for (Value *Data : CompilerUsedVars) 1660 if (!isa<Function>(Data)) 1661 IRB.CreateCall(RuntimeRegisterF, Data); 1662 for (Value *Data : UsedVars) 1663 if (Data != NamesVar && !isa<Function>(Data)) 1664 IRB.CreateCall(RuntimeRegisterF, Data); 1665 1666 if (NamesVar) { 1667 Type *ParamTypes[] = {VoidPtrTy, Int64Ty}; 1668 auto *NamesRegisterTy = 1669 FunctionType::get(VoidTy, ArrayRef(ParamTypes), false); 1670 auto *NamesRegisterF = 1671 Function::Create(NamesRegisterTy, GlobalVariable::ExternalLinkage, 1672 getInstrProfNamesRegFuncName(), M); 1673 IRB.CreateCall(NamesRegisterF, {NamesVar, IRB.getInt64(NamesSize)}); 1674 } 1675 1676 IRB.CreateRetVoid(); 1677 } 1678 1679 bool InstrLowerer::emitRuntimeHook() { 1680 // We expect the linker to be invoked with -u<hook_var> flag for Linux 1681 // in which case there is no need to emit the external variable. 1682 if (TT.isOSLinux() || TT.isOSAIX()) 1683 return false; 1684 1685 // If the module's provided its own runtime, we don't need to do anything. 1686 if (M.getGlobalVariable(getInstrProfRuntimeHookVarName())) 1687 return false; 1688 1689 // Declare an external variable that will pull in the runtime initialization. 1690 auto *Int32Ty = Type::getInt32Ty(M.getContext()); 1691 auto *Var = 1692 new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage, 1693 nullptr, getInstrProfRuntimeHookVarName()); 1694 Var->setVisibility(GlobalValue::HiddenVisibility); 1695 1696 if (TT.isOSBinFormatELF() && !TT.isPS()) { 1697 // Mark the user variable as used so that it isn't stripped out. 1698 CompilerUsedVars.push_back(Var); 1699 } else { 1700 // Make a function that uses it. 1701 auto *User = Function::Create(FunctionType::get(Int32Ty, false), 1702 GlobalValue::LinkOnceODRLinkage, 1703 getInstrProfRuntimeHookVarUseFuncName(), M); 1704 User->addFnAttr(Attribute::NoInline); 1705 if (Options.NoRedZone) 1706 User->addFnAttr(Attribute::NoRedZone); 1707 User->setVisibility(GlobalValue::HiddenVisibility); 1708 if (TT.supportsCOMDAT()) 1709 User->setComdat(M.getOrInsertComdat(User->getName())); 1710 1711 IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", User)); 1712 auto *Load = IRB.CreateLoad(Int32Ty, Var); 1713 IRB.CreateRet(Load); 1714 1715 // Mark the function as used so that it isn't stripped out. 1716 CompilerUsedVars.push_back(User); 1717 } 1718 return true; 1719 } 1720 1721 void InstrLowerer::emitUses() { 1722 // The metadata sections are parallel arrays. Optimizers (e.g. 1723 // GlobalOpt/ConstantMerge) may not discard associated sections as a unit, so 1724 // we conservatively retain all unconditionally in the compiler. 1725 // 1726 // On ELF and Mach-O, the linker can guarantee the associated sections will be 1727 // retained or discarded as a unit, so llvm.compiler.used is sufficient. 1728 // Similarly on COFF, if prof data is not referenced by code we use one comdat 1729 // and ensure this GC property as well. Otherwise, we have to conservatively 1730 // make all of the sections retained by the linker. 1731 if (TT.isOSBinFormatELF() || TT.isOSBinFormatMachO() || 1732 (TT.isOSBinFormatCOFF() && !profDataReferencedByCode(M))) 1733 appendToCompilerUsed(M, CompilerUsedVars); 1734 else 1735 appendToUsed(M, CompilerUsedVars); 1736 1737 // We do not add proper references from used metadata sections to NamesVar and 1738 // VNodesVar, so we have to be conservative and place them in llvm.used 1739 // regardless of the target, 1740 appendToUsed(M, UsedVars); 1741 } 1742 1743 void InstrLowerer::emitInitialization() { 1744 // Create ProfileFileName variable. Don't don't this for the 1745 // context-sensitive instrumentation lowering: This lowering is after 1746 // LTO/ThinLTO linking. Pass PGOInstrumentationGenCreateVar should 1747 // have already create the variable before LTO/ThinLTO linking. 1748 if (!IsCS) 1749 createProfileFileNameVar(M, Options.InstrProfileOutput); 1750 Function *RegisterF = M.getFunction(getInstrProfRegFuncsName()); 1751 if (!RegisterF) 1752 return; 1753 1754 // Create the initialization function. 1755 auto *VoidTy = Type::getVoidTy(M.getContext()); 1756 auto *F = Function::Create(FunctionType::get(VoidTy, false), 1757 GlobalValue::InternalLinkage, 1758 getInstrProfInitFuncName(), M); 1759 F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 1760 F->addFnAttr(Attribute::NoInline); 1761 if (Options.NoRedZone) 1762 F->addFnAttr(Attribute::NoRedZone); 1763 1764 // Add the basic block and the necessary calls. 1765 IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", F)); 1766 IRB.CreateCall(RegisterF, {}); 1767 IRB.CreateRetVoid(); 1768 1769 appendToGlobalCtors(M, F, 0); 1770 } 1771