1 //===- LowerTypeTests.cpp - type metadata lowering pass -------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass lowers type metadata and calls to the llvm.type.test intrinsic. 10 // It also ensures that globals are properly laid out for the 11 // llvm.icall.branch.funnel intrinsic. 12 // See http://llvm.org/docs/TypeMetadata.html for more information. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "llvm/Transforms/IPO/LowerTypeTests.h" 17 #include "llvm/ADT/APInt.h" 18 #include "llvm/ADT/ArrayRef.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/ADT/EquivalenceClasses.h" 21 #include "llvm/ADT/PointerUnion.h" 22 #include "llvm/ADT/SetVector.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/ADT/Statistic.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/TinyPtrVector.h" 27 #include "llvm/Analysis/TargetTransformInfo.h" 28 #include "llvm/Analysis/TypeMetadataUtils.h" 29 #include "llvm/Analysis/ValueTracking.h" 30 #include "llvm/IR/Attributes.h" 31 #include "llvm/IR/BasicBlock.h" 32 #include "llvm/IR/Constant.h" 33 #include "llvm/IR/Constants.h" 34 #include "llvm/IR/DataLayout.h" 35 #include "llvm/IR/DerivedTypes.h" 36 #include "llvm/IR/Function.h" 37 #include "llvm/IR/GlobalAlias.h" 38 #include "llvm/IR/GlobalObject.h" 39 #include "llvm/IR/GlobalValue.h" 40 #include "llvm/IR/GlobalVariable.h" 41 #include "llvm/IR/IRBuilder.h" 42 #include "llvm/IR/InlineAsm.h" 43 #include "llvm/IR/Instruction.h" 44 #include "llvm/IR/Instructions.h" 45 #include "llvm/IR/IntrinsicInst.h" 46 #include "llvm/IR/Intrinsics.h" 47 #include "llvm/IR/LLVMContext.h" 48 #include "llvm/IR/Metadata.h" 49 #include "llvm/IR/Module.h" 50 #include "llvm/IR/ModuleSummaryIndex.h" 51 #include "llvm/IR/ModuleSummaryIndexYAML.h" 52 #include "llvm/IR/Operator.h" 53 #include "llvm/IR/PassManager.h" 54 #include "llvm/IR/ReplaceConstant.h" 55 #include "llvm/IR/Type.h" 56 #include "llvm/IR/Use.h" 57 #include "llvm/IR/User.h" 58 #include "llvm/IR/Value.h" 59 #include "llvm/Support/Allocator.h" 60 #include "llvm/Support/Casting.h" 61 #include "llvm/Support/CommandLine.h" 62 #include "llvm/Support/Debug.h" 63 #include "llvm/Support/Error.h" 64 #include "llvm/Support/ErrorHandling.h" 65 #include "llvm/Support/FileSystem.h" 66 #include "llvm/Support/MathExtras.h" 67 #include "llvm/Support/MemoryBuffer.h" 68 #include "llvm/Support/TrailingObjects.h" 69 #include "llvm/Support/YAMLTraits.h" 70 #include "llvm/Support/raw_ostream.h" 71 #include "llvm/TargetParser/Triple.h" 72 #include "llvm/Transforms/IPO.h" 73 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 74 #include "llvm/Transforms/Utils/ModuleUtils.h" 75 #include <algorithm> 76 #include <cassert> 77 #include <cstdint> 78 #include <memory> 79 #include <set> 80 #include <string> 81 #include <system_error> 82 #include <utility> 83 #include <vector> 84 85 using namespace llvm; 86 using namespace lowertypetests; 87 88 #define DEBUG_TYPE "lowertypetests" 89 90 STATISTIC(ByteArraySizeBits, "Byte array size in bits"); 91 STATISTIC(ByteArraySizeBytes, "Byte array size in bytes"); 92 STATISTIC(NumByteArraysCreated, "Number of byte arrays created"); 93 STATISTIC(NumTypeTestCallsLowered, "Number of type test calls lowered"); 94 STATISTIC(NumTypeIdDisjointSets, "Number of disjoint sets of type identifiers"); 95 96 static cl::opt<bool> AvoidReuse( 97 "lowertypetests-avoid-reuse", 98 cl::desc("Try to avoid reuse of byte array addresses using aliases"), 99 cl::Hidden, cl::init(true)); 100 101 static cl::opt<PassSummaryAction> ClSummaryAction( 102 "lowertypetests-summary-action", 103 cl::desc("What to do with the summary when running this pass"), 104 cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"), 105 clEnumValN(PassSummaryAction::Import, "import", 106 "Import typeid resolutions from summary and globals"), 107 clEnumValN(PassSummaryAction::Export, "export", 108 "Export typeid resolutions to summary and globals")), 109 cl::Hidden); 110 111 static cl::opt<std::string> ClReadSummary( 112 "lowertypetests-read-summary", 113 cl::desc("Read summary from given YAML file before running pass"), 114 cl::Hidden); 115 116 static cl::opt<std::string> ClWriteSummary( 117 "lowertypetests-write-summary", 118 cl::desc("Write summary to given YAML file after running pass"), 119 cl::Hidden); 120 121 static cl::opt<bool> 122 ClDropTypeTests("lowertypetests-drop-type-tests", 123 cl::desc("Simply drop type test assume sequences"), 124 cl::Hidden, cl::init(false)); 125 126 bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const { 127 if (Offset < ByteOffset) 128 return false; 129 130 if ((Offset - ByteOffset) % (uint64_t(1) << AlignLog2) != 0) 131 return false; 132 133 uint64_t BitOffset = (Offset - ByteOffset) >> AlignLog2; 134 if (BitOffset >= BitSize) 135 return false; 136 137 return Bits.count(BitOffset); 138 } 139 140 void BitSetInfo::print(raw_ostream &OS) const { 141 OS << "offset " << ByteOffset << " size " << BitSize << " align " 142 << (1 << AlignLog2); 143 144 if (isAllOnes()) { 145 OS << " all-ones\n"; 146 return; 147 } 148 149 OS << " { "; 150 for (uint64_t B : Bits) 151 OS << B << ' '; 152 OS << "}\n"; 153 } 154 155 BitSetInfo BitSetBuilder::build() { 156 if (Min > Max) 157 Min = 0; 158 159 // Normalize each offset against the minimum observed offset, and compute 160 // the bitwise OR of each of the offsets. The number of trailing zeros 161 // in the mask gives us the log2 of the alignment of all offsets, which 162 // allows us to compress the bitset by only storing one bit per aligned 163 // address. 164 uint64_t Mask = 0; 165 for (uint64_t &Offset : Offsets) { 166 Offset -= Min; 167 Mask |= Offset; 168 } 169 170 BitSetInfo BSI; 171 BSI.ByteOffset = Min; 172 173 BSI.AlignLog2 = 0; 174 if (Mask != 0) 175 BSI.AlignLog2 = llvm::countr_zero(Mask); 176 177 // Build the compressed bitset while normalizing the offsets against the 178 // computed alignment. 179 BSI.BitSize = ((Max - Min) >> BSI.AlignLog2) + 1; 180 for (uint64_t Offset : Offsets) { 181 Offset >>= BSI.AlignLog2; 182 BSI.Bits.insert(Offset); 183 } 184 185 return BSI; 186 } 187 188 void GlobalLayoutBuilder::addFragment(const std::set<uint64_t> &F) { 189 // Create a new fragment to hold the layout for F. 190 Fragments.emplace_back(); 191 std::vector<uint64_t> &Fragment = Fragments.back(); 192 uint64_t FragmentIndex = Fragments.size() - 1; 193 194 for (auto ObjIndex : F) { 195 uint64_t OldFragmentIndex = FragmentMap[ObjIndex]; 196 if (OldFragmentIndex == 0) { 197 // We haven't seen this object index before, so just add it to the current 198 // fragment. 199 Fragment.push_back(ObjIndex); 200 } else { 201 // This index belongs to an existing fragment. Copy the elements of the 202 // old fragment into this one and clear the old fragment. We don't update 203 // the fragment map just yet, this ensures that any further references to 204 // indices from the old fragment in this fragment do not insert any more 205 // indices. 206 std::vector<uint64_t> &OldFragment = Fragments[OldFragmentIndex]; 207 llvm::append_range(Fragment, OldFragment); 208 OldFragment.clear(); 209 } 210 } 211 212 // Update the fragment map to point our object indices to this fragment. 213 for (uint64_t ObjIndex : Fragment) 214 FragmentMap[ObjIndex] = FragmentIndex; 215 } 216 217 void ByteArrayBuilder::allocate(const std::set<uint64_t> &Bits, 218 uint64_t BitSize, uint64_t &AllocByteOffset, 219 uint8_t &AllocMask) { 220 // Find the smallest current allocation. 221 unsigned Bit = 0; 222 for (unsigned I = 1; I != BitsPerByte; ++I) 223 if (BitAllocs[I] < BitAllocs[Bit]) 224 Bit = I; 225 226 AllocByteOffset = BitAllocs[Bit]; 227 228 // Add our size to it. 229 unsigned ReqSize = AllocByteOffset + BitSize; 230 BitAllocs[Bit] = ReqSize; 231 if (Bytes.size() < ReqSize) 232 Bytes.resize(ReqSize); 233 234 // Set our bits. 235 AllocMask = 1 << Bit; 236 for (uint64_t B : Bits) 237 Bytes[AllocByteOffset + B] |= AllocMask; 238 } 239 240 bool lowertypetests::isJumpTableCanonical(Function *F) { 241 if (F->isDeclarationForLinker()) 242 return false; 243 auto *CI = mdconst::extract_or_null<ConstantInt>( 244 F->getParent()->getModuleFlag("CFI Canonical Jump Tables")); 245 if (!CI || !CI->isZero()) 246 return true; 247 return F->hasFnAttribute("cfi-canonical-jump-table"); 248 } 249 250 namespace { 251 252 struct ByteArrayInfo { 253 std::set<uint64_t> Bits; 254 uint64_t BitSize; 255 GlobalVariable *ByteArray; 256 GlobalVariable *MaskGlobal; 257 uint8_t *MaskPtr = nullptr; 258 }; 259 260 /// A POD-like structure that we use to store a global reference together with 261 /// its metadata types. In this pass we frequently need to query the set of 262 /// metadata types referenced by a global, which at the IR level is an expensive 263 /// operation involving a map lookup; this data structure helps to reduce the 264 /// number of times we need to do this lookup. 265 class GlobalTypeMember final : TrailingObjects<GlobalTypeMember, MDNode *> { 266 friend TrailingObjects; 267 268 GlobalObject *GO; 269 size_t NTypes; 270 271 // For functions: true if the jump table is canonical. This essentially means 272 // whether the canonical address (i.e. the symbol table entry) of the function 273 // is provided by the local jump table. This is normally the same as whether 274 // the function is defined locally, but if canonical jump tables are disabled 275 // by the user then the jump table never provides a canonical definition. 276 bool IsJumpTableCanonical; 277 278 // For functions: true if this function is either defined or used in a thinlto 279 // module and its jumptable entry needs to be exported to thinlto backends. 280 bool IsExported; 281 282 size_t numTrailingObjects(OverloadToken<MDNode *>) const { return NTypes; } 283 284 public: 285 static GlobalTypeMember *create(BumpPtrAllocator &Alloc, GlobalObject *GO, 286 bool IsJumpTableCanonical, bool IsExported, 287 ArrayRef<MDNode *> Types) { 288 auto *GTM = static_cast<GlobalTypeMember *>(Alloc.Allocate( 289 totalSizeToAlloc<MDNode *>(Types.size()), alignof(GlobalTypeMember))); 290 GTM->GO = GO; 291 GTM->NTypes = Types.size(); 292 GTM->IsJumpTableCanonical = IsJumpTableCanonical; 293 GTM->IsExported = IsExported; 294 std::uninitialized_copy(Types.begin(), Types.end(), 295 GTM->getTrailingObjects<MDNode *>()); 296 return GTM; 297 } 298 299 GlobalObject *getGlobal() const { 300 return GO; 301 } 302 303 bool isJumpTableCanonical() const { 304 return IsJumpTableCanonical; 305 } 306 307 bool isExported() const { 308 return IsExported; 309 } 310 311 ArrayRef<MDNode *> types() const { 312 return ArrayRef(getTrailingObjects<MDNode *>(), NTypes); 313 } 314 }; 315 316 struct ICallBranchFunnel final 317 : TrailingObjects<ICallBranchFunnel, GlobalTypeMember *> { 318 static ICallBranchFunnel *create(BumpPtrAllocator &Alloc, CallInst *CI, 319 ArrayRef<GlobalTypeMember *> Targets, 320 unsigned UniqueId) { 321 auto *Call = static_cast<ICallBranchFunnel *>( 322 Alloc.Allocate(totalSizeToAlloc<GlobalTypeMember *>(Targets.size()), 323 alignof(ICallBranchFunnel))); 324 Call->CI = CI; 325 Call->UniqueId = UniqueId; 326 Call->NTargets = Targets.size(); 327 std::uninitialized_copy(Targets.begin(), Targets.end(), 328 Call->getTrailingObjects<GlobalTypeMember *>()); 329 return Call; 330 } 331 332 CallInst *CI; 333 ArrayRef<GlobalTypeMember *> targets() const { 334 return ArrayRef(getTrailingObjects<GlobalTypeMember *>(), NTargets); 335 } 336 337 unsigned UniqueId; 338 339 private: 340 size_t NTargets; 341 }; 342 343 struct ScopedSaveAliaseesAndUsed { 344 Module &M; 345 SmallVector<GlobalValue *, 4> Used, CompilerUsed; 346 std::vector<std::pair<GlobalAlias *, Function *>> FunctionAliases; 347 std::vector<std::pair<GlobalIFunc *, Function *>> ResolverIFuncs; 348 349 ScopedSaveAliaseesAndUsed(Module &M) : M(M) { 350 // The users of this class want to replace all function references except 351 // for aliases and llvm.used/llvm.compiler.used with references to a jump 352 // table. We avoid replacing aliases in order to avoid introducing a double 353 // indirection (or an alias pointing to a declaration in ThinLTO mode), and 354 // we avoid replacing llvm.used/llvm.compiler.used because these global 355 // variables describe properties of the global, not the jump table (besides, 356 // offseted references to the jump table in llvm.used are invalid). 357 // Unfortunately, LLVM doesn't have a "RAUW except for these (possibly 358 // indirect) users", so what we do is save the list of globals referenced by 359 // llvm.used/llvm.compiler.used and aliases, erase the used lists, let RAUW 360 // replace the aliasees and then set them back to their original values at 361 // the end. 362 if (GlobalVariable *GV = collectUsedGlobalVariables(M, Used, false)) 363 GV->eraseFromParent(); 364 if (GlobalVariable *GV = collectUsedGlobalVariables(M, CompilerUsed, true)) 365 GV->eraseFromParent(); 366 367 for (auto &GA : M.aliases()) { 368 // FIXME: This should look past all aliases not just interposable ones, 369 // see discussion on D65118. 370 if (auto *F = dyn_cast<Function>(GA.getAliasee()->stripPointerCasts())) 371 FunctionAliases.push_back({&GA, F}); 372 } 373 374 for (auto &GI : M.ifuncs()) 375 if (auto *F = dyn_cast<Function>(GI.getResolver()->stripPointerCasts())) 376 ResolverIFuncs.push_back({&GI, F}); 377 } 378 379 ~ScopedSaveAliaseesAndUsed() { 380 appendToUsed(M, Used); 381 appendToCompilerUsed(M, CompilerUsed); 382 383 for (auto P : FunctionAliases) 384 P.first->setAliasee( 385 ConstantExpr::getBitCast(P.second, P.first->getType())); 386 387 for (auto P : ResolverIFuncs) { 388 // This does not preserve pointer casts that may have been stripped by the 389 // constructor, but the resolver's type is different from that of the 390 // ifunc anyway. 391 P.first->setResolver(P.second); 392 } 393 } 394 }; 395 396 class LowerTypeTestsModule { 397 Module &M; 398 399 ModuleSummaryIndex *ExportSummary; 400 const ModuleSummaryIndex *ImportSummary; 401 // Set when the client has invoked this to simply drop all type test assume 402 // sequences. 403 bool DropTypeTests; 404 405 Triple::ArchType Arch; 406 Triple::OSType OS; 407 Triple::ObjectFormatType ObjectFormat; 408 409 // Determines which kind of Thumb jump table we generate. If arch is 410 // either 'arm' or 'thumb' we need to find this out, because 411 // selectJumpTableArmEncoding may decide to use Thumb in either case. 412 bool CanUseArmJumpTable = false, CanUseThumbBWJumpTable = false; 413 414 // The jump table type we ended up deciding on. (Usually the same as 415 // Arch, except that 'arm' and 'thumb' are often interchangeable.) 416 Triple::ArchType JumpTableArch = Triple::UnknownArch; 417 418 IntegerType *Int1Ty = Type::getInt1Ty(M.getContext()); 419 IntegerType *Int8Ty = Type::getInt8Ty(M.getContext()); 420 PointerType *Int8PtrTy = Type::getInt8PtrTy(M.getContext()); 421 ArrayType *Int8Arr0Ty = ArrayType::get(Type::getInt8Ty(M.getContext()), 0); 422 IntegerType *Int32Ty = Type::getInt32Ty(M.getContext()); 423 PointerType *Int32PtrTy = PointerType::getUnqual(Int32Ty); 424 IntegerType *Int64Ty = Type::getInt64Ty(M.getContext()); 425 IntegerType *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext(), 0); 426 427 // Indirect function call index assignment counter for WebAssembly 428 uint64_t IndirectIndex = 1; 429 430 // Mapping from type identifiers to the call sites that test them, as well as 431 // whether the type identifier needs to be exported to ThinLTO backends as 432 // part of the regular LTO phase of the ThinLTO pipeline (see exportTypeId). 433 struct TypeIdUserInfo { 434 std::vector<CallInst *> CallSites; 435 bool IsExported = false; 436 }; 437 DenseMap<Metadata *, TypeIdUserInfo> TypeIdUsers; 438 439 /// This structure describes how to lower type tests for a particular type 440 /// identifier. It is either built directly from the global analysis (during 441 /// regular LTO or the regular LTO phase of ThinLTO), or indirectly using type 442 /// identifier summaries and external symbol references (in ThinLTO backends). 443 struct TypeIdLowering { 444 TypeTestResolution::Kind TheKind = TypeTestResolution::Unsat; 445 446 /// All except Unsat: the start address within the combined global. 447 Constant *OffsetedGlobal; 448 449 /// ByteArray, Inline, AllOnes: log2 of the required global alignment 450 /// relative to the start address. 451 Constant *AlignLog2; 452 453 /// ByteArray, Inline, AllOnes: one less than the size of the memory region 454 /// covering members of this type identifier as a multiple of 2^AlignLog2. 455 Constant *SizeM1; 456 457 /// ByteArray: the byte array to test the address against. 458 Constant *TheByteArray; 459 460 /// ByteArray: the bit mask to apply to bytes loaded from the byte array. 461 Constant *BitMask; 462 463 /// Inline: the bit mask to test the address against. 464 Constant *InlineBits; 465 }; 466 467 std::vector<ByteArrayInfo> ByteArrayInfos; 468 469 Function *WeakInitializerFn = nullptr; 470 471 bool shouldExportConstantsAsAbsoluteSymbols(); 472 uint8_t *exportTypeId(StringRef TypeId, const TypeIdLowering &TIL); 473 TypeIdLowering importTypeId(StringRef TypeId); 474 void importTypeTest(CallInst *CI); 475 void importFunction(Function *F, bool isJumpTableCanonical, 476 std::vector<GlobalAlias *> &AliasesToErase); 477 478 BitSetInfo 479 buildBitSet(Metadata *TypeId, 480 const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout); 481 ByteArrayInfo *createByteArray(BitSetInfo &BSI); 482 void allocateByteArrays(); 483 Value *createBitSetTest(IRBuilder<> &B, const TypeIdLowering &TIL, 484 Value *BitOffset); 485 void lowerTypeTestCalls( 486 ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr, 487 const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout); 488 Value *lowerTypeTestCall(Metadata *TypeId, CallInst *CI, 489 const TypeIdLowering &TIL); 490 491 void buildBitSetsFromGlobalVariables(ArrayRef<Metadata *> TypeIds, 492 ArrayRef<GlobalTypeMember *> Globals); 493 Triple::ArchType 494 selectJumpTableArmEncoding(ArrayRef<GlobalTypeMember *> Functions); 495 unsigned getJumpTableEntrySize(); 496 Type *getJumpTableEntryType(); 497 void createJumpTableEntry(raw_ostream &AsmOS, raw_ostream &ConstraintOS, 498 Triple::ArchType JumpTableArch, 499 SmallVectorImpl<Value *> &AsmArgs, Function *Dest); 500 void verifyTypeMDNode(GlobalObject *GO, MDNode *Type); 501 void buildBitSetsFromFunctions(ArrayRef<Metadata *> TypeIds, 502 ArrayRef<GlobalTypeMember *> Functions); 503 void buildBitSetsFromFunctionsNative(ArrayRef<Metadata *> TypeIds, 504 ArrayRef<GlobalTypeMember *> Functions); 505 void buildBitSetsFromFunctionsWASM(ArrayRef<Metadata *> TypeIds, 506 ArrayRef<GlobalTypeMember *> Functions); 507 void 508 buildBitSetsFromDisjointSet(ArrayRef<Metadata *> TypeIds, 509 ArrayRef<GlobalTypeMember *> Globals, 510 ArrayRef<ICallBranchFunnel *> ICallBranchFunnels); 511 512 void replaceWeakDeclarationWithJumpTablePtr(Function *F, Constant *JT, 513 bool IsJumpTableCanonical); 514 void moveInitializerToModuleConstructor(GlobalVariable *GV); 515 void findGlobalVariableUsersOf(Constant *C, 516 SmallSetVector<GlobalVariable *, 8> &Out); 517 518 void createJumpTable(Function *F, ArrayRef<GlobalTypeMember *> Functions); 519 520 /// replaceCfiUses - Go through the uses list for this definition 521 /// and make each use point to "V" instead of "this" when the use is outside 522 /// the block. 'This's use list is expected to have at least one element. 523 /// Unlike replaceAllUsesWith this function skips blockaddr and direct call 524 /// uses. 525 void replaceCfiUses(Function *Old, Value *New, bool IsJumpTableCanonical); 526 527 /// replaceDirectCalls - Go through the uses list for this definition and 528 /// replace each use, which is a direct function call. 529 void replaceDirectCalls(Value *Old, Value *New); 530 531 public: 532 LowerTypeTestsModule(Module &M, ModuleAnalysisManager &AM, 533 ModuleSummaryIndex *ExportSummary, 534 const ModuleSummaryIndex *ImportSummary, 535 bool DropTypeTests); 536 537 bool lower(); 538 539 // Lower the module using the action and summary passed as command line 540 // arguments. For testing purposes only. 541 static bool runForTesting(Module &M, ModuleAnalysisManager &AM); 542 }; 543 } // end anonymous namespace 544 545 /// Build a bit set for TypeId using the object layouts in 546 /// GlobalLayout. 547 BitSetInfo LowerTypeTestsModule::buildBitSet( 548 Metadata *TypeId, 549 const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout) { 550 BitSetBuilder BSB; 551 552 // Compute the byte offset of each address associated with this type 553 // identifier. 554 for (const auto &GlobalAndOffset : GlobalLayout) { 555 for (MDNode *Type : GlobalAndOffset.first->types()) { 556 if (Type->getOperand(1) != TypeId) 557 continue; 558 uint64_t Offset = 559 cast<ConstantInt>( 560 cast<ConstantAsMetadata>(Type->getOperand(0))->getValue()) 561 ->getZExtValue(); 562 BSB.addOffset(GlobalAndOffset.second + Offset); 563 } 564 } 565 566 return BSB.build(); 567 } 568 569 /// Build a test that bit BitOffset mod sizeof(Bits)*8 is set in 570 /// Bits. This pattern matches to the bt instruction on x86. 571 static Value *createMaskedBitTest(IRBuilder<> &B, Value *Bits, 572 Value *BitOffset) { 573 auto BitsType = cast<IntegerType>(Bits->getType()); 574 unsigned BitWidth = BitsType->getBitWidth(); 575 576 BitOffset = B.CreateZExtOrTrunc(BitOffset, BitsType); 577 Value *BitIndex = 578 B.CreateAnd(BitOffset, ConstantInt::get(BitsType, BitWidth - 1)); 579 Value *BitMask = B.CreateShl(ConstantInt::get(BitsType, 1), BitIndex); 580 Value *MaskedBits = B.CreateAnd(Bits, BitMask); 581 return B.CreateICmpNE(MaskedBits, ConstantInt::get(BitsType, 0)); 582 } 583 584 ByteArrayInfo *LowerTypeTestsModule::createByteArray(BitSetInfo &BSI) { 585 // Create globals to stand in for byte arrays and masks. These never actually 586 // get initialized, we RAUW and erase them later in allocateByteArrays() once 587 // we know the offset and mask to use. 588 auto ByteArrayGlobal = new GlobalVariable( 589 M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr); 590 auto MaskGlobal = new GlobalVariable(M, Int8Ty, /*isConstant=*/true, 591 GlobalValue::PrivateLinkage, nullptr); 592 593 ByteArrayInfos.emplace_back(); 594 ByteArrayInfo *BAI = &ByteArrayInfos.back(); 595 596 BAI->Bits = BSI.Bits; 597 BAI->BitSize = BSI.BitSize; 598 BAI->ByteArray = ByteArrayGlobal; 599 BAI->MaskGlobal = MaskGlobal; 600 return BAI; 601 } 602 603 void LowerTypeTestsModule::allocateByteArrays() { 604 llvm::stable_sort(ByteArrayInfos, 605 [](const ByteArrayInfo &BAI1, const ByteArrayInfo &BAI2) { 606 return BAI1.BitSize > BAI2.BitSize; 607 }); 608 609 std::vector<uint64_t> ByteArrayOffsets(ByteArrayInfos.size()); 610 611 ByteArrayBuilder BAB; 612 for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) { 613 ByteArrayInfo *BAI = &ByteArrayInfos[I]; 614 615 uint8_t Mask; 616 BAB.allocate(BAI->Bits, BAI->BitSize, ByteArrayOffsets[I], Mask); 617 618 BAI->MaskGlobal->replaceAllUsesWith( 619 ConstantExpr::getIntToPtr(ConstantInt::get(Int8Ty, Mask), Int8PtrTy)); 620 BAI->MaskGlobal->eraseFromParent(); 621 if (BAI->MaskPtr) 622 *BAI->MaskPtr = Mask; 623 } 624 625 Constant *ByteArrayConst = ConstantDataArray::get(M.getContext(), BAB.Bytes); 626 auto ByteArray = 627 new GlobalVariable(M, ByteArrayConst->getType(), /*isConstant=*/true, 628 GlobalValue::PrivateLinkage, ByteArrayConst); 629 630 for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) { 631 ByteArrayInfo *BAI = &ByteArrayInfos[I]; 632 633 Constant *Idxs[] = {ConstantInt::get(IntPtrTy, 0), 634 ConstantInt::get(IntPtrTy, ByteArrayOffsets[I])}; 635 Constant *GEP = ConstantExpr::getInBoundsGetElementPtr( 636 ByteArrayConst->getType(), ByteArray, Idxs); 637 638 // Create an alias instead of RAUW'ing the gep directly. On x86 this ensures 639 // that the pc-relative displacement is folded into the lea instead of the 640 // test instruction getting another displacement. 641 GlobalAlias *Alias = GlobalAlias::create( 642 Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, &M); 643 BAI->ByteArray->replaceAllUsesWith(Alias); 644 BAI->ByteArray->eraseFromParent(); 645 } 646 647 ByteArraySizeBits = BAB.BitAllocs[0] + BAB.BitAllocs[1] + BAB.BitAllocs[2] + 648 BAB.BitAllocs[3] + BAB.BitAllocs[4] + BAB.BitAllocs[5] + 649 BAB.BitAllocs[6] + BAB.BitAllocs[7]; 650 ByteArraySizeBytes = BAB.Bytes.size(); 651 } 652 653 /// Build a test that bit BitOffset is set in the type identifier that was 654 /// lowered to TIL, which must be either an Inline or a ByteArray. 655 Value *LowerTypeTestsModule::createBitSetTest(IRBuilder<> &B, 656 const TypeIdLowering &TIL, 657 Value *BitOffset) { 658 if (TIL.TheKind == TypeTestResolution::Inline) { 659 // If the bit set is sufficiently small, we can avoid a load by bit testing 660 // a constant. 661 return createMaskedBitTest(B, TIL.InlineBits, BitOffset); 662 } else { 663 Constant *ByteArray = TIL.TheByteArray; 664 if (AvoidReuse && !ImportSummary) { 665 // Each use of the byte array uses a different alias. This makes the 666 // backend less likely to reuse previously computed byte array addresses, 667 // improving the security of the CFI mechanism based on this pass. 668 // This won't work when importing because TheByteArray is external. 669 ByteArray = GlobalAlias::create(Int8Ty, 0, GlobalValue::PrivateLinkage, 670 "bits_use", ByteArray, &M); 671 } 672 673 Value *ByteAddr = B.CreateGEP(Int8Ty, ByteArray, BitOffset); 674 Value *Byte = B.CreateLoad(Int8Ty, ByteAddr); 675 676 Value *ByteAndMask = 677 B.CreateAnd(Byte, ConstantExpr::getPtrToInt(TIL.BitMask, Int8Ty)); 678 return B.CreateICmpNE(ByteAndMask, ConstantInt::get(Int8Ty, 0)); 679 } 680 } 681 682 static bool isKnownTypeIdMember(Metadata *TypeId, const DataLayout &DL, 683 Value *V, uint64_t COffset) { 684 if (auto GV = dyn_cast<GlobalObject>(V)) { 685 SmallVector<MDNode *, 2> Types; 686 GV->getMetadata(LLVMContext::MD_type, Types); 687 for (MDNode *Type : Types) { 688 if (Type->getOperand(1) != TypeId) 689 continue; 690 uint64_t Offset = 691 cast<ConstantInt>( 692 cast<ConstantAsMetadata>(Type->getOperand(0))->getValue()) 693 ->getZExtValue(); 694 if (COffset == Offset) 695 return true; 696 } 697 return false; 698 } 699 700 if (auto GEP = dyn_cast<GEPOperator>(V)) { 701 APInt APOffset(DL.getIndexSizeInBits(0), 0); 702 bool Result = GEP->accumulateConstantOffset(DL, APOffset); 703 if (!Result) 704 return false; 705 COffset += APOffset.getZExtValue(); 706 return isKnownTypeIdMember(TypeId, DL, GEP->getPointerOperand(), COffset); 707 } 708 709 if (auto Op = dyn_cast<Operator>(V)) { 710 if (Op->getOpcode() == Instruction::BitCast) 711 return isKnownTypeIdMember(TypeId, DL, Op->getOperand(0), COffset); 712 713 if (Op->getOpcode() == Instruction::Select) 714 return isKnownTypeIdMember(TypeId, DL, Op->getOperand(1), COffset) && 715 isKnownTypeIdMember(TypeId, DL, Op->getOperand(2), COffset); 716 } 717 718 return false; 719 } 720 721 /// Lower a llvm.type.test call to its implementation. Returns the value to 722 /// replace the call with. 723 Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI, 724 const TypeIdLowering &TIL) { 725 // Delay lowering if the resolution is currently unknown. 726 if (TIL.TheKind == TypeTestResolution::Unknown) 727 return nullptr; 728 if (TIL.TheKind == TypeTestResolution::Unsat) 729 return ConstantInt::getFalse(M.getContext()); 730 731 Value *Ptr = CI->getArgOperand(0); 732 const DataLayout &DL = M.getDataLayout(); 733 if (isKnownTypeIdMember(TypeId, DL, Ptr, 0)) 734 return ConstantInt::getTrue(M.getContext()); 735 736 BasicBlock *InitialBB = CI->getParent(); 737 738 IRBuilder<> B(CI); 739 740 Value *PtrAsInt = B.CreatePtrToInt(Ptr, IntPtrTy); 741 742 Constant *OffsetedGlobalAsInt = 743 ConstantExpr::getPtrToInt(TIL.OffsetedGlobal, IntPtrTy); 744 if (TIL.TheKind == TypeTestResolution::Single) 745 return B.CreateICmpEQ(PtrAsInt, OffsetedGlobalAsInt); 746 747 Value *PtrOffset = B.CreateSub(PtrAsInt, OffsetedGlobalAsInt); 748 749 // We need to check that the offset both falls within our range and is 750 // suitably aligned. We can check both properties at the same time by 751 // performing a right rotate by log2(alignment) followed by an integer 752 // comparison against the bitset size. The rotate will move the lower 753 // order bits that need to be zero into the higher order bits of the 754 // result, causing the comparison to fail if they are nonzero. The rotate 755 // also conveniently gives us a bit offset to use during the load from 756 // the bitset. 757 Value *OffsetSHR = 758 B.CreateLShr(PtrOffset, ConstantExpr::getZExt(TIL.AlignLog2, IntPtrTy)); 759 Value *OffsetSHL = B.CreateShl( 760 PtrOffset, ConstantExpr::getZExt( 761 ConstantExpr::getSub( 762 ConstantInt::get(Int8Ty, DL.getPointerSizeInBits(0)), 763 TIL.AlignLog2), 764 IntPtrTy)); 765 Value *BitOffset = B.CreateOr(OffsetSHR, OffsetSHL); 766 767 Value *OffsetInRange = B.CreateICmpULE(BitOffset, TIL.SizeM1); 768 769 // If the bit set is all ones, testing against it is unnecessary. 770 if (TIL.TheKind == TypeTestResolution::AllOnes) 771 return OffsetInRange; 772 773 // See if the intrinsic is used in the following common pattern: 774 // br(llvm.type.test(...), thenbb, elsebb) 775 // where nothing happens between the type test and the br. 776 // If so, create slightly simpler IR. 777 if (CI->hasOneUse()) 778 if (auto *Br = dyn_cast<BranchInst>(*CI->user_begin())) 779 if (CI->getNextNode() == Br) { 780 BasicBlock *Then = InitialBB->splitBasicBlock(CI->getIterator()); 781 BasicBlock *Else = Br->getSuccessor(1); 782 BranchInst *NewBr = BranchInst::Create(Then, Else, OffsetInRange); 783 NewBr->setMetadata(LLVMContext::MD_prof, 784 Br->getMetadata(LLVMContext::MD_prof)); 785 ReplaceInstWithInst(InitialBB->getTerminator(), NewBr); 786 787 // Update phis in Else resulting from InitialBB being split 788 for (auto &Phi : Else->phis()) 789 Phi.addIncoming(Phi.getIncomingValueForBlock(Then), InitialBB); 790 791 IRBuilder<> ThenB(CI); 792 return createBitSetTest(ThenB, TIL, BitOffset); 793 } 794 795 IRBuilder<> ThenB(SplitBlockAndInsertIfThen(OffsetInRange, CI, false)); 796 797 // Now that we know that the offset is in range and aligned, load the 798 // appropriate bit from the bitset. 799 Value *Bit = createBitSetTest(ThenB, TIL, BitOffset); 800 801 // The value we want is 0 if we came directly from the initial block 802 // (having failed the range or alignment checks), or the loaded bit if 803 // we came from the block in which we loaded it. 804 B.SetInsertPoint(CI); 805 PHINode *P = B.CreatePHI(Int1Ty, 2); 806 P->addIncoming(ConstantInt::get(Int1Ty, 0), InitialBB); 807 P->addIncoming(Bit, ThenB.GetInsertBlock()); 808 return P; 809 } 810 811 /// Given a disjoint set of type identifiers and globals, lay out the globals, 812 /// build the bit sets and lower the llvm.type.test calls. 813 void LowerTypeTestsModule::buildBitSetsFromGlobalVariables( 814 ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Globals) { 815 // Build a new global with the combined contents of the referenced globals. 816 // This global is a struct whose even-indexed elements contain the original 817 // contents of the referenced globals and whose odd-indexed elements contain 818 // any padding required to align the next element to the next power of 2 plus 819 // any additional padding required to meet its alignment requirements. 820 std::vector<Constant *> GlobalInits; 821 const DataLayout &DL = M.getDataLayout(); 822 DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout; 823 Align MaxAlign; 824 uint64_t CurOffset = 0; 825 uint64_t DesiredPadding = 0; 826 for (GlobalTypeMember *G : Globals) { 827 auto *GV = cast<GlobalVariable>(G->getGlobal()); 828 Align Alignment = 829 DL.getValueOrABITypeAlignment(GV->getAlign(), GV->getValueType()); 830 MaxAlign = std::max(MaxAlign, Alignment); 831 uint64_t GVOffset = alignTo(CurOffset + DesiredPadding, Alignment); 832 GlobalLayout[G] = GVOffset; 833 if (GVOffset != 0) { 834 uint64_t Padding = GVOffset - CurOffset; 835 GlobalInits.push_back( 836 ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding))); 837 } 838 839 GlobalInits.push_back(GV->getInitializer()); 840 uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType()); 841 CurOffset = GVOffset + InitSize; 842 843 // Compute the amount of padding that we'd like for the next element. 844 DesiredPadding = NextPowerOf2(InitSize - 1) - InitSize; 845 846 // Experiments of different caps with Chromium on both x64 and ARM64 847 // have shown that the 32-byte cap generates the smallest binary on 848 // both platforms while different caps yield similar performance. 849 // (see https://lists.llvm.org/pipermail/llvm-dev/2018-July/124694.html) 850 if (DesiredPadding > 32) 851 DesiredPadding = alignTo(InitSize, 32) - InitSize; 852 } 853 854 Constant *NewInit = ConstantStruct::getAnon(M.getContext(), GlobalInits); 855 auto *CombinedGlobal = 856 new GlobalVariable(M, NewInit->getType(), /*isConstant=*/true, 857 GlobalValue::PrivateLinkage, NewInit); 858 CombinedGlobal->setAlignment(MaxAlign); 859 860 StructType *NewTy = cast<StructType>(NewInit->getType()); 861 lowerTypeTestCalls(TypeIds, CombinedGlobal, GlobalLayout); 862 863 // Build aliases pointing to offsets into the combined global for each 864 // global from which we built the combined global, and replace references 865 // to the original globals with references to the aliases. 866 for (unsigned I = 0; I != Globals.size(); ++I) { 867 GlobalVariable *GV = cast<GlobalVariable>(Globals[I]->getGlobal()); 868 869 // Multiply by 2 to account for padding elements. 870 Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0), 871 ConstantInt::get(Int32Ty, I * 2)}; 872 Constant *CombinedGlobalElemPtr = ConstantExpr::getGetElementPtr( 873 NewInit->getType(), CombinedGlobal, CombinedGlobalIdxs); 874 assert(GV->getType()->getAddressSpace() == 0); 875 GlobalAlias *GAlias = 876 GlobalAlias::create(NewTy->getElementType(I * 2), 0, GV->getLinkage(), 877 "", CombinedGlobalElemPtr, &M); 878 GAlias->setVisibility(GV->getVisibility()); 879 GAlias->takeName(GV); 880 GV->replaceAllUsesWith(GAlias); 881 GV->eraseFromParent(); 882 } 883 } 884 885 bool LowerTypeTestsModule::shouldExportConstantsAsAbsoluteSymbols() { 886 return (Arch == Triple::x86 || Arch == Triple::x86_64) && 887 ObjectFormat == Triple::ELF; 888 } 889 890 /// Export the given type identifier so that ThinLTO backends may import it. 891 /// Type identifiers are exported by adding coarse-grained information about how 892 /// to test the type identifier to the summary, and creating symbols in the 893 /// object file (aliases and absolute symbols) containing fine-grained 894 /// information about the type identifier. 895 /// 896 /// Returns a pointer to the location in which to store the bitmask, if 897 /// applicable. 898 uint8_t *LowerTypeTestsModule::exportTypeId(StringRef TypeId, 899 const TypeIdLowering &TIL) { 900 TypeTestResolution &TTRes = 901 ExportSummary->getOrInsertTypeIdSummary(TypeId).TTRes; 902 TTRes.TheKind = TIL.TheKind; 903 904 auto ExportGlobal = [&](StringRef Name, Constant *C) { 905 GlobalAlias *GA = 906 GlobalAlias::create(Int8Ty, 0, GlobalValue::ExternalLinkage, 907 "__typeid_" + TypeId + "_" + Name, C, &M); 908 GA->setVisibility(GlobalValue::HiddenVisibility); 909 }; 910 911 auto ExportConstant = [&](StringRef Name, uint64_t &Storage, Constant *C) { 912 if (shouldExportConstantsAsAbsoluteSymbols()) 913 ExportGlobal(Name, ConstantExpr::getIntToPtr(C, Int8PtrTy)); 914 else 915 Storage = cast<ConstantInt>(C)->getZExtValue(); 916 }; 917 918 if (TIL.TheKind != TypeTestResolution::Unsat) 919 ExportGlobal("global_addr", TIL.OffsetedGlobal); 920 921 if (TIL.TheKind == TypeTestResolution::ByteArray || 922 TIL.TheKind == TypeTestResolution::Inline || 923 TIL.TheKind == TypeTestResolution::AllOnes) { 924 ExportConstant("align", TTRes.AlignLog2, TIL.AlignLog2); 925 ExportConstant("size_m1", TTRes.SizeM1, TIL.SizeM1); 926 927 uint64_t BitSize = cast<ConstantInt>(TIL.SizeM1)->getZExtValue() + 1; 928 if (TIL.TheKind == TypeTestResolution::Inline) 929 TTRes.SizeM1BitWidth = (BitSize <= 32) ? 5 : 6; 930 else 931 TTRes.SizeM1BitWidth = (BitSize <= 128) ? 7 : 32; 932 } 933 934 if (TIL.TheKind == TypeTestResolution::ByteArray) { 935 ExportGlobal("byte_array", TIL.TheByteArray); 936 if (shouldExportConstantsAsAbsoluteSymbols()) 937 ExportGlobal("bit_mask", TIL.BitMask); 938 else 939 return &TTRes.BitMask; 940 } 941 942 if (TIL.TheKind == TypeTestResolution::Inline) 943 ExportConstant("inline_bits", TTRes.InlineBits, TIL.InlineBits); 944 945 return nullptr; 946 } 947 948 LowerTypeTestsModule::TypeIdLowering 949 LowerTypeTestsModule::importTypeId(StringRef TypeId) { 950 const TypeIdSummary *TidSummary = ImportSummary->getTypeIdSummary(TypeId); 951 if (!TidSummary) 952 return {}; // Unsat: no globals match this type id. 953 const TypeTestResolution &TTRes = TidSummary->TTRes; 954 955 TypeIdLowering TIL; 956 TIL.TheKind = TTRes.TheKind; 957 958 auto ImportGlobal = [&](StringRef Name) { 959 // Give the global a type of length 0 so that it is not assumed not to alias 960 // with any other global. 961 Constant *C = M.getOrInsertGlobal(("__typeid_" + TypeId + "_" + Name).str(), 962 Int8Arr0Ty); 963 if (auto *GV = dyn_cast<GlobalVariable>(C)) 964 GV->setVisibility(GlobalValue::HiddenVisibility); 965 C = ConstantExpr::getBitCast(C, Int8PtrTy); 966 return C; 967 }; 968 969 auto ImportConstant = [&](StringRef Name, uint64_t Const, unsigned AbsWidth, 970 Type *Ty) { 971 if (!shouldExportConstantsAsAbsoluteSymbols()) { 972 Constant *C = 973 ConstantInt::get(isa<IntegerType>(Ty) ? Ty : Int64Ty, Const); 974 if (!isa<IntegerType>(Ty)) 975 C = ConstantExpr::getIntToPtr(C, Ty); 976 return C; 977 } 978 979 Constant *C = ImportGlobal(Name); 980 auto *GV = cast<GlobalVariable>(C->stripPointerCasts()); 981 if (isa<IntegerType>(Ty)) 982 C = ConstantExpr::getPtrToInt(C, Ty); 983 if (GV->getMetadata(LLVMContext::MD_absolute_symbol)) 984 return C; 985 986 auto SetAbsRange = [&](uint64_t Min, uint64_t Max) { 987 auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Min)); 988 auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Max)); 989 GV->setMetadata(LLVMContext::MD_absolute_symbol, 990 MDNode::get(M.getContext(), {MinC, MaxC})); 991 }; 992 if (AbsWidth == IntPtrTy->getBitWidth()) 993 SetAbsRange(~0ull, ~0ull); // Full set. 994 else 995 SetAbsRange(0, 1ull << AbsWidth); 996 return C; 997 }; 998 999 if (TIL.TheKind != TypeTestResolution::Unsat) 1000 TIL.OffsetedGlobal = ImportGlobal("global_addr"); 1001 1002 if (TIL.TheKind == TypeTestResolution::ByteArray || 1003 TIL.TheKind == TypeTestResolution::Inline || 1004 TIL.TheKind == TypeTestResolution::AllOnes) { 1005 TIL.AlignLog2 = ImportConstant("align", TTRes.AlignLog2, 8, Int8Ty); 1006 TIL.SizeM1 = 1007 ImportConstant("size_m1", TTRes.SizeM1, TTRes.SizeM1BitWidth, IntPtrTy); 1008 } 1009 1010 if (TIL.TheKind == TypeTestResolution::ByteArray) { 1011 TIL.TheByteArray = ImportGlobal("byte_array"); 1012 TIL.BitMask = ImportConstant("bit_mask", TTRes.BitMask, 8, Int8PtrTy); 1013 } 1014 1015 if (TIL.TheKind == TypeTestResolution::Inline) 1016 TIL.InlineBits = ImportConstant( 1017 "inline_bits", TTRes.InlineBits, 1 << TTRes.SizeM1BitWidth, 1018 TTRes.SizeM1BitWidth <= 5 ? Int32Ty : Int64Ty); 1019 1020 return TIL; 1021 } 1022 1023 void LowerTypeTestsModule::importTypeTest(CallInst *CI) { 1024 auto TypeIdMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1)); 1025 if (!TypeIdMDVal) 1026 report_fatal_error("Second argument of llvm.type.test must be metadata"); 1027 1028 auto TypeIdStr = dyn_cast<MDString>(TypeIdMDVal->getMetadata()); 1029 // If this is a local unpromoted type, which doesn't have a metadata string, 1030 // treat as Unknown and delay lowering, so that we can still utilize it for 1031 // later optimizations. 1032 if (!TypeIdStr) 1033 return; 1034 1035 TypeIdLowering TIL = importTypeId(TypeIdStr->getString()); 1036 Value *Lowered = lowerTypeTestCall(TypeIdStr, CI, TIL); 1037 if (Lowered) { 1038 CI->replaceAllUsesWith(Lowered); 1039 CI->eraseFromParent(); 1040 } 1041 } 1042 1043 // ThinLTO backend: the function F has a jump table entry; update this module 1044 // accordingly. isJumpTableCanonical describes the type of the jump table entry. 1045 void LowerTypeTestsModule::importFunction( 1046 Function *F, bool isJumpTableCanonical, 1047 std::vector<GlobalAlias *> &AliasesToErase) { 1048 assert(F->getType()->getAddressSpace() == 0); 1049 1050 GlobalValue::VisibilityTypes Visibility = F->getVisibility(); 1051 std::string Name = std::string(F->getName()); 1052 1053 if (F->isDeclarationForLinker() && isJumpTableCanonical) { 1054 // Non-dso_local functions may be overriden at run time, 1055 // don't short curcuit them 1056 if (F->isDSOLocal()) { 1057 Function *RealF = Function::Create(F->getFunctionType(), 1058 GlobalValue::ExternalLinkage, 1059 F->getAddressSpace(), 1060 Name + ".cfi", &M); 1061 RealF->setVisibility(GlobalVariable::HiddenVisibility); 1062 replaceDirectCalls(F, RealF); 1063 } 1064 return; 1065 } 1066 1067 Function *FDecl; 1068 if (!isJumpTableCanonical) { 1069 // Either a declaration of an external function or a reference to a locally 1070 // defined jump table. 1071 FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage, 1072 F->getAddressSpace(), Name + ".cfi_jt", &M); 1073 FDecl->setVisibility(GlobalValue::HiddenVisibility); 1074 } else { 1075 F->setName(Name + ".cfi"); 1076 F->setLinkage(GlobalValue::ExternalLinkage); 1077 FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage, 1078 F->getAddressSpace(), Name, &M); 1079 FDecl->setVisibility(Visibility); 1080 Visibility = GlobalValue::HiddenVisibility; 1081 1082 // Delete aliases pointing to this function, they'll be re-created in the 1083 // merged output. Don't do it yet though because ScopedSaveAliaseesAndUsed 1084 // will want to reset the aliasees first. 1085 for (auto &U : F->uses()) { 1086 if (auto *A = dyn_cast<GlobalAlias>(U.getUser())) { 1087 Function *AliasDecl = Function::Create( 1088 F->getFunctionType(), GlobalValue::ExternalLinkage, 1089 F->getAddressSpace(), "", &M); 1090 AliasDecl->takeName(A); 1091 A->replaceAllUsesWith(AliasDecl); 1092 AliasesToErase.push_back(A); 1093 } 1094 } 1095 } 1096 1097 if (F->hasExternalWeakLinkage()) 1098 replaceWeakDeclarationWithJumpTablePtr(F, FDecl, isJumpTableCanonical); 1099 else 1100 replaceCfiUses(F, FDecl, isJumpTableCanonical); 1101 1102 // Set visibility late because it's used in replaceCfiUses() to determine 1103 // whether uses need to to be replaced. 1104 F->setVisibility(Visibility); 1105 } 1106 1107 void LowerTypeTestsModule::lowerTypeTestCalls( 1108 ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr, 1109 const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout) { 1110 CombinedGlobalAddr = ConstantExpr::getBitCast(CombinedGlobalAddr, Int8PtrTy); 1111 1112 // For each type identifier in this disjoint set... 1113 for (Metadata *TypeId : TypeIds) { 1114 // Build the bitset. 1115 BitSetInfo BSI = buildBitSet(TypeId, GlobalLayout); 1116 LLVM_DEBUG({ 1117 if (auto MDS = dyn_cast<MDString>(TypeId)) 1118 dbgs() << MDS->getString() << ": "; 1119 else 1120 dbgs() << "<unnamed>: "; 1121 BSI.print(dbgs()); 1122 }); 1123 1124 ByteArrayInfo *BAI = nullptr; 1125 TypeIdLowering TIL; 1126 TIL.OffsetedGlobal = ConstantExpr::getGetElementPtr( 1127 Int8Ty, CombinedGlobalAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset)), 1128 TIL.AlignLog2 = ConstantInt::get(Int8Ty, BSI.AlignLog2); 1129 TIL.SizeM1 = ConstantInt::get(IntPtrTy, BSI.BitSize - 1); 1130 if (BSI.isAllOnes()) { 1131 TIL.TheKind = (BSI.BitSize == 1) ? TypeTestResolution::Single 1132 : TypeTestResolution::AllOnes; 1133 } else if (BSI.BitSize <= 64) { 1134 TIL.TheKind = TypeTestResolution::Inline; 1135 uint64_t InlineBits = 0; 1136 for (auto Bit : BSI.Bits) 1137 InlineBits |= uint64_t(1) << Bit; 1138 if (InlineBits == 0) 1139 TIL.TheKind = TypeTestResolution::Unsat; 1140 else 1141 TIL.InlineBits = ConstantInt::get( 1142 (BSI.BitSize <= 32) ? Int32Ty : Int64Ty, InlineBits); 1143 } else { 1144 TIL.TheKind = TypeTestResolution::ByteArray; 1145 ++NumByteArraysCreated; 1146 BAI = createByteArray(BSI); 1147 TIL.TheByteArray = BAI->ByteArray; 1148 TIL.BitMask = BAI->MaskGlobal; 1149 } 1150 1151 TypeIdUserInfo &TIUI = TypeIdUsers[TypeId]; 1152 1153 if (TIUI.IsExported) { 1154 uint8_t *MaskPtr = exportTypeId(cast<MDString>(TypeId)->getString(), TIL); 1155 if (BAI) 1156 BAI->MaskPtr = MaskPtr; 1157 } 1158 1159 // Lower each call to llvm.type.test for this type identifier. 1160 for (CallInst *CI : TIUI.CallSites) { 1161 ++NumTypeTestCallsLowered; 1162 Value *Lowered = lowerTypeTestCall(TypeId, CI, TIL); 1163 if (Lowered) { 1164 CI->replaceAllUsesWith(Lowered); 1165 CI->eraseFromParent(); 1166 } 1167 } 1168 } 1169 } 1170 1171 void LowerTypeTestsModule::verifyTypeMDNode(GlobalObject *GO, MDNode *Type) { 1172 if (Type->getNumOperands() != 2) 1173 report_fatal_error("All operands of type metadata must have 2 elements"); 1174 1175 if (GO->isThreadLocal()) 1176 report_fatal_error("Bit set element may not be thread-local"); 1177 if (isa<GlobalVariable>(GO) && GO->hasSection()) 1178 report_fatal_error( 1179 "A member of a type identifier may not have an explicit section"); 1180 1181 // FIXME: We previously checked that global var member of a type identifier 1182 // must be a definition, but the IR linker may leave type metadata on 1183 // declarations. We should restore this check after fixing PR31759. 1184 1185 auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Type->getOperand(0)); 1186 if (!OffsetConstMD) 1187 report_fatal_error("Type offset must be a constant"); 1188 auto OffsetInt = dyn_cast<ConstantInt>(OffsetConstMD->getValue()); 1189 if (!OffsetInt) 1190 report_fatal_error("Type offset must be an integer constant"); 1191 } 1192 1193 static const unsigned kX86JumpTableEntrySize = 8; 1194 static const unsigned kX86IBTJumpTableEntrySize = 16; 1195 static const unsigned kARMJumpTableEntrySize = 4; 1196 static const unsigned kARMBTIJumpTableEntrySize = 8; 1197 static const unsigned kARMv6MJumpTableEntrySize = 16; 1198 static const unsigned kRISCVJumpTableEntrySize = 8; 1199 1200 unsigned LowerTypeTestsModule::getJumpTableEntrySize() { 1201 switch (JumpTableArch) { 1202 case Triple::x86: 1203 case Triple::x86_64: 1204 if (const auto *MD = mdconst::extract_or_null<ConstantInt>( 1205 M.getModuleFlag("cf-protection-branch"))) 1206 if (MD->getZExtValue()) 1207 return kX86IBTJumpTableEntrySize; 1208 return kX86JumpTableEntrySize; 1209 case Triple::arm: 1210 return kARMJumpTableEntrySize; 1211 case Triple::thumb: 1212 if (CanUseThumbBWJumpTable) 1213 return kARMJumpTableEntrySize; 1214 else 1215 return kARMv6MJumpTableEntrySize; 1216 case Triple::aarch64: 1217 if (const auto *BTE = mdconst::extract_or_null<ConstantInt>( 1218 M.getModuleFlag("branch-target-enforcement"))) 1219 if (BTE->getZExtValue()) 1220 return kARMBTIJumpTableEntrySize; 1221 return kARMJumpTableEntrySize; 1222 case Triple::riscv32: 1223 case Triple::riscv64: 1224 return kRISCVJumpTableEntrySize; 1225 default: 1226 report_fatal_error("Unsupported architecture for jump tables"); 1227 } 1228 } 1229 1230 // Create a jump table entry for the target. This consists of an instruction 1231 // sequence containing a relative branch to Dest. Appends inline asm text, 1232 // constraints and arguments to AsmOS, ConstraintOS and AsmArgs. 1233 void LowerTypeTestsModule::createJumpTableEntry( 1234 raw_ostream &AsmOS, raw_ostream &ConstraintOS, 1235 Triple::ArchType JumpTableArch, SmallVectorImpl<Value *> &AsmArgs, 1236 Function *Dest) { 1237 unsigned ArgIndex = AsmArgs.size(); 1238 1239 if (JumpTableArch == Triple::x86 || JumpTableArch == Triple::x86_64) { 1240 bool Endbr = false; 1241 if (const auto *MD = mdconst::extract_or_null<ConstantInt>( 1242 Dest->getParent()->getModuleFlag("cf-protection-branch"))) 1243 Endbr = !MD->isZero(); 1244 if (Endbr) 1245 AsmOS << (JumpTableArch == Triple::x86 ? "endbr32\n" : "endbr64\n"); 1246 AsmOS << "jmp ${" << ArgIndex << ":c}@plt\n"; 1247 if (Endbr) 1248 AsmOS << ".balign 16, 0xcc\n"; 1249 else 1250 AsmOS << "int3\nint3\nint3\n"; 1251 } else if (JumpTableArch == Triple::arm) { 1252 AsmOS << "b $" << ArgIndex << "\n"; 1253 } else if (JumpTableArch == Triple::aarch64) { 1254 if (const auto *BTE = mdconst::extract_or_null<ConstantInt>( 1255 Dest->getParent()->getModuleFlag("branch-target-enforcement"))) 1256 if (BTE->getZExtValue()) 1257 AsmOS << "bti c\n"; 1258 AsmOS << "b $" << ArgIndex << "\n"; 1259 } else if (JumpTableArch == Triple::thumb) { 1260 if (!CanUseThumbBWJumpTable) { 1261 // In Armv6-M, this sequence will generate a branch without corrupting 1262 // any registers. We use two stack words; in the second, we construct the 1263 // address we'll pop into pc, and the first is used to save and restore 1264 // r0 which we use as a temporary register. 1265 // 1266 // To support position-independent use cases, the offset of the target 1267 // function is stored as a relative offset (which will expand into an 1268 // R_ARM_REL32 relocation in ELF, and presumably the equivalent in other 1269 // object file types), and added to pc after we load it. (The alternative 1270 // B.W is automatically pc-relative.) 1271 // 1272 // There are five 16-bit Thumb instructions here, so the .balign 4 adds a 1273 // sixth halfword of padding, and then the offset consumes a further 4 1274 // bytes, for a total of 16, which is very convenient since entries in 1275 // this jump table need to have power-of-two size. 1276 AsmOS << "push {r0,r1}\n" 1277 << "ldr r0, 1f\n" 1278 << "0: add r0, r0, pc\n" 1279 << "str r0, [sp, #4]\n" 1280 << "pop {r0,pc}\n" 1281 << ".balign 4\n" 1282 << "1: .word $" << ArgIndex << " - (0b + 4)\n"; 1283 } else { 1284 AsmOS << "b.w $" << ArgIndex << "\n"; 1285 } 1286 } else if (JumpTableArch == Triple::riscv32 || 1287 JumpTableArch == Triple::riscv64) { 1288 AsmOS << "tail $" << ArgIndex << "@plt\n"; 1289 } else { 1290 report_fatal_error("Unsupported architecture for jump tables"); 1291 } 1292 1293 ConstraintOS << (ArgIndex > 0 ? ",s" : "s"); 1294 AsmArgs.push_back(Dest); 1295 } 1296 1297 Type *LowerTypeTestsModule::getJumpTableEntryType() { 1298 return ArrayType::get(Int8Ty, getJumpTableEntrySize()); 1299 } 1300 1301 /// Given a disjoint set of type identifiers and functions, build the bit sets 1302 /// and lower the llvm.type.test calls, architecture dependently. 1303 void LowerTypeTestsModule::buildBitSetsFromFunctions( 1304 ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Functions) { 1305 if (Arch == Triple::x86 || Arch == Triple::x86_64 || Arch == Triple::arm || 1306 Arch == Triple::thumb || Arch == Triple::aarch64 || 1307 Arch == Triple::riscv32 || Arch == Triple::riscv64) 1308 buildBitSetsFromFunctionsNative(TypeIds, Functions); 1309 else if (Arch == Triple::wasm32 || Arch == Triple::wasm64) 1310 buildBitSetsFromFunctionsWASM(TypeIds, Functions); 1311 else 1312 report_fatal_error("Unsupported architecture for jump tables"); 1313 } 1314 1315 void LowerTypeTestsModule::moveInitializerToModuleConstructor( 1316 GlobalVariable *GV) { 1317 if (WeakInitializerFn == nullptr) { 1318 WeakInitializerFn = Function::Create( 1319 FunctionType::get(Type::getVoidTy(M.getContext()), 1320 /* IsVarArg */ false), 1321 GlobalValue::InternalLinkage, 1322 M.getDataLayout().getProgramAddressSpace(), 1323 "__cfi_global_var_init", &M); 1324 BasicBlock *BB = 1325 BasicBlock::Create(M.getContext(), "entry", WeakInitializerFn); 1326 ReturnInst::Create(M.getContext(), BB); 1327 WeakInitializerFn->setSection( 1328 ObjectFormat == Triple::MachO 1329 ? "__TEXT,__StaticInit,regular,pure_instructions" 1330 : ".text.startup"); 1331 // This code is equivalent to relocation application, and should run at the 1332 // earliest possible time (i.e. with the highest priority). 1333 appendToGlobalCtors(M, WeakInitializerFn, /* Priority */ 0); 1334 } 1335 1336 IRBuilder<> IRB(WeakInitializerFn->getEntryBlock().getTerminator()); 1337 GV->setConstant(false); 1338 IRB.CreateAlignedStore(GV->getInitializer(), GV, GV->getAlign()); 1339 GV->setInitializer(Constant::getNullValue(GV->getValueType())); 1340 } 1341 1342 void LowerTypeTestsModule::findGlobalVariableUsersOf( 1343 Constant *C, SmallSetVector<GlobalVariable *, 8> &Out) { 1344 for (auto *U : C->users()){ 1345 if (auto *GV = dyn_cast<GlobalVariable>(U)) 1346 Out.insert(GV); 1347 else if (auto *C2 = dyn_cast<Constant>(U)) 1348 findGlobalVariableUsersOf(C2, Out); 1349 } 1350 } 1351 1352 // Replace all uses of F with (F ? JT : 0). 1353 void LowerTypeTestsModule::replaceWeakDeclarationWithJumpTablePtr( 1354 Function *F, Constant *JT, bool IsJumpTableCanonical) { 1355 // The target expression can not appear in a constant initializer on most 1356 // (all?) targets. Switch to a runtime initializer. 1357 SmallSetVector<GlobalVariable *, 8> GlobalVarUsers; 1358 findGlobalVariableUsersOf(F, GlobalVarUsers); 1359 for (auto *GV : GlobalVarUsers) 1360 moveInitializerToModuleConstructor(GV); 1361 1362 // Can not RAUW F with an expression that uses F. Replace with a temporary 1363 // placeholder first. 1364 Function *PlaceholderFn = 1365 Function::Create(cast<FunctionType>(F->getValueType()), 1366 GlobalValue::ExternalWeakLinkage, 1367 F->getAddressSpace(), "", &M); 1368 replaceCfiUses(F, PlaceholderFn, IsJumpTableCanonical); 1369 1370 convertUsersOfConstantsToInstructions(PlaceholderFn); 1371 // Don't use range based loop, because use list will be modified. 1372 while (!PlaceholderFn->use_empty()) { 1373 Use &U = *PlaceholderFn->use_begin(); 1374 auto *InsertPt = dyn_cast<Instruction>(U.getUser()); 1375 assert(InsertPt && "Non-instruction users should have been eliminated"); 1376 auto *PN = dyn_cast<PHINode>(InsertPt); 1377 if (PN) 1378 InsertPt = PN->getIncomingBlock(U)->getTerminator(); 1379 IRBuilder Builder(InsertPt); 1380 Value *ICmp = Builder.CreateICmp(CmpInst::ICMP_NE, F, 1381 Constant::getNullValue(F->getType())); 1382 Value *Select = Builder.CreateSelect(ICmp, JT, 1383 Constant::getNullValue(F->getType())); 1384 // For phi nodes, we need to update the incoming value for all operands 1385 // with the same predecessor. 1386 if (PN) 1387 PN->setIncomingValueForBlock(InsertPt->getParent(), Select); 1388 else 1389 U.set(Select); 1390 } 1391 PlaceholderFn->eraseFromParent(); 1392 } 1393 1394 static bool isThumbFunction(Function *F, Triple::ArchType ModuleArch) { 1395 Attribute TFAttr = F->getFnAttribute("target-features"); 1396 if (TFAttr.isValid()) { 1397 SmallVector<StringRef, 6> Features; 1398 TFAttr.getValueAsString().split(Features, ','); 1399 for (StringRef Feature : Features) { 1400 if (Feature == "-thumb-mode") 1401 return false; 1402 else if (Feature == "+thumb-mode") 1403 return true; 1404 } 1405 } 1406 1407 return ModuleArch == Triple::thumb; 1408 } 1409 1410 // Each jump table must be either ARM or Thumb as a whole for the bit-test math 1411 // to work. Pick one that matches the majority of members to minimize interop 1412 // veneers inserted by the linker. 1413 Triple::ArchType LowerTypeTestsModule::selectJumpTableArmEncoding( 1414 ArrayRef<GlobalTypeMember *> Functions) { 1415 if (Arch != Triple::arm && Arch != Triple::thumb) 1416 return Arch; 1417 1418 if (!CanUseThumbBWJumpTable && CanUseArmJumpTable) { 1419 // In architectures that provide Arm and Thumb-1 but not Thumb-2, 1420 // we should always prefer the Arm jump table format, because the 1421 // Thumb-1 one is larger and slower. 1422 return Triple::arm; 1423 } 1424 1425 // Otherwise, go with majority vote. 1426 unsigned ArmCount = 0, ThumbCount = 0; 1427 for (const auto GTM : Functions) { 1428 if (!GTM->isJumpTableCanonical()) { 1429 // PLT stubs are always ARM. 1430 // FIXME: This is the wrong heuristic for non-canonical jump tables. 1431 ++ArmCount; 1432 continue; 1433 } 1434 1435 Function *F = cast<Function>(GTM->getGlobal()); 1436 ++(isThumbFunction(F, Arch) ? ThumbCount : ArmCount); 1437 } 1438 1439 return ArmCount > ThumbCount ? Triple::arm : Triple::thumb; 1440 } 1441 1442 void LowerTypeTestsModule::createJumpTable( 1443 Function *F, ArrayRef<GlobalTypeMember *> Functions) { 1444 std::string AsmStr, ConstraintStr; 1445 raw_string_ostream AsmOS(AsmStr), ConstraintOS(ConstraintStr); 1446 SmallVector<Value *, 16> AsmArgs; 1447 AsmArgs.reserve(Functions.size() * 2); 1448 1449 for (GlobalTypeMember *GTM : Functions) 1450 createJumpTableEntry(AsmOS, ConstraintOS, JumpTableArch, AsmArgs, 1451 cast<Function>(GTM->getGlobal())); 1452 1453 // Align the whole table by entry size. 1454 F->setAlignment(Align(getJumpTableEntrySize())); 1455 // Skip prologue. 1456 // Disabled on win32 due to https://llvm.org/bugs/show_bug.cgi?id=28641#c3. 1457 // Luckily, this function does not get any prologue even without the 1458 // attribute. 1459 if (OS != Triple::Win32) 1460 F->addFnAttr(Attribute::Naked); 1461 if (JumpTableArch == Triple::arm) 1462 F->addFnAttr("target-features", "-thumb-mode"); 1463 if (JumpTableArch == Triple::thumb) { 1464 F->addFnAttr("target-features", "+thumb-mode"); 1465 if (CanUseThumbBWJumpTable) { 1466 // Thumb jump table assembly needs Thumb2. The following attribute is 1467 // added by Clang for -march=armv7. 1468 F->addFnAttr("target-cpu", "cortex-a8"); 1469 } 1470 } 1471 // When -mbranch-protection= is used, the inline asm adds a BTI. Suppress BTI 1472 // for the function to avoid double BTI. This is a no-op without 1473 // -mbranch-protection=. 1474 if (JumpTableArch == Triple::aarch64) { 1475 F->addFnAttr("branch-target-enforcement", "false"); 1476 F->addFnAttr("sign-return-address", "none"); 1477 } 1478 if (JumpTableArch == Triple::riscv32 || JumpTableArch == Triple::riscv64) { 1479 // Make sure the jump table assembly is not modified by the assembler or 1480 // the linker. 1481 F->addFnAttr("target-features", "-c,-relax"); 1482 } 1483 // When -fcf-protection= is used, the inline asm adds an ENDBR. Suppress ENDBR 1484 // for the function to avoid double ENDBR. This is a no-op without 1485 // -fcf-protection=. 1486 if (JumpTableArch == Triple::x86 || JumpTableArch == Triple::x86_64) 1487 F->addFnAttr(Attribute::NoCfCheck); 1488 // Make sure we don't emit .eh_frame for this function. 1489 F->addFnAttr(Attribute::NoUnwind); 1490 1491 BasicBlock *BB = BasicBlock::Create(M.getContext(), "entry", F); 1492 IRBuilder<> IRB(BB); 1493 1494 SmallVector<Type *, 16> ArgTypes; 1495 ArgTypes.reserve(AsmArgs.size()); 1496 for (const auto &Arg : AsmArgs) 1497 ArgTypes.push_back(Arg->getType()); 1498 InlineAsm *JumpTableAsm = 1499 InlineAsm::get(FunctionType::get(IRB.getVoidTy(), ArgTypes, false), 1500 AsmOS.str(), ConstraintOS.str(), 1501 /*hasSideEffects=*/true); 1502 1503 IRB.CreateCall(JumpTableAsm, AsmArgs); 1504 IRB.CreateUnreachable(); 1505 } 1506 1507 /// Given a disjoint set of type identifiers and functions, build a jump table 1508 /// for the functions, build the bit sets and lower the llvm.type.test calls. 1509 void LowerTypeTestsModule::buildBitSetsFromFunctionsNative( 1510 ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Functions) { 1511 // Unlike the global bitset builder, the function bitset builder cannot 1512 // re-arrange functions in a particular order and base its calculations on the 1513 // layout of the functions' entry points, as we have no idea how large a 1514 // particular function will end up being (the size could even depend on what 1515 // this pass does!) Instead, we build a jump table, which is a block of code 1516 // consisting of one branch instruction for each of the functions in the bit 1517 // set that branches to the target function, and redirect any taken function 1518 // addresses to the corresponding jump table entry. In the object file's 1519 // symbol table, the symbols for the target functions also refer to the jump 1520 // table entries, so that addresses taken outside the module will pass any 1521 // verification done inside the module. 1522 // 1523 // In more concrete terms, suppose we have three functions f, g, h which are 1524 // of the same type, and a function foo that returns their addresses: 1525 // 1526 // f: 1527 // mov 0, %eax 1528 // ret 1529 // 1530 // g: 1531 // mov 1, %eax 1532 // ret 1533 // 1534 // h: 1535 // mov 2, %eax 1536 // ret 1537 // 1538 // foo: 1539 // mov f, %eax 1540 // mov g, %edx 1541 // mov h, %ecx 1542 // ret 1543 // 1544 // We output the jump table as module-level inline asm string. The end result 1545 // will (conceptually) look like this: 1546 // 1547 // f = .cfi.jumptable 1548 // g = .cfi.jumptable + 4 1549 // h = .cfi.jumptable + 8 1550 // .cfi.jumptable: 1551 // jmp f.cfi ; 5 bytes 1552 // int3 ; 1 byte 1553 // int3 ; 1 byte 1554 // int3 ; 1 byte 1555 // jmp g.cfi ; 5 bytes 1556 // int3 ; 1 byte 1557 // int3 ; 1 byte 1558 // int3 ; 1 byte 1559 // jmp h.cfi ; 5 bytes 1560 // int3 ; 1 byte 1561 // int3 ; 1 byte 1562 // int3 ; 1 byte 1563 // 1564 // f.cfi: 1565 // mov 0, %eax 1566 // ret 1567 // 1568 // g.cfi: 1569 // mov 1, %eax 1570 // ret 1571 // 1572 // h.cfi: 1573 // mov 2, %eax 1574 // ret 1575 // 1576 // foo: 1577 // mov f, %eax 1578 // mov g, %edx 1579 // mov h, %ecx 1580 // ret 1581 // 1582 // Because the addresses of f, g, h are evenly spaced at a power of 2, in the 1583 // normal case the check can be carried out using the same kind of simple 1584 // arithmetic that we normally use for globals. 1585 1586 // FIXME: find a better way to represent the jumptable in the IR. 1587 assert(!Functions.empty()); 1588 1589 // Decide on the jump table encoding, so that we know how big the 1590 // entries will be. 1591 JumpTableArch = selectJumpTableArmEncoding(Functions); 1592 1593 // Build a simple layout based on the regular layout of jump tables. 1594 DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout; 1595 unsigned EntrySize = getJumpTableEntrySize(); 1596 for (unsigned I = 0; I != Functions.size(); ++I) 1597 GlobalLayout[Functions[I]] = I * EntrySize; 1598 1599 Function *JumpTableFn = 1600 Function::Create(FunctionType::get(Type::getVoidTy(M.getContext()), 1601 /* IsVarArg */ false), 1602 GlobalValue::PrivateLinkage, 1603 M.getDataLayout().getProgramAddressSpace(), 1604 ".cfi.jumptable", &M); 1605 ArrayType *JumpTableType = 1606 ArrayType::get(getJumpTableEntryType(), Functions.size()); 1607 auto JumpTable = 1608 ConstantExpr::getPointerCast(JumpTableFn, JumpTableType->getPointerTo(0)); 1609 1610 lowerTypeTestCalls(TypeIds, JumpTable, GlobalLayout); 1611 1612 { 1613 ScopedSaveAliaseesAndUsed S(M); 1614 1615 // Build aliases pointing to offsets into the jump table, and replace 1616 // references to the original functions with references to the aliases. 1617 for (unsigned I = 0; I != Functions.size(); ++I) { 1618 Function *F = cast<Function>(Functions[I]->getGlobal()); 1619 bool IsJumpTableCanonical = Functions[I]->isJumpTableCanonical(); 1620 1621 Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast( 1622 ConstantExpr::getInBoundsGetElementPtr( 1623 JumpTableType, JumpTable, 1624 ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0), 1625 ConstantInt::get(IntPtrTy, I)}), 1626 F->getType()); 1627 1628 const bool IsExported = Functions[I]->isExported(); 1629 if (!IsJumpTableCanonical) { 1630 GlobalValue::LinkageTypes LT = IsExported 1631 ? GlobalValue::ExternalLinkage 1632 : GlobalValue::InternalLinkage; 1633 GlobalAlias *JtAlias = GlobalAlias::create(F->getValueType(), 0, LT, 1634 F->getName() + ".cfi_jt", 1635 CombinedGlobalElemPtr, &M); 1636 if (IsExported) 1637 JtAlias->setVisibility(GlobalValue::HiddenVisibility); 1638 else 1639 appendToUsed(M, {JtAlias}); 1640 } 1641 1642 if (IsExported) { 1643 if (IsJumpTableCanonical) 1644 ExportSummary->cfiFunctionDefs().insert(std::string(F->getName())); 1645 else 1646 ExportSummary->cfiFunctionDecls().insert(std::string(F->getName())); 1647 } 1648 1649 if (!IsJumpTableCanonical) { 1650 if (F->hasExternalWeakLinkage()) 1651 replaceWeakDeclarationWithJumpTablePtr(F, CombinedGlobalElemPtr, 1652 IsJumpTableCanonical); 1653 else 1654 replaceCfiUses(F, CombinedGlobalElemPtr, IsJumpTableCanonical); 1655 } else { 1656 assert(F->getType()->getAddressSpace() == 0); 1657 1658 GlobalAlias *FAlias = 1659 GlobalAlias::create(F->getValueType(), 0, F->getLinkage(), "", 1660 CombinedGlobalElemPtr, &M); 1661 FAlias->setVisibility(F->getVisibility()); 1662 FAlias->takeName(F); 1663 if (FAlias->hasName()) 1664 F->setName(FAlias->getName() + ".cfi"); 1665 replaceCfiUses(F, FAlias, IsJumpTableCanonical); 1666 if (!F->hasLocalLinkage()) 1667 F->setVisibility(GlobalVariable::HiddenVisibility); 1668 } 1669 } 1670 } 1671 1672 createJumpTable(JumpTableFn, Functions); 1673 } 1674 1675 /// Assign a dummy layout using an incrementing counter, tag each function 1676 /// with its index represented as metadata, and lower each type test to an 1677 /// integer range comparison. During generation of the indirect function call 1678 /// table in the backend, it will assign the given indexes. 1679 /// Note: Dynamic linking is not supported, as the WebAssembly ABI has not yet 1680 /// been finalized. 1681 void LowerTypeTestsModule::buildBitSetsFromFunctionsWASM( 1682 ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Functions) { 1683 assert(!Functions.empty()); 1684 1685 // Build consecutive monotonic integer ranges for each call target set 1686 DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout; 1687 1688 for (GlobalTypeMember *GTM : Functions) { 1689 Function *F = cast<Function>(GTM->getGlobal()); 1690 1691 // Skip functions that are not address taken, to avoid bloating the table 1692 if (!F->hasAddressTaken()) 1693 continue; 1694 1695 // Store metadata with the index for each function 1696 MDNode *MD = MDNode::get(F->getContext(), 1697 ArrayRef<Metadata *>(ConstantAsMetadata::get( 1698 ConstantInt::get(Int64Ty, IndirectIndex)))); 1699 F->setMetadata("wasm.index", MD); 1700 1701 // Assign the counter value 1702 GlobalLayout[GTM] = IndirectIndex++; 1703 } 1704 1705 // The indirect function table index space starts at zero, so pass a NULL 1706 // pointer as the subtracted "jump table" offset. 1707 lowerTypeTestCalls(TypeIds, ConstantPointerNull::get(Int32PtrTy), 1708 GlobalLayout); 1709 } 1710 1711 void LowerTypeTestsModule::buildBitSetsFromDisjointSet( 1712 ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Globals, 1713 ArrayRef<ICallBranchFunnel *> ICallBranchFunnels) { 1714 DenseMap<Metadata *, uint64_t> TypeIdIndices; 1715 for (unsigned I = 0; I != TypeIds.size(); ++I) 1716 TypeIdIndices[TypeIds[I]] = I; 1717 1718 // For each type identifier, build a set of indices that refer to members of 1719 // the type identifier. 1720 std::vector<std::set<uint64_t>> TypeMembers(TypeIds.size()); 1721 unsigned GlobalIndex = 0; 1722 DenseMap<GlobalTypeMember *, uint64_t> GlobalIndices; 1723 for (GlobalTypeMember *GTM : Globals) { 1724 for (MDNode *Type : GTM->types()) { 1725 // Type = { offset, type identifier } 1726 auto I = TypeIdIndices.find(Type->getOperand(1)); 1727 if (I != TypeIdIndices.end()) 1728 TypeMembers[I->second].insert(GlobalIndex); 1729 } 1730 GlobalIndices[GTM] = GlobalIndex; 1731 GlobalIndex++; 1732 } 1733 1734 for (ICallBranchFunnel *JT : ICallBranchFunnels) { 1735 TypeMembers.emplace_back(); 1736 std::set<uint64_t> &TMSet = TypeMembers.back(); 1737 for (GlobalTypeMember *T : JT->targets()) 1738 TMSet.insert(GlobalIndices[T]); 1739 } 1740 1741 // Order the sets of indices by size. The GlobalLayoutBuilder works best 1742 // when given small index sets first. 1743 llvm::stable_sort(TypeMembers, [](const std::set<uint64_t> &O1, 1744 const std::set<uint64_t> &O2) { 1745 return O1.size() < O2.size(); 1746 }); 1747 1748 // Create a GlobalLayoutBuilder and provide it with index sets as layout 1749 // fragments. The GlobalLayoutBuilder tries to lay out members of fragments as 1750 // close together as possible. 1751 GlobalLayoutBuilder GLB(Globals.size()); 1752 for (auto &&MemSet : TypeMembers) 1753 GLB.addFragment(MemSet); 1754 1755 // Build a vector of globals with the computed layout. 1756 bool IsGlobalSet = 1757 Globals.empty() || isa<GlobalVariable>(Globals[0]->getGlobal()); 1758 std::vector<GlobalTypeMember *> OrderedGTMs(Globals.size()); 1759 auto OGTMI = OrderedGTMs.begin(); 1760 for (auto &&F : GLB.Fragments) { 1761 for (auto &&Offset : F) { 1762 if (IsGlobalSet != isa<GlobalVariable>(Globals[Offset]->getGlobal())) 1763 report_fatal_error("Type identifier may not contain both global " 1764 "variables and functions"); 1765 *OGTMI++ = Globals[Offset]; 1766 } 1767 } 1768 1769 // Build the bitsets from this disjoint set. 1770 if (IsGlobalSet) 1771 buildBitSetsFromGlobalVariables(TypeIds, OrderedGTMs); 1772 else 1773 buildBitSetsFromFunctions(TypeIds, OrderedGTMs); 1774 } 1775 1776 /// Lower all type tests in this module. 1777 LowerTypeTestsModule::LowerTypeTestsModule( 1778 Module &M, ModuleAnalysisManager &AM, ModuleSummaryIndex *ExportSummary, 1779 const ModuleSummaryIndex *ImportSummary, bool DropTypeTests) 1780 : M(M), ExportSummary(ExportSummary), ImportSummary(ImportSummary), 1781 DropTypeTests(DropTypeTests || ClDropTypeTests) { 1782 assert(!(ExportSummary && ImportSummary)); 1783 Triple TargetTriple(M.getTargetTriple()); 1784 Arch = TargetTriple.getArch(); 1785 if (Arch == Triple::arm) 1786 CanUseArmJumpTable = true; 1787 if (Arch == Triple::arm || Arch == Triple::thumb) { 1788 auto &FAM = 1789 AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 1790 for (Function &F : M) { 1791 auto &TTI = FAM.getResult<TargetIRAnalysis>(F); 1792 if (TTI.hasArmWideBranch(false)) 1793 CanUseArmJumpTable = true; 1794 if (TTI.hasArmWideBranch(true)) 1795 CanUseThumbBWJumpTable = true; 1796 } 1797 } 1798 OS = TargetTriple.getOS(); 1799 ObjectFormat = TargetTriple.getObjectFormat(); 1800 } 1801 1802 bool LowerTypeTestsModule::runForTesting(Module &M, ModuleAnalysisManager &AM) { 1803 ModuleSummaryIndex Summary(/*HaveGVs=*/false); 1804 1805 // Handle the command-line summary arguments. This code is for testing 1806 // purposes only, so we handle errors directly. 1807 if (!ClReadSummary.empty()) { 1808 ExitOnError ExitOnErr("-lowertypetests-read-summary: " + ClReadSummary + 1809 ": "); 1810 auto ReadSummaryFile = 1811 ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(ClReadSummary))); 1812 1813 yaml::Input In(ReadSummaryFile->getBuffer()); 1814 In >> Summary; 1815 ExitOnErr(errorCodeToError(In.error())); 1816 } 1817 1818 bool Changed = 1819 LowerTypeTestsModule( 1820 M, AM, 1821 ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr, 1822 ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr, 1823 /*DropTypeTests*/ false) 1824 .lower(); 1825 1826 if (!ClWriteSummary.empty()) { 1827 ExitOnError ExitOnErr("-lowertypetests-write-summary: " + ClWriteSummary + 1828 ": "); 1829 std::error_code EC; 1830 raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_TextWithCRLF); 1831 ExitOnErr(errorCodeToError(EC)); 1832 1833 yaml::Output Out(OS); 1834 Out << Summary; 1835 } 1836 1837 return Changed; 1838 } 1839 1840 static bool isDirectCall(Use& U) { 1841 auto *Usr = dyn_cast<CallInst>(U.getUser()); 1842 if (Usr) { 1843 auto *CB = dyn_cast<CallBase>(Usr); 1844 if (CB && CB->isCallee(&U)) 1845 return true; 1846 } 1847 return false; 1848 } 1849 1850 void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New, 1851 bool IsJumpTableCanonical) { 1852 SmallSetVector<Constant *, 4> Constants; 1853 for (Use &U : llvm::make_early_inc_range(Old->uses())) { 1854 // Skip block addresses and no_cfi values, which refer to the function 1855 // body instead of the jump table. 1856 if (isa<BlockAddress, NoCFIValue>(U.getUser())) 1857 continue; 1858 1859 // Skip direct calls to externally defined or non-dso_local functions 1860 if (isDirectCall(U) && (Old->isDSOLocal() || !IsJumpTableCanonical)) 1861 continue; 1862 1863 // Must handle Constants specially, we cannot call replaceUsesOfWith on a 1864 // constant because they are uniqued. 1865 if (auto *C = dyn_cast<Constant>(U.getUser())) { 1866 if (!isa<GlobalValue>(C)) { 1867 // Save unique users to avoid processing operand replacement 1868 // more than once. 1869 Constants.insert(C); 1870 continue; 1871 } 1872 } 1873 1874 U.set(New); 1875 } 1876 1877 // Process operand replacement of saved constants. 1878 for (auto *C : Constants) 1879 C->handleOperandChange(Old, New); 1880 } 1881 1882 void LowerTypeTestsModule::replaceDirectCalls(Value *Old, Value *New) { 1883 Old->replaceUsesWithIf(New, isDirectCall); 1884 } 1885 1886 static void dropTypeTests(Module &M, Function &TypeTestFunc) { 1887 for (Use &U : llvm::make_early_inc_range(TypeTestFunc.uses())) { 1888 auto *CI = cast<CallInst>(U.getUser()); 1889 // Find and erase llvm.assume intrinsics for this llvm.type.test call. 1890 for (Use &CIU : llvm::make_early_inc_range(CI->uses())) 1891 if (auto *Assume = dyn_cast<AssumeInst>(CIU.getUser())) 1892 Assume->eraseFromParent(); 1893 // If the assume was merged with another assume, we might have a use on a 1894 // phi (which will feed the assume). Simply replace the use on the phi 1895 // with "true" and leave the merged assume. 1896 if (!CI->use_empty()) { 1897 assert( 1898 all_of(CI->users(), [](User *U) -> bool { return isa<PHINode>(U); })); 1899 CI->replaceAllUsesWith(ConstantInt::getTrue(M.getContext())); 1900 } 1901 CI->eraseFromParent(); 1902 } 1903 } 1904 1905 bool LowerTypeTestsModule::lower() { 1906 Function *TypeTestFunc = 1907 M.getFunction(Intrinsic::getName(Intrinsic::type_test)); 1908 1909 if (DropTypeTests) { 1910 if (TypeTestFunc) 1911 dropTypeTests(M, *TypeTestFunc); 1912 // Normally we'd have already removed all @llvm.public.type.test calls, 1913 // except for in the case where we originally were performing ThinLTO but 1914 // decided not to in the backend. 1915 Function *PublicTypeTestFunc = 1916 M.getFunction(Intrinsic::getName(Intrinsic::public_type_test)); 1917 if (PublicTypeTestFunc) 1918 dropTypeTests(M, *PublicTypeTestFunc); 1919 if (TypeTestFunc || PublicTypeTestFunc) { 1920 // We have deleted the type intrinsics, so we no longer have enough 1921 // information to reason about the liveness of virtual function pointers 1922 // in GlobalDCE. 1923 for (GlobalVariable &GV : M.globals()) 1924 GV.eraseMetadata(LLVMContext::MD_vcall_visibility); 1925 return true; 1926 } 1927 return false; 1928 } 1929 1930 // If only some of the modules were split, we cannot correctly perform 1931 // this transformation. We already checked for the presense of type tests 1932 // with partially split modules during the thin link, and would have emitted 1933 // an error if any were found, so here we can simply return. 1934 if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) || 1935 (ImportSummary && ImportSummary->partiallySplitLTOUnits())) 1936 return false; 1937 1938 Function *ICallBranchFunnelFunc = 1939 M.getFunction(Intrinsic::getName(Intrinsic::icall_branch_funnel)); 1940 if ((!TypeTestFunc || TypeTestFunc->use_empty()) && 1941 (!ICallBranchFunnelFunc || ICallBranchFunnelFunc->use_empty()) && 1942 !ExportSummary && !ImportSummary) 1943 return false; 1944 1945 if (ImportSummary) { 1946 if (TypeTestFunc) 1947 for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses())) 1948 importTypeTest(cast<CallInst>(U.getUser())); 1949 1950 if (ICallBranchFunnelFunc && !ICallBranchFunnelFunc->use_empty()) 1951 report_fatal_error( 1952 "unexpected call to llvm.icall.branch.funnel during import phase"); 1953 1954 SmallVector<Function *, 8> Defs; 1955 SmallVector<Function *, 8> Decls; 1956 for (auto &F : M) { 1957 // CFI functions are either external, or promoted. A local function may 1958 // have the same name, but it's not the one we are looking for. 1959 if (F.hasLocalLinkage()) 1960 continue; 1961 if (ImportSummary->cfiFunctionDefs().count(std::string(F.getName()))) 1962 Defs.push_back(&F); 1963 else if (ImportSummary->cfiFunctionDecls().count( 1964 std::string(F.getName()))) 1965 Decls.push_back(&F); 1966 } 1967 1968 std::vector<GlobalAlias *> AliasesToErase; 1969 { 1970 ScopedSaveAliaseesAndUsed S(M); 1971 for (auto *F : Defs) 1972 importFunction(F, /*isJumpTableCanonical*/ true, AliasesToErase); 1973 for (auto *F : Decls) 1974 importFunction(F, /*isJumpTableCanonical*/ false, AliasesToErase); 1975 } 1976 for (GlobalAlias *GA : AliasesToErase) 1977 GA->eraseFromParent(); 1978 1979 return true; 1980 } 1981 1982 // Equivalence class set containing type identifiers and the globals that 1983 // reference them. This is used to partition the set of type identifiers in 1984 // the module into disjoint sets. 1985 using GlobalClassesTy = EquivalenceClasses< 1986 PointerUnion<GlobalTypeMember *, Metadata *, ICallBranchFunnel *>>; 1987 GlobalClassesTy GlobalClasses; 1988 1989 // Verify the type metadata and build a few data structures to let us 1990 // efficiently enumerate the type identifiers associated with a global: 1991 // a list of GlobalTypeMembers (a GlobalObject stored alongside a vector 1992 // of associated type metadata) and a mapping from type identifiers to their 1993 // list of GlobalTypeMembers and last observed index in the list of globals. 1994 // The indices will be used later to deterministically order the list of type 1995 // identifiers. 1996 BumpPtrAllocator Alloc; 1997 struct TIInfo { 1998 unsigned UniqueId; 1999 std::vector<GlobalTypeMember *> RefGlobals; 2000 }; 2001 DenseMap<Metadata *, TIInfo> TypeIdInfo; 2002 unsigned CurUniqueId = 0; 2003 SmallVector<MDNode *, 2> Types; 2004 2005 // Cross-DSO CFI emits jumptable entries for exported functions as well as 2006 // address taken functions in case they are address taken in other modules. 2007 const bool CrossDsoCfi = M.getModuleFlag("Cross-DSO CFI") != nullptr; 2008 2009 struct ExportedFunctionInfo { 2010 CfiFunctionLinkage Linkage; 2011 MDNode *FuncMD; // {name, linkage, type[, type...]} 2012 }; 2013 DenseMap<StringRef, ExportedFunctionInfo> ExportedFunctions; 2014 if (ExportSummary) { 2015 // A set of all functions that are address taken by a live global object. 2016 DenseSet<GlobalValue::GUID> AddressTaken; 2017 for (auto &I : *ExportSummary) 2018 for (auto &GVS : I.second.SummaryList) 2019 if (GVS->isLive()) 2020 for (const auto &Ref : GVS->refs()) 2021 AddressTaken.insert(Ref.getGUID()); 2022 2023 NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions"); 2024 if (CfiFunctionsMD) { 2025 for (auto *FuncMD : CfiFunctionsMD->operands()) { 2026 assert(FuncMD->getNumOperands() >= 2); 2027 StringRef FunctionName = 2028 cast<MDString>(FuncMD->getOperand(0))->getString(); 2029 CfiFunctionLinkage Linkage = static_cast<CfiFunctionLinkage>( 2030 cast<ConstantAsMetadata>(FuncMD->getOperand(1)) 2031 ->getValue() 2032 ->getUniqueInteger() 2033 .getZExtValue()); 2034 const GlobalValue::GUID GUID = GlobalValue::getGUID( 2035 GlobalValue::dropLLVMManglingEscape(FunctionName)); 2036 // Do not emit jumptable entries for functions that are not-live and 2037 // have no live references (and are not exported with cross-DSO CFI.) 2038 if (!ExportSummary->isGUIDLive(GUID)) 2039 continue; 2040 if (!AddressTaken.count(GUID)) { 2041 if (!CrossDsoCfi || Linkage != CFL_Definition) 2042 continue; 2043 2044 bool Exported = false; 2045 if (auto VI = ExportSummary->getValueInfo(GUID)) 2046 for (const auto &GVS : VI.getSummaryList()) 2047 if (GVS->isLive() && !GlobalValue::isLocalLinkage(GVS->linkage())) 2048 Exported = true; 2049 2050 if (!Exported) 2051 continue; 2052 } 2053 auto P = ExportedFunctions.insert({FunctionName, {Linkage, FuncMD}}); 2054 if (!P.second && P.first->second.Linkage != CFL_Definition) 2055 P.first->second = {Linkage, FuncMD}; 2056 } 2057 2058 for (const auto &P : ExportedFunctions) { 2059 StringRef FunctionName = P.first; 2060 CfiFunctionLinkage Linkage = P.second.Linkage; 2061 MDNode *FuncMD = P.second.FuncMD; 2062 Function *F = M.getFunction(FunctionName); 2063 if (F && F->hasLocalLinkage()) { 2064 // Locally defined function that happens to have the same name as a 2065 // function defined in a ThinLTO module. Rename it to move it out of 2066 // the way of the external reference that we're about to create. 2067 // Note that setName will find a unique name for the function, so even 2068 // if there is an existing function with the suffix there won't be a 2069 // name collision. 2070 F->setName(F->getName() + ".1"); 2071 F = nullptr; 2072 } 2073 2074 if (!F) 2075 F = Function::Create( 2076 FunctionType::get(Type::getVoidTy(M.getContext()), false), 2077 GlobalVariable::ExternalLinkage, 2078 M.getDataLayout().getProgramAddressSpace(), FunctionName, &M); 2079 2080 // If the function is available_externally, remove its definition so 2081 // that it is handled the same way as a declaration. Later we will try 2082 // to create an alias using this function's linkage, which will fail if 2083 // the linkage is available_externally. This will also result in us 2084 // following the code path below to replace the type metadata. 2085 if (F->hasAvailableExternallyLinkage()) { 2086 F->setLinkage(GlobalValue::ExternalLinkage); 2087 F->deleteBody(); 2088 F->setComdat(nullptr); 2089 F->clearMetadata(); 2090 } 2091 2092 // Update the linkage for extern_weak declarations when a definition 2093 // exists. 2094 if (Linkage == CFL_Definition && F->hasExternalWeakLinkage()) 2095 F->setLinkage(GlobalValue::ExternalLinkage); 2096 2097 // If the function in the full LTO module is a declaration, replace its 2098 // type metadata with the type metadata we found in cfi.functions. That 2099 // metadata is presumed to be more accurate than the metadata attached 2100 // to the declaration. 2101 if (F->isDeclaration()) { 2102 if (Linkage == CFL_WeakDeclaration) 2103 F->setLinkage(GlobalValue::ExternalWeakLinkage); 2104 2105 F->eraseMetadata(LLVMContext::MD_type); 2106 for (unsigned I = 2; I < FuncMD->getNumOperands(); ++I) 2107 F->addMetadata(LLVMContext::MD_type, 2108 *cast<MDNode>(FuncMD->getOperand(I).get())); 2109 } 2110 } 2111 } 2112 } 2113 2114 DenseMap<GlobalObject *, GlobalTypeMember *> GlobalTypeMembers; 2115 for (GlobalObject &GO : M.global_objects()) { 2116 if (isa<GlobalVariable>(GO) && GO.isDeclarationForLinker()) 2117 continue; 2118 2119 Types.clear(); 2120 GO.getMetadata(LLVMContext::MD_type, Types); 2121 2122 bool IsJumpTableCanonical = false; 2123 bool IsExported = false; 2124 if (Function *F = dyn_cast<Function>(&GO)) { 2125 IsJumpTableCanonical = isJumpTableCanonical(F); 2126 if (ExportedFunctions.count(F->getName())) { 2127 IsJumpTableCanonical |= 2128 ExportedFunctions[F->getName()].Linkage == CFL_Definition; 2129 IsExported = true; 2130 // TODO: The logic here checks only that the function is address taken, 2131 // not that the address takers are live. This can be updated to check 2132 // their liveness and emit fewer jumptable entries once monolithic LTO 2133 // builds also emit summaries. 2134 } else if (!F->hasAddressTaken()) { 2135 if (!CrossDsoCfi || !IsJumpTableCanonical || F->hasLocalLinkage()) 2136 continue; 2137 } 2138 } 2139 2140 auto *GTM = GlobalTypeMember::create(Alloc, &GO, IsJumpTableCanonical, 2141 IsExported, Types); 2142 GlobalTypeMembers[&GO] = GTM; 2143 for (MDNode *Type : Types) { 2144 verifyTypeMDNode(&GO, Type); 2145 auto &Info = TypeIdInfo[Type->getOperand(1)]; 2146 Info.UniqueId = ++CurUniqueId; 2147 Info.RefGlobals.push_back(GTM); 2148 } 2149 } 2150 2151 auto AddTypeIdUse = [&](Metadata *TypeId) -> TypeIdUserInfo & { 2152 // Add the call site to the list of call sites for this type identifier. We 2153 // also use TypeIdUsers to keep track of whether we have seen this type 2154 // identifier before. If we have, we don't need to re-add the referenced 2155 // globals to the equivalence class. 2156 auto Ins = TypeIdUsers.insert({TypeId, {}}); 2157 if (Ins.second) { 2158 // Add the type identifier to the equivalence class. 2159 GlobalClassesTy::iterator GCI = GlobalClasses.insert(TypeId); 2160 GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(GCI); 2161 2162 // Add the referenced globals to the type identifier's equivalence class. 2163 for (GlobalTypeMember *GTM : TypeIdInfo[TypeId].RefGlobals) 2164 CurSet = GlobalClasses.unionSets( 2165 CurSet, GlobalClasses.findLeader(GlobalClasses.insert(GTM))); 2166 } 2167 2168 return Ins.first->second; 2169 }; 2170 2171 if (TypeTestFunc) { 2172 for (const Use &U : TypeTestFunc->uses()) { 2173 auto CI = cast<CallInst>(U.getUser()); 2174 // If this type test is only used by llvm.assume instructions, it 2175 // was used for whole program devirtualization, and is being kept 2176 // for use by other optimization passes. We do not need or want to 2177 // lower it here. We also don't want to rewrite any associated globals 2178 // unnecessarily. These will be removed by a subsequent LTT invocation 2179 // with the DropTypeTests flag set. 2180 bool OnlyAssumeUses = !CI->use_empty(); 2181 for (const Use &CIU : CI->uses()) { 2182 if (isa<AssumeInst>(CIU.getUser())) 2183 continue; 2184 OnlyAssumeUses = false; 2185 break; 2186 } 2187 if (OnlyAssumeUses) 2188 continue; 2189 2190 auto TypeIdMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1)); 2191 if (!TypeIdMDVal) 2192 report_fatal_error("Second argument of llvm.type.test must be metadata"); 2193 auto TypeId = TypeIdMDVal->getMetadata(); 2194 AddTypeIdUse(TypeId).CallSites.push_back(CI); 2195 } 2196 } 2197 2198 if (ICallBranchFunnelFunc) { 2199 for (const Use &U : ICallBranchFunnelFunc->uses()) { 2200 if (Arch != Triple::x86_64) 2201 report_fatal_error( 2202 "llvm.icall.branch.funnel not supported on this target"); 2203 2204 auto CI = cast<CallInst>(U.getUser()); 2205 2206 std::vector<GlobalTypeMember *> Targets; 2207 if (CI->arg_size() % 2 != 1) 2208 report_fatal_error("number of arguments should be odd"); 2209 2210 GlobalClassesTy::member_iterator CurSet; 2211 for (unsigned I = 1; I != CI->arg_size(); I += 2) { 2212 int64_t Offset; 2213 auto *Base = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset( 2214 CI->getOperand(I), Offset, M.getDataLayout())); 2215 if (!Base) 2216 report_fatal_error( 2217 "Expected branch funnel operand to be global value"); 2218 2219 GlobalTypeMember *GTM = GlobalTypeMembers[Base]; 2220 Targets.push_back(GTM); 2221 GlobalClassesTy::member_iterator NewSet = 2222 GlobalClasses.findLeader(GlobalClasses.insert(GTM)); 2223 if (I == 1) 2224 CurSet = NewSet; 2225 else 2226 CurSet = GlobalClasses.unionSets(CurSet, NewSet); 2227 } 2228 2229 GlobalClasses.unionSets( 2230 CurSet, GlobalClasses.findLeader( 2231 GlobalClasses.insert(ICallBranchFunnel::create( 2232 Alloc, CI, Targets, ++CurUniqueId)))); 2233 } 2234 } 2235 2236 if (ExportSummary) { 2237 DenseMap<GlobalValue::GUID, TinyPtrVector<Metadata *>> MetadataByGUID; 2238 for (auto &P : TypeIdInfo) { 2239 if (auto *TypeId = dyn_cast<MDString>(P.first)) 2240 MetadataByGUID[GlobalValue::getGUID(TypeId->getString())].push_back( 2241 TypeId); 2242 } 2243 2244 for (auto &P : *ExportSummary) { 2245 for (auto &S : P.second.SummaryList) { 2246 if (!ExportSummary->isGlobalValueLive(S.get())) 2247 continue; 2248 if (auto *FS = dyn_cast<FunctionSummary>(S->getBaseObject())) 2249 for (GlobalValue::GUID G : FS->type_tests()) 2250 for (Metadata *MD : MetadataByGUID[G]) 2251 AddTypeIdUse(MD).IsExported = true; 2252 } 2253 } 2254 } 2255 2256 if (GlobalClasses.empty()) 2257 return false; 2258 2259 // Build a list of disjoint sets ordered by their maximum global index for 2260 // determinism. 2261 std::vector<std::pair<GlobalClassesTy::iterator, unsigned>> Sets; 2262 for (GlobalClassesTy::iterator I = GlobalClasses.begin(), 2263 E = GlobalClasses.end(); 2264 I != E; ++I) { 2265 if (!I->isLeader()) 2266 continue; 2267 ++NumTypeIdDisjointSets; 2268 2269 unsigned MaxUniqueId = 0; 2270 for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I); 2271 MI != GlobalClasses.member_end(); ++MI) { 2272 if (auto *MD = dyn_cast_if_present<Metadata *>(*MI)) 2273 MaxUniqueId = std::max(MaxUniqueId, TypeIdInfo[MD].UniqueId); 2274 else if (auto *BF = dyn_cast_if_present<ICallBranchFunnel *>(*MI)) 2275 MaxUniqueId = std::max(MaxUniqueId, BF->UniqueId); 2276 } 2277 Sets.emplace_back(I, MaxUniqueId); 2278 } 2279 llvm::sort(Sets, llvm::less_second()); 2280 2281 // For each disjoint set we found... 2282 for (const auto &S : Sets) { 2283 // Build the list of type identifiers in this disjoint set. 2284 std::vector<Metadata *> TypeIds; 2285 std::vector<GlobalTypeMember *> Globals; 2286 std::vector<ICallBranchFunnel *> ICallBranchFunnels; 2287 for (GlobalClassesTy::member_iterator MI = 2288 GlobalClasses.member_begin(S.first); 2289 MI != GlobalClasses.member_end(); ++MI) { 2290 if (isa<Metadata *>(*MI)) 2291 TypeIds.push_back(cast<Metadata *>(*MI)); 2292 else if (isa<GlobalTypeMember *>(*MI)) 2293 Globals.push_back(cast<GlobalTypeMember *>(*MI)); 2294 else 2295 ICallBranchFunnels.push_back(cast<ICallBranchFunnel *>(*MI)); 2296 } 2297 2298 // Order type identifiers by unique ID for determinism. This ordering is 2299 // stable as there is a one-to-one mapping between metadata and unique IDs. 2300 llvm::sort(TypeIds, [&](Metadata *M1, Metadata *M2) { 2301 return TypeIdInfo[M1].UniqueId < TypeIdInfo[M2].UniqueId; 2302 }); 2303 2304 // Same for the branch funnels. 2305 llvm::sort(ICallBranchFunnels, 2306 [&](ICallBranchFunnel *F1, ICallBranchFunnel *F2) { 2307 return F1->UniqueId < F2->UniqueId; 2308 }); 2309 2310 // Build bitsets for this disjoint set. 2311 buildBitSetsFromDisjointSet(TypeIds, Globals, ICallBranchFunnels); 2312 } 2313 2314 allocateByteArrays(); 2315 2316 // Parse alias data to replace stand-in function declarations for aliases 2317 // with an alias to the intended target. 2318 if (ExportSummary) { 2319 if (NamedMDNode *AliasesMD = M.getNamedMetadata("aliases")) { 2320 for (auto *AliasMD : AliasesMD->operands()) { 2321 assert(AliasMD->getNumOperands() >= 4); 2322 StringRef AliasName = 2323 cast<MDString>(AliasMD->getOperand(0))->getString(); 2324 StringRef Aliasee = cast<MDString>(AliasMD->getOperand(1))->getString(); 2325 2326 if (!ExportedFunctions.count(Aliasee) || 2327 ExportedFunctions[Aliasee].Linkage != CFL_Definition || 2328 !M.getNamedAlias(Aliasee)) 2329 continue; 2330 2331 GlobalValue::VisibilityTypes Visibility = 2332 static_cast<GlobalValue::VisibilityTypes>( 2333 cast<ConstantAsMetadata>(AliasMD->getOperand(2)) 2334 ->getValue() 2335 ->getUniqueInteger() 2336 .getZExtValue()); 2337 bool Weak = 2338 static_cast<bool>(cast<ConstantAsMetadata>(AliasMD->getOperand(3)) 2339 ->getValue() 2340 ->getUniqueInteger() 2341 .getZExtValue()); 2342 2343 auto *Alias = GlobalAlias::create("", M.getNamedAlias(Aliasee)); 2344 Alias->setVisibility(Visibility); 2345 if (Weak) 2346 Alias->setLinkage(GlobalValue::WeakAnyLinkage); 2347 2348 if (auto *F = M.getFunction(AliasName)) { 2349 Alias->takeName(F); 2350 F->replaceAllUsesWith(Alias); 2351 F->eraseFromParent(); 2352 } else { 2353 Alias->setName(AliasName); 2354 } 2355 } 2356 } 2357 } 2358 2359 // Emit .symver directives for exported functions, if they exist. 2360 if (ExportSummary) { 2361 if (NamedMDNode *SymversMD = M.getNamedMetadata("symvers")) { 2362 for (auto *Symver : SymversMD->operands()) { 2363 assert(Symver->getNumOperands() >= 2); 2364 StringRef SymbolName = 2365 cast<MDString>(Symver->getOperand(0))->getString(); 2366 StringRef Alias = cast<MDString>(Symver->getOperand(1))->getString(); 2367 2368 if (!ExportedFunctions.count(SymbolName)) 2369 continue; 2370 2371 M.appendModuleInlineAsm( 2372 (llvm::Twine(".symver ") + SymbolName + ", " + Alias).str()); 2373 } 2374 } 2375 } 2376 2377 return true; 2378 } 2379 2380 PreservedAnalyses LowerTypeTestsPass::run(Module &M, 2381 ModuleAnalysisManager &AM) { 2382 bool Changed; 2383 if (UseCommandLine) 2384 Changed = LowerTypeTestsModule::runForTesting(M, AM); 2385 else 2386 Changed = 2387 LowerTypeTestsModule(M, AM, ExportSummary, ImportSummary, DropTypeTests) 2388 .lower(); 2389 if (!Changed) 2390 return PreservedAnalyses::all(); 2391 return PreservedAnalyses::none(); 2392 } 2393