1 //===- LowerTypeTests.cpp - type metadata lowering pass -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass lowers type metadata and calls to the llvm.type.test intrinsic.
10 // It also ensures that globals are properly laid out for the
11 // llvm.icall.branch.funnel intrinsic.
12 // See http://llvm.org/docs/TypeMetadata.html for more information.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "llvm/Transforms/IPO/LowerTypeTests.h"
17 #include "llvm/ADT/APInt.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/EquivalenceClasses.h"
21 #include "llvm/ADT/PointerUnion.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/Statistic.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/TinyPtrVector.h"
27 #include "llvm/Analysis/TargetTransformInfo.h"
28 #include "llvm/Analysis/TypeMetadataUtils.h"
29 #include "llvm/Analysis/ValueTracking.h"
30 #include "llvm/IR/Attributes.h"
31 #include "llvm/IR/BasicBlock.h"
32 #include "llvm/IR/Constant.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DataLayout.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/IR/GlobalAlias.h"
38 #include "llvm/IR/GlobalObject.h"
39 #include "llvm/IR/GlobalValue.h"
40 #include "llvm/IR/GlobalVariable.h"
41 #include "llvm/IR/IRBuilder.h"
42 #include "llvm/IR/InlineAsm.h"
43 #include "llvm/IR/Instruction.h"
44 #include "llvm/IR/Instructions.h"
45 #include "llvm/IR/IntrinsicInst.h"
46 #include "llvm/IR/Intrinsics.h"
47 #include "llvm/IR/LLVMContext.h"
48 #include "llvm/IR/Metadata.h"
49 #include "llvm/IR/Module.h"
50 #include "llvm/IR/ModuleSummaryIndex.h"
51 #include "llvm/IR/ModuleSummaryIndexYAML.h"
52 #include "llvm/IR/Operator.h"
53 #include "llvm/IR/PassManager.h"
54 #include "llvm/IR/ReplaceConstant.h"
55 #include "llvm/IR/Type.h"
56 #include "llvm/IR/Use.h"
57 #include "llvm/IR/User.h"
58 #include "llvm/IR/Value.h"
59 #include "llvm/Support/Allocator.h"
60 #include "llvm/Support/Casting.h"
61 #include "llvm/Support/CommandLine.h"
62 #include "llvm/Support/Debug.h"
63 #include "llvm/Support/Error.h"
64 #include "llvm/Support/ErrorHandling.h"
65 #include "llvm/Support/FileSystem.h"
66 #include "llvm/Support/MathExtras.h"
67 #include "llvm/Support/MemoryBuffer.h"
68 #include "llvm/Support/TrailingObjects.h"
69 #include "llvm/Support/YAMLTraits.h"
70 #include "llvm/Support/raw_ostream.h"
71 #include "llvm/TargetParser/Triple.h"
72 #include "llvm/Transforms/IPO.h"
73 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
74 #include "llvm/Transforms/Utils/ModuleUtils.h"
75 #include <algorithm>
76 #include <cassert>
77 #include <cstdint>
78 #include <memory>
79 #include <set>
80 #include <string>
81 #include <system_error>
82 #include <utility>
83 #include <vector>
84 
85 using namespace llvm;
86 using namespace lowertypetests;
87 
88 #define DEBUG_TYPE "lowertypetests"
89 
90 STATISTIC(ByteArraySizeBits, "Byte array size in bits");
91 STATISTIC(ByteArraySizeBytes, "Byte array size in bytes");
92 STATISTIC(NumByteArraysCreated, "Number of byte arrays created");
93 STATISTIC(NumTypeTestCallsLowered, "Number of type test calls lowered");
94 STATISTIC(NumTypeIdDisjointSets, "Number of disjoint sets of type identifiers");
95 
96 static cl::opt<bool> AvoidReuse(
97     "lowertypetests-avoid-reuse",
98     cl::desc("Try to avoid reuse of byte array addresses using aliases"),
99     cl::Hidden, cl::init(true));
100 
101 static cl::opt<PassSummaryAction> ClSummaryAction(
102     "lowertypetests-summary-action",
103     cl::desc("What to do with the summary when running this pass"),
104     cl::values(clEnumValN(PassSummaryAction::None, "none", "Do nothing"),
105                clEnumValN(PassSummaryAction::Import, "import",
106                           "Import typeid resolutions from summary and globals"),
107                clEnumValN(PassSummaryAction::Export, "export",
108                           "Export typeid resolutions to summary and globals")),
109     cl::Hidden);
110 
111 static cl::opt<std::string> ClReadSummary(
112     "lowertypetests-read-summary",
113     cl::desc("Read summary from given YAML file before running pass"),
114     cl::Hidden);
115 
116 static cl::opt<std::string> ClWriteSummary(
117     "lowertypetests-write-summary",
118     cl::desc("Write summary to given YAML file after running pass"),
119     cl::Hidden);
120 
121 static cl::opt<bool>
122     ClDropTypeTests("lowertypetests-drop-type-tests",
123                     cl::desc("Simply drop type test assume sequences"),
124                     cl::Hidden, cl::init(false));
125 
126 bool BitSetInfo::containsGlobalOffset(uint64_t Offset) const {
127   if (Offset < ByteOffset)
128     return false;
129 
130   if ((Offset - ByteOffset) % (uint64_t(1) << AlignLog2) != 0)
131     return false;
132 
133   uint64_t BitOffset = (Offset - ByteOffset) >> AlignLog2;
134   if (BitOffset >= BitSize)
135     return false;
136 
137   return Bits.count(BitOffset);
138 }
139 
140 void BitSetInfo::print(raw_ostream &OS) const {
141   OS << "offset " << ByteOffset << " size " << BitSize << " align "
142      << (1 << AlignLog2);
143 
144   if (isAllOnes()) {
145     OS << " all-ones\n";
146     return;
147   }
148 
149   OS << " { ";
150   for (uint64_t B : Bits)
151     OS << B << ' ';
152   OS << "}\n";
153 }
154 
155 BitSetInfo BitSetBuilder::build() {
156   if (Min > Max)
157     Min = 0;
158 
159   // Normalize each offset against the minimum observed offset, and compute
160   // the bitwise OR of each of the offsets. The number of trailing zeros
161   // in the mask gives us the log2 of the alignment of all offsets, which
162   // allows us to compress the bitset by only storing one bit per aligned
163   // address.
164   uint64_t Mask = 0;
165   for (uint64_t &Offset : Offsets) {
166     Offset -= Min;
167     Mask |= Offset;
168   }
169 
170   BitSetInfo BSI;
171   BSI.ByteOffset = Min;
172 
173   BSI.AlignLog2 = 0;
174   if (Mask != 0)
175     BSI.AlignLog2 = llvm::countr_zero(Mask);
176 
177   // Build the compressed bitset while normalizing the offsets against the
178   // computed alignment.
179   BSI.BitSize = ((Max - Min) >> BSI.AlignLog2) + 1;
180   for (uint64_t Offset : Offsets) {
181     Offset >>= BSI.AlignLog2;
182     BSI.Bits.insert(Offset);
183   }
184 
185   return BSI;
186 }
187 
188 void GlobalLayoutBuilder::addFragment(const std::set<uint64_t> &F) {
189   // Create a new fragment to hold the layout for F.
190   Fragments.emplace_back();
191   std::vector<uint64_t> &Fragment = Fragments.back();
192   uint64_t FragmentIndex = Fragments.size() - 1;
193 
194   for (auto ObjIndex : F) {
195     uint64_t OldFragmentIndex = FragmentMap[ObjIndex];
196     if (OldFragmentIndex == 0) {
197       // We haven't seen this object index before, so just add it to the current
198       // fragment.
199       Fragment.push_back(ObjIndex);
200     } else {
201       // This index belongs to an existing fragment. Copy the elements of the
202       // old fragment into this one and clear the old fragment. We don't update
203       // the fragment map just yet, this ensures that any further references to
204       // indices from the old fragment in this fragment do not insert any more
205       // indices.
206       std::vector<uint64_t> &OldFragment = Fragments[OldFragmentIndex];
207       llvm::append_range(Fragment, OldFragment);
208       OldFragment.clear();
209     }
210   }
211 
212   // Update the fragment map to point our object indices to this fragment.
213   for (uint64_t ObjIndex : Fragment)
214     FragmentMap[ObjIndex] = FragmentIndex;
215 }
216 
217 void ByteArrayBuilder::allocate(const std::set<uint64_t> &Bits,
218                                 uint64_t BitSize, uint64_t &AllocByteOffset,
219                                 uint8_t &AllocMask) {
220   // Find the smallest current allocation.
221   unsigned Bit = 0;
222   for (unsigned I = 1; I != BitsPerByte; ++I)
223     if (BitAllocs[I] < BitAllocs[Bit])
224       Bit = I;
225 
226   AllocByteOffset = BitAllocs[Bit];
227 
228   // Add our size to it.
229   unsigned ReqSize = AllocByteOffset + BitSize;
230   BitAllocs[Bit] = ReqSize;
231   if (Bytes.size() < ReqSize)
232     Bytes.resize(ReqSize);
233 
234   // Set our bits.
235   AllocMask = 1 << Bit;
236   for (uint64_t B : Bits)
237     Bytes[AllocByteOffset + B] |= AllocMask;
238 }
239 
240 bool lowertypetests::isJumpTableCanonical(Function *F) {
241   if (F->isDeclarationForLinker())
242     return false;
243   auto *CI = mdconst::extract_or_null<ConstantInt>(
244       F->getParent()->getModuleFlag("CFI Canonical Jump Tables"));
245   if (!CI || !CI->isZero())
246     return true;
247   return F->hasFnAttribute("cfi-canonical-jump-table");
248 }
249 
250 namespace {
251 
252 struct ByteArrayInfo {
253   std::set<uint64_t> Bits;
254   uint64_t BitSize;
255   GlobalVariable *ByteArray;
256   GlobalVariable *MaskGlobal;
257   uint8_t *MaskPtr = nullptr;
258 };
259 
260 /// A POD-like structure that we use to store a global reference together with
261 /// its metadata types. In this pass we frequently need to query the set of
262 /// metadata types referenced by a global, which at the IR level is an expensive
263 /// operation involving a map lookup; this data structure helps to reduce the
264 /// number of times we need to do this lookup.
265 class GlobalTypeMember final : TrailingObjects<GlobalTypeMember, MDNode *> {
266   friend TrailingObjects;
267 
268   GlobalObject *GO;
269   size_t NTypes;
270 
271   // For functions: true if the jump table is canonical. This essentially means
272   // whether the canonical address (i.e. the symbol table entry) of the function
273   // is provided by the local jump table. This is normally the same as whether
274   // the function is defined locally, but if canonical jump tables are disabled
275   // by the user then the jump table never provides a canonical definition.
276   bool IsJumpTableCanonical;
277 
278   // For functions: true if this function is either defined or used in a thinlto
279   // module and its jumptable entry needs to be exported to thinlto backends.
280   bool IsExported;
281 
282   size_t numTrailingObjects(OverloadToken<MDNode *>) const { return NTypes; }
283 
284 public:
285   static GlobalTypeMember *create(BumpPtrAllocator &Alloc, GlobalObject *GO,
286                                   bool IsJumpTableCanonical, bool IsExported,
287                                   ArrayRef<MDNode *> Types) {
288     auto *GTM = static_cast<GlobalTypeMember *>(Alloc.Allocate(
289         totalSizeToAlloc<MDNode *>(Types.size()), alignof(GlobalTypeMember)));
290     GTM->GO = GO;
291     GTM->NTypes = Types.size();
292     GTM->IsJumpTableCanonical = IsJumpTableCanonical;
293     GTM->IsExported = IsExported;
294     std::uninitialized_copy(Types.begin(), Types.end(),
295                             GTM->getTrailingObjects<MDNode *>());
296     return GTM;
297   }
298 
299   GlobalObject *getGlobal() const {
300     return GO;
301   }
302 
303   bool isJumpTableCanonical() const {
304     return IsJumpTableCanonical;
305   }
306 
307   bool isExported() const {
308     return IsExported;
309   }
310 
311   ArrayRef<MDNode *> types() const {
312     return ArrayRef(getTrailingObjects<MDNode *>(), NTypes);
313   }
314 };
315 
316 struct ICallBranchFunnel final
317     : TrailingObjects<ICallBranchFunnel, GlobalTypeMember *> {
318   static ICallBranchFunnel *create(BumpPtrAllocator &Alloc, CallInst *CI,
319                                    ArrayRef<GlobalTypeMember *> Targets,
320                                    unsigned UniqueId) {
321     auto *Call = static_cast<ICallBranchFunnel *>(
322         Alloc.Allocate(totalSizeToAlloc<GlobalTypeMember *>(Targets.size()),
323                        alignof(ICallBranchFunnel)));
324     Call->CI = CI;
325     Call->UniqueId = UniqueId;
326     Call->NTargets = Targets.size();
327     std::uninitialized_copy(Targets.begin(), Targets.end(),
328                             Call->getTrailingObjects<GlobalTypeMember *>());
329     return Call;
330   }
331 
332   CallInst *CI;
333   ArrayRef<GlobalTypeMember *> targets() const {
334     return ArrayRef(getTrailingObjects<GlobalTypeMember *>(), NTargets);
335   }
336 
337   unsigned UniqueId;
338 
339 private:
340   size_t NTargets;
341 };
342 
343 struct ScopedSaveAliaseesAndUsed {
344   Module &M;
345   SmallVector<GlobalValue *, 4> Used, CompilerUsed;
346   std::vector<std::pair<GlobalAlias *, Function *>> FunctionAliases;
347   std::vector<std::pair<GlobalIFunc *, Function *>> ResolverIFuncs;
348 
349   ScopedSaveAliaseesAndUsed(Module &M) : M(M) {
350     // The users of this class want to replace all function references except
351     // for aliases and llvm.used/llvm.compiler.used with references to a jump
352     // table. We avoid replacing aliases in order to avoid introducing a double
353     // indirection (or an alias pointing to a declaration in ThinLTO mode), and
354     // we avoid replacing llvm.used/llvm.compiler.used because these global
355     // variables describe properties of the global, not the jump table (besides,
356     // offseted references to the jump table in llvm.used are invalid).
357     // Unfortunately, LLVM doesn't have a "RAUW except for these (possibly
358     // indirect) users", so what we do is save the list of globals referenced by
359     // llvm.used/llvm.compiler.used and aliases, erase the used lists, let RAUW
360     // replace the aliasees and then set them back to their original values at
361     // the end.
362     if (GlobalVariable *GV = collectUsedGlobalVariables(M, Used, false))
363       GV->eraseFromParent();
364     if (GlobalVariable *GV = collectUsedGlobalVariables(M, CompilerUsed, true))
365       GV->eraseFromParent();
366 
367     for (auto &GA : M.aliases()) {
368       // FIXME: This should look past all aliases not just interposable ones,
369       // see discussion on D65118.
370       if (auto *F = dyn_cast<Function>(GA.getAliasee()->stripPointerCasts()))
371         FunctionAliases.push_back({&GA, F});
372     }
373 
374     for (auto &GI : M.ifuncs())
375       if (auto *F = dyn_cast<Function>(GI.getResolver()->stripPointerCasts()))
376         ResolverIFuncs.push_back({&GI, F});
377   }
378 
379   ~ScopedSaveAliaseesAndUsed() {
380     appendToUsed(M, Used);
381     appendToCompilerUsed(M, CompilerUsed);
382 
383     for (auto P : FunctionAliases)
384       P.first->setAliasee(
385           ConstantExpr::getBitCast(P.second, P.first->getType()));
386 
387     for (auto P : ResolverIFuncs) {
388       // This does not preserve pointer casts that may have been stripped by the
389       // constructor, but the resolver's type is different from that of the
390       // ifunc anyway.
391       P.first->setResolver(P.second);
392     }
393   }
394 };
395 
396 class LowerTypeTestsModule {
397   Module &M;
398 
399   ModuleSummaryIndex *ExportSummary;
400   const ModuleSummaryIndex *ImportSummary;
401   // Set when the client has invoked this to simply drop all type test assume
402   // sequences.
403   bool DropTypeTests;
404 
405   Triple::ArchType Arch;
406   Triple::OSType OS;
407   Triple::ObjectFormatType ObjectFormat;
408 
409   // Determines which kind of Thumb jump table we generate. If arch is
410   // either 'arm' or 'thumb' we need to find this out, because
411   // selectJumpTableArmEncoding may decide to use Thumb in either case.
412   bool CanUseArmJumpTable = false, CanUseThumbBWJumpTable = false;
413 
414   // The jump table type we ended up deciding on. (Usually the same as
415   // Arch, except that 'arm' and 'thumb' are often interchangeable.)
416   Triple::ArchType JumpTableArch = Triple::UnknownArch;
417 
418   IntegerType *Int1Ty = Type::getInt1Ty(M.getContext());
419   IntegerType *Int8Ty = Type::getInt8Ty(M.getContext());
420   PointerType *Int8PtrTy = Type::getInt8PtrTy(M.getContext());
421   ArrayType *Int8Arr0Ty = ArrayType::get(Type::getInt8Ty(M.getContext()), 0);
422   IntegerType *Int32Ty = Type::getInt32Ty(M.getContext());
423   PointerType *Int32PtrTy = PointerType::getUnqual(Int32Ty);
424   IntegerType *Int64Ty = Type::getInt64Ty(M.getContext());
425   IntegerType *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext(), 0);
426 
427   // Indirect function call index assignment counter for WebAssembly
428   uint64_t IndirectIndex = 1;
429 
430   // Mapping from type identifiers to the call sites that test them, as well as
431   // whether the type identifier needs to be exported to ThinLTO backends as
432   // part of the regular LTO phase of the ThinLTO pipeline (see exportTypeId).
433   struct TypeIdUserInfo {
434     std::vector<CallInst *> CallSites;
435     bool IsExported = false;
436   };
437   DenseMap<Metadata *, TypeIdUserInfo> TypeIdUsers;
438 
439   /// This structure describes how to lower type tests for a particular type
440   /// identifier. It is either built directly from the global analysis (during
441   /// regular LTO or the regular LTO phase of ThinLTO), or indirectly using type
442   /// identifier summaries and external symbol references (in ThinLTO backends).
443   struct TypeIdLowering {
444     TypeTestResolution::Kind TheKind = TypeTestResolution::Unsat;
445 
446     /// All except Unsat: the start address within the combined global.
447     Constant *OffsetedGlobal;
448 
449     /// ByteArray, Inline, AllOnes: log2 of the required global alignment
450     /// relative to the start address.
451     Constant *AlignLog2;
452 
453     /// ByteArray, Inline, AllOnes: one less than the size of the memory region
454     /// covering members of this type identifier as a multiple of 2^AlignLog2.
455     Constant *SizeM1;
456 
457     /// ByteArray: the byte array to test the address against.
458     Constant *TheByteArray;
459 
460     /// ByteArray: the bit mask to apply to bytes loaded from the byte array.
461     Constant *BitMask;
462 
463     /// Inline: the bit mask to test the address against.
464     Constant *InlineBits;
465   };
466 
467   std::vector<ByteArrayInfo> ByteArrayInfos;
468 
469   Function *WeakInitializerFn = nullptr;
470 
471   bool shouldExportConstantsAsAbsoluteSymbols();
472   uint8_t *exportTypeId(StringRef TypeId, const TypeIdLowering &TIL);
473   TypeIdLowering importTypeId(StringRef TypeId);
474   void importTypeTest(CallInst *CI);
475   void importFunction(Function *F, bool isJumpTableCanonical,
476                       std::vector<GlobalAlias *> &AliasesToErase);
477 
478   BitSetInfo
479   buildBitSet(Metadata *TypeId,
480               const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout);
481   ByteArrayInfo *createByteArray(BitSetInfo &BSI);
482   void allocateByteArrays();
483   Value *createBitSetTest(IRBuilder<> &B, const TypeIdLowering &TIL,
484                           Value *BitOffset);
485   void lowerTypeTestCalls(
486       ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr,
487       const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout);
488   Value *lowerTypeTestCall(Metadata *TypeId, CallInst *CI,
489                            const TypeIdLowering &TIL);
490 
491   void buildBitSetsFromGlobalVariables(ArrayRef<Metadata *> TypeIds,
492                                        ArrayRef<GlobalTypeMember *> Globals);
493   Triple::ArchType
494   selectJumpTableArmEncoding(ArrayRef<GlobalTypeMember *> Functions);
495   unsigned getJumpTableEntrySize();
496   Type *getJumpTableEntryType();
497   void createJumpTableEntry(raw_ostream &AsmOS, raw_ostream &ConstraintOS,
498                             Triple::ArchType JumpTableArch,
499                             SmallVectorImpl<Value *> &AsmArgs, Function *Dest);
500   void verifyTypeMDNode(GlobalObject *GO, MDNode *Type);
501   void buildBitSetsFromFunctions(ArrayRef<Metadata *> TypeIds,
502                                  ArrayRef<GlobalTypeMember *> Functions);
503   void buildBitSetsFromFunctionsNative(ArrayRef<Metadata *> TypeIds,
504                                        ArrayRef<GlobalTypeMember *> Functions);
505   void buildBitSetsFromFunctionsWASM(ArrayRef<Metadata *> TypeIds,
506                                      ArrayRef<GlobalTypeMember *> Functions);
507   void
508   buildBitSetsFromDisjointSet(ArrayRef<Metadata *> TypeIds,
509                               ArrayRef<GlobalTypeMember *> Globals,
510                               ArrayRef<ICallBranchFunnel *> ICallBranchFunnels);
511 
512   void replaceWeakDeclarationWithJumpTablePtr(Function *F, Constant *JT,
513                                               bool IsJumpTableCanonical);
514   void moveInitializerToModuleConstructor(GlobalVariable *GV);
515   void findGlobalVariableUsersOf(Constant *C,
516                                  SmallSetVector<GlobalVariable *, 8> &Out);
517 
518   void createJumpTable(Function *F, ArrayRef<GlobalTypeMember *> Functions);
519 
520   /// replaceCfiUses - Go through the uses list for this definition
521   /// and make each use point to "V" instead of "this" when the use is outside
522   /// the block. 'This's use list is expected to have at least one element.
523   /// Unlike replaceAllUsesWith this function skips blockaddr and direct call
524   /// uses.
525   void replaceCfiUses(Function *Old, Value *New, bool IsJumpTableCanonical);
526 
527   /// replaceDirectCalls - Go through the uses list for this definition and
528   /// replace each use, which is a direct function call.
529   void replaceDirectCalls(Value *Old, Value *New);
530 
531 public:
532   LowerTypeTestsModule(Module &M, ModuleAnalysisManager &AM,
533                        ModuleSummaryIndex *ExportSummary,
534                        const ModuleSummaryIndex *ImportSummary,
535                        bool DropTypeTests);
536 
537   bool lower();
538 
539   // Lower the module using the action and summary passed as command line
540   // arguments. For testing purposes only.
541   static bool runForTesting(Module &M, ModuleAnalysisManager &AM);
542 };
543 } // end anonymous namespace
544 
545 /// Build a bit set for TypeId using the object layouts in
546 /// GlobalLayout.
547 BitSetInfo LowerTypeTestsModule::buildBitSet(
548     Metadata *TypeId,
549     const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout) {
550   BitSetBuilder BSB;
551 
552   // Compute the byte offset of each address associated with this type
553   // identifier.
554   for (const auto &GlobalAndOffset : GlobalLayout) {
555     for (MDNode *Type : GlobalAndOffset.first->types()) {
556       if (Type->getOperand(1) != TypeId)
557         continue;
558       uint64_t Offset =
559           cast<ConstantInt>(
560               cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
561               ->getZExtValue();
562       BSB.addOffset(GlobalAndOffset.second + Offset);
563     }
564   }
565 
566   return BSB.build();
567 }
568 
569 /// Build a test that bit BitOffset mod sizeof(Bits)*8 is set in
570 /// Bits. This pattern matches to the bt instruction on x86.
571 static Value *createMaskedBitTest(IRBuilder<> &B, Value *Bits,
572                                   Value *BitOffset) {
573   auto BitsType = cast<IntegerType>(Bits->getType());
574   unsigned BitWidth = BitsType->getBitWidth();
575 
576   BitOffset = B.CreateZExtOrTrunc(BitOffset, BitsType);
577   Value *BitIndex =
578       B.CreateAnd(BitOffset, ConstantInt::get(BitsType, BitWidth - 1));
579   Value *BitMask = B.CreateShl(ConstantInt::get(BitsType, 1), BitIndex);
580   Value *MaskedBits = B.CreateAnd(Bits, BitMask);
581   return B.CreateICmpNE(MaskedBits, ConstantInt::get(BitsType, 0));
582 }
583 
584 ByteArrayInfo *LowerTypeTestsModule::createByteArray(BitSetInfo &BSI) {
585   // Create globals to stand in for byte arrays and masks. These never actually
586   // get initialized, we RAUW and erase them later in allocateByteArrays() once
587   // we know the offset and mask to use.
588   auto ByteArrayGlobal = new GlobalVariable(
589       M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr);
590   auto MaskGlobal = new GlobalVariable(M, Int8Ty, /*isConstant=*/true,
591                                        GlobalValue::PrivateLinkage, nullptr);
592 
593   ByteArrayInfos.emplace_back();
594   ByteArrayInfo *BAI = &ByteArrayInfos.back();
595 
596   BAI->Bits = BSI.Bits;
597   BAI->BitSize = BSI.BitSize;
598   BAI->ByteArray = ByteArrayGlobal;
599   BAI->MaskGlobal = MaskGlobal;
600   return BAI;
601 }
602 
603 void LowerTypeTestsModule::allocateByteArrays() {
604   llvm::stable_sort(ByteArrayInfos,
605                     [](const ByteArrayInfo &BAI1, const ByteArrayInfo &BAI2) {
606                       return BAI1.BitSize > BAI2.BitSize;
607                     });
608 
609   std::vector<uint64_t> ByteArrayOffsets(ByteArrayInfos.size());
610 
611   ByteArrayBuilder BAB;
612   for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) {
613     ByteArrayInfo *BAI = &ByteArrayInfos[I];
614 
615     uint8_t Mask;
616     BAB.allocate(BAI->Bits, BAI->BitSize, ByteArrayOffsets[I], Mask);
617 
618     BAI->MaskGlobal->replaceAllUsesWith(
619         ConstantExpr::getIntToPtr(ConstantInt::get(Int8Ty, Mask), Int8PtrTy));
620     BAI->MaskGlobal->eraseFromParent();
621     if (BAI->MaskPtr)
622       *BAI->MaskPtr = Mask;
623   }
624 
625   Constant *ByteArrayConst = ConstantDataArray::get(M.getContext(), BAB.Bytes);
626   auto ByteArray =
627       new GlobalVariable(M, ByteArrayConst->getType(), /*isConstant=*/true,
628                          GlobalValue::PrivateLinkage, ByteArrayConst);
629 
630   for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) {
631     ByteArrayInfo *BAI = &ByteArrayInfos[I];
632 
633     Constant *Idxs[] = {ConstantInt::get(IntPtrTy, 0),
634                         ConstantInt::get(IntPtrTy, ByteArrayOffsets[I])};
635     Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(
636         ByteArrayConst->getType(), ByteArray, Idxs);
637 
638     // Create an alias instead of RAUW'ing the gep directly. On x86 this ensures
639     // that the pc-relative displacement is folded into the lea instead of the
640     // test instruction getting another displacement.
641     GlobalAlias *Alias = GlobalAlias::create(
642         Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, &M);
643     BAI->ByteArray->replaceAllUsesWith(Alias);
644     BAI->ByteArray->eraseFromParent();
645   }
646 
647   ByteArraySizeBits = BAB.BitAllocs[0] + BAB.BitAllocs[1] + BAB.BitAllocs[2] +
648                       BAB.BitAllocs[3] + BAB.BitAllocs[4] + BAB.BitAllocs[5] +
649                       BAB.BitAllocs[6] + BAB.BitAllocs[7];
650   ByteArraySizeBytes = BAB.Bytes.size();
651 }
652 
653 /// Build a test that bit BitOffset is set in the type identifier that was
654 /// lowered to TIL, which must be either an Inline or a ByteArray.
655 Value *LowerTypeTestsModule::createBitSetTest(IRBuilder<> &B,
656                                               const TypeIdLowering &TIL,
657                                               Value *BitOffset) {
658   if (TIL.TheKind == TypeTestResolution::Inline) {
659     // If the bit set is sufficiently small, we can avoid a load by bit testing
660     // a constant.
661     return createMaskedBitTest(B, TIL.InlineBits, BitOffset);
662   } else {
663     Constant *ByteArray = TIL.TheByteArray;
664     if (AvoidReuse && !ImportSummary) {
665       // Each use of the byte array uses a different alias. This makes the
666       // backend less likely to reuse previously computed byte array addresses,
667       // improving the security of the CFI mechanism based on this pass.
668       // This won't work when importing because TheByteArray is external.
669       ByteArray = GlobalAlias::create(Int8Ty, 0, GlobalValue::PrivateLinkage,
670                                       "bits_use", ByteArray, &M);
671     }
672 
673     Value *ByteAddr = B.CreateGEP(Int8Ty, ByteArray, BitOffset);
674     Value *Byte = B.CreateLoad(Int8Ty, ByteAddr);
675 
676     Value *ByteAndMask =
677         B.CreateAnd(Byte, ConstantExpr::getPtrToInt(TIL.BitMask, Int8Ty));
678     return B.CreateICmpNE(ByteAndMask, ConstantInt::get(Int8Ty, 0));
679   }
680 }
681 
682 static bool isKnownTypeIdMember(Metadata *TypeId, const DataLayout &DL,
683                                 Value *V, uint64_t COffset) {
684   if (auto GV = dyn_cast<GlobalObject>(V)) {
685     SmallVector<MDNode *, 2> Types;
686     GV->getMetadata(LLVMContext::MD_type, Types);
687     for (MDNode *Type : Types) {
688       if (Type->getOperand(1) != TypeId)
689         continue;
690       uint64_t Offset =
691           cast<ConstantInt>(
692               cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
693               ->getZExtValue();
694       if (COffset == Offset)
695         return true;
696     }
697     return false;
698   }
699 
700   if (auto GEP = dyn_cast<GEPOperator>(V)) {
701     APInt APOffset(DL.getIndexSizeInBits(0), 0);
702     bool Result = GEP->accumulateConstantOffset(DL, APOffset);
703     if (!Result)
704       return false;
705     COffset += APOffset.getZExtValue();
706     return isKnownTypeIdMember(TypeId, DL, GEP->getPointerOperand(), COffset);
707   }
708 
709   if (auto Op = dyn_cast<Operator>(V)) {
710     if (Op->getOpcode() == Instruction::BitCast)
711       return isKnownTypeIdMember(TypeId, DL, Op->getOperand(0), COffset);
712 
713     if (Op->getOpcode() == Instruction::Select)
714       return isKnownTypeIdMember(TypeId, DL, Op->getOperand(1), COffset) &&
715              isKnownTypeIdMember(TypeId, DL, Op->getOperand(2), COffset);
716   }
717 
718   return false;
719 }
720 
721 /// Lower a llvm.type.test call to its implementation. Returns the value to
722 /// replace the call with.
723 Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI,
724                                                const TypeIdLowering &TIL) {
725   // Delay lowering if the resolution is currently unknown.
726   if (TIL.TheKind == TypeTestResolution::Unknown)
727     return nullptr;
728   if (TIL.TheKind == TypeTestResolution::Unsat)
729     return ConstantInt::getFalse(M.getContext());
730 
731   Value *Ptr = CI->getArgOperand(0);
732   const DataLayout &DL = M.getDataLayout();
733   if (isKnownTypeIdMember(TypeId, DL, Ptr, 0))
734     return ConstantInt::getTrue(M.getContext());
735 
736   BasicBlock *InitialBB = CI->getParent();
737 
738   IRBuilder<> B(CI);
739 
740   Value *PtrAsInt = B.CreatePtrToInt(Ptr, IntPtrTy);
741 
742   Constant *OffsetedGlobalAsInt =
743       ConstantExpr::getPtrToInt(TIL.OffsetedGlobal, IntPtrTy);
744   if (TIL.TheKind == TypeTestResolution::Single)
745     return B.CreateICmpEQ(PtrAsInt, OffsetedGlobalAsInt);
746 
747   Value *PtrOffset = B.CreateSub(PtrAsInt, OffsetedGlobalAsInt);
748 
749   // We need to check that the offset both falls within our range and is
750   // suitably aligned. We can check both properties at the same time by
751   // performing a right rotate by log2(alignment) followed by an integer
752   // comparison against the bitset size. The rotate will move the lower
753   // order bits that need to be zero into the higher order bits of the
754   // result, causing the comparison to fail if they are nonzero. The rotate
755   // also conveniently gives us a bit offset to use during the load from
756   // the bitset.
757   Value *OffsetSHR =
758       B.CreateLShr(PtrOffset, ConstantExpr::getZExt(TIL.AlignLog2, IntPtrTy));
759   Value *OffsetSHL = B.CreateShl(
760       PtrOffset, ConstantExpr::getZExt(
761                      ConstantExpr::getSub(
762                          ConstantInt::get(Int8Ty, DL.getPointerSizeInBits(0)),
763                          TIL.AlignLog2),
764                      IntPtrTy));
765   Value *BitOffset = B.CreateOr(OffsetSHR, OffsetSHL);
766 
767   Value *OffsetInRange = B.CreateICmpULE(BitOffset, TIL.SizeM1);
768 
769   // If the bit set is all ones, testing against it is unnecessary.
770   if (TIL.TheKind == TypeTestResolution::AllOnes)
771     return OffsetInRange;
772 
773   // See if the intrinsic is used in the following common pattern:
774   //   br(llvm.type.test(...), thenbb, elsebb)
775   // where nothing happens between the type test and the br.
776   // If so, create slightly simpler IR.
777   if (CI->hasOneUse())
778     if (auto *Br = dyn_cast<BranchInst>(*CI->user_begin()))
779       if (CI->getNextNode() == Br) {
780         BasicBlock *Then = InitialBB->splitBasicBlock(CI->getIterator());
781         BasicBlock *Else = Br->getSuccessor(1);
782         BranchInst *NewBr = BranchInst::Create(Then, Else, OffsetInRange);
783         NewBr->setMetadata(LLVMContext::MD_prof,
784                            Br->getMetadata(LLVMContext::MD_prof));
785         ReplaceInstWithInst(InitialBB->getTerminator(), NewBr);
786 
787         // Update phis in Else resulting from InitialBB being split
788         for (auto &Phi : Else->phis())
789           Phi.addIncoming(Phi.getIncomingValueForBlock(Then), InitialBB);
790 
791         IRBuilder<> ThenB(CI);
792         return createBitSetTest(ThenB, TIL, BitOffset);
793       }
794 
795   IRBuilder<> ThenB(SplitBlockAndInsertIfThen(OffsetInRange, CI, false));
796 
797   // Now that we know that the offset is in range and aligned, load the
798   // appropriate bit from the bitset.
799   Value *Bit = createBitSetTest(ThenB, TIL, BitOffset);
800 
801   // The value we want is 0 if we came directly from the initial block
802   // (having failed the range or alignment checks), or the loaded bit if
803   // we came from the block in which we loaded it.
804   B.SetInsertPoint(CI);
805   PHINode *P = B.CreatePHI(Int1Ty, 2);
806   P->addIncoming(ConstantInt::get(Int1Ty, 0), InitialBB);
807   P->addIncoming(Bit, ThenB.GetInsertBlock());
808   return P;
809 }
810 
811 /// Given a disjoint set of type identifiers and globals, lay out the globals,
812 /// build the bit sets and lower the llvm.type.test calls.
813 void LowerTypeTestsModule::buildBitSetsFromGlobalVariables(
814     ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Globals) {
815   // Build a new global with the combined contents of the referenced globals.
816   // This global is a struct whose even-indexed elements contain the original
817   // contents of the referenced globals and whose odd-indexed elements contain
818   // any padding required to align the next element to the next power of 2 plus
819   // any additional padding required to meet its alignment requirements.
820   std::vector<Constant *> GlobalInits;
821   const DataLayout &DL = M.getDataLayout();
822   DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout;
823   Align MaxAlign;
824   uint64_t CurOffset = 0;
825   uint64_t DesiredPadding = 0;
826   for (GlobalTypeMember *G : Globals) {
827     auto *GV = cast<GlobalVariable>(G->getGlobal());
828     Align Alignment =
829         DL.getValueOrABITypeAlignment(GV->getAlign(), GV->getValueType());
830     MaxAlign = std::max(MaxAlign, Alignment);
831     uint64_t GVOffset = alignTo(CurOffset + DesiredPadding, Alignment);
832     GlobalLayout[G] = GVOffset;
833     if (GVOffset != 0) {
834       uint64_t Padding = GVOffset - CurOffset;
835       GlobalInits.push_back(
836           ConstantAggregateZero::get(ArrayType::get(Int8Ty, Padding)));
837     }
838 
839     GlobalInits.push_back(GV->getInitializer());
840     uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType());
841     CurOffset = GVOffset + InitSize;
842 
843     // Compute the amount of padding that we'd like for the next element.
844     DesiredPadding = NextPowerOf2(InitSize - 1) - InitSize;
845 
846     // Experiments of different caps with Chromium on both x64 and ARM64
847     // have shown that the 32-byte cap generates the smallest binary on
848     // both platforms while different caps yield similar performance.
849     // (see https://lists.llvm.org/pipermail/llvm-dev/2018-July/124694.html)
850     if (DesiredPadding > 32)
851       DesiredPadding = alignTo(InitSize, 32) - InitSize;
852   }
853 
854   Constant *NewInit = ConstantStruct::getAnon(M.getContext(), GlobalInits);
855   auto *CombinedGlobal =
856       new GlobalVariable(M, NewInit->getType(), /*isConstant=*/true,
857                          GlobalValue::PrivateLinkage, NewInit);
858   CombinedGlobal->setAlignment(MaxAlign);
859 
860   StructType *NewTy = cast<StructType>(NewInit->getType());
861   lowerTypeTestCalls(TypeIds, CombinedGlobal, GlobalLayout);
862 
863   // Build aliases pointing to offsets into the combined global for each
864   // global from which we built the combined global, and replace references
865   // to the original globals with references to the aliases.
866   for (unsigned I = 0; I != Globals.size(); ++I) {
867     GlobalVariable *GV = cast<GlobalVariable>(Globals[I]->getGlobal());
868 
869     // Multiply by 2 to account for padding elements.
870     Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0),
871                                       ConstantInt::get(Int32Ty, I * 2)};
872     Constant *CombinedGlobalElemPtr = ConstantExpr::getGetElementPtr(
873         NewInit->getType(), CombinedGlobal, CombinedGlobalIdxs);
874     assert(GV->getType()->getAddressSpace() == 0);
875     GlobalAlias *GAlias =
876         GlobalAlias::create(NewTy->getElementType(I * 2), 0, GV->getLinkage(),
877                             "", CombinedGlobalElemPtr, &M);
878     GAlias->setVisibility(GV->getVisibility());
879     GAlias->takeName(GV);
880     GV->replaceAllUsesWith(GAlias);
881     GV->eraseFromParent();
882   }
883 }
884 
885 bool LowerTypeTestsModule::shouldExportConstantsAsAbsoluteSymbols() {
886   return (Arch == Triple::x86 || Arch == Triple::x86_64) &&
887          ObjectFormat == Triple::ELF;
888 }
889 
890 /// Export the given type identifier so that ThinLTO backends may import it.
891 /// Type identifiers are exported by adding coarse-grained information about how
892 /// to test the type identifier to the summary, and creating symbols in the
893 /// object file (aliases and absolute symbols) containing fine-grained
894 /// information about the type identifier.
895 ///
896 /// Returns a pointer to the location in which to store the bitmask, if
897 /// applicable.
898 uint8_t *LowerTypeTestsModule::exportTypeId(StringRef TypeId,
899                                             const TypeIdLowering &TIL) {
900   TypeTestResolution &TTRes =
901       ExportSummary->getOrInsertTypeIdSummary(TypeId).TTRes;
902   TTRes.TheKind = TIL.TheKind;
903 
904   auto ExportGlobal = [&](StringRef Name, Constant *C) {
905     GlobalAlias *GA =
906         GlobalAlias::create(Int8Ty, 0, GlobalValue::ExternalLinkage,
907                             "__typeid_" + TypeId + "_" + Name, C, &M);
908     GA->setVisibility(GlobalValue::HiddenVisibility);
909   };
910 
911   auto ExportConstant = [&](StringRef Name, uint64_t &Storage, Constant *C) {
912     if (shouldExportConstantsAsAbsoluteSymbols())
913       ExportGlobal(Name, ConstantExpr::getIntToPtr(C, Int8PtrTy));
914     else
915       Storage = cast<ConstantInt>(C)->getZExtValue();
916   };
917 
918   if (TIL.TheKind != TypeTestResolution::Unsat)
919     ExportGlobal("global_addr", TIL.OffsetedGlobal);
920 
921   if (TIL.TheKind == TypeTestResolution::ByteArray ||
922       TIL.TheKind == TypeTestResolution::Inline ||
923       TIL.TheKind == TypeTestResolution::AllOnes) {
924     ExportConstant("align", TTRes.AlignLog2, TIL.AlignLog2);
925     ExportConstant("size_m1", TTRes.SizeM1, TIL.SizeM1);
926 
927     uint64_t BitSize = cast<ConstantInt>(TIL.SizeM1)->getZExtValue() + 1;
928     if (TIL.TheKind == TypeTestResolution::Inline)
929       TTRes.SizeM1BitWidth = (BitSize <= 32) ? 5 : 6;
930     else
931       TTRes.SizeM1BitWidth = (BitSize <= 128) ? 7 : 32;
932   }
933 
934   if (TIL.TheKind == TypeTestResolution::ByteArray) {
935     ExportGlobal("byte_array", TIL.TheByteArray);
936     if (shouldExportConstantsAsAbsoluteSymbols())
937       ExportGlobal("bit_mask", TIL.BitMask);
938     else
939       return &TTRes.BitMask;
940   }
941 
942   if (TIL.TheKind == TypeTestResolution::Inline)
943     ExportConstant("inline_bits", TTRes.InlineBits, TIL.InlineBits);
944 
945   return nullptr;
946 }
947 
948 LowerTypeTestsModule::TypeIdLowering
949 LowerTypeTestsModule::importTypeId(StringRef TypeId) {
950   const TypeIdSummary *TidSummary = ImportSummary->getTypeIdSummary(TypeId);
951   if (!TidSummary)
952     return {}; // Unsat: no globals match this type id.
953   const TypeTestResolution &TTRes = TidSummary->TTRes;
954 
955   TypeIdLowering TIL;
956   TIL.TheKind = TTRes.TheKind;
957 
958   auto ImportGlobal = [&](StringRef Name) {
959     // Give the global a type of length 0 so that it is not assumed not to alias
960     // with any other global.
961     Constant *C = M.getOrInsertGlobal(("__typeid_" + TypeId + "_" + Name).str(),
962                                       Int8Arr0Ty);
963     if (auto *GV = dyn_cast<GlobalVariable>(C))
964       GV->setVisibility(GlobalValue::HiddenVisibility);
965     C = ConstantExpr::getBitCast(C, Int8PtrTy);
966     return C;
967   };
968 
969   auto ImportConstant = [&](StringRef Name, uint64_t Const, unsigned AbsWidth,
970                             Type *Ty) {
971     if (!shouldExportConstantsAsAbsoluteSymbols()) {
972       Constant *C =
973           ConstantInt::get(isa<IntegerType>(Ty) ? Ty : Int64Ty, Const);
974       if (!isa<IntegerType>(Ty))
975         C = ConstantExpr::getIntToPtr(C, Ty);
976       return C;
977     }
978 
979     Constant *C = ImportGlobal(Name);
980     auto *GV = cast<GlobalVariable>(C->stripPointerCasts());
981     if (isa<IntegerType>(Ty))
982       C = ConstantExpr::getPtrToInt(C, Ty);
983     if (GV->getMetadata(LLVMContext::MD_absolute_symbol))
984       return C;
985 
986     auto SetAbsRange = [&](uint64_t Min, uint64_t Max) {
987       auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Min));
988       auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntPtrTy, Max));
989       GV->setMetadata(LLVMContext::MD_absolute_symbol,
990                       MDNode::get(M.getContext(), {MinC, MaxC}));
991     };
992     if (AbsWidth == IntPtrTy->getBitWidth())
993       SetAbsRange(~0ull, ~0ull); // Full set.
994     else
995       SetAbsRange(0, 1ull << AbsWidth);
996     return C;
997   };
998 
999   if (TIL.TheKind != TypeTestResolution::Unsat)
1000     TIL.OffsetedGlobal = ImportGlobal("global_addr");
1001 
1002   if (TIL.TheKind == TypeTestResolution::ByteArray ||
1003       TIL.TheKind == TypeTestResolution::Inline ||
1004       TIL.TheKind == TypeTestResolution::AllOnes) {
1005     TIL.AlignLog2 = ImportConstant("align", TTRes.AlignLog2, 8, Int8Ty);
1006     TIL.SizeM1 =
1007         ImportConstant("size_m1", TTRes.SizeM1, TTRes.SizeM1BitWidth, IntPtrTy);
1008   }
1009 
1010   if (TIL.TheKind == TypeTestResolution::ByteArray) {
1011     TIL.TheByteArray = ImportGlobal("byte_array");
1012     TIL.BitMask = ImportConstant("bit_mask", TTRes.BitMask, 8, Int8PtrTy);
1013   }
1014 
1015   if (TIL.TheKind == TypeTestResolution::Inline)
1016     TIL.InlineBits = ImportConstant(
1017         "inline_bits", TTRes.InlineBits, 1 << TTRes.SizeM1BitWidth,
1018         TTRes.SizeM1BitWidth <= 5 ? Int32Ty : Int64Ty);
1019 
1020   return TIL;
1021 }
1022 
1023 void LowerTypeTestsModule::importTypeTest(CallInst *CI) {
1024   auto TypeIdMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1));
1025   if (!TypeIdMDVal)
1026     report_fatal_error("Second argument of llvm.type.test must be metadata");
1027 
1028   auto TypeIdStr = dyn_cast<MDString>(TypeIdMDVal->getMetadata());
1029   // If this is a local unpromoted type, which doesn't have a metadata string,
1030   // treat as Unknown and delay lowering, so that we can still utilize it for
1031   // later optimizations.
1032   if (!TypeIdStr)
1033     return;
1034 
1035   TypeIdLowering TIL = importTypeId(TypeIdStr->getString());
1036   Value *Lowered = lowerTypeTestCall(TypeIdStr, CI, TIL);
1037   if (Lowered) {
1038     CI->replaceAllUsesWith(Lowered);
1039     CI->eraseFromParent();
1040   }
1041 }
1042 
1043 // ThinLTO backend: the function F has a jump table entry; update this module
1044 // accordingly. isJumpTableCanonical describes the type of the jump table entry.
1045 void LowerTypeTestsModule::importFunction(
1046     Function *F, bool isJumpTableCanonical,
1047     std::vector<GlobalAlias *> &AliasesToErase) {
1048   assert(F->getType()->getAddressSpace() == 0);
1049 
1050   GlobalValue::VisibilityTypes Visibility = F->getVisibility();
1051   std::string Name = std::string(F->getName());
1052 
1053   if (F->isDeclarationForLinker() && isJumpTableCanonical) {
1054     // Non-dso_local functions may be overriden at run time,
1055     // don't short curcuit them
1056     if (F->isDSOLocal()) {
1057       Function *RealF = Function::Create(F->getFunctionType(),
1058                                          GlobalValue::ExternalLinkage,
1059                                          F->getAddressSpace(),
1060                                          Name + ".cfi", &M);
1061       RealF->setVisibility(GlobalVariable::HiddenVisibility);
1062       replaceDirectCalls(F, RealF);
1063     }
1064     return;
1065   }
1066 
1067   Function *FDecl;
1068   if (!isJumpTableCanonical) {
1069     // Either a declaration of an external function or a reference to a locally
1070     // defined jump table.
1071     FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage,
1072                              F->getAddressSpace(), Name + ".cfi_jt", &M);
1073     FDecl->setVisibility(GlobalValue::HiddenVisibility);
1074   } else {
1075     F->setName(Name + ".cfi");
1076     F->setLinkage(GlobalValue::ExternalLinkage);
1077     FDecl = Function::Create(F->getFunctionType(), GlobalValue::ExternalLinkage,
1078                              F->getAddressSpace(), Name, &M);
1079     FDecl->setVisibility(Visibility);
1080     Visibility = GlobalValue::HiddenVisibility;
1081 
1082     // Delete aliases pointing to this function, they'll be re-created in the
1083     // merged output. Don't do it yet though because ScopedSaveAliaseesAndUsed
1084     // will want to reset the aliasees first.
1085     for (auto &U : F->uses()) {
1086       if (auto *A = dyn_cast<GlobalAlias>(U.getUser())) {
1087         Function *AliasDecl = Function::Create(
1088             F->getFunctionType(), GlobalValue::ExternalLinkage,
1089             F->getAddressSpace(), "", &M);
1090         AliasDecl->takeName(A);
1091         A->replaceAllUsesWith(AliasDecl);
1092         AliasesToErase.push_back(A);
1093       }
1094     }
1095   }
1096 
1097   if (F->hasExternalWeakLinkage())
1098     replaceWeakDeclarationWithJumpTablePtr(F, FDecl, isJumpTableCanonical);
1099   else
1100     replaceCfiUses(F, FDecl, isJumpTableCanonical);
1101 
1102   // Set visibility late because it's used in replaceCfiUses() to determine
1103   // whether uses need to to be replaced.
1104   F->setVisibility(Visibility);
1105 }
1106 
1107 void LowerTypeTestsModule::lowerTypeTestCalls(
1108     ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr,
1109     const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout) {
1110   CombinedGlobalAddr = ConstantExpr::getBitCast(CombinedGlobalAddr, Int8PtrTy);
1111 
1112   // For each type identifier in this disjoint set...
1113   for (Metadata *TypeId : TypeIds) {
1114     // Build the bitset.
1115     BitSetInfo BSI = buildBitSet(TypeId, GlobalLayout);
1116     LLVM_DEBUG({
1117       if (auto MDS = dyn_cast<MDString>(TypeId))
1118         dbgs() << MDS->getString() << ": ";
1119       else
1120         dbgs() << "<unnamed>: ";
1121       BSI.print(dbgs());
1122     });
1123 
1124     ByteArrayInfo *BAI = nullptr;
1125     TypeIdLowering TIL;
1126     TIL.OffsetedGlobal = ConstantExpr::getGetElementPtr(
1127         Int8Ty, CombinedGlobalAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset)),
1128     TIL.AlignLog2 = ConstantInt::get(Int8Ty, BSI.AlignLog2);
1129     TIL.SizeM1 = ConstantInt::get(IntPtrTy, BSI.BitSize - 1);
1130     if (BSI.isAllOnes()) {
1131       TIL.TheKind = (BSI.BitSize == 1) ? TypeTestResolution::Single
1132                                        : TypeTestResolution::AllOnes;
1133     } else if (BSI.BitSize <= 64) {
1134       TIL.TheKind = TypeTestResolution::Inline;
1135       uint64_t InlineBits = 0;
1136       for (auto Bit : BSI.Bits)
1137         InlineBits |= uint64_t(1) << Bit;
1138       if (InlineBits == 0)
1139         TIL.TheKind = TypeTestResolution::Unsat;
1140       else
1141         TIL.InlineBits = ConstantInt::get(
1142             (BSI.BitSize <= 32) ? Int32Ty : Int64Ty, InlineBits);
1143     } else {
1144       TIL.TheKind = TypeTestResolution::ByteArray;
1145       ++NumByteArraysCreated;
1146       BAI = createByteArray(BSI);
1147       TIL.TheByteArray = BAI->ByteArray;
1148       TIL.BitMask = BAI->MaskGlobal;
1149     }
1150 
1151     TypeIdUserInfo &TIUI = TypeIdUsers[TypeId];
1152 
1153     if (TIUI.IsExported) {
1154       uint8_t *MaskPtr = exportTypeId(cast<MDString>(TypeId)->getString(), TIL);
1155       if (BAI)
1156         BAI->MaskPtr = MaskPtr;
1157     }
1158 
1159     // Lower each call to llvm.type.test for this type identifier.
1160     for (CallInst *CI : TIUI.CallSites) {
1161       ++NumTypeTestCallsLowered;
1162       Value *Lowered = lowerTypeTestCall(TypeId, CI, TIL);
1163       if (Lowered) {
1164         CI->replaceAllUsesWith(Lowered);
1165         CI->eraseFromParent();
1166       }
1167     }
1168   }
1169 }
1170 
1171 void LowerTypeTestsModule::verifyTypeMDNode(GlobalObject *GO, MDNode *Type) {
1172   if (Type->getNumOperands() != 2)
1173     report_fatal_error("All operands of type metadata must have 2 elements");
1174 
1175   if (GO->isThreadLocal())
1176     report_fatal_error("Bit set element may not be thread-local");
1177   if (isa<GlobalVariable>(GO) && GO->hasSection())
1178     report_fatal_error(
1179         "A member of a type identifier may not have an explicit section");
1180 
1181   // FIXME: We previously checked that global var member of a type identifier
1182   // must be a definition, but the IR linker may leave type metadata on
1183   // declarations. We should restore this check after fixing PR31759.
1184 
1185   auto OffsetConstMD = dyn_cast<ConstantAsMetadata>(Type->getOperand(0));
1186   if (!OffsetConstMD)
1187     report_fatal_error("Type offset must be a constant");
1188   auto OffsetInt = dyn_cast<ConstantInt>(OffsetConstMD->getValue());
1189   if (!OffsetInt)
1190     report_fatal_error("Type offset must be an integer constant");
1191 }
1192 
1193 static const unsigned kX86JumpTableEntrySize = 8;
1194 static const unsigned kX86IBTJumpTableEntrySize = 16;
1195 static const unsigned kARMJumpTableEntrySize = 4;
1196 static const unsigned kARMBTIJumpTableEntrySize = 8;
1197 static const unsigned kARMv6MJumpTableEntrySize = 16;
1198 static const unsigned kRISCVJumpTableEntrySize = 8;
1199 
1200 unsigned LowerTypeTestsModule::getJumpTableEntrySize() {
1201   switch (JumpTableArch) {
1202   case Triple::x86:
1203   case Triple::x86_64:
1204     if (const auto *MD = mdconst::extract_or_null<ConstantInt>(
1205             M.getModuleFlag("cf-protection-branch")))
1206       if (MD->getZExtValue())
1207         return kX86IBTJumpTableEntrySize;
1208     return kX86JumpTableEntrySize;
1209   case Triple::arm:
1210     return kARMJumpTableEntrySize;
1211   case Triple::thumb:
1212     if (CanUseThumbBWJumpTable)
1213       return kARMJumpTableEntrySize;
1214     else
1215       return kARMv6MJumpTableEntrySize;
1216   case Triple::aarch64:
1217     if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
1218             M.getModuleFlag("branch-target-enforcement")))
1219       if (BTE->getZExtValue())
1220         return kARMBTIJumpTableEntrySize;
1221     return kARMJumpTableEntrySize;
1222   case Triple::riscv32:
1223   case Triple::riscv64:
1224     return kRISCVJumpTableEntrySize;
1225   default:
1226     report_fatal_error("Unsupported architecture for jump tables");
1227   }
1228 }
1229 
1230 // Create a jump table entry for the target. This consists of an instruction
1231 // sequence containing a relative branch to Dest. Appends inline asm text,
1232 // constraints and arguments to AsmOS, ConstraintOS and AsmArgs.
1233 void LowerTypeTestsModule::createJumpTableEntry(
1234     raw_ostream &AsmOS, raw_ostream &ConstraintOS,
1235     Triple::ArchType JumpTableArch, SmallVectorImpl<Value *> &AsmArgs,
1236     Function *Dest) {
1237   unsigned ArgIndex = AsmArgs.size();
1238 
1239   if (JumpTableArch == Triple::x86 || JumpTableArch == Triple::x86_64) {
1240     bool Endbr = false;
1241     if (const auto *MD = mdconst::extract_or_null<ConstantInt>(
1242           Dest->getParent()->getModuleFlag("cf-protection-branch")))
1243       Endbr = !MD->isZero();
1244     if (Endbr)
1245       AsmOS << (JumpTableArch == Triple::x86 ? "endbr32\n" : "endbr64\n");
1246     AsmOS << "jmp ${" << ArgIndex << ":c}@plt\n";
1247     if (Endbr)
1248       AsmOS << ".balign 16, 0xcc\n";
1249     else
1250       AsmOS << "int3\nint3\nint3\n";
1251   } else if (JumpTableArch == Triple::arm) {
1252     AsmOS << "b $" << ArgIndex << "\n";
1253   } else if (JumpTableArch == Triple::aarch64) {
1254     if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
1255           Dest->getParent()->getModuleFlag("branch-target-enforcement")))
1256       if (BTE->getZExtValue())
1257         AsmOS << "bti c\n";
1258     AsmOS << "b $" << ArgIndex << "\n";
1259   } else if (JumpTableArch == Triple::thumb) {
1260     if (!CanUseThumbBWJumpTable) {
1261       // In Armv6-M, this sequence will generate a branch without corrupting
1262       // any registers. We use two stack words; in the second, we construct the
1263       // address we'll pop into pc, and the first is used to save and restore
1264       // r0 which we use as a temporary register.
1265       //
1266       // To support position-independent use cases, the offset of the target
1267       // function is stored as a relative offset (which will expand into an
1268       // R_ARM_REL32 relocation in ELF, and presumably the equivalent in other
1269       // object file types), and added to pc after we load it. (The alternative
1270       // B.W is automatically pc-relative.)
1271       //
1272       // There are five 16-bit Thumb instructions here, so the .balign 4 adds a
1273       // sixth halfword of padding, and then the offset consumes a further 4
1274       // bytes, for a total of 16, which is very convenient since entries in
1275       // this jump table need to have power-of-two size.
1276       AsmOS << "push {r0,r1}\n"
1277             << "ldr r0, 1f\n"
1278             << "0: add r0, r0, pc\n"
1279             << "str r0, [sp, #4]\n"
1280             << "pop {r0,pc}\n"
1281             << ".balign 4\n"
1282             << "1: .word $" << ArgIndex << " - (0b + 4)\n";
1283     } else {
1284       AsmOS << "b.w $" << ArgIndex << "\n";
1285     }
1286   } else if (JumpTableArch == Triple::riscv32 ||
1287              JumpTableArch == Triple::riscv64) {
1288     AsmOS << "tail $" << ArgIndex << "@plt\n";
1289   } else {
1290     report_fatal_error("Unsupported architecture for jump tables");
1291   }
1292 
1293   ConstraintOS << (ArgIndex > 0 ? ",s" : "s");
1294   AsmArgs.push_back(Dest);
1295 }
1296 
1297 Type *LowerTypeTestsModule::getJumpTableEntryType() {
1298   return ArrayType::get(Int8Ty, getJumpTableEntrySize());
1299 }
1300 
1301 /// Given a disjoint set of type identifiers and functions, build the bit sets
1302 /// and lower the llvm.type.test calls, architecture dependently.
1303 void LowerTypeTestsModule::buildBitSetsFromFunctions(
1304     ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Functions) {
1305   if (Arch == Triple::x86 || Arch == Triple::x86_64 || Arch == Triple::arm ||
1306       Arch == Triple::thumb || Arch == Triple::aarch64 ||
1307       Arch == Triple::riscv32 || Arch == Triple::riscv64)
1308     buildBitSetsFromFunctionsNative(TypeIds, Functions);
1309   else if (Arch == Triple::wasm32 || Arch == Triple::wasm64)
1310     buildBitSetsFromFunctionsWASM(TypeIds, Functions);
1311   else
1312     report_fatal_error("Unsupported architecture for jump tables");
1313 }
1314 
1315 void LowerTypeTestsModule::moveInitializerToModuleConstructor(
1316     GlobalVariable *GV) {
1317   if (WeakInitializerFn == nullptr) {
1318     WeakInitializerFn = Function::Create(
1319         FunctionType::get(Type::getVoidTy(M.getContext()),
1320                           /* IsVarArg */ false),
1321         GlobalValue::InternalLinkage,
1322         M.getDataLayout().getProgramAddressSpace(),
1323         "__cfi_global_var_init", &M);
1324     BasicBlock *BB =
1325         BasicBlock::Create(M.getContext(), "entry", WeakInitializerFn);
1326     ReturnInst::Create(M.getContext(), BB);
1327     WeakInitializerFn->setSection(
1328         ObjectFormat == Triple::MachO
1329             ? "__TEXT,__StaticInit,regular,pure_instructions"
1330             : ".text.startup");
1331     // This code is equivalent to relocation application, and should run at the
1332     // earliest possible time (i.e. with the highest priority).
1333     appendToGlobalCtors(M, WeakInitializerFn, /* Priority */ 0);
1334   }
1335 
1336   IRBuilder<> IRB(WeakInitializerFn->getEntryBlock().getTerminator());
1337   GV->setConstant(false);
1338   IRB.CreateAlignedStore(GV->getInitializer(), GV, GV->getAlign());
1339   GV->setInitializer(Constant::getNullValue(GV->getValueType()));
1340 }
1341 
1342 void LowerTypeTestsModule::findGlobalVariableUsersOf(
1343     Constant *C, SmallSetVector<GlobalVariable *, 8> &Out) {
1344   for (auto *U : C->users()){
1345     if (auto *GV = dyn_cast<GlobalVariable>(U))
1346       Out.insert(GV);
1347     else if (auto *C2 = dyn_cast<Constant>(U))
1348       findGlobalVariableUsersOf(C2, Out);
1349   }
1350 }
1351 
1352 // Replace all uses of F with (F ? JT : 0).
1353 void LowerTypeTestsModule::replaceWeakDeclarationWithJumpTablePtr(
1354     Function *F, Constant *JT, bool IsJumpTableCanonical) {
1355   // The target expression can not appear in a constant initializer on most
1356   // (all?) targets. Switch to a runtime initializer.
1357   SmallSetVector<GlobalVariable *, 8> GlobalVarUsers;
1358   findGlobalVariableUsersOf(F, GlobalVarUsers);
1359   for (auto *GV : GlobalVarUsers)
1360     moveInitializerToModuleConstructor(GV);
1361 
1362   // Can not RAUW F with an expression that uses F. Replace with a temporary
1363   // placeholder first.
1364   Function *PlaceholderFn =
1365       Function::Create(cast<FunctionType>(F->getValueType()),
1366                        GlobalValue::ExternalWeakLinkage,
1367                        F->getAddressSpace(), "", &M);
1368   replaceCfiUses(F, PlaceholderFn, IsJumpTableCanonical);
1369 
1370   convertUsersOfConstantsToInstructions(PlaceholderFn);
1371   // Don't use range based loop, because use list will be modified.
1372   while (!PlaceholderFn->use_empty()) {
1373     Use &U = *PlaceholderFn->use_begin();
1374     auto *InsertPt = dyn_cast<Instruction>(U.getUser());
1375     assert(InsertPt && "Non-instruction users should have been eliminated");
1376     auto *PN = dyn_cast<PHINode>(InsertPt);
1377     if (PN)
1378       InsertPt = PN->getIncomingBlock(U)->getTerminator();
1379     IRBuilder Builder(InsertPt);
1380     Value *ICmp = Builder.CreateICmp(CmpInst::ICMP_NE, F,
1381                                      Constant::getNullValue(F->getType()));
1382     Value *Select = Builder.CreateSelect(ICmp, JT,
1383                                          Constant::getNullValue(F->getType()));
1384     // For phi nodes, we need to update the incoming value for all operands
1385     // with the same predecessor.
1386     if (PN)
1387       PN->setIncomingValueForBlock(InsertPt->getParent(), Select);
1388     else
1389       U.set(Select);
1390   }
1391   PlaceholderFn->eraseFromParent();
1392 }
1393 
1394 static bool isThumbFunction(Function *F, Triple::ArchType ModuleArch) {
1395   Attribute TFAttr = F->getFnAttribute("target-features");
1396   if (TFAttr.isValid()) {
1397     SmallVector<StringRef, 6> Features;
1398     TFAttr.getValueAsString().split(Features, ',');
1399     for (StringRef Feature : Features) {
1400       if (Feature == "-thumb-mode")
1401         return false;
1402       else if (Feature == "+thumb-mode")
1403         return true;
1404     }
1405   }
1406 
1407   return ModuleArch == Triple::thumb;
1408 }
1409 
1410 // Each jump table must be either ARM or Thumb as a whole for the bit-test math
1411 // to work. Pick one that matches the majority of members to minimize interop
1412 // veneers inserted by the linker.
1413 Triple::ArchType LowerTypeTestsModule::selectJumpTableArmEncoding(
1414     ArrayRef<GlobalTypeMember *> Functions) {
1415   if (Arch != Triple::arm && Arch != Triple::thumb)
1416     return Arch;
1417 
1418   if (!CanUseThumbBWJumpTable && CanUseArmJumpTable) {
1419     // In architectures that provide Arm and Thumb-1 but not Thumb-2,
1420     // we should always prefer the Arm jump table format, because the
1421     // Thumb-1 one is larger and slower.
1422     return Triple::arm;
1423   }
1424 
1425   // Otherwise, go with majority vote.
1426   unsigned ArmCount = 0, ThumbCount = 0;
1427   for (const auto GTM : Functions) {
1428     if (!GTM->isJumpTableCanonical()) {
1429       // PLT stubs are always ARM.
1430       // FIXME: This is the wrong heuristic for non-canonical jump tables.
1431       ++ArmCount;
1432       continue;
1433     }
1434 
1435     Function *F = cast<Function>(GTM->getGlobal());
1436     ++(isThumbFunction(F, Arch) ? ThumbCount : ArmCount);
1437   }
1438 
1439   return ArmCount > ThumbCount ? Triple::arm : Triple::thumb;
1440 }
1441 
1442 void LowerTypeTestsModule::createJumpTable(
1443     Function *F, ArrayRef<GlobalTypeMember *> Functions) {
1444   std::string AsmStr, ConstraintStr;
1445   raw_string_ostream AsmOS(AsmStr), ConstraintOS(ConstraintStr);
1446   SmallVector<Value *, 16> AsmArgs;
1447   AsmArgs.reserve(Functions.size() * 2);
1448 
1449   for (GlobalTypeMember *GTM : Functions)
1450     createJumpTableEntry(AsmOS, ConstraintOS, JumpTableArch, AsmArgs,
1451                          cast<Function>(GTM->getGlobal()));
1452 
1453   // Align the whole table by entry size.
1454   F->setAlignment(Align(getJumpTableEntrySize()));
1455   // Skip prologue.
1456   // Disabled on win32 due to https://llvm.org/bugs/show_bug.cgi?id=28641#c3.
1457   // Luckily, this function does not get any prologue even without the
1458   // attribute.
1459   if (OS != Triple::Win32)
1460     F->addFnAttr(Attribute::Naked);
1461   if (JumpTableArch == Triple::arm)
1462     F->addFnAttr("target-features", "-thumb-mode");
1463   if (JumpTableArch == Triple::thumb) {
1464     F->addFnAttr("target-features", "+thumb-mode");
1465     if (CanUseThumbBWJumpTable) {
1466       // Thumb jump table assembly needs Thumb2. The following attribute is
1467       // added by Clang for -march=armv7.
1468       F->addFnAttr("target-cpu", "cortex-a8");
1469     }
1470   }
1471   // When -mbranch-protection= is used, the inline asm adds a BTI. Suppress BTI
1472   // for the function to avoid double BTI. This is a no-op without
1473   // -mbranch-protection=.
1474   if (JumpTableArch == Triple::aarch64) {
1475     F->addFnAttr("branch-target-enforcement", "false");
1476     F->addFnAttr("sign-return-address", "none");
1477   }
1478   if (JumpTableArch == Triple::riscv32 || JumpTableArch == Triple::riscv64) {
1479     // Make sure the jump table assembly is not modified by the assembler or
1480     // the linker.
1481     F->addFnAttr("target-features", "-c,-relax");
1482   }
1483   // When -fcf-protection= is used, the inline asm adds an ENDBR. Suppress ENDBR
1484   // for the function to avoid double ENDBR. This is a no-op without
1485   // -fcf-protection=.
1486   if (JumpTableArch == Triple::x86 || JumpTableArch == Triple::x86_64)
1487     F->addFnAttr(Attribute::NoCfCheck);
1488   // Make sure we don't emit .eh_frame for this function.
1489   F->addFnAttr(Attribute::NoUnwind);
1490 
1491   BasicBlock *BB = BasicBlock::Create(M.getContext(), "entry", F);
1492   IRBuilder<> IRB(BB);
1493 
1494   SmallVector<Type *, 16> ArgTypes;
1495   ArgTypes.reserve(AsmArgs.size());
1496   for (const auto &Arg : AsmArgs)
1497     ArgTypes.push_back(Arg->getType());
1498   InlineAsm *JumpTableAsm =
1499       InlineAsm::get(FunctionType::get(IRB.getVoidTy(), ArgTypes, false),
1500                      AsmOS.str(), ConstraintOS.str(),
1501                      /*hasSideEffects=*/true);
1502 
1503   IRB.CreateCall(JumpTableAsm, AsmArgs);
1504   IRB.CreateUnreachable();
1505 }
1506 
1507 /// Given a disjoint set of type identifiers and functions, build a jump table
1508 /// for the functions, build the bit sets and lower the llvm.type.test calls.
1509 void LowerTypeTestsModule::buildBitSetsFromFunctionsNative(
1510     ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Functions) {
1511   // Unlike the global bitset builder, the function bitset builder cannot
1512   // re-arrange functions in a particular order and base its calculations on the
1513   // layout of the functions' entry points, as we have no idea how large a
1514   // particular function will end up being (the size could even depend on what
1515   // this pass does!) Instead, we build a jump table, which is a block of code
1516   // consisting of one branch instruction for each of the functions in the bit
1517   // set that branches to the target function, and redirect any taken function
1518   // addresses to the corresponding jump table entry. In the object file's
1519   // symbol table, the symbols for the target functions also refer to the jump
1520   // table entries, so that addresses taken outside the module will pass any
1521   // verification done inside the module.
1522   //
1523   // In more concrete terms, suppose we have three functions f, g, h which are
1524   // of the same type, and a function foo that returns their addresses:
1525   //
1526   // f:
1527   // mov 0, %eax
1528   // ret
1529   //
1530   // g:
1531   // mov 1, %eax
1532   // ret
1533   //
1534   // h:
1535   // mov 2, %eax
1536   // ret
1537   //
1538   // foo:
1539   // mov f, %eax
1540   // mov g, %edx
1541   // mov h, %ecx
1542   // ret
1543   //
1544   // We output the jump table as module-level inline asm string. The end result
1545   // will (conceptually) look like this:
1546   //
1547   // f = .cfi.jumptable
1548   // g = .cfi.jumptable + 4
1549   // h = .cfi.jumptable + 8
1550   // .cfi.jumptable:
1551   // jmp f.cfi  ; 5 bytes
1552   // int3       ; 1 byte
1553   // int3       ; 1 byte
1554   // int3       ; 1 byte
1555   // jmp g.cfi  ; 5 bytes
1556   // int3       ; 1 byte
1557   // int3       ; 1 byte
1558   // int3       ; 1 byte
1559   // jmp h.cfi  ; 5 bytes
1560   // int3       ; 1 byte
1561   // int3       ; 1 byte
1562   // int3       ; 1 byte
1563   //
1564   // f.cfi:
1565   // mov 0, %eax
1566   // ret
1567   //
1568   // g.cfi:
1569   // mov 1, %eax
1570   // ret
1571   //
1572   // h.cfi:
1573   // mov 2, %eax
1574   // ret
1575   //
1576   // foo:
1577   // mov f, %eax
1578   // mov g, %edx
1579   // mov h, %ecx
1580   // ret
1581   //
1582   // Because the addresses of f, g, h are evenly spaced at a power of 2, in the
1583   // normal case the check can be carried out using the same kind of simple
1584   // arithmetic that we normally use for globals.
1585 
1586   // FIXME: find a better way to represent the jumptable in the IR.
1587   assert(!Functions.empty());
1588 
1589   // Decide on the jump table encoding, so that we know how big the
1590   // entries will be.
1591   JumpTableArch = selectJumpTableArmEncoding(Functions);
1592 
1593   // Build a simple layout based on the regular layout of jump tables.
1594   DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout;
1595   unsigned EntrySize = getJumpTableEntrySize();
1596   for (unsigned I = 0; I != Functions.size(); ++I)
1597     GlobalLayout[Functions[I]] = I * EntrySize;
1598 
1599   Function *JumpTableFn =
1600       Function::Create(FunctionType::get(Type::getVoidTy(M.getContext()),
1601                                          /* IsVarArg */ false),
1602                        GlobalValue::PrivateLinkage,
1603                        M.getDataLayout().getProgramAddressSpace(),
1604                        ".cfi.jumptable", &M);
1605   ArrayType *JumpTableType =
1606       ArrayType::get(getJumpTableEntryType(), Functions.size());
1607   auto JumpTable =
1608       ConstantExpr::getPointerCast(JumpTableFn, JumpTableType->getPointerTo(0));
1609 
1610   lowerTypeTestCalls(TypeIds, JumpTable, GlobalLayout);
1611 
1612   {
1613     ScopedSaveAliaseesAndUsed S(M);
1614 
1615     // Build aliases pointing to offsets into the jump table, and replace
1616     // references to the original functions with references to the aliases.
1617     for (unsigned I = 0; I != Functions.size(); ++I) {
1618       Function *F = cast<Function>(Functions[I]->getGlobal());
1619       bool IsJumpTableCanonical = Functions[I]->isJumpTableCanonical();
1620 
1621       Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast(
1622           ConstantExpr::getInBoundsGetElementPtr(
1623               JumpTableType, JumpTable,
1624               ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0),
1625                                    ConstantInt::get(IntPtrTy, I)}),
1626           F->getType());
1627 
1628       const bool IsExported = Functions[I]->isExported();
1629       if (!IsJumpTableCanonical) {
1630         GlobalValue::LinkageTypes LT = IsExported
1631                                            ? GlobalValue::ExternalLinkage
1632                                            : GlobalValue::InternalLinkage;
1633         GlobalAlias *JtAlias = GlobalAlias::create(F->getValueType(), 0, LT,
1634                                                    F->getName() + ".cfi_jt",
1635                                                    CombinedGlobalElemPtr, &M);
1636         if (IsExported)
1637           JtAlias->setVisibility(GlobalValue::HiddenVisibility);
1638         else
1639           appendToUsed(M, {JtAlias});
1640       }
1641 
1642       if (IsExported) {
1643         if (IsJumpTableCanonical)
1644           ExportSummary->cfiFunctionDefs().insert(std::string(F->getName()));
1645         else
1646           ExportSummary->cfiFunctionDecls().insert(std::string(F->getName()));
1647       }
1648 
1649       if (!IsJumpTableCanonical) {
1650         if (F->hasExternalWeakLinkage())
1651           replaceWeakDeclarationWithJumpTablePtr(F, CombinedGlobalElemPtr,
1652                                                  IsJumpTableCanonical);
1653         else
1654           replaceCfiUses(F, CombinedGlobalElemPtr, IsJumpTableCanonical);
1655       } else {
1656         assert(F->getType()->getAddressSpace() == 0);
1657 
1658         GlobalAlias *FAlias =
1659             GlobalAlias::create(F->getValueType(), 0, F->getLinkage(), "",
1660                                 CombinedGlobalElemPtr, &M);
1661         FAlias->setVisibility(F->getVisibility());
1662         FAlias->takeName(F);
1663         if (FAlias->hasName())
1664           F->setName(FAlias->getName() + ".cfi");
1665         replaceCfiUses(F, FAlias, IsJumpTableCanonical);
1666         if (!F->hasLocalLinkage())
1667           F->setVisibility(GlobalVariable::HiddenVisibility);
1668       }
1669     }
1670   }
1671 
1672   createJumpTable(JumpTableFn, Functions);
1673 }
1674 
1675 /// Assign a dummy layout using an incrementing counter, tag each function
1676 /// with its index represented as metadata, and lower each type test to an
1677 /// integer range comparison. During generation of the indirect function call
1678 /// table in the backend, it will assign the given indexes.
1679 /// Note: Dynamic linking is not supported, as the WebAssembly ABI has not yet
1680 /// been finalized.
1681 void LowerTypeTestsModule::buildBitSetsFromFunctionsWASM(
1682     ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Functions) {
1683   assert(!Functions.empty());
1684 
1685   // Build consecutive monotonic integer ranges for each call target set
1686   DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout;
1687 
1688   for (GlobalTypeMember *GTM : Functions) {
1689     Function *F = cast<Function>(GTM->getGlobal());
1690 
1691     // Skip functions that are not address taken, to avoid bloating the table
1692     if (!F->hasAddressTaken())
1693       continue;
1694 
1695     // Store metadata with the index for each function
1696     MDNode *MD = MDNode::get(F->getContext(),
1697                              ArrayRef<Metadata *>(ConstantAsMetadata::get(
1698                                  ConstantInt::get(Int64Ty, IndirectIndex))));
1699     F->setMetadata("wasm.index", MD);
1700 
1701     // Assign the counter value
1702     GlobalLayout[GTM] = IndirectIndex++;
1703   }
1704 
1705   // The indirect function table index space starts at zero, so pass a NULL
1706   // pointer as the subtracted "jump table" offset.
1707   lowerTypeTestCalls(TypeIds, ConstantPointerNull::get(Int32PtrTy),
1708                      GlobalLayout);
1709 }
1710 
1711 void LowerTypeTestsModule::buildBitSetsFromDisjointSet(
1712     ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Globals,
1713     ArrayRef<ICallBranchFunnel *> ICallBranchFunnels) {
1714   DenseMap<Metadata *, uint64_t> TypeIdIndices;
1715   for (unsigned I = 0; I != TypeIds.size(); ++I)
1716     TypeIdIndices[TypeIds[I]] = I;
1717 
1718   // For each type identifier, build a set of indices that refer to members of
1719   // the type identifier.
1720   std::vector<std::set<uint64_t>> TypeMembers(TypeIds.size());
1721   unsigned GlobalIndex = 0;
1722   DenseMap<GlobalTypeMember *, uint64_t> GlobalIndices;
1723   for (GlobalTypeMember *GTM : Globals) {
1724     for (MDNode *Type : GTM->types()) {
1725       // Type = { offset, type identifier }
1726       auto I = TypeIdIndices.find(Type->getOperand(1));
1727       if (I != TypeIdIndices.end())
1728         TypeMembers[I->second].insert(GlobalIndex);
1729     }
1730     GlobalIndices[GTM] = GlobalIndex;
1731     GlobalIndex++;
1732   }
1733 
1734   for (ICallBranchFunnel *JT : ICallBranchFunnels) {
1735     TypeMembers.emplace_back();
1736     std::set<uint64_t> &TMSet = TypeMembers.back();
1737     for (GlobalTypeMember *T : JT->targets())
1738       TMSet.insert(GlobalIndices[T]);
1739   }
1740 
1741   // Order the sets of indices by size. The GlobalLayoutBuilder works best
1742   // when given small index sets first.
1743   llvm::stable_sort(TypeMembers, [](const std::set<uint64_t> &O1,
1744                                     const std::set<uint64_t> &O2) {
1745     return O1.size() < O2.size();
1746   });
1747 
1748   // Create a GlobalLayoutBuilder and provide it with index sets as layout
1749   // fragments. The GlobalLayoutBuilder tries to lay out members of fragments as
1750   // close together as possible.
1751   GlobalLayoutBuilder GLB(Globals.size());
1752   for (auto &&MemSet : TypeMembers)
1753     GLB.addFragment(MemSet);
1754 
1755   // Build a vector of globals with the computed layout.
1756   bool IsGlobalSet =
1757       Globals.empty() || isa<GlobalVariable>(Globals[0]->getGlobal());
1758   std::vector<GlobalTypeMember *> OrderedGTMs(Globals.size());
1759   auto OGTMI = OrderedGTMs.begin();
1760   for (auto &&F : GLB.Fragments) {
1761     for (auto &&Offset : F) {
1762       if (IsGlobalSet != isa<GlobalVariable>(Globals[Offset]->getGlobal()))
1763         report_fatal_error("Type identifier may not contain both global "
1764                            "variables and functions");
1765       *OGTMI++ = Globals[Offset];
1766     }
1767   }
1768 
1769   // Build the bitsets from this disjoint set.
1770   if (IsGlobalSet)
1771     buildBitSetsFromGlobalVariables(TypeIds, OrderedGTMs);
1772   else
1773     buildBitSetsFromFunctions(TypeIds, OrderedGTMs);
1774 }
1775 
1776 /// Lower all type tests in this module.
1777 LowerTypeTestsModule::LowerTypeTestsModule(
1778     Module &M, ModuleAnalysisManager &AM, ModuleSummaryIndex *ExportSummary,
1779     const ModuleSummaryIndex *ImportSummary, bool DropTypeTests)
1780     : M(M), ExportSummary(ExportSummary), ImportSummary(ImportSummary),
1781       DropTypeTests(DropTypeTests || ClDropTypeTests) {
1782   assert(!(ExportSummary && ImportSummary));
1783   Triple TargetTriple(M.getTargetTriple());
1784   Arch = TargetTriple.getArch();
1785   if (Arch == Triple::arm)
1786     CanUseArmJumpTable = true;
1787   if (Arch == Triple::arm || Arch == Triple::thumb) {
1788     auto &FAM =
1789         AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1790     for (Function &F : M) {
1791       auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
1792       if (TTI.hasArmWideBranch(false))
1793         CanUseArmJumpTable = true;
1794       if (TTI.hasArmWideBranch(true))
1795         CanUseThumbBWJumpTable = true;
1796     }
1797   }
1798   OS = TargetTriple.getOS();
1799   ObjectFormat = TargetTriple.getObjectFormat();
1800 }
1801 
1802 bool LowerTypeTestsModule::runForTesting(Module &M, ModuleAnalysisManager &AM) {
1803   ModuleSummaryIndex Summary(/*HaveGVs=*/false);
1804 
1805   // Handle the command-line summary arguments. This code is for testing
1806   // purposes only, so we handle errors directly.
1807   if (!ClReadSummary.empty()) {
1808     ExitOnError ExitOnErr("-lowertypetests-read-summary: " + ClReadSummary +
1809                           ": ");
1810     auto ReadSummaryFile =
1811         ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(ClReadSummary)));
1812 
1813     yaml::Input In(ReadSummaryFile->getBuffer());
1814     In >> Summary;
1815     ExitOnErr(errorCodeToError(In.error()));
1816   }
1817 
1818   bool Changed =
1819       LowerTypeTestsModule(
1820           M, AM,
1821           ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr,
1822           ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr,
1823           /*DropTypeTests*/ false)
1824           .lower();
1825 
1826   if (!ClWriteSummary.empty()) {
1827     ExitOnError ExitOnErr("-lowertypetests-write-summary: " + ClWriteSummary +
1828                           ": ");
1829     std::error_code EC;
1830     raw_fd_ostream OS(ClWriteSummary, EC, sys::fs::OF_TextWithCRLF);
1831     ExitOnErr(errorCodeToError(EC));
1832 
1833     yaml::Output Out(OS);
1834     Out << Summary;
1835   }
1836 
1837   return Changed;
1838 }
1839 
1840 static bool isDirectCall(Use& U) {
1841   auto *Usr = dyn_cast<CallInst>(U.getUser());
1842   if (Usr) {
1843     auto *CB = dyn_cast<CallBase>(Usr);
1844     if (CB && CB->isCallee(&U))
1845       return true;
1846   }
1847   return false;
1848 }
1849 
1850 void LowerTypeTestsModule::replaceCfiUses(Function *Old, Value *New,
1851                                           bool IsJumpTableCanonical) {
1852   SmallSetVector<Constant *, 4> Constants;
1853   for (Use &U : llvm::make_early_inc_range(Old->uses())) {
1854     // Skip block addresses and no_cfi values, which refer to the function
1855     // body instead of the jump table.
1856     if (isa<BlockAddress, NoCFIValue>(U.getUser()))
1857       continue;
1858 
1859     // Skip direct calls to externally defined or non-dso_local functions
1860     if (isDirectCall(U) && (Old->isDSOLocal() || !IsJumpTableCanonical))
1861       continue;
1862 
1863     // Must handle Constants specially, we cannot call replaceUsesOfWith on a
1864     // constant because they are uniqued.
1865     if (auto *C = dyn_cast<Constant>(U.getUser())) {
1866       if (!isa<GlobalValue>(C)) {
1867         // Save unique users to avoid processing operand replacement
1868         // more than once.
1869         Constants.insert(C);
1870         continue;
1871       }
1872     }
1873 
1874     U.set(New);
1875   }
1876 
1877   // Process operand replacement of saved constants.
1878   for (auto *C : Constants)
1879     C->handleOperandChange(Old, New);
1880 }
1881 
1882 void LowerTypeTestsModule::replaceDirectCalls(Value *Old, Value *New) {
1883   Old->replaceUsesWithIf(New, isDirectCall);
1884 }
1885 
1886 static void dropTypeTests(Module &M, Function &TypeTestFunc) {
1887   for (Use &U : llvm::make_early_inc_range(TypeTestFunc.uses())) {
1888     auto *CI = cast<CallInst>(U.getUser());
1889     // Find and erase llvm.assume intrinsics for this llvm.type.test call.
1890     for (Use &CIU : llvm::make_early_inc_range(CI->uses()))
1891       if (auto *Assume = dyn_cast<AssumeInst>(CIU.getUser()))
1892         Assume->eraseFromParent();
1893     // If the assume was merged with another assume, we might have a use on a
1894     // phi (which will feed the assume). Simply replace the use on the phi
1895     // with "true" and leave the merged assume.
1896     if (!CI->use_empty()) {
1897       assert(
1898           all_of(CI->users(), [](User *U) -> bool { return isa<PHINode>(U); }));
1899       CI->replaceAllUsesWith(ConstantInt::getTrue(M.getContext()));
1900     }
1901     CI->eraseFromParent();
1902   }
1903 }
1904 
1905 bool LowerTypeTestsModule::lower() {
1906   Function *TypeTestFunc =
1907       M.getFunction(Intrinsic::getName(Intrinsic::type_test));
1908 
1909   if (DropTypeTests) {
1910     if (TypeTestFunc)
1911       dropTypeTests(M, *TypeTestFunc);
1912     // Normally we'd have already removed all @llvm.public.type.test calls,
1913     // except for in the case where we originally were performing ThinLTO but
1914     // decided not to in the backend.
1915     Function *PublicTypeTestFunc =
1916         M.getFunction(Intrinsic::getName(Intrinsic::public_type_test));
1917     if (PublicTypeTestFunc)
1918       dropTypeTests(M, *PublicTypeTestFunc);
1919     if (TypeTestFunc || PublicTypeTestFunc) {
1920       // We have deleted the type intrinsics, so we no longer have enough
1921       // information to reason about the liveness of virtual function pointers
1922       // in GlobalDCE.
1923       for (GlobalVariable &GV : M.globals())
1924         GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
1925       return true;
1926     }
1927     return false;
1928   }
1929 
1930   // If only some of the modules were split, we cannot correctly perform
1931   // this transformation. We already checked for the presense of type tests
1932   // with partially split modules during the thin link, and would have emitted
1933   // an error if any were found, so here we can simply return.
1934   if ((ExportSummary && ExportSummary->partiallySplitLTOUnits()) ||
1935       (ImportSummary && ImportSummary->partiallySplitLTOUnits()))
1936     return false;
1937 
1938   Function *ICallBranchFunnelFunc =
1939       M.getFunction(Intrinsic::getName(Intrinsic::icall_branch_funnel));
1940   if ((!TypeTestFunc || TypeTestFunc->use_empty()) &&
1941       (!ICallBranchFunnelFunc || ICallBranchFunnelFunc->use_empty()) &&
1942       !ExportSummary && !ImportSummary)
1943     return false;
1944 
1945   if (ImportSummary) {
1946     if (TypeTestFunc)
1947       for (Use &U : llvm::make_early_inc_range(TypeTestFunc->uses()))
1948         importTypeTest(cast<CallInst>(U.getUser()));
1949 
1950     if (ICallBranchFunnelFunc && !ICallBranchFunnelFunc->use_empty())
1951       report_fatal_error(
1952           "unexpected call to llvm.icall.branch.funnel during import phase");
1953 
1954     SmallVector<Function *, 8> Defs;
1955     SmallVector<Function *, 8> Decls;
1956     for (auto &F : M) {
1957       // CFI functions are either external, or promoted. A local function may
1958       // have the same name, but it's not the one we are looking for.
1959       if (F.hasLocalLinkage())
1960         continue;
1961       if (ImportSummary->cfiFunctionDefs().count(std::string(F.getName())))
1962         Defs.push_back(&F);
1963       else if (ImportSummary->cfiFunctionDecls().count(
1964                    std::string(F.getName())))
1965         Decls.push_back(&F);
1966     }
1967 
1968     std::vector<GlobalAlias *> AliasesToErase;
1969     {
1970       ScopedSaveAliaseesAndUsed S(M);
1971       for (auto *F : Defs)
1972         importFunction(F, /*isJumpTableCanonical*/ true, AliasesToErase);
1973       for (auto *F : Decls)
1974         importFunction(F, /*isJumpTableCanonical*/ false, AliasesToErase);
1975     }
1976     for (GlobalAlias *GA : AliasesToErase)
1977       GA->eraseFromParent();
1978 
1979     return true;
1980   }
1981 
1982   // Equivalence class set containing type identifiers and the globals that
1983   // reference them. This is used to partition the set of type identifiers in
1984   // the module into disjoint sets.
1985   using GlobalClassesTy = EquivalenceClasses<
1986       PointerUnion<GlobalTypeMember *, Metadata *, ICallBranchFunnel *>>;
1987   GlobalClassesTy GlobalClasses;
1988 
1989   // Verify the type metadata and build a few data structures to let us
1990   // efficiently enumerate the type identifiers associated with a global:
1991   // a list of GlobalTypeMembers (a GlobalObject stored alongside a vector
1992   // of associated type metadata) and a mapping from type identifiers to their
1993   // list of GlobalTypeMembers and last observed index in the list of globals.
1994   // The indices will be used later to deterministically order the list of type
1995   // identifiers.
1996   BumpPtrAllocator Alloc;
1997   struct TIInfo {
1998     unsigned UniqueId;
1999     std::vector<GlobalTypeMember *> RefGlobals;
2000   };
2001   DenseMap<Metadata *, TIInfo> TypeIdInfo;
2002   unsigned CurUniqueId = 0;
2003   SmallVector<MDNode *, 2> Types;
2004 
2005   // Cross-DSO CFI emits jumptable entries for exported functions as well as
2006   // address taken functions in case they are address taken in other modules.
2007   const bool CrossDsoCfi = M.getModuleFlag("Cross-DSO CFI") != nullptr;
2008 
2009   struct ExportedFunctionInfo {
2010     CfiFunctionLinkage Linkage;
2011     MDNode *FuncMD; // {name, linkage, type[, type...]}
2012   };
2013   DenseMap<StringRef, ExportedFunctionInfo> ExportedFunctions;
2014   if (ExportSummary) {
2015     // A set of all functions that are address taken by a live global object.
2016     DenseSet<GlobalValue::GUID> AddressTaken;
2017     for (auto &I : *ExportSummary)
2018       for (auto &GVS : I.second.SummaryList)
2019         if (GVS->isLive())
2020           for (const auto &Ref : GVS->refs())
2021             AddressTaken.insert(Ref.getGUID());
2022 
2023     NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions");
2024     if (CfiFunctionsMD) {
2025       for (auto *FuncMD : CfiFunctionsMD->operands()) {
2026         assert(FuncMD->getNumOperands() >= 2);
2027         StringRef FunctionName =
2028             cast<MDString>(FuncMD->getOperand(0))->getString();
2029         CfiFunctionLinkage Linkage = static_cast<CfiFunctionLinkage>(
2030             cast<ConstantAsMetadata>(FuncMD->getOperand(1))
2031                 ->getValue()
2032                 ->getUniqueInteger()
2033                 .getZExtValue());
2034         const GlobalValue::GUID GUID = GlobalValue::getGUID(
2035                 GlobalValue::dropLLVMManglingEscape(FunctionName));
2036         // Do not emit jumptable entries for functions that are not-live and
2037         // have no live references (and are not exported with cross-DSO CFI.)
2038         if (!ExportSummary->isGUIDLive(GUID))
2039           continue;
2040         if (!AddressTaken.count(GUID)) {
2041           if (!CrossDsoCfi || Linkage != CFL_Definition)
2042             continue;
2043 
2044           bool Exported = false;
2045           if (auto VI = ExportSummary->getValueInfo(GUID))
2046             for (const auto &GVS : VI.getSummaryList())
2047               if (GVS->isLive() && !GlobalValue::isLocalLinkage(GVS->linkage()))
2048                 Exported = true;
2049 
2050           if (!Exported)
2051             continue;
2052         }
2053         auto P = ExportedFunctions.insert({FunctionName, {Linkage, FuncMD}});
2054         if (!P.second && P.first->second.Linkage != CFL_Definition)
2055           P.first->second = {Linkage, FuncMD};
2056       }
2057 
2058       for (const auto &P : ExportedFunctions) {
2059         StringRef FunctionName = P.first;
2060         CfiFunctionLinkage Linkage = P.second.Linkage;
2061         MDNode *FuncMD = P.second.FuncMD;
2062         Function *F = M.getFunction(FunctionName);
2063         if (F && F->hasLocalLinkage()) {
2064           // Locally defined function that happens to have the same name as a
2065           // function defined in a ThinLTO module. Rename it to move it out of
2066           // the way of the external reference that we're about to create.
2067           // Note that setName will find a unique name for the function, so even
2068           // if there is an existing function with the suffix there won't be a
2069           // name collision.
2070           F->setName(F->getName() + ".1");
2071           F = nullptr;
2072         }
2073 
2074         if (!F)
2075           F = Function::Create(
2076               FunctionType::get(Type::getVoidTy(M.getContext()), false),
2077               GlobalVariable::ExternalLinkage,
2078               M.getDataLayout().getProgramAddressSpace(), FunctionName, &M);
2079 
2080         // If the function is available_externally, remove its definition so
2081         // that it is handled the same way as a declaration. Later we will try
2082         // to create an alias using this function's linkage, which will fail if
2083         // the linkage is available_externally. This will also result in us
2084         // following the code path below to replace the type metadata.
2085         if (F->hasAvailableExternallyLinkage()) {
2086           F->setLinkage(GlobalValue::ExternalLinkage);
2087           F->deleteBody();
2088           F->setComdat(nullptr);
2089           F->clearMetadata();
2090         }
2091 
2092         // Update the linkage for extern_weak declarations when a definition
2093         // exists.
2094         if (Linkage == CFL_Definition && F->hasExternalWeakLinkage())
2095           F->setLinkage(GlobalValue::ExternalLinkage);
2096 
2097         // If the function in the full LTO module is a declaration, replace its
2098         // type metadata with the type metadata we found in cfi.functions. That
2099         // metadata is presumed to be more accurate than the metadata attached
2100         // to the declaration.
2101         if (F->isDeclaration()) {
2102           if (Linkage == CFL_WeakDeclaration)
2103             F->setLinkage(GlobalValue::ExternalWeakLinkage);
2104 
2105           F->eraseMetadata(LLVMContext::MD_type);
2106           for (unsigned I = 2; I < FuncMD->getNumOperands(); ++I)
2107             F->addMetadata(LLVMContext::MD_type,
2108                            *cast<MDNode>(FuncMD->getOperand(I).get()));
2109         }
2110       }
2111     }
2112   }
2113 
2114   DenseMap<GlobalObject *, GlobalTypeMember *> GlobalTypeMembers;
2115   for (GlobalObject &GO : M.global_objects()) {
2116     if (isa<GlobalVariable>(GO) && GO.isDeclarationForLinker())
2117       continue;
2118 
2119     Types.clear();
2120     GO.getMetadata(LLVMContext::MD_type, Types);
2121 
2122     bool IsJumpTableCanonical = false;
2123     bool IsExported = false;
2124     if (Function *F = dyn_cast<Function>(&GO)) {
2125       IsJumpTableCanonical = isJumpTableCanonical(F);
2126       if (ExportedFunctions.count(F->getName())) {
2127         IsJumpTableCanonical |=
2128             ExportedFunctions[F->getName()].Linkage == CFL_Definition;
2129         IsExported = true;
2130       // TODO: The logic here checks only that the function is address taken,
2131       // not that the address takers are live. This can be updated to check
2132       // their liveness and emit fewer jumptable entries once monolithic LTO
2133       // builds also emit summaries.
2134       } else if (!F->hasAddressTaken()) {
2135         if (!CrossDsoCfi || !IsJumpTableCanonical || F->hasLocalLinkage())
2136           continue;
2137       }
2138     }
2139 
2140     auto *GTM = GlobalTypeMember::create(Alloc, &GO, IsJumpTableCanonical,
2141                                          IsExported, Types);
2142     GlobalTypeMembers[&GO] = GTM;
2143     for (MDNode *Type : Types) {
2144       verifyTypeMDNode(&GO, Type);
2145       auto &Info = TypeIdInfo[Type->getOperand(1)];
2146       Info.UniqueId = ++CurUniqueId;
2147       Info.RefGlobals.push_back(GTM);
2148     }
2149   }
2150 
2151   auto AddTypeIdUse = [&](Metadata *TypeId) -> TypeIdUserInfo & {
2152     // Add the call site to the list of call sites for this type identifier. We
2153     // also use TypeIdUsers to keep track of whether we have seen this type
2154     // identifier before. If we have, we don't need to re-add the referenced
2155     // globals to the equivalence class.
2156     auto Ins = TypeIdUsers.insert({TypeId, {}});
2157     if (Ins.second) {
2158       // Add the type identifier to the equivalence class.
2159       GlobalClassesTy::iterator GCI = GlobalClasses.insert(TypeId);
2160       GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(GCI);
2161 
2162       // Add the referenced globals to the type identifier's equivalence class.
2163       for (GlobalTypeMember *GTM : TypeIdInfo[TypeId].RefGlobals)
2164         CurSet = GlobalClasses.unionSets(
2165             CurSet, GlobalClasses.findLeader(GlobalClasses.insert(GTM)));
2166     }
2167 
2168     return Ins.first->second;
2169   };
2170 
2171   if (TypeTestFunc) {
2172     for (const Use &U : TypeTestFunc->uses()) {
2173       auto CI = cast<CallInst>(U.getUser());
2174       // If this type test is only used by llvm.assume instructions, it
2175       // was used for whole program devirtualization, and is being kept
2176       // for use by other optimization passes. We do not need or want to
2177       // lower it here. We also don't want to rewrite any associated globals
2178       // unnecessarily. These will be removed by a subsequent LTT invocation
2179       // with the DropTypeTests flag set.
2180       bool OnlyAssumeUses = !CI->use_empty();
2181       for (const Use &CIU : CI->uses()) {
2182         if (isa<AssumeInst>(CIU.getUser()))
2183           continue;
2184         OnlyAssumeUses = false;
2185         break;
2186       }
2187       if (OnlyAssumeUses)
2188         continue;
2189 
2190       auto TypeIdMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1));
2191       if (!TypeIdMDVal)
2192         report_fatal_error("Second argument of llvm.type.test must be metadata");
2193       auto TypeId = TypeIdMDVal->getMetadata();
2194       AddTypeIdUse(TypeId).CallSites.push_back(CI);
2195     }
2196   }
2197 
2198   if (ICallBranchFunnelFunc) {
2199     for (const Use &U : ICallBranchFunnelFunc->uses()) {
2200       if (Arch != Triple::x86_64)
2201         report_fatal_error(
2202             "llvm.icall.branch.funnel not supported on this target");
2203 
2204       auto CI = cast<CallInst>(U.getUser());
2205 
2206       std::vector<GlobalTypeMember *> Targets;
2207       if (CI->arg_size() % 2 != 1)
2208         report_fatal_error("number of arguments should be odd");
2209 
2210       GlobalClassesTy::member_iterator CurSet;
2211       for (unsigned I = 1; I != CI->arg_size(); I += 2) {
2212         int64_t Offset;
2213         auto *Base = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset(
2214             CI->getOperand(I), Offset, M.getDataLayout()));
2215         if (!Base)
2216           report_fatal_error(
2217               "Expected branch funnel operand to be global value");
2218 
2219         GlobalTypeMember *GTM = GlobalTypeMembers[Base];
2220         Targets.push_back(GTM);
2221         GlobalClassesTy::member_iterator NewSet =
2222             GlobalClasses.findLeader(GlobalClasses.insert(GTM));
2223         if (I == 1)
2224           CurSet = NewSet;
2225         else
2226           CurSet = GlobalClasses.unionSets(CurSet, NewSet);
2227       }
2228 
2229       GlobalClasses.unionSets(
2230           CurSet, GlobalClasses.findLeader(
2231                       GlobalClasses.insert(ICallBranchFunnel::create(
2232                           Alloc, CI, Targets, ++CurUniqueId))));
2233     }
2234   }
2235 
2236   if (ExportSummary) {
2237     DenseMap<GlobalValue::GUID, TinyPtrVector<Metadata *>> MetadataByGUID;
2238     for (auto &P : TypeIdInfo) {
2239       if (auto *TypeId = dyn_cast<MDString>(P.first))
2240         MetadataByGUID[GlobalValue::getGUID(TypeId->getString())].push_back(
2241             TypeId);
2242     }
2243 
2244     for (auto &P : *ExportSummary) {
2245       for (auto &S : P.second.SummaryList) {
2246         if (!ExportSummary->isGlobalValueLive(S.get()))
2247           continue;
2248         if (auto *FS = dyn_cast<FunctionSummary>(S->getBaseObject()))
2249           for (GlobalValue::GUID G : FS->type_tests())
2250             for (Metadata *MD : MetadataByGUID[G])
2251               AddTypeIdUse(MD).IsExported = true;
2252       }
2253     }
2254   }
2255 
2256   if (GlobalClasses.empty())
2257     return false;
2258 
2259   // Build a list of disjoint sets ordered by their maximum global index for
2260   // determinism.
2261   std::vector<std::pair<GlobalClassesTy::iterator, unsigned>> Sets;
2262   for (GlobalClassesTy::iterator I = GlobalClasses.begin(),
2263                                  E = GlobalClasses.end();
2264        I != E; ++I) {
2265     if (!I->isLeader())
2266       continue;
2267     ++NumTypeIdDisjointSets;
2268 
2269     unsigned MaxUniqueId = 0;
2270     for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I);
2271          MI != GlobalClasses.member_end(); ++MI) {
2272       if (auto *MD = dyn_cast_if_present<Metadata *>(*MI))
2273         MaxUniqueId = std::max(MaxUniqueId, TypeIdInfo[MD].UniqueId);
2274       else if (auto *BF = dyn_cast_if_present<ICallBranchFunnel *>(*MI))
2275         MaxUniqueId = std::max(MaxUniqueId, BF->UniqueId);
2276     }
2277     Sets.emplace_back(I, MaxUniqueId);
2278   }
2279   llvm::sort(Sets, llvm::less_second());
2280 
2281   // For each disjoint set we found...
2282   for (const auto &S : Sets) {
2283     // Build the list of type identifiers in this disjoint set.
2284     std::vector<Metadata *> TypeIds;
2285     std::vector<GlobalTypeMember *> Globals;
2286     std::vector<ICallBranchFunnel *> ICallBranchFunnels;
2287     for (GlobalClassesTy::member_iterator MI =
2288              GlobalClasses.member_begin(S.first);
2289          MI != GlobalClasses.member_end(); ++MI) {
2290       if (isa<Metadata *>(*MI))
2291         TypeIds.push_back(cast<Metadata *>(*MI));
2292       else if (isa<GlobalTypeMember *>(*MI))
2293         Globals.push_back(cast<GlobalTypeMember *>(*MI));
2294       else
2295         ICallBranchFunnels.push_back(cast<ICallBranchFunnel *>(*MI));
2296     }
2297 
2298     // Order type identifiers by unique ID for determinism. This ordering is
2299     // stable as there is a one-to-one mapping between metadata and unique IDs.
2300     llvm::sort(TypeIds, [&](Metadata *M1, Metadata *M2) {
2301       return TypeIdInfo[M1].UniqueId < TypeIdInfo[M2].UniqueId;
2302     });
2303 
2304     // Same for the branch funnels.
2305     llvm::sort(ICallBranchFunnels,
2306                [&](ICallBranchFunnel *F1, ICallBranchFunnel *F2) {
2307                  return F1->UniqueId < F2->UniqueId;
2308                });
2309 
2310     // Build bitsets for this disjoint set.
2311     buildBitSetsFromDisjointSet(TypeIds, Globals, ICallBranchFunnels);
2312   }
2313 
2314   allocateByteArrays();
2315 
2316   // Parse alias data to replace stand-in function declarations for aliases
2317   // with an alias to the intended target.
2318   if (ExportSummary) {
2319     if (NamedMDNode *AliasesMD = M.getNamedMetadata("aliases")) {
2320       for (auto *AliasMD : AliasesMD->operands()) {
2321         assert(AliasMD->getNumOperands() >= 4);
2322         StringRef AliasName =
2323             cast<MDString>(AliasMD->getOperand(0))->getString();
2324         StringRef Aliasee = cast<MDString>(AliasMD->getOperand(1))->getString();
2325 
2326         if (!ExportedFunctions.count(Aliasee) ||
2327             ExportedFunctions[Aliasee].Linkage != CFL_Definition ||
2328             !M.getNamedAlias(Aliasee))
2329           continue;
2330 
2331         GlobalValue::VisibilityTypes Visibility =
2332             static_cast<GlobalValue::VisibilityTypes>(
2333                 cast<ConstantAsMetadata>(AliasMD->getOperand(2))
2334                     ->getValue()
2335                     ->getUniqueInteger()
2336                     .getZExtValue());
2337         bool Weak =
2338             static_cast<bool>(cast<ConstantAsMetadata>(AliasMD->getOperand(3))
2339                                   ->getValue()
2340                                   ->getUniqueInteger()
2341                                   .getZExtValue());
2342 
2343         auto *Alias = GlobalAlias::create("", M.getNamedAlias(Aliasee));
2344         Alias->setVisibility(Visibility);
2345         if (Weak)
2346           Alias->setLinkage(GlobalValue::WeakAnyLinkage);
2347 
2348         if (auto *F = M.getFunction(AliasName)) {
2349           Alias->takeName(F);
2350           F->replaceAllUsesWith(Alias);
2351           F->eraseFromParent();
2352         } else {
2353           Alias->setName(AliasName);
2354         }
2355       }
2356     }
2357   }
2358 
2359   // Emit .symver directives for exported functions, if they exist.
2360   if (ExportSummary) {
2361     if (NamedMDNode *SymversMD = M.getNamedMetadata("symvers")) {
2362       for (auto *Symver : SymversMD->operands()) {
2363         assert(Symver->getNumOperands() >= 2);
2364         StringRef SymbolName =
2365             cast<MDString>(Symver->getOperand(0))->getString();
2366         StringRef Alias = cast<MDString>(Symver->getOperand(1))->getString();
2367 
2368         if (!ExportedFunctions.count(SymbolName))
2369           continue;
2370 
2371         M.appendModuleInlineAsm(
2372             (llvm::Twine(".symver ") + SymbolName + ", " + Alias).str());
2373       }
2374     }
2375   }
2376 
2377   return true;
2378 }
2379 
2380 PreservedAnalyses LowerTypeTestsPass::run(Module &M,
2381                                           ModuleAnalysisManager &AM) {
2382   bool Changed;
2383   if (UseCommandLine)
2384     Changed = LowerTypeTestsModule::runForTesting(M, AM);
2385   else
2386     Changed =
2387         LowerTypeTestsModule(M, AM, ExportSummary, ImportSummary, DropTypeTests)
2388             .lower();
2389   if (!Changed)
2390     return PreservedAnalyses::all();
2391   return PreservedAnalyses::none();
2392 }
2393