1 //===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions perform manipulations on Modules.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Transforms/Utils/ModuleUtils.h"
14 #include "llvm/Analysis/VectorUtils.h"
15 #include "llvm/IR/DerivedTypes.h"
16 #include "llvm/IR/Function.h"
17 #include "llvm/IR/IRBuilder.h"
18 #include "llvm/IR/MDBuilder.h"
19 #include "llvm/IR/Module.h"
20 #include "llvm/Support/raw_ostream.h"
21 #include "llvm/Support/xxhash.h"
22 using namespace llvm;
23 
24 #define DEBUG_TYPE "moduleutils"
25 
26 static void appendToGlobalArray(StringRef ArrayName, Module &M, Function *F,
27                                 int Priority, Constant *Data) {
28   IRBuilder<> IRB(M.getContext());
29   FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false);
30 
31   // Get the current set of static global constructors and add the new ctor
32   // to the list.
33   SmallVector<Constant *, 16> CurrentCtors;
34   StructType *EltTy = StructType::get(
35       IRB.getInt32Ty(), PointerType::get(FnTy, F->getAddressSpace()),
36       IRB.getInt8PtrTy());
37 
38   if (GlobalVariable *GVCtor = M.getNamedGlobal(ArrayName)) {
39     if (Constant *Init = GVCtor->getInitializer()) {
40       unsigned n = Init->getNumOperands();
41       CurrentCtors.reserve(n + 1);
42       for (unsigned i = 0; i != n; ++i)
43         CurrentCtors.push_back(cast<Constant>(Init->getOperand(i)));
44     }
45     GVCtor->eraseFromParent();
46   }
47 
48   // Build a 3 field global_ctor entry.  We don't take a comdat key.
49   Constant *CSVals[3];
50   CSVals[0] = IRB.getInt32(Priority);
51   CSVals[1] = F;
52   CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getInt8PtrTy())
53                    : Constant::getNullValue(IRB.getInt8PtrTy());
54   Constant *RuntimeCtorInit =
55       ConstantStruct::get(EltTy, ArrayRef(CSVals, EltTy->getNumElements()));
56 
57   CurrentCtors.push_back(RuntimeCtorInit);
58 
59   // Create a new initializer.
60   ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size());
61   Constant *NewInit = ConstantArray::get(AT, CurrentCtors);
62 
63   // Create the new global variable and replace all uses of
64   // the old global variable with the new one.
65   (void)new GlobalVariable(M, NewInit->getType(), false,
66                            GlobalValue::AppendingLinkage, NewInit, ArrayName);
67 }
68 
69 void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) {
70   appendToGlobalArray("llvm.global_ctors", M, F, Priority, Data);
71 }
72 
73 void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) {
74   appendToGlobalArray("llvm.global_dtors", M, F, Priority, Data);
75 }
76 
77 static void collectUsedGlobals(GlobalVariable *GV,
78                                SmallSetVector<Constant *, 16> &Init) {
79   if (!GV || !GV->hasInitializer())
80     return;
81 
82   auto *CA = cast<ConstantArray>(GV->getInitializer());
83   for (Use &Op : CA->operands())
84     Init.insert(cast<Constant>(Op));
85 }
86 
87 static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) {
88   GlobalVariable *GV = M.getGlobalVariable(Name);
89 
90   SmallSetVector<Constant *, 16> Init;
91   collectUsedGlobals(GV, Init);
92   if (GV)
93     GV->eraseFromParent();
94 
95   Type *ArrayEltTy = llvm::Type::getInt8PtrTy(M.getContext());
96   for (auto *V : Values)
97     Init.insert(ConstantExpr::getPointerBitCastOrAddrSpaceCast(V, ArrayEltTy));
98 
99   if (Init.empty())
100     return;
101 
102   ArrayType *ATy = ArrayType::get(ArrayEltTy, Init.size());
103   GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
104                                 ConstantArray::get(ATy, Init.getArrayRef()),
105                                 Name);
106   GV->setSection("llvm.metadata");
107 }
108 
109 void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) {
110   appendToUsedList(M, "llvm.used", Values);
111 }
112 
113 void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) {
114   appendToUsedList(M, "llvm.compiler.used", Values);
115 }
116 
117 static void removeFromUsedList(Module &M, StringRef Name,
118                                function_ref<bool(Constant *)> ShouldRemove) {
119   GlobalVariable *GV = M.getNamedGlobal(Name);
120   if (!GV)
121     return;
122 
123   SmallSetVector<Constant *, 16> Init;
124   collectUsedGlobals(GV, Init);
125 
126   Type *ArrayEltTy = cast<ArrayType>(GV->getValueType())->getElementType();
127 
128   SmallVector<Constant *, 16> NewInit;
129   for (Constant *MaybeRemoved : Init) {
130     if (!ShouldRemove(MaybeRemoved->stripPointerCasts()))
131       NewInit.push_back(MaybeRemoved);
132   }
133 
134   if (!NewInit.empty()) {
135     ArrayType *ATy = ArrayType::get(ArrayEltTy, NewInit.size());
136     GlobalVariable *NewGV =
137         new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
138                            ConstantArray::get(ATy, NewInit), "", GV,
139                            GV->getThreadLocalMode(), GV->getAddressSpace());
140     NewGV->setSection(GV->getSection());
141     NewGV->takeName(GV);
142   }
143 
144   GV->eraseFromParent();
145 }
146 
147 void llvm::removeFromUsedLists(Module &M,
148                                function_ref<bool(Constant *)> ShouldRemove) {
149   removeFromUsedList(M, "llvm.used", ShouldRemove);
150   removeFromUsedList(M, "llvm.compiler.used", ShouldRemove);
151 }
152 
153 void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) {
154   if (!M.getModuleFlag("kcfi"))
155     return;
156   // Matches CodeGenModule::CreateKCFITypeId in Clang.
157   LLVMContext &Ctx = M.getContext();
158   MDBuilder MDB(Ctx);
159   F.setMetadata(
160       LLVMContext::MD_kcfi_type,
161       MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(
162                            Type::getInt32Ty(Ctx),
163                            static_cast<uint32_t>(xxHash64(MangledType))))));
164   // If the module was compiled with -fpatchable-function-entry, ensure
165   // we use the same patchable-function-prefix.
166   if (auto *MD = mdconst::extract_or_null<ConstantInt>(
167           M.getModuleFlag("kcfi-offset"))) {
168     if (unsigned Offset = MD->getZExtValue())
169       F.addFnAttr("patchable-function-prefix", std::to_string(Offset));
170   }
171 }
172 
173 FunctionCallee llvm::declareSanitizerInitFunction(Module &M, StringRef InitName,
174                                                   ArrayRef<Type *> InitArgTypes,
175                                                   bool Weak) {
176   assert(!InitName.empty() && "Expected init function name");
177   auto *VoidTy = Type::getVoidTy(M.getContext());
178   auto *FnTy = FunctionType::get(VoidTy, InitArgTypes, false);
179   auto FnCallee = M.getOrInsertFunction(InitName, FnTy);
180   auto *Fn = cast<Function>(FnCallee.getCallee());
181   if (Weak && Fn->isDeclaration())
182     Fn->setLinkage(Function::ExternalWeakLinkage);
183   return FnCallee;
184 }
185 
186 Function *llvm::createSanitizerCtor(Module &M, StringRef CtorName) {
187   Function *Ctor = Function::createWithDefaultAttr(
188       FunctionType::get(Type::getVoidTy(M.getContext()), false),
189       GlobalValue::InternalLinkage, M.getDataLayout().getProgramAddressSpace(),
190       CtorName, &M);
191   Ctor->addFnAttr(Attribute::NoUnwind);
192   setKCFIType(M, *Ctor, "_ZTSFvvE"); // void (*)(void)
193   BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor);
194   ReturnInst::Create(M.getContext(), CtorBB);
195   // Ensure Ctor cannot be discarded, even if in a comdat.
196   appendToUsed(M, {Ctor});
197   return Ctor;
198 }
199 
200 std::pair<Function *, FunctionCallee> llvm::createSanitizerCtorAndInitFunctions(
201     Module &M, StringRef CtorName, StringRef InitName,
202     ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
203     StringRef VersionCheckName, bool Weak) {
204   assert(!InitName.empty() && "Expected init function name");
205   assert(InitArgs.size() == InitArgTypes.size() &&
206          "Sanitizer's init function expects different number of arguments");
207   FunctionCallee InitFunction =
208       declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak);
209   Function *Ctor = createSanitizerCtor(M, CtorName);
210   IRBuilder<> IRB(M.getContext());
211 
212   BasicBlock *RetBB = &Ctor->getEntryBlock();
213   if (Weak) {
214     RetBB->setName("ret");
215     auto *EntryBB = BasicBlock::Create(M.getContext(), "entry", Ctor, RetBB);
216     auto *CallInitBB =
217         BasicBlock::Create(M.getContext(), "callfunc", Ctor, RetBB);
218     auto *InitFn = cast<Function>(InitFunction.getCallee());
219     auto *InitFnPtr =
220         PointerType::get(InitFn->getType(), InitFn->getAddressSpace());
221     IRB.SetInsertPoint(EntryBB);
222     Value *InitNotNull =
223         IRB.CreateICmpNE(InitFn, ConstantPointerNull::get(InitFnPtr));
224     IRB.CreateCondBr(InitNotNull, CallInitBB, RetBB);
225     IRB.SetInsertPoint(CallInitBB);
226   } else {
227     IRB.SetInsertPoint(RetBB->getTerminator());
228   }
229 
230   IRB.CreateCall(InitFunction, InitArgs);
231   if (!VersionCheckName.empty()) {
232     FunctionCallee VersionCheckFunction = M.getOrInsertFunction(
233         VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false),
234         AttributeList());
235     IRB.CreateCall(VersionCheckFunction, {});
236   }
237 
238   if (Weak)
239     IRB.CreateBr(RetBB);
240 
241   return std::make_pair(Ctor, InitFunction);
242 }
243 
244 std::pair<Function *, FunctionCallee>
245 llvm::getOrCreateSanitizerCtorAndInitFunctions(
246     Module &M, StringRef CtorName, StringRef InitName,
247     ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
248     function_ref<void(Function *, FunctionCallee)> FunctionsCreatedCallback,
249     StringRef VersionCheckName, bool Weak) {
250   assert(!CtorName.empty() && "Expected ctor function name");
251 
252   if (Function *Ctor = M.getFunction(CtorName))
253     // FIXME: Sink this logic into the module, similar to the handling of
254     // globals. This will make moving to a concurrent model much easier.
255     if (Ctor->arg_empty() ||
256         Ctor->getReturnType() == Type::getVoidTy(M.getContext()))
257       return {Ctor,
258               declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak)};
259 
260   Function *Ctor;
261   FunctionCallee InitFunction;
262   std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions(
263       M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName, Weak);
264   FunctionsCreatedCallback(Ctor, InitFunction);
265   return std::make_pair(Ctor, InitFunction);
266 }
267 
268 void llvm::filterDeadComdatFunctions(
269     SmallVectorImpl<Function *> &DeadComdatFunctions) {
270   SmallPtrSet<Function *, 32> MaybeDeadFunctions;
271   SmallPtrSet<Comdat *, 32> MaybeDeadComdats;
272   for (Function *F : DeadComdatFunctions) {
273     MaybeDeadFunctions.insert(F);
274     if (Comdat *C = F->getComdat())
275       MaybeDeadComdats.insert(C);
276   }
277 
278   // Find comdats for which all users are dead now.
279   SmallPtrSet<Comdat *, 32> DeadComdats;
280   for (Comdat *C : MaybeDeadComdats) {
281     auto IsUserDead = [&](GlobalObject *GO) {
282       auto *F = dyn_cast<Function>(GO);
283       return F && MaybeDeadFunctions.contains(F);
284     };
285     if (all_of(C->getUsers(), IsUserDead))
286       DeadComdats.insert(C);
287   }
288 
289   // Only keep functions which have no comdat or a dead comdat.
290   erase_if(DeadComdatFunctions, [&](Function *F) {
291     Comdat *C = F->getComdat();
292     return C && !DeadComdats.contains(C);
293   });
294 }
295 
296 std::string llvm::getUniqueModuleId(Module *M) {
297   MD5 Md5;
298   bool ExportsSymbols = false;
299   auto AddGlobal = [&](GlobalValue &GV) {
300     if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
301         !GV.hasExternalLinkage() || GV.hasComdat())
302       return;
303     ExportsSymbols = true;
304     Md5.update(GV.getName());
305     Md5.update(ArrayRef<uint8_t>{0});
306   };
307 
308   for (auto &F : *M)
309     AddGlobal(F);
310   for (auto &GV : M->globals())
311     AddGlobal(GV);
312   for (auto &GA : M->aliases())
313     AddGlobal(GA);
314   for (auto &IF : M->ifuncs())
315     AddGlobal(IF);
316 
317   if (!ExportsSymbols)
318     return "";
319 
320   MD5::MD5Result R;
321   Md5.final(R);
322 
323   SmallString<32> Str;
324   MD5::stringifyResult(R, Str);
325   return ("." + Str).str();
326 }
327 
328 void VFABI::setVectorVariantNames(CallInst *CI,
329                                   ArrayRef<std::string> VariantMappings) {
330   if (VariantMappings.empty())
331     return;
332 
333   SmallString<256> Buffer;
334   llvm::raw_svector_ostream Out(Buffer);
335   for (const std::string &VariantMapping : VariantMappings)
336     Out << VariantMapping << ",";
337   // Get rid of the trailing ','.
338   assert(!Buffer.str().empty() && "Must have at least one char.");
339   Buffer.pop_back();
340 
341   Module *M = CI->getModule();
342 #ifndef NDEBUG
343   for (const std::string &VariantMapping : VariantMappings) {
344     LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << VariantMapping << "'\n");
345     std::optional<VFInfo> VI = VFABI::tryDemangleForVFABI(VariantMapping, *M);
346     assert(VI && "Cannot add an invalid VFABI name.");
347     assert(M->getNamedValue(VI->VectorName) &&
348            "Cannot add variant to attribute: "
349            "vector function declaration is missing.");
350   }
351 #endif
352   CI->addFnAttr(
353       Attribute::get(M->getContext(), MappingsAttrName, Buffer.str()));
354 }
355 
356 void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf,
357                                StringRef SectionName, Align Alignment) {
358   // Embed the memory buffer into the module.
359   Constant *ModuleConstant = ConstantDataArray::get(
360       M.getContext(), ArrayRef(Buf.getBufferStart(), Buf.getBufferSize()));
361   GlobalVariable *GV = new GlobalVariable(
362       M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage,
363       ModuleConstant, "llvm.embedded.object");
364   GV->setSection(SectionName);
365   GV->setAlignment(Alignment);
366 
367   LLVMContext &Ctx = M.getContext();
368   NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.embedded.objects");
369   Metadata *MDVals[] = {ConstantAsMetadata::get(GV),
370                         MDString::get(Ctx, SectionName)};
371 
372   MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
373   GV->setMetadata(LLVMContext::MD_exclude, llvm::MDNode::get(Ctx, {}));
374 
375   appendToCompilerUsed(M, GV);
376 }
377 
378 bool llvm::lowerGlobalIFuncUsersAsGlobalCtor(
379     Module &M, ArrayRef<GlobalIFunc *> FilteredIFuncsToLower) {
380   SmallVector<GlobalIFunc *, 32> AllIFuncs;
381   ArrayRef<GlobalIFunc *> IFuncsToLower = FilteredIFuncsToLower;
382   if (FilteredIFuncsToLower.empty()) { // Default to lowering all ifuncs
383     for (GlobalIFunc &GI : M.ifuncs())
384       AllIFuncs.push_back(&GI);
385     IFuncsToLower = AllIFuncs;
386   }
387 
388   bool UnhandledUsers = false;
389   LLVMContext &Ctx = M.getContext();
390   const DataLayout &DL = M.getDataLayout();
391 
392   PointerType *TableEntryTy =
393       Ctx.supportsTypedPointers()
394           ? PointerType::get(Type::getInt8Ty(Ctx), DL.getProgramAddressSpace())
395           : PointerType::get(Ctx, DL.getProgramAddressSpace());
396 
397   ArrayType *FuncPtrTableTy =
398       ArrayType::get(TableEntryTy, IFuncsToLower.size());
399 
400   Align PtrAlign = DL.getABITypeAlign(TableEntryTy);
401 
402   // Create a global table of function pointers we'll initialize in a global
403   // constructor.
404   auto *FuncPtrTable = new GlobalVariable(
405       M, FuncPtrTableTy, false, GlobalValue::InternalLinkage,
406       PoisonValue::get(FuncPtrTableTy), "", nullptr,
407       GlobalVariable::NotThreadLocal, DL.getDefaultGlobalsAddressSpace());
408   FuncPtrTable->setAlignment(PtrAlign);
409 
410   // Create a function to initialize the function pointer table.
411   Function *NewCtor = Function::Create(
412       FunctionType::get(Type::getVoidTy(Ctx), false), Function::InternalLinkage,
413       DL.getProgramAddressSpace(), "", &M);
414 
415   BasicBlock *BB = BasicBlock::Create(Ctx, "", NewCtor);
416   IRBuilder<> InitBuilder(BB);
417 
418   size_t TableIndex = 0;
419   for (GlobalIFunc *GI : IFuncsToLower) {
420     Function *ResolvedFunction = GI->getResolverFunction();
421 
422     // We don't know what to pass to a resolver function taking arguments
423     //
424     // FIXME: Is this even valid? clang and gcc don't complain but this
425     // probably should be invalid IR. We could just pass through undef.
426     if (!std::empty(ResolvedFunction->getFunctionType()->params())) {
427       LLVM_DEBUG(dbgs() << "Not lowering ifunc resolver function "
428                         << ResolvedFunction->getName() << " with parameters\n");
429       UnhandledUsers = true;
430       continue;
431     }
432 
433     // Initialize the function pointer table.
434     CallInst *ResolvedFunc = InitBuilder.CreateCall(ResolvedFunction);
435     Value *Casted = InitBuilder.CreatePointerCast(ResolvedFunc, TableEntryTy);
436     Constant *GEP = cast<Constant>(InitBuilder.CreateConstInBoundsGEP2_32(
437         FuncPtrTableTy, FuncPtrTable, 0, TableIndex++));
438     InitBuilder.CreateAlignedStore(Casted, GEP, PtrAlign);
439 
440     // Update all users to load a pointer from the global table.
441     for (User *User : make_early_inc_range(GI->users())) {
442       Instruction *UserInst = dyn_cast<Instruction>(User);
443       if (!UserInst) {
444         // TODO: Should handle constantexpr casts in user instructions. Probably
445         // can't do much about constant initializers.
446         UnhandledUsers = true;
447         continue;
448       }
449 
450       IRBuilder<> UseBuilder(UserInst);
451       LoadInst *ResolvedTarget =
452           UseBuilder.CreateAlignedLoad(TableEntryTy, GEP, PtrAlign);
453       Value *ResolvedCast =
454           UseBuilder.CreatePointerCast(ResolvedTarget, GI->getType());
455       UserInst->replaceUsesOfWith(GI, ResolvedCast);
456     }
457 
458     // If we handled all users, erase the ifunc.
459     if (GI->use_empty())
460       GI->eraseFromParent();
461   }
462 
463   InitBuilder.CreateRetVoid();
464 
465   PointerType *ConstantDataTy = Ctx.supportsTypedPointers()
466                                     ? PointerType::get(Type::getInt8Ty(Ctx), 0)
467                                     : PointerType::get(Ctx, 0);
468 
469   // TODO: Is this the right priority? Probably should be before any other
470   // constructors?
471   const int Priority = 10;
472   appendToGlobalCtors(M, NewCtor, Priority,
473                       ConstantPointerNull::get(ConstantDataTy));
474   return UnhandledUsers;
475 }
476