1 //===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions perform manipulations on Modules.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Transforms/Utils/ModuleUtils.h"
14 #include "llvm/Analysis/VectorUtils.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/IR/DerivedTypes.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/IRBuilder.h"
19 #include "llvm/IR/MDBuilder.h"
20 #include "llvm/IR/Module.h"
21 #include "llvm/Support/raw_ostream.h"
22 #include "llvm/Support/xxhash.h"
23 
24 using namespace llvm;
25 
26 #define DEBUG_TYPE "moduleutils"
27 
28 static void appendToGlobalArray(StringRef ArrayName, Module &M, Function *F,
29                                 int Priority, Constant *Data) {
30   IRBuilder<> IRB(M.getContext());
31   FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false);
32 
33   // Get the current set of static global constructors and add the new ctor
34   // to the list.
35   SmallVector<Constant *, 16> CurrentCtors;
36   StructType *EltTy;
37   if (GlobalVariable *GVCtor = M.getNamedGlobal(ArrayName)) {
38     EltTy = cast<StructType>(GVCtor->getValueType()->getArrayElementType());
39     if (Constant *Init = GVCtor->getInitializer()) {
40       unsigned n = Init->getNumOperands();
41       CurrentCtors.reserve(n + 1);
42       for (unsigned i = 0; i != n; ++i)
43         CurrentCtors.push_back(cast<Constant>(Init->getOperand(i)));
44     }
45     GVCtor->eraseFromParent();
46   } else {
47     EltTy = StructType::get(
48         IRB.getInt32Ty(), PointerType::get(FnTy, F->getAddressSpace()),
49         IRB.getInt8PtrTy());
50   }
51 
52   // Build a 3 field global_ctor entry.  We don't take a comdat key.
53   Constant *CSVals[3];
54   CSVals[0] = IRB.getInt32(Priority);
55   CSVals[1] = F;
56   CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getInt8PtrTy())
57                    : Constant::getNullValue(IRB.getInt8PtrTy());
58   Constant *RuntimeCtorInit =
59       ConstantStruct::get(EltTy, ArrayRef(CSVals, EltTy->getNumElements()));
60 
61   CurrentCtors.push_back(RuntimeCtorInit);
62 
63   // Create a new initializer.
64   ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size());
65   Constant *NewInit = ConstantArray::get(AT, CurrentCtors);
66 
67   // Create the new global variable and replace all uses of
68   // the old global variable with the new one.
69   (void)new GlobalVariable(M, NewInit->getType(), false,
70                            GlobalValue::AppendingLinkage, NewInit, ArrayName);
71 }
72 
73 void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) {
74   appendToGlobalArray("llvm.global_ctors", M, F, Priority, Data);
75 }
76 
77 void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) {
78   appendToGlobalArray("llvm.global_dtors", M, F, Priority, Data);
79 }
80 
81 static void collectUsedGlobals(GlobalVariable *GV,
82                                SmallSetVector<Constant *, 16> &Init) {
83   if (!GV || !GV->hasInitializer())
84     return;
85 
86   auto *CA = cast<ConstantArray>(GV->getInitializer());
87   for (Use &Op : CA->operands())
88     Init.insert(cast<Constant>(Op));
89 }
90 
91 static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) {
92   GlobalVariable *GV = M.getGlobalVariable(Name);
93 
94   SmallSetVector<Constant *, 16> Init;
95   collectUsedGlobals(GV, Init);
96   if (GV)
97     GV->eraseFromParent();
98 
99   Type *ArrayEltTy = llvm::Type::getInt8PtrTy(M.getContext());
100   for (auto *V : Values)
101     Init.insert(ConstantExpr::getPointerBitCastOrAddrSpaceCast(V, ArrayEltTy));
102 
103   if (Init.empty())
104     return;
105 
106   ArrayType *ATy = ArrayType::get(ArrayEltTy, Init.size());
107   GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
108                                 ConstantArray::get(ATy, Init.getArrayRef()),
109                                 Name);
110   GV->setSection("llvm.metadata");
111 }
112 
113 void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) {
114   appendToUsedList(M, "llvm.used", Values);
115 }
116 
117 void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) {
118   appendToUsedList(M, "llvm.compiler.used", Values);
119 }
120 
121 static void removeFromUsedList(Module &M, StringRef Name,
122                                function_ref<bool(Constant *)> ShouldRemove) {
123   GlobalVariable *GV = M.getNamedGlobal(Name);
124   if (!GV)
125     return;
126 
127   SmallSetVector<Constant *, 16> Init;
128   collectUsedGlobals(GV, Init);
129 
130   Type *ArrayEltTy = cast<ArrayType>(GV->getValueType())->getElementType();
131 
132   SmallVector<Constant *, 16> NewInit;
133   for (Constant *MaybeRemoved : Init) {
134     if (!ShouldRemove(MaybeRemoved->stripPointerCasts()))
135       NewInit.push_back(MaybeRemoved);
136   }
137 
138   if (!NewInit.empty()) {
139     ArrayType *ATy = ArrayType::get(ArrayEltTy, NewInit.size());
140     GlobalVariable *NewGV =
141         new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
142                            ConstantArray::get(ATy, NewInit), "", GV,
143                            GV->getThreadLocalMode(), GV->getAddressSpace());
144     NewGV->setSection(GV->getSection());
145     NewGV->takeName(GV);
146   }
147 
148   GV->eraseFromParent();
149 }
150 
151 void llvm::removeFromUsedLists(Module &M,
152                                function_ref<bool(Constant *)> ShouldRemove) {
153   removeFromUsedList(M, "llvm.used", ShouldRemove);
154   removeFromUsedList(M, "llvm.compiler.used", ShouldRemove);
155 }
156 
157 void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) {
158   if (!M.getModuleFlag("kcfi"))
159     return;
160   // Matches CodeGenModule::CreateKCFITypeId in Clang.
161   LLVMContext &Ctx = M.getContext();
162   MDBuilder MDB(Ctx);
163   F.setMetadata(
164       LLVMContext::MD_kcfi_type,
165       MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(
166                            Type::getInt32Ty(Ctx),
167                            static_cast<uint32_t>(xxHash64(MangledType))))));
168   // If the module was compiled with -fpatchable-function-entry, ensure
169   // we use the same patchable-function-prefix.
170   if (auto *MD = mdconst::extract_or_null<ConstantInt>(
171           M.getModuleFlag("kcfi-offset"))) {
172     if (unsigned Offset = MD->getZExtValue())
173       F.addFnAttr("patchable-function-prefix", std::to_string(Offset));
174   }
175 }
176 
177 FunctionCallee llvm::declareSanitizerInitFunction(Module &M, StringRef InitName,
178                                                   ArrayRef<Type *> InitArgTypes,
179                                                   bool Weak) {
180   assert(!InitName.empty() && "Expected init function name");
181   auto *VoidTy = Type::getVoidTy(M.getContext());
182   auto *FnTy = FunctionType::get(VoidTy, InitArgTypes, false);
183   auto FnCallee = M.getOrInsertFunction(InitName, FnTy);
184   auto *Fn = cast<Function>(FnCallee.getCallee());
185   if (Weak && Fn->isDeclaration())
186     Fn->setLinkage(Function::ExternalWeakLinkage);
187   return FnCallee;
188 }
189 
190 Function *llvm::createSanitizerCtor(Module &M, StringRef CtorName) {
191   Function *Ctor = Function::createWithDefaultAttr(
192       FunctionType::get(Type::getVoidTy(M.getContext()), false),
193       GlobalValue::InternalLinkage, M.getDataLayout().getProgramAddressSpace(),
194       CtorName, &M);
195   Ctor->addFnAttr(Attribute::NoUnwind);
196   setKCFIType(M, *Ctor, "_ZTSFvvE"); // void (*)(void)
197   BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor);
198   ReturnInst::Create(M.getContext(), CtorBB);
199   // Ensure Ctor cannot be discarded, even if in a comdat.
200   appendToUsed(M, {Ctor});
201   return Ctor;
202 }
203 
204 std::pair<Function *, FunctionCallee> llvm::createSanitizerCtorAndInitFunctions(
205     Module &M, StringRef CtorName, StringRef InitName,
206     ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
207     StringRef VersionCheckName, bool Weak) {
208   assert(!InitName.empty() && "Expected init function name");
209   assert(InitArgs.size() == InitArgTypes.size() &&
210          "Sanitizer's init function expects different number of arguments");
211   FunctionCallee InitFunction =
212       declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak);
213   Function *Ctor = createSanitizerCtor(M, CtorName);
214   IRBuilder<> IRB(M.getContext());
215 
216   BasicBlock *RetBB = &Ctor->getEntryBlock();
217   if (Weak) {
218     RetBB->setName("ret");
219     auto *EntryBB = BasicBlock::Create(M.getContext(), "entry", Ctor, RetBB);
220     auto *CallInitBB =
221         BasicBlock::Create(M.getContext(), "callfunc", Ctor, RetBB);
222     auto *InitFn = cast<Function>(InitFunction.getCallee());
223     auto *InitFnPtr =
224         PointerType::get(InitFn->getType(), InitFn->getAddressSpace());
225     IRB.SetInsertPoint(EntryBB);
226     Value *InitNotNull =
227         IRB.CreateICmpNE(InitFn, ConstantPointerNull::get(InitFnPtr));
228     IRB.CreateCondBr(InitNotNull, CallInitBB, RetBB);
229     IRB.SetInsertPoint(CallInitBB);
230   } else {
231     IRB.SetInsertPoint(RetBB->getTerminator());
232   }
233 
234   IRB.CreateCall(InitFunction, InitArgs);
235   if (!VersionCheckName.empty()) {
236     FunctionCallee VersionCheckFunction = M.getOrInsertFunction(
237         VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false),
238         AttributeList());
239     IRB.CreateCall(VersionCheckFunction, {});
240   }
241 
242   if (Weak)
243     IRB.CreateBr(RetBB);
244 
245   return std::make_pair(Ctor, InitFunction);
246 }
247 
248 std::pair<Function *, FunctionCallee>
249 llvm::getOrCreateSanitizerCtorAndInitFunctions(
250     Module &M, StringRef CtorName, StringRef InitName,
251     ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
252     function_ref<void(Function *, FunctionCallee)> FunctionsCreatedCallback,
253     StringRef VersionCheckName, bool Weak) {
254   assert(!CtorName.empty() && "Expected ctor function name");
255 
256   if (Function *Ctor = M.getFunction(CtorName))
257     // FIXME: Sink this logic into the module, similar to the handling of
258     // globals. This will make moving to a concurrent model much easier.
259     if (Ctor->arg_empty() ||
260         Ctor->getReturnType() == Type::getVoidTy(M.getContext()))
261       return {Ctor,
262               declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak)};
263 
264   Function *Ctor;
265   FunctionCallee InitFunction;
266   std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions(
267       M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName, Weak);
268   FunctionsCreatedCallback(Ctor, InitFunction);
269   return std::make_pair(Ctor, InitFunction);
270 }
271 
272 void llvm::filterDeadComdatFunctions(
273     SmallVectorImpl<Function *> &DeadComdatFunctions) {
274   SmallPtrSet<Function *, 32> MaybeDeadFunctions;
275   SmallPtrSet<Comdat *, 32> MaybeDeadComdats;
276   for (Function *F : DeadComdatFunctions) {
277     MaybeDeadFunctions.insert(F);
278     if (Comdat *C = F->getComdat())
279       MaybeDeadComdats.insert(C);
280   }
281 
282   // Find comdats for which all users are dead now.
283   SmallPtrSet<Comdat *, 32> DeadComdats;
284   for (Comdat *C : MaybeDeadComdats) {
285     auto IsUserDead = [&](GlobalObject *GO) {
286       auto *F = dyn_cast<Function>(GO);
287       return F && MaybeDeadFunctions.contains(F);
288     };
289     if (all_of(C->getUsers(), IsUserDead))
290       DeadComdats.insert(C);
291   }
292 
293   // Only keep functions which have no comdat or a dead comdat.
294   erase_if(DeadComdatFunctions, [&](Function *F) {
295     Comdat *C = F->getComdat();
296     return C && !DeadComdats.contains(C);
297   });
298 }
299 
300 std::string llvm::getUniqueModuleId(Module *M) {
301   MD5 Md5;
302   bool ExportsSymbols = false;
303   auto AddGlobal = [&](GlobalValue &GV) {
304     if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
305         !GV.hasExternalLinkage() || GV.hasComdat())
306       return;
307     ExportsSymbols = true;
308     Md5.update(GV.getName());
309     Md5.update(ArrayRef<uint8_t>{0});
310   };
311 
312   for (auto &F : *M)
313     AddGlobal(F);
314   for (auto &GV : M->globals())
315     AddGlobal(GV);
316   for (auto &GA : M->aliases())
317     AddGlobal(GA);
318   for (auto &IF : M->ifuncs())
319     AddGlobal(IF);
320 
321   if (!ExportsSymbols)
322     return "";
323 
324   MD5::MD5Result R;
325   Md5.final(R);
326 
327   SmallString<32> Str;
328   MD5::stringifyResult(R, Str);
329   return ("." + Str).str();
330 }
331 
332 void VFABI::setVectorVariantNames(CallInst *CI,
333                                   ArrayRef<std::string> VariantMappings) {
334   if (VariantMappings.empty())
335     return;
336 
337   SmallString<256> Buffer;
338   llvm::raw_svector_ostream Out(Buffer);
339   for (const std::string &VariantMapping : VariantMappings)
340     Out << VariantMapping << ",";
341   // Get rid of the trailing ','.
342   assert(!Buffer.str().empty() && "Must have at least one char.");
343   Buffer.pop_back();
344 
345   Module *M = CI->getModule();
346 #ifndef NDEBUG
347   for (const std::string &VariantMapping : VariantMappings) {
348     LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << VariantMapping << "'\n");
349     std::optional<VFInfo> VI = VFABI::tryDemangleForVFABI(VariantMapping, *M);
350     assert(VI && "Cannot add an invalid VFABI name.");
351     assert(M->getNamedValue(VI->VectorName) &&
352            "Cannot add variant to attribute: "
353            "vector function declaration is missing.");
354   }
355 #endif
356   CI->addFnAttr(
357       Attribute::get(M->getContext(), MappingsAttrName, Buffer.str()));
358 }
359 
360 void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf,
361                                StringRef SectionName, Align Alignment) {
362   // Embed the memory buffer into the module.
363   Constant *ModuleConstant = ConstantDataArray::get(
364       M.getContext(), ArrayRef(Buf.getBufferStart(), Buf.getBufferSize()));
365   GlobalVariable *GV = new GlobalVariable(
366       M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage,
367       ModuleConstant, "llvm.embedded.object");
368   GV->setSection(SectionName);
369   GV->setAlignment(Alignment);
370 
371   LLVMContext &Ctx = M.getContext();
372   NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.embedded.objects");
373   Metadata *MDVals[] = {ConstantAsMetadata::get(GV),
374                         MDString::get(Ctx, SectionName)};
375 
376   MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
377   GV->setMetadata(LLVMContext::MD_exclude, llvm::MDNode::get(Ctx, {}));
378 
379   appendToCompilerUsed(M, GV);
380 }
381 
382 bool llvm::lowerGlobalIFuncUsersAsGlobalCtor(
383     Module &M, ArrayRef<GlobalIFunc *> FilteredIFuncsToLower) {
384   SmallVector<GlobalIFunc *, 32> AllIFuncs;
385   ArrayRef<GlobalIFunc *> IFuncsToLower = FilteredIFuncsToLower;
386   if (FilteredIFuncsToLower.empty()) { // Default to lowering all ifuncs
387     for (GlobalIFunc &GI : M.ifuncs())
388       AllIFuncs.push_back(&GI);
389     IFuncsToLower = AllIFuncs;
390   }
391 
392   bool UnhandledUsers = false;
393   LLVMContext &Ctx = M.getContext();
394   const DataLayout &DL = M.getDataLayout();
395 
396   PointerType *TableEntryTy =
397       PointerType::get(Ctx, DL.getProgramAddressSpace());
398 
399   ArrayType *FuncPtrTableTy =
400       ArrayType::get(TableEntryTy, IFuncsToLower.size());
401 
402   Align PtrAlign = DL.getABITypeAlign(TableEntryTy);
403 
404   // Create a global table of function pointers we'll initialize in a global
405   // constructor.
406   auto *FuncPtrTable = new GlobalVariable(
407       M, FuncPtrTableTy, false, GlobalValue::InternalLinkage,
408       PoisonValue::get(FuncPtrTableTy), "", nullptr,
409       GlobalVariable::NotThreadLocal, DL.getDefaultGlobalsAddressSpace());
410   FuncPtrTable->setAlignment(PtrAlign);
411 
412   // Create a function to initialize the function pointer table.
413   Function *NewCtor = Function::Create(
414       FunctionType::get(Type::getVoidTy(Ctx), false), Function::InternalLinkage,
415       DL.getProgramAddressSpace(), "", &M);
416 
417   BasicBlock *BB = BasicBlock::Create(Ctx, "", NewCtor);
418   IRBuilder<> InitBuilder(BB);
419 
420   size_t TableIndex = 0;
421   for (GlobalIFunc *GI : IFuncsToLower) {
422     Function *ResolvedFunction = GI->getResolverFunction();
423 
424     // We don't know what to pass to a resolver function taking arguments
425     //
426     // FIXME: Is this even valid? clang and gcc don't complain but this
427     // probably should be invalid IR. We could just pass through undef.
428     if (!std::empty(ResolvedFunction->getFunctionType()->params())) {
429       LLVM_DEBUG(dbgs() << "Not lowering ifunc resolver function "
430                         << ResolvedFunction->getName() << " with parameters\n");
431       UnhandledUsers = true;
432       continue;
433     }
434 
435     // Initialize the function pointer table.
436     CallInst *ResolvedFunc = InitBuilder.CreateCall(ResolvedFunction);
437     Value *Casted = InitBuilder.CreatePointerCast(ResolvedFunc, TableEntryTy);
438     Constant *GEP = cast<Constant>(InitBuilder.CreateConstInBoundsGEP2_32(
439         FuncPtrTableTy, FuncPtrTable, 0, TableIndex++));
440     InitBuilder.CreateAlignedStore(Casted, GEP, PtrAlign);
441 
442     // Update all users to load a pointer from the global table.
443     for (User *User : make_early_inc_range(GI->users())) {
444       Instruction *UserInst = dyn_cast<Instruction>(User);
445       if (!UserInst) {
446         // TODO: Should handle constantexpr casts in user instructions. Probably
447         // can't do much about constant initializers.
448         UnhandledUsers = true;
449         continue;
450       }
451 
452       IRBuilder<> UseBuilder(UserInst);
453       LoadInst *ResolvedTarget =
454           UseBuilder.CreateAlignedLoad(TableEntryTy, GEP, PtrAlign);
455       Value *ResolvedCast =
456           UseBuilder.CreatePointerCast(ResolvedTarget, GI->getType());
457       UserInst->replaceUsesOfWith(GI, ResolvedCast);
458     }
459 
460     // If we handled all users, erase the ifunc.
461     if (GI->use_empty())
462       GI->eraseFromParent();
463   }
464 
465   InitBuilder.CreateRetVoid();
466 
467   PointerType *ConstantDataTy = PointerType::get(Ctx, 0);
468 
469   // TODO: Is this the right priority? Probably should be before any other
470   // constructors?
471   const int Priority = 10;
472   appendToGlobalCtors(M, NewCtor, Priority,
473                       ConstantPointerNull::get(ConstantDataTy));
474   return UnhandledUsers;
475 }
476