1 //===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions perform manipulations on Modules.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Transforms/Utils/ModuleUtils.h"
14 #include "llvm/Analysis/VectorUtils.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/IR/DerivedTypes.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/IRBuilder.h"
19 #include "llvm/IR/MDBuilder.h"
20 #include "llvm/IR/Module.h"
21 #include "llvm/Support/raw_ostream.h"
22 #include "llvm/Support/xxhash.h"
23 
24 using namespace llvm;
25 
26 #define DEBUG_TYPE "moduleutils"
27 
28 static void appendToGlobalArray(StringRef ArrayName, Module &M, Function *F,
29                                 int Priority, Constant *Data) {
30   IRBuilder<> IRB(M.getContext());
31   FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false);
32 
33   // Get the current set of static global constructors and add the new ctor
34   // to the list.
35   SmallVector<Constant *, 16> CurrentCtors;
36   StructType *EltTy;
37   if (GlobalVariable *GVCtor = M.getNamedGlobal(ArrayName)) {
38     EltTy = cast<StructType>(GVCtor->getValueType()->getArrayElementType());
39     if (Constant *Init = GVCtor->getInitializer()) {
40       unsigned n = Init->getNumOperands();
41       CurrentCtors.reserve(n + 1);
42       for (unsigned i = 0; i != n; ++i)
43         CurrentCtors.push_back(cast<Constant>(Init->getOperand(i)));
44     }
45     GVCtor->eraseFromParent();
46   } else {
47     EltTy = StructType::get(IRB.getInt32Ty(),
48                             PointerType::get(FnTy, F->getAddressSpace()),
49                             IRB.getPtrTy());
50   }
51 
52   // Build a 3 field global_ctor entry.  We don't take a comdat key.
53   Constant *CSVals[3];
54   CSVals[0] = IRB.getInt32(Priority);
55   CSVals[1] = F;
56   CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getPtrTy())
57                    : Constant::getNullValue(IRB.getPtrTy());
58   Constant *RuntimeCtorInit =
59       ConstantStruct::get(EltTy, ArrayRef(CSVals, EltTy->getNumElements()));
60 
61   CurrentCtors.push_back(RuntimeCtorInit);
62 
63   // Create a new initializer.
64   ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size());
65   Constant *NewInit = ConstantArray::get(AT, CurrentCtors);
66 
67   // Create the new global variable and replace all uses of
68   // the old global variable with the new one.
69   (void)new GlobalVariable(M, NewInit->getType(), false,
70                            GlobalValue::AppendingLinkage, NewInit, ArrayName);
71 }
72 
73 void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) {
74   appendToGlobalArray("llvm.global_ctors", M, F, Priority, Data);
75 }
76 
77 void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) {
78   appendToGlobalArray("llvm.global_dtors", M, F, Priority, Data);
79 }
80 
81 static void collectUsedGlobals(GlobalVariable *GV,
82                                SmallSetVector<Constant *, 16> &Init) {
83   if (!GV || !GV->hasInitializer())
84     return;
85 
86   auto *CA = cast<ConstantArray>(GV->getInitializer());
87   for (Use &Op : CA->operands())
88     Init.insert(cast<Constant>(Op));
89 }
90 
91 static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) {
92   GlobalVariable *GV = M.getGlobalVariable(Name);
93 
94   SmallSetVector<Constant *, 16> Init;
95   collectUsedGlobals(GV, Init);
96   if (GV)
97     GV->eraseFromParent();
98 
99   Type *ArrayEltTy = llvm::PointerType::getUnqual(M.getContext());
100   for (auto *V : Values)
101     Init.insert(ConstantExpr::getPointerBitCastOrAddrSpaceCast(V, ArrayEltTy));
102 
103   if (Init.empty())
104     return;
105 
106   ArrayType *ATy = ArrayType::get(ArrayEltTy, Init.size());
107   GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
108                                 ConstantArray::get(ATy, Init.getArrayRef()),
109                                 Name);
110   GV->setSection("llvm.metadata");
111 }
112 
113 void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) {
114   appendToUsedList(M, "llvm.used", Values);
115 }
116 
117 void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) {
118   appendToUsedList(M, "llvm.compiler.used", Values);
119 }
120 
121 static void removeFromUsedList(Module &M, StringRef Name,
122                                function_ref<bool(Constant *)> ShouldRemove) {
123   GlobalVariable *GV = M.getNamedGlobal(Name);
124   if (!GV)
125     return;
126 
127   SmallSetVector<Constant *, 16> Init;
128   collectUsedGlobals(GV, Init);
129 
130   Type *ArrayEltTy = cast<ArrayType>(GV->getValueType())->getElementType();
131 
132   SmallVector<Constant *, 16> NewInit;
133   for (Constant *MaybeRemoved : Init) {
134     if (!ShouldRemove(MaybeRemoved->stripPointerCasts()))
135       NewInit.push_back(MaybeRemoved);
136   }
137 
138   if (!NewInit.empty()) {
139     ArrayType *ATy = ArrayType::get(ArrayEltTy, NewInit.size());
140     GlobalVariable *NewGV =
141         new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
142                            ConstantArray::get(ATy, NewInit), "", GV,
143                            GV->getThreadLocalMode(), GV->getAddressSpace());
144     NewGV->setSection(GV->getSection());
145     NewGV->takeName(GV);
146   }
147 
148   GV->eraseFromParent();
149 }
150 
151 void llvm::removeFromUsedLists(Module &M,
152                                function_ref<bool(Constant *)> ShouldRemove) {
153   removeFromUsedList(M, "llvm.used", ShouldRemove);
154   removeFromUsedList(M, "llvm.compiler.used", ShouldRemove);
155 }
156 
157 void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) {
158   if (!M.getModuleFlag("kcfi"))
159     return;
160   // Matches CodeGenModule::CreateKCFITypeId in Clang.
161   LLVMContext &Ctx = M.getContext();
162   MDBuilder MDB(Ctx);
163   F.setMetadata(
164       LLVMContext::MD_kcfi_type,
165       MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(
166                            Type::getInt32Ty(Ctx),
167                            static_cast<uint32_t>(xxHash64(MangledType))))));
168   // If the module was compiled with -fpatchable-function-entry, ensure
169   // we use the same patchable-function-prefix.
170   if (auto *MD = mdconst::extract_or_null<ConstantInt>(
171           M.getModuleFlag("kcfi-offset"))) {
172     if (unsigned Offset = MD->getZExtValue())
173       F.addFnAttr("patchable-function-prefix", std::to_string(Offset));
174   }
175 }
176 
177 FunctionCallee llvm::declareSanitizerInitFunction(Module &M, StringRef InitName,
178                                                   ArrayRef<Type *> InitArgTypes,
179                                                   bool Weak) {
180   assert(!InitName.empty() && "Expected init function name");
181   auto *VoidTy = Type::getVoidTy(M.getContext());
182   auto *FnTy = FunctionType::get(VoidTy, InitArgTypes, false);
183   auto FnCallee = M.getOrInsertFunction(InitName, FnTy);
184   auto *Fn = cast<Function>(FnCallee.getCallee());
185   if (Weak && Fn->isDeclaration())
186     Fn->setLinkage(Function::ExternalWeakLinkage);
187   return FnCallee;
188 }
189 
190 Function *llvm::createSanitizerCtor(Module &M, StringRef CtorName) {
191   Function *Ctor = Function::createWithDefaultAttr(
192       FunctionType::get(Type::getVoidTy(M.getContext()), false),
193       GlobalValue::InternalLinkage, M.getDataLayout().getProgramAddressSpace(),
194       CtorName, &M);
195   Ctor->addFnAttr(Attribute::NoUnwind);
196   setKCFIType(M, *Ctor, "_ZTSFvvE"); // void (*)(void)
197   BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor);
198   ReturnInst::Create(M.getContext(), CtorBB);
199   // Ensure Ctor cannot be discarded, even if in a comdat.
200   appendToUsed(M, {Ctor});
201   return Ctor;
202 }
203 
204 std::pair<Function *, FunctionCallee> llvm::createSanitizerCtorAndInitFunctions(
205     Module &M, StringRef CtorName, StringRef InitName,
206     ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
207     StringRef VersionCheckName, bool Weak) {
208   assert(!InitName.empty() && "Expected init function name");
209   assert(InitArgs.size() == InitArgTypes.size() &&
210          "Sanitizer's init function expects different number of arguments");
211   FunctionCallee InitFunction =
212       declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak);
213   Function *Ctor = createSanitizerCtor(M, CtorName);
214   IRBuilder<> IRB(M.getContext());
215 
216   BasicBlock *RetBB = &Ctor->getEntryBlock();
217   if (Weak) {
218     RetBB->setName("ret");
219     auto *EntryBB = BasicBlock::Create(M.getContext(), "entry", Ctor, RetBB);
220     auto *CallInitBB =
221         BasicBlock::Create(M.getContext(), "callfunc", Ctor, RetBB);
222     auto *InitFn = cast<Function>(InitFunction.getCallee());
223     auto *InitFnPtr =
224         PointerType::get(InitFn->getType(), InitFn->getAddressSpace());
225     IRB.SetInsertPoint(EntryBB);
226     Value *InitNotNull =
227         IRB.CreateICmpNE(InitFn, ConstantPointerNull::get(InitFnPtr));
228     IRB.CreateCondBr(InitNotNull, CallInitBB, RetBB);
229     IRB.SetInsertPoint(CallInitBB);
230   } else {
231     IRB.SetInsertPoint(RetBB->getTerminator());
232   }
233 
234   IRB.CreateCall(InitFunction, InitArgs);
235   if (!VersionCheckName.empty()) {
236     FunctionCallee VersionCheckFunction = M.getOrInsertFunction(
237         VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false),
238         AttributeList());
239     IRB.CreateCall(VersionCheckFunction, {});
240   }
241 
242   if (Weak)
243     IRB.CreateBr(RetBB);
244 
245   return std::make_pair(Ctor, InitFunction);
246 }
247 
248 std::pair<Function *, FunctionCallee>
249 llvm::getOrCreateSanitizerCtorAndInitFunctions(
250     Module &M, StringRef CtorName, StringRef InitName,
251     ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
252     function_ref<void(Function *, FunctionCallee)> FunctionsCreatedCallback,
253     StringRef VersionCheckName, bool Weak) {
254   assert(!CtorName.empty() && "Expected ctor function name");
255 
256   if (Function *Ctor = M.getFunction(CtorName))
257     // FIXME: Sink this logic into the module, similar to the handling of
258     // globals. This will make moving to a concurrent model much easier.
259     if (Ctor->arg_empty() ||
260         Ctor->getReturnType() == Type::getVoidTy(M.getContext()))
261       return {Ctor,
262               declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak)};
263 
264   Function *Ctor;
265   FunctionCallee InitFunction;
266   std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions(
267       M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName, Weak);
268   FunctionsCreatedCallback(Ctor, InitFunction);
269   return std::make_pair(Ctor, InitFunction);
270 }
271 
272 void llvm::filterDeadComdatFunctions(
273     SmallVectorImpl<Function *> &DeadComdatFunctions) {
274   SmallPtrSet<Function *, 32> MaybeDeadFunctions;
275   SmallPtrSet<Comdat *, 32> MaybeDeadComdats;
276   for (Function *F : DeadComdatFunctions) {
277     MaybeDeadFunctions.insert(F);
278     if (Comdat *C = F->getComdat())
279       MaybeDeadComdats.insert(C);
280   }
281 
282   // Find comdats for which all users are dead now.
283   SmallPtrSet<Comdat *, 32> DeadComdats;
284   for (Comdat *C : MaybeDeadComdats) {
285     auto IsUserDead = [&](GlobalObject *GO) {
286       auto *F = dyn_cast<Function>(GO);
287       return F && MaybeDeadFunctions.contains(F);
288     };
289     if (all_of(C->getUsers(), IsUserDead))
290       DeadComdats.insert(C);
291   }
292 
293   // Only keep functions which have no comdat or a dead comdat.
294   erase_if(DeadComdatFunctions, [&](Function *F) {
295     Comdat *C = F->getComdat();
296     return C && !DeadComdats.contains(C);
297   });
298 }
299 
300 std::string llvm::getUniqueModuleId(Module *M) {
301   MD5 Md5;
302   bool ExportsSymbols = false;
303   auto AddGlobal = [&](GlobalValue &GV) {
304     if (GV.isDeclaration() || GV.getName().starts_with("llvm.") ||
305         !GV.hasExternalLinkage() || GV.hasComdat())
306       return;
307     ExportsSymbols = true;
308     Md5.update(GV.getName());
309     Md5.update(ArrayRef<uint8_t>{0});
310   };
311 
312   for (auto &F : *M)
313     AddGlobal(F);
314   for (auto &GV : M->globals())
315     AddGlobal(GV);
316   for (auto &GA : M->aliases())
317     AddGlobal(GA);
318   for (auto &IF : M->ifuncs())
319     AddGlobal(IF);
320 
321   if (!ExportsSymbols)
322     return "";
323 
324   MD5::MD5Result R;
325   Md5.final(R);
326 
327   SmallString<32> Str;
328   MD5::stringifyResult(R, Str);
329   return ("." + Str).str();
330 }
331 
332 void VFABI::setVectorVariantNames(CallInst *CI,
333                                   ArrayRef<std::string> VariantMappings) {
334   if (VariantMappings.empty())
335     return;
336 
337   SmallString<256> Buffer;
338   llvm::raw_svector_ostream Out(Buffer);
339   for (const std::string &VariantMapping : VariantMappings)
340     Out << VariantMapping << ",";
341   // Get rid of the trailing ','.
342   assert(!Buffer.str().empty() && "Must have at least one char.");
343   Buffer.pop_back();
344 
345   Module *M = CI->getModule();
346 #ifndef NDEBUG
347   for (const std::string &VariantMapping : VariantMappings) {
348     LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << VariantMapping << "'\n");
349     std::optional<VFInfo> VI =
350         VFABI::tryDemangleForVFABI(VariantMapping, CI->getFunctionType());
351     assert(VI && "Cannot add an invalid VFABI name.");
352     assert(M->getNamedValue(VI->VectorName) &&
353            "Cannot add variant to attribute: "
354            "vector function declaration is missing.");
355   }
356 #endif
357   CI->addFnAttr(
358       Attribute::get(M->getContext(), MappingsAttrName, Buffer.str()));
359 }
360 
361 void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf,
362                                StringRef SectionName, Align Alignment) {
363   // Embed the memory buffer into the module.
364   Constant *ModuleConstant = ConstantDataArray::get(
365       M.getContext(), ArrayRef(Buf.getBufferStart(), Buf.getBufferSize()));
366   GlobalVariable *GV = new GlobalVariable(
367       M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage,
368       ModuleConstant, "llvm.embedded.object");
369   GV->setSection(SectionName);
370   GV->setAlignment(Alignment);
371 
372   LLVMContext &Ctx = M.getContext();
373   NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.embedded.objects");
374   Metadata *MDVals[] = {ConstantAsMetadata::get(GV),
375                         MDString::get(Ctx, SectionName)};
376 
377   MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
378   GV->setMetadata(LLVMContext::MD_exclude, llvm::MDNode::get(Ctx, {}));
379 
380   appendToCompilerUsed(M, GV);
381 }
382 
383 bool llvm::lowerGlobalIFuncUsersAsGlobalCtor(
384     Module &M, ArrayRef<GlobalIFunc *> FilteredIFuncsToLower) {
385   SmallVector<GlobalIFunc *, 32> AllIFuncs;
386   ArrayRef<GlobalIFunc *> IFuncsToLower = FilteredIFuncsToLower;
387   if (FilteredIFuncsToLower.empty()) { // Default to lowering all ifuncs
388     for (GlobalIFunc &GI : M.ifuncs())
389       AllIFuncs.push_back(&GI);
390     IFuncsToLower = AllIFuncs;
391   }
392 
393   bool UnhandledUsers = false;
394   LLVMContext &Ctx = M.getContext();
395   const DataLayout &DL = M.getDataLayout();
396 
397   PointerType *TableEntryTy =
398       PointerType::get(Ctx, DL.getProgramAddressSpace());
399 
400   ArrayType *FuncPtrTableTy =
401       ArrayType::get(TableEntryTy, IFuncsToLower.size());
402 
403   Align PtrAlign = DL.getABITypeAlign(TableEntryTy);
404 
405   // Create a global table of function pointers we'll initialize in a global
406   // constructor.
407   auto *FuncPtrTable = new GlobalVariable(
408       M, FuncPtrTableTy, false, GlobalValue::InternalLinkage,
409       PoisonValue::get(FuncPtrTableTy), "", nullptr,
410       GlobalVariable::NotThreadLocal, DL.getDefaultGlobalsAddressSpace());
411   FuncPtrTable->setAlignment(PtrAlign);
412 
413   // Create a function to initialize the function pointer table.
414   Function *NewCtor = Function::Create(
415       FunctionType::get(Type::getVoidTy(Ctx), false), Function::InternalLinkage,
416       DL.getProgramAddressSpace(), "", &M);
417 
418   BasicBlock *BB = BasicBlock::Create(Ctx, "", NewCtor);
419   IRBuilder<> InitBuilder(BB);
420 
421   size_t TableIndex = 0;
422   for (GlobalIFunc *GI : IFuncsToLower) {
423     Function *ResolvedFunction = GI->getResolverFunction();
424 
425     // We don't know what to pass to a resolver function taking arguments
426     //
427     // FIXME: Is this even valid? clang and gcc don't complain but this
428     // probably should be invalid IR. We could just pass through undef.
429     if (!std::empty(ResolvedFunction->getFunctionType()->params())) {
430       LLVM_DEBUG(dbgs() << "Not lowering ifunc resolver function "
431                         << ResolvedFunction->getName() << " with parameters\n");
432       UnhandledUsers = true;
433       continue;
434     }
435 
436     // Initialize the function pointer table.
437     CallInst *ResolvedFunc = InitBuilder.CreateCall(ResolvedFunction);
438     Value *Casted = InitBuilder.CreatePointerCast(ResolvedFunc, TableEntryTy);
439     Constant *GEP = cast<Constant>(InitBuilder.CreateConstInBoundsGEP2_32(
440         FuncPtrTableTy, FuncPtrTable, 0, TableIndex++));
441     InitBuilder.CreateAlignedStore(Casted, GEP, PtrAlign);
442 
443     // Update all users to load a pointer from the global table.
444     for (User *User : make_early_inc_range(GI->users())) {
445       Instruction *UserInst = dyn_cast<Instruction>(User);
446       if (!UserInst) {
447         // TODO: Should handle constantexpr casts in user instructions. Probably
448         // can't do much about constant initializers.
449         UnhandledUsers = true;
450         continue;
451       }
452 
453       IRBuilder<> UseBuilder(UserInst);
454       LoadInst *ResolvedTarget =
455           UseBuilder.CreateAlignedLoad(TableEntryTy, GEP, PtrAlign);
456       Value *ResolvedCast =
457           UseBuilder.CreatePointerCast(ResolvedTarget, GI->getType());
458       UserInst->replaceUsesOfWith(GI, ResolvedCast);
459     }
460 
461     // If we handled all users, erase the ifunc.
462     if (GI->use_empty())
463       GI->eraseFromParent();
464   }
465 
466   InitBuilder.CreateRetVoid();
467 
468   PointerType *ConstantDataTy = PointerType::get(Ctx, 0);
469 
470   // TODO: Is this the right priority? Probably should be before any other
471   // constructors?
472   const int Priority = 10;
473   appendToGlobalCtors(M, NewCtor, Priority,
474                       ConstantPointerNull::get(ConstantDataTy));
475   return UnhandledUsers;
476 }
477