1 //===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions perform manipulations on Modules.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Transforms/Utils/ModuleUtils.h"
14 #include "llvm/Analysis/VectorUtils.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/IR/DerivedTypes.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/IRBuilder.h"
19 #include "llvm/IR/MDBuilder.h"
20 #include "llvm/IR/Module.h"
21 #include "llvm/Support/raw_ostream.h"
22 #include "llvm/Support/xxhash.h"
23 
24 using namespace llvm;
25 
26 #define DEBUG_TYPE "moduleutils"
27 
appendToGlobalArray(StringRef ArrayName,Module & M,Function * F,int Priority,Constant * Data)28 static void appendToGlobalArray(StringRef ArrayName, Module &M, Function *F,
29                                 int Priority, Constant *Data) {
30   IRBuilder<> IRB(M.getContext());
31   FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false);
32 
33   // Get the current set of static global constructors and add the new ctor
34   // to the list.
35   SmallVector<Constant *, 16> CurrentCtors;
36   StructType *EltTy;
37   if (GlobalVariable *GVCtor = M.getNamedGlobal(ArrayName)) {
38     EltTy = cast<StructType>(GVCtor->getValueType()->getArrayElementType());
39     if (Constant *Init = GVCtor->getInitializer()) {
40       unsigned n = Init->getNumOperands();
41       CurrentCtors.reserve(n + 1);
42       for (unsigned i = 0; i != n; ++i)
43         CurrentCtors.push_back(cast<Constant>(Init->getOperand(i)));
44     }
45     GVCtor->eraseFromParent();
46   } else {
47     EltTy = StructType::get(IRB.getInt32Ty(),
48                             PointerType::get(FnTy, F->getAddressSpace()),
49                             IRB.getPtrTy());
50   }
51 
52   // Build a 3 field global_ctor entry.  We don't take a comdat key.
53   Constant *CSVals[3];
54   CSVals[0] = IRB.getInt32(Priority);
55   CSVals[1] = F;
56   CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getPtrTy())
57                    : Constant::getNullValue(IRB.getPtrTy());
58   Constant *RuntimeCtorInit =
59       ConstantStruct::get(EltTy, ArrayRef(CSVals, EltTy->getNumElements()));
60 
61   CurrentCtors.push_back(RuntimeCtorInit);
62 
63   // Create a new initializer.
64   ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size());
65   Constant *NewInit = ConstantArray::get(AT, CurrentCtors);
66 
67   // Create the new global variable and replace all uses of
68   // the old global variable with the new one.
69   (void)new GlobalVariable(M, NewInit->getType(), false,
70                            GlobalValue::AppendingLinkage, NewInit, ArrayName);
71 }
72 
appendToGlobalCtors(Module & M,Function * F,int Priority,Constant * Data)73 void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) {
74   appendToGlobalArray("llvm.global_ctors", M, F, Priority, Data);
75 }
76 
appendToGlobalDtors(Module & M,Function * F,int Priority,Constant * Data)77 void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) {
78   appendToGlobalArray("llvm.global_dtors", M, F, Priority, Data);
79 }
80 
collectUsedGlobals(GlobalVariable * GV,SmallSetVector<Constant *,16> & Init)81 static void collectUsedGlobals(GlobalVariable *GV,
82                                SmallSetVector<Constant *, 16> &Init) {
83   if (!GV || !GV->hasInitializer())
84     return;
85 
86   auto *CA = cast<ConstantArray>(GV->getInitializer());
87   for (Use &Op : CA->operands())
88     Init.insert(cast<Constant>(Op));
89 }
90 
appendToUsedList(Module & M,StringRef Name,ArrayRef<GlobalValue * > Values)91 static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) {
92   GlobalVariable *GV = M.getGlobalVariable(Name);
93 
94   SmallSetVector<Constant *, 16> Init;
95   collectUsedGlobals(GV, Init);
96   if (GV)
97     GV->eraseFromParent();
98 
99   Type *ArrayEltTy = llvm::PointerType::getUnqual(M.getContext());
100   for (auto *V : Values)
101     Init.insert(ConstantExpr::getPointerBitCastOrAddrSpaceCast(V, ArrayEltTy));
102 
103   if (Init.empty())
104     return;
105 
106   ArrayType *ATy = ArrayType::get(ArrayEltTy, Init.size());
107   GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
108                                 ConstantArray::get(ATy, Init.getArrayRef()),
109                                 Name);
110   GV->setSection("llvm.metadata");
111 }
112 
appendToUsed(Module & M,ArrayRef<GlobalValue * > Values)113 void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) {
114   appendToUsedList(M, "llvm.used", Values);
115 }
116 
appendToCompilerUsed(Module & M,ArrayRef<GlobalValue * > Values)117 void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) {
118   appendToUsedList(M, "llvm.compiler.used", Values);
119 }
120 
removeFromUsedList(Module & M,StringRef Name,function_ref<bool (Constant *)> ShouldRemove)121 static void removeFromUsedList(Module &M, StringRef Name,
122                                function_ref<bool(Constant *)> ShouldRemove) {
123   GlobalVariable *GV = M.getNamedGlobal(Name);
124   if (!GV)
125     return;
126 
127   SmallSetVector<Constant *, 16> Init;
128   collectUsedGlobals(GV, Init);
129 
130   Type *ArrayEltTy = cast<ArrayType>(GV->getValueType())->getElementType();
131 
132   SmallVector<Constant *, 16> NewInit;
133   for (Constant *MaybeRemoved : Init) {
134     if (!ShouldRemove(MaybeRemoved->stripPointerCasts()))
135       NewInit.push_back(MaybeRemoved);
136   }
137 
138   if (!NewInit.empty()) {
139     ArrayType *ATy = ArrayType::get(ArrayEltTy, NewInit.size());
140     GlobalVariable *NewGV =
141         new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
142                            ConstantArray::get(ATy, NewInit), "", GV,
143                            GV->getThreadLocalMode(), GV->getAddressSpace());
144     NewGV->setSection(GV->getSection());
145     NewGV->takeName(GV);
146   }
147 
148   GV->eraseFromParent();
149 }
150 
removeFromUsedLists(Module & M,function_ref<bool (Constant *)> ShouldRemove)151 void llvm::removeFromUsedLists(Module &M,
152                                function_ref<bool(Constant *)> ShouldRemove) {
153   removeFromUsedList(M, "llvm.used", ShouldRemove);
154   removeFromUsedList(M, "llvm.compiler.used", ShouldRemove);
155 }
156 
setKCFIType(Module & M,Function & F,StringRef MangledType)157 void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) {
158   if (!M.getModuleFlag("kcfi"))
159     return;
160   // Matches CodeGenModule::CreateKCFITypeId in Clang.
161   LLVMContext &Ctx = M.getContext();
162   MDBuilder MDB(Ctx);
163   F.setMetadata(
164       LLVMContext::MD_kcfi_type,
165       MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(
166                            Type::getInt32Ty(Ctx),
167                            static_cast<uint32_t>(xxHash64(MangledType))))));
168   // If the module was compiled with -fpatchable-function-entry, ensure
169   // we use the same patchable-function-prefix.
170   if (auto *MD = mdconst::extract_or_null<ConstantInt>(
171           M.getModuleFlag("kcfi-offset"))) {
172     if (unsigned Offset = MD->getZExtValue())
173       F.addFnAttr("patchable-function-prefix", std::to_string(Offset));
174   }
175 }
176 
declareSanitizerInitFunction(Module & M,StringRef InitName,ArrayRef<Type * > InitArgTypes,bool Weak)177 FunctionCallee llvm::declareSanitizerInitFunction(Module &M, StringRef InitName,
178                                                   ArrayRef<Type *> InitArgTypes,
179                                                   bool Weak) {
180   assert(!InitName.empty() && "Expected init function name");
181   auto *VoidTy = Type::getVoidTy(M.getContext());
182   auto *FnTy = FunctionType::get(VoidTy, InitArgTypes, false);
183   auto FnCallee = M.getOrInsertFunction(InitName, FnTy);
184   auto *Fn = cast<Function>(FnCallee.getCallee());
185   if (Weak && Fn->isDeclaration())
186     Fn->setLinkage(Function::ExternalWeakLinkage);
187   return FnCallee;
188 }
189 
createSanitizerCtor(Module & M,StringRef CtorName)190 Function *llvm::createSanitizerCtor(Module &M, StringRef CtorName) {
191   Function *Ctor = Function::createWithDefaultAttr(
192       FunctionType::get(Type::getVoidTy(M.getContext()), false),
193       GlobalValue::InternalLinkage, M.getDataLayout().getProgramAddressSpace(),
194       CtorName, &M);
195   Ctor->addFnAttr(Attribute::NoUnwind);
196   setKCFIType(M, *Ctor, "_ZTSFvvE"); // void (*)(void)
197   BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor);
198   ReturnInst::Create(M.getContext(), CtorBB);
199   // Ensure Ctor cannot be discarded, even if in a comdat.
200   appendToUsed(M, {Ctor});
201   return Ctor;
202 }
203 
createSanitizerCtorAndInitFunctions(Module & M,StringRef CtorName,StringRef InitName,ArrayRef<Type * > InitArgTypes,ArrayRef<Value * > InitArgs,StringRef VersionCheckName,bool Weak)204 std::pair<Function *, FunctionCallee> llvm::createSanitizerCtorAndInitFunctions(
205     Module &M, StringRef CtorName, StringRef InitName,
206     ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
207     StringRef VersionCheckName, bool Weak) {
208   assert(!InitName.empty() && "Expected init function name");
209   assert(InitArgs.size() == InitArgTypes.size() &&
210          "Sanitizer's init function expects different number of arguments");
211   FunctionCallee InitFunction =
212       declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak);
213   Function *Ctor = createSanitizerCtor(M, CtorName);
214   IRBuilder<> IRB(M.getContext());
215 
216   BasicBlock *RetBB = &Ctor->getEntryBlock();
217   if (Weak) {
218     RetBB->setName("ret");
219     auto *EntryBB = BasicBlock::Create(M.getContext(), "entry", Ctor, RetBB);
220     auto *CallInitBB =
221         BasicBlock::Create(M.getContext(), "callfunc", Ctor, RetBB);
222     auto *InitFn = cast<Function>(InitFunction.getCallee());
223     auto *InitFnPtr =
224         PointerType::get(InitFn->getType(), InitFn->getAddressSpace());
225     IRB.SetInsertPoint(EntryBB);
226     Value *InitNotNull =
227         IRB.CreateICmpNE(InitFn, ConstantPointerNull::get(InitFnPtr));
228     IRB.CreateCondBr(InitNotNull, CallInitBB, RetBB);
229     IRB.SetInsertPoint(CallInitBB);
230   } else {
231     IRB.SetInsertPoint(RetBB->getTerminator());
232   }
233 
234   IRB.CreateCall(InitFunction, InitArgs);
235   if (!VersionCheckName.empty()) {
236     FunctionCallee VersionCheckFunction = M.getOrInsertFunction(
237         VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false),
238         AttributeList());
239     IRB.CreateCall(VersionCheckFunction, {});
240   }
241 
242   if (Weak)
243     IRB.CreateBr(RetBB);
244 
245   return std::make_pair(Ctor, InitFunction);
246 }
247 
248 std::pair<Function *, FunctionCallee>
getOrCreateSanitizerCtorAndInitFunctions(Module & M,StringRef CtorName,StringRef InitName,ArrayRef<Type * > InitArgTypes,ArrayRef<Value * > InitArgs,function_ref<void (Function *,FunctionCallee)> FunctionsCreatedCallback,StringRef VersionCheckName,bool Weak)249 llvm::getOrCreateSanitizerCtorAndInitFunctions(
250     Module &M, StringRef CtorName, StringRef InitName,
251     ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
252     function_ref<void(Function *, FunctionCallee)> FunctionsCreatedCallback,
253     StringRef VersionCheckName, bool Weak) {
254   assert(!CtorName.empty() && "Expected ctor function name");
255 
256   if (Function *Ctor = M.getFunction(CtorName))
257     // FIXME: Sink this logic into the module, similar to the handling of
258     // globals. This will make moving to a concurrent model much easier.
259     if (Ctor->arg_empty() ||
260         Ctor->getReturnType() == Type::getVoidTy(M.getContext()))
261       return {Ctor,
262               declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak)};
263 
264   Function *Ctor;
265   FunctionCallee InitFunction;
266   std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions(
267       M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName, Weak);
268   FunctionsCreatedCallback(Ctor, InitFunction);
269   return std::make_pair(Ctor, InitFunction);
270 }
271 
filterDeadComdatFunctions(SmallVectorImpl<Function * > & DeadComdatFunctions)272 void llvm::filterDeadComdatFunctions(
273     SmallVectorImpl<Function *> &DeadComdatFunctions) {
274   SmallPtrSet<Function *, 32> MaybeDeadFunctions;
275   SmallPtrSet<Comdat *, 32> MaybeDeadComdats;
276   for (Function *F : DeadComdatFunctions) {
277     MaybeDeadFunctions.insert(F);
278     if (Comdat *C = F->getComdat())
279       MaybeDeadComdats.insert(C);
280   }
281 
282   // Find comdats for which all users are dead now.
283   SmallPtrSet<Comdat *, 32> DeadComdats;
284   for (Comdat *C : MaybeDeadComdats) {
285     auto IsUserDead = [&](GlobalObject *GO) {
286       auto *F = dyn_cast<Function>(GO);
287       return F && MaybeDeadFunctions.contains(F);
288     };
289     if (all_of(C->getUsers(), IsUserDead))
290       DeadComdats.insert(C);
291   }
292 
293   // Only keep functions which have no comdat or a dead comdat.
294   erase_if(DeadComdatFunctions, [&](Function *F) {
295     Comdat *C = F->getComdat();
296     return C && !DeadComdats.contains(C);
297   });
298 }
299 
getUniqueModuleId(Module * M)300 std::string llvm::getUniqueModuleId(Module *M) {
301   MD5 Md5;
302   bool ExportsSymbols = false;
303   auto AddGlobal = [&](GlobalValue &GV) {
304     if (GV.isDeclaration() || GV.getName().starts_with("llvm.") ||
305         !GV.hasExternalLinkage() || GV.hasComdat())
306       return;
307     ExportsSymbols = true;
308     Md5.update(GV.getName());
309     Md5.update(ArrayRef<uint8_t>{0});
310   };
311 
312   for (auto &F : *M)
313     AddGlobal(F);
314   for (auto &GV : M->globals())
315     AddGlobal(GV);
316   for (auto &GA : M->aliases())
317     AddGlobal(GA);
318   for (auto &IF : M->ifuncs())
319     AddGlobal(IF);
320 
321   if (!ExportsSymbols)
322     return "";
323 
324   MD5::MD5Result R;
325   Md5.final(R);
326 
327   SmallString<32> Str;
328   MD5::stringifyResult(R, Str);
329   return ("." + Str).str();
330 }
331 
embedBufferInModule(Module & M,MemoryBufferRef Buf,StringRef SectionName,Align Alignment)332 void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf,
333                                StringRef SectionName, Align Alignment) {
334   // Embed the memory buffer into the module.
335   Constant *ModuleConstant = ConstantDataArray::get(
336       M.getContext(), ArrayRef(Buf.getBufferStart(), Buf.getBufferSize()));
337   GlobalVariable *GV = new GlobalVariable(
338       M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage,
339       ModuleConstant, "llvm.embedded.object");
340   GV->setSection(SectionName);
341   GV->setAlignment(Alignment);
342 
343   LLVMContext &Ctx = M.getContext();
344   NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.embedded.objects");
345   Metadata *MDVals[] = {ConstantAsMetadata::get(GV),
346                         MDString::get(Ctx, SectionName)};
347 
348   MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
349   GV->setMetadata(LLVMContext::MD_exclude, llvm::MDNode::get(Ctx, {}));
350 
351   appendToCompilerUsed(M, GV);
352 }
353 
lowerGlobalIFuncUsersAsGlobalCtor(Module & M,ArrayRef<GlobalIFunc * > FilteredIFuncsToLower)354 bool llvm::lowerGlobalIFuncUsersAsGlobalCtor(
355     Module &M, ArrayRef<GlobalIFunc *> FilteredIFuncsToLower) {
356   SmallVector<GlobalIFunc *, 32> AllIFuncs;
357   ArrayRef<GlobalIFunc *> IFuncsToLower = FilteredIFuncsToLower;
358   if (FilteredIFuncsToLower.empty()) { // Default to lowering all ifuncs
359     for (GlobalIFunc &GI : M.ifuncs())
360       AllIFuncs.push_back(&GI);
361     IFuncsToLower = AllIFuncs;
362   }
363 
364   bool UnhandledUsers = false;
365   LLVMContext &Ctx = M.getContext();
366   const DataLayout &DL = M.getDataLayout();
367 
368   PointerType *TableEntryTy =
369       PointerType::get(Ctx, DL.getProgramAddressSpace());
370 
371   ArrayType *FuncPtrTableTy =
372       ArrayType::get(TableEntryTy, IFuncsToLower.size());
373 
374   Align PtrAlign = DL.getABITypeAlign(TableEntryTy);
375 
376   // Create a global table of function pointers we'll initialize in a global
377   // constructor.
378   auto *FuncPtrTable = new GlobalVariable(
379       M, FuncPtrTableTy, false, GlobalValue::InternalLinkage,
380       PoisonValue::get(FuncPtrTableTy), "", nullptr,
381       GlobalVariable::NotThreadLocal, DL.getDefaultGlobalsAddressSpace());
382   FuncPtrTable->setAlignment(PtrAlign);
383 
384   // Create a function to initialize the function pointer table.
385   Function *NewCtor = Function::Create(
386       FunctionType::get(Type::getVoidTy(Ctx), false), Function::InternalLinkage,
387       DL.getProgramAddressSpace(), "", &M);
388 
389   BasicBlock *BB = BasicBlock::Create(Ctx, "", NewCtor);
390   IRBuilder<> InitBuilder(BB);
391 
392   size_t TableIndex = 0;
393   for (GlobalIFunc *GI : IFuncsToLower) {
394     Function *ResolvedFunction = GI->getResolverFunction();
395 
396     // We don't know what to pass to a resolver function taking arguments
397     //
398     // FIXME: Is this even valid? clang and gcc don't complain but this
399     // probably should be invalid IR. We could just pass through undef.
400     if (!std::empty(ResolvedFunction->getFunctionType()->params())) {
401       LLVM_DEBUG(dbgs() << "Not lowering ifunc resolver function "
402                         << ResolvedFunction->getName() << " with parameters\n");
403       UnhandledUsers = true;
404       continue;
405     }
406 
407     // Initialize the function pointer table.
408     CallInst *ResolvedFunc = InitBuilder.CreateCall(ResolvedFunction);
409     Value *Casted = InitBuilder.CreatePointerCast(ResolvedFunc, TableEntryTy);
410     Constant *GEP = cast<Constant>(InitBuilder.CreateConstInBoundsGEP2_32(
411         FuncPtrTableTy, FuncPtrTable, 0, TableIndex++));
412     InitBuilder.CreateAlignedStore(Casted, GEP, PtrAlign);
413 
414     // Update all users to load a pointer from the global table.
415     for (User *User : make_early_inc_range(GI->users())) {
416       Instruction *UserInst = dyn_cast<Instruction>(User);
417       if (!UserInst) {
418         // TODO: Should handle constantexpr casts in user instructions. Probably
419         // can't do much about constant initializers.
420         UnhandledUsers = true;
421         continue;
422       }
423 
424       IRBuilder<> UseBuilder(UserInst);
425       LoadInst *ResolvedTarget =
426           UseBuilder.CreateAlignedLoad(TableEntryTy, GEP, PtrAlign);
427       Value *ResolvedCast =
428           UseBuilder.CreatePointerCast(ResolvedTarget, GI->getType());
429       UserInst->replaceUsesOfWith(GI, ResolvedCast);
430     }
431 
432     // If we handled all users, erase the ifunc.
433     if (GI->use_empty())
434       GI->eraseFromParent();
435   }
436 
437   InitBuilder.CreateRetVoid();
438 
439   PointerType *ConstantDataTy = PointerType::get(Ctx, 0);
440 
441   // TODO: Is this the right priority? Probably should be before any other
442   // constructors?
443   const int Priority = 10;
444   appendToGlobalCtors(M, NewCtor, Priority,
445                       ConstantPointerNull::get(ConstantDataTy));
446   return UnhandledUsers;
447 }
448