1 //===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This family of functions perform manipulations on Modules.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Transforms/Utils/ModuleUtils.h"
14 #include "llvm/Analysis/VectorUtils.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/IR/DerivedTypes.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/IRBuilder.h"
19 #include "llvm/IR/MDBuilder.h"
20 #include "llvm/IR/Module.h"
21 #include "llvm/Support/MD5.h"
22 #include "llvm/Support/raw_ostream.h"
23 #include "llvm/Support/xxhash.h"
24 
25 using namespace llvm;
26 
27 #define DEBUG_TYPE "moduleutils"
28 
appendToGlobalArray(StringRef ArrayName,Module & M,Function * F,int Priority,Constant * Data)29 static void appendToGlobalArray(StringRef ArrayName, Module &M, Function *F,
30                                 int Priority, Constant *Data) {
31   IRBuilder<> IRB(M.getContext());
32   FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false);
33 
34   // Get the current set of static global constructors and add the new ctor
35   // to the list.
36   SmallVector<Constant *, 16> CurrentCtors;
37   StructType *EltTy;
38   if (GlobalVariable *GVCtor = M.getNamedGlobal(ArrayName)) {
39     EltTy = cast<StructType>(GVCtor->getValueType()->getArrayElementType());
40     if (Constant *Init = GVCtor->getInitializer()) {
41       unsigned n = Init->getNumOperands();
42       CurrentCtors.reserve(n + 1);
43       for (unsigned i = 0; i != n; ++i)
44         CurrentCtors.push_back(cast<Constant>(Init->getOperand(i)));
45     }
46     GVCtor->eraseFromParent();
47   } else {
48     EltTy = StructType::get(IRB.getInt32Ty(),
49                             PointerType::get(FnTy, F->getAddressSpace()),
50                             IRB.getPtrTy());
51   }
52 
53   // Build a 3 field global_ctor entry.  We don't take a comdat key.
54   Constant *CSVals[3];
55   CSVals[0] = IRB.getInt32(Priority);
56   CSVals[1] = F;
57   CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getPtrTy())
58                    : Constant::getNullValue(IRB.getPtrTy());
59   Constant *RuntimeCtorInit =
60       ConstantStruct::get(EltTy, ArrayRef(CSVals, EltTy->getNumElements()));
61 
62   CurrentCtors.push_back(RuntimeCtorInit);
63 
64   // Create a new initializer.
65   ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size());
66   Constant *NewInit = ConstantArray::get(AT, CurrentCtors);
67 
68   // Create the new global variable and replace all uses of
69   // the old global variable with the new one.
70   (void)new GlobalVariable(M, NewInit->getType(), false,
71                            GlobalValue::AppendingLinkage, NewInit, ArrayName);
72 }
73 
appendToGlobalCtors(Module & M,Function * F,int Priority,Constant * Data)74 void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority, Constant *Data) {
75   appendToGlobalArray("llvm.global_ctors", M, F, Priority, Data);
76 }
77 
appendToGlobalDtors(Module & M,Function * F,int Priority,Constant * Data)78 void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *Data) {
79   appendToGlobalArray("llvm.global_dtors", M, F, Priority, Data);
80 }
81 
collectUsedGlobals(GlobalVariable * GV,SmallSetVector<Constant *,16> & Init)82 static void collectUsedGlobals(GlobalVariable *GV,
83                                SmallSetVector<Constant *, 16> &Init) {
84   if (!GV || !GV->hasInitializer())
85     return;
86 
87   auto *CA = cast<ConstantArray>(GV->getInitializer());
88   for (Use &Op : CA->operands())
89     Init.insert(cast<Constant>(Op));
90 }
91 
appendToUsedList(Module & M,StringRef Name,ArrayRef<GlobalValue * > Values)92 static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) {
93   GlobalVariable *GV = M.getGlobalVariable(Name);
94 
95   SmallSetVector<Constant *, 16> Init;
96   collectUsedGlobals(GV, Init);
97   if (GV)
98     GV->eraseFromParent();
99 
100   Type *ArrayEltTy = llvm::PointerType::getUnqual(M.getContext());
101   for (auto *V : Values)
102     Init.insert(ConstantExpr::getPointerBitCastOrAddrSpaceCast(V, ArrayEltTy));
103 
104   if (Init.empty())
105     return;
106 
107   ArrayType *ATy = ArrayType::get(ArrayEltTy, Init.size());
108   GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
109                                 ConstantArray::get(ATy, Init.getArrayRef()),
110                                 Name);
111   GV->setSection("llvm.metadata");
112 }
113 
appendToUsed(Module & M,ArrayRef<GlobalValue * > Values)114 void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) {
115   appendToUsedList(M, "llvm.used", Values);
116 }
117 
appendToCompilerUsed(Module & M,ArrayRef<GlobalValue * > Values)118 void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) {
119   appendToUsedList(M, "llvm.compiler.used", Values);
120 }
121 
removeFromUsedList(Module & M,StringRef Name,function_ref<bool (Constant *)> ShouldRemove)122 static void removeFromUsedList(Module &M, StringRef Name,
123                                function_ref<bool(Constant *)> ShouldRemove) {
124   GlobalVariable *GV = M.getNamedGlobal(Name);
125   if (!GV)
126     return;
127 
128   SmallSetVector<Constant *, 16> Init;
129   collectUsedGlobals(GV, Init);
130 
131   Type *ArrayEltTy = cast<ArrayType>(GV->getValueType())->getElementType();
132 
133   SmallVector<Constant *, 16> NewInit;
134   for (Constant *MaybeRemoved : Init) {
135     if (!ShouldRemove(MaybeRemoved->stripPointerCasts()))
136       NewInit.push_back(MaybeRemoved);
137   }
138 
139   if (!NewInit.empty()) {
140     ArrayType *ATy = ArrayType::get(ArrayEltTy, NewInit.size());
141     GlobalVariable *NewGV =
142         new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
143                            ConstantArray::get(ATy, NewInit), "", GV,
144                            GV->getThreadLocalMode(), GV->getAddressSpace());
145     NewGV->setSection(GV->getSection());
146     NewGV->takeName(GV);
147   }
148 
149   GV->eraseFromParent();
150 }
151 
removeFromUsedLists(Module & M,function_ref<bool (Constant *)> ShouldRemove)152 void llvm::removeFromUsedLists(Module &M,
153                                function_ref<bool(Constant *)> ShouldRemove) {
154   removeFromUsedList(M, "llvm.used", ShouldRemove);
155   removeFromUsedList(M, "llvm.compiler.used", ShouldRemove);
156 }
157 
setKCFIType(Module & M,Function & F,StringRef MangledType)158 void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) {
159   if (!M.getModuleFlag("kcfi"))
160     return;
161   // Matches CodeGenModule::CreateKCFITypeId in Clang.
162   LLVMContext &Ctx = M.getContext();
163   MDBuilder MDB(Ctx);
164   std::string Type = MangledType.str();
165   if (M.getModuleFlag("cfi-normalize-integers"))
166     Type += ".normalized";
167   F.setMetadata(LLVMContext::MD_kcfi_type,
168                 MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(
169                                      Type::getInt32Ty(Ctx),
170                                      static_cast<uint32_t>(xxHash64(Type))))));
171   // If the module was compiled with -fpatchable-function-entry, ensure
172   // we use the same patchable-function-prefix.
173   if (auto *MD = mdconst::extract_or_null<ConstantInt>(
174           M.getModuleFlag("kcfi-offset"))) {
175     if (unsigned Offset = MD->getZExtValue())
176       F.addFnAttr("patchable-function-prefix", std::to_string(Offset));
177   }
178 }
179 
declareSanitizerInitFunction(Module & M,StringRef InitName,ArrayRef<Type * > InitArgTypes,bool Weak)180 FunctionCallee llvm::declareSanitizerInitFunction(Module &M, StringRef InitName,
181                                                   ArrayRef<Type *> InitArgTypes,
182                                                   bool Weak) {
183   assert(!InitName.empty() && "Expected init function name");
184   auto *VoidTy = Type::getVoidTy(M.getContext());
185   auto *FnTy = FunctionType::get(VoidTy, InitArgTypes, false);
186   auto FnCallee = M.getOrInsertFunction(InitName, FnTy);
187   auto *Fn = cast<Function>(FnCallee.getCallee());
188   if (Weak && Fn->isDeclaration())
189     Fn->setLinkage(Function::ExternalWeakLinkage);
190   return FnCallee;
191 }
192 
createSanitizerCtor(Module & M,StringRef CtorName)193 Function *llvm::createSanitizerCtor(Module &M, StringRef CtorName) {
194   Function *Ctor = Function::createWithDefaultAttr(
195       FunctionType::get(Type::getVoidTy(M.getContext()), false),
196       GlobalValue::InternalLinkage, M.getDataLayout().getProgramAddressSpace(),
197       CtorName, &M);
198   Ctor->addFnAttr(Attribute::NoUnwind);
199   setKCFIType(M, *Ctor, "_ZTSFvvE"); // void (*)(void)
200   BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor);
201   ReturnInst::Create(M.getContext(), CtorBB);
202   // Ensure Ctor cannot be discarded, even if in a comdat.
203   appendToUsed(M, {Ctor});
204   return Ctor;
205 }
206 
createSanitizerCtorAndInitFunctions(Module & M,StringRef CtorName,StringRef InitName,ArrayRef<Type * > InitArgTypes,ArrayRef<Value * > InitArgs,StringRef VersionCheckName,bool Weak)207 std::pair<Function *, FunctionCallee> llvm::createSanitizerCtorAndInitFunctions(
208     Module &M, StringRef CtorName, StringRef InitName,
209     ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
210     StringRef VersionCheckName, bool Weak) {
211   assert(!InitName.empty() && "Expected init function name");
212   assert(InitArgs.size() == InitArgTypes.size() &&
213          "Sanitizer's init function expects different number of arguments");
214   FunctionCallee InitFunction =
215       declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak);
216   Function *Ctor = createSanitizerCtor(M, CtorName);
217   IRBuilder<> IRB(M.getContext());
218 
219   BasicBlock *RetBB = &Ctor->getEntryBlock();
220   if (Weak) {
221     RetBB->setName("ret");
222     auto *EntryBB = BasicBlock::Create(M.getContext(), "entry", Ctor, RetBB);
223     auto *CallInitBB =
224         BasicBlock::Create(M.getContext(), "callfunc", Ctor, RetBB);
225     auto *InitFn = cast<Function>(InitFunction.getCallee());
226     auto *InitFnPtr =
227         PointerType::get(InitFn->getType(), InitFn->getAddressSpace());
228     IRB.SetInsertPoint(EntryBB);
229     Value *InitNotNull =
230         IRB.CreateICmpNE(InitFn, ConstantPointerNull::get(InitFnPtr));
231     IRB.CreateCondBr(InitNotNull, CallInitBB, RetBB);
232     IRB.SetInsertPoint(CallInitBB);
233   } else {
234     IRB.SetInsertPoint(RetBB->getTerminator());
235   }
236 
237   IRB.CreateCall(InitFunction, InitArgs);
238   if (!VersionCheckName.empty()) {
239     FunctionCallee VersionCheckFunction = M.getOrInsertFunction(
240         VersionCheckName, FunctionType::get(IRB.getVoidTy(), {}, false),
241         AttributeList());
242     IRB.CreateCall(VersionCheckFunction, {});
243   }
244 
245   if (Weak)
246     IRB.CreateBr(RetBB);
247 
248   return std::make_pair(Ctor, InitFunction);
249 }
250 
251 std::pair<Function *, FunctionCallee>
getOrCreateSanitizerCtorAndInitFunctions(Module & M,StringRef CtorName,StringRef InitName,ArrayRef<Type * > InitArgTypes,ArrayRef<Value * > InitArgs,function_ref<void (Function *,FunctionCallee)> FunctionsCreatedCallback,StringRef VersionCheckName,bool Weak)252 llvm::getOrCreateSanitizerCtorAndInitFunctions(
253     Module &M, StringRef CtorName, StringRef InitName,
254     ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
255     function_ref<void(Function *, FunctionCallee)> FunctionsCreatedCallback,
256     StringRef VersionCheckName, bool Weak) {
257   assert(!CtorName.empty() && "Expected ctor function name");
258 
259   if (Function *Ctor = M.getFunction(CtorName))
260     // FIXME: Sink this logic into the module, similar to the handling of
261     // globals. This will make moving to a concurrent model much easier.
262     if (Ctor->arg_empty() ||
263         Ctor->getReturnType() == Type::getVoidTy(M.getContext()))
264       return {Ctor,
265               declareSanitizerInitFunction(M, InitName, InitArgTypes, Weak)};
266 
267   Function *Ctor;
268   FunctionCallee InitFunction;
269   std::tie(Ctor, InitFunction) = llvm::createSanitizerCtorAndInitFunctions(
270       M, CtorName, InitName, InitArgTypes, InitArgs, VersionCheckName, Weak);
271   FunctionsCreatedCallback(Ctor, InitFunction);
272   return std::make_pair(Ctor, InitFunction);
273 }
274 
filterDeadComdatFunctions(SmallVectorImpl<Function * > & DeadComdatFunctions)275 void llvm::filterDeadComdatFunctions(
276     SmallVectorImpl<Function *> &DeadComdatFunctions) {
277   SmallPtrSet<Function *, 32> MaybeDeadFunctions;
278   SmallPtrSet<Comdat *, 32> MaybeDeadComdats;
279   for (Function *F : DeadComdatFunctions) {
280     MaybeDeadFunctions.insert(F);
281     if (Comdat *C = F->getComdat())
282       MaybeDeadComdats.insert(C);
283   }
284 
285   // Find comdats for which all users are dead now.
286   SmallPtrSet<Comdat *, 32> DeadComdats;
287   for (Comdat *C : MaybeDeadComdats) {
288     auto IsUserDead = [&](GlobalObject *GO) {
289       auto *F = dyn_cast<Function>(GO);
290       return F && MaybeDeadFunctions.contains(F);
291     };
292     if (all_of(C->getUsers(), IsUserDead))
293       DeadComdats.insert(C);
294   }
295 
296   // Only keep functions which have no comdat or a dead comdat.
297   erase_if(DeadComdatFunctions, [&](Function *F) {
298     Comdat *C = F->getComdat();
299     return C && !DeadComdats.contains(C);
300   });
301 }
302 
getUniqueModuleId(Module * M)303 std::string llvm::getUniqueModuleId(Module *M) {
304   MD5 Md5;
305   bool ExportsSymbols = false;
306   auto AddGlobal = [&](GlobalValue &GV) {
307     if (GV.isDeclaration() || GV.getName().starts_with("llvm.") ||
308         !GV.hasExternalLinkage() || GV.hasComdat())
309       return;
310     ExportsSymbols = true;
311     Md5.update(GV.getName());
312     Md5.update(ArrayRef<uint8_t>{0});
313   };
314 
315   for (auto &F : *M)
316     AddGlobal(F);
317   for (auto &GV : M->globals())
318     AddGlobal(GV);
319   for (auto &GA : M->aliases())
320     AddGlobal(GA);
321   for (auto &IF : M->ifuncs())
322     AddGlobal(IF);
323 
324   if (!ExportsSymbols)
325     return "";
326 
327   MD5::MD5Result R;
328   Md5.final(R);
329 
330   SmallString<32> Str;
331   MD5::stringifyResult(R, Str);
332   return ("." + Str).str();
333 }
334 
embedBufferInModule(Module & M,MemoryBufferRef Buf,StringRef SectionName,Align Alignment)335 void llvm::embedBufferInModule(Module &M, MemoryBufferRef Buf,
336                                StringRef SectionName, Align Alignment) {
337   // Embed the memory buffer into the module.
338   Constant *ModuleConstant = ConstantDataArray::get(
339       M.getContext(), ArrayRef(Buf.getBufferStart(), Buf.getBufferSize()));
340   GlobalVariable *GV = new GlobalVariable(
341       M, ModuleConstant->getType(), true, GlobalValue::PrivateLinkage,
342       ModuleConstant, "llvm.embedded.object");
343   GV->setSection(SectionName);
344   GV->setAlignment(Alignment);
345 
346   LLVMContext &Ctx = M.getContext();
347   NamedMDNode *MD = M.getOrInsertNamedMetadata("llvm.embedded.objects");
348   Metadata *MDVals[] = {ConstantAsMetadata::get(GV),
349                         MDString::get(Ctx, SectionName)};
350 
351   MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
352   GV->setMetadata(LLVMContext::MD_exclude, llvm::MDNode::get(Ctx, {}));
353 
354   appendToCompilerUsed(M, GV);
355 }
356 
lowerGlobalIFuncUsersAsGlobalCtor(Module & M,ArrayRef<GlobalIFunc * > FilteredIFuncsToLower)357 bool llvm::lowerGlobalIFuncUsersAsGlobalCtor(
358     Module &M, ArrayRef<GlobalIFunc *> FilteredIFuncsToLower) {
359   SmallVector<GlobalIFunc *, 32> AllIFuncs;
360   ArrayRef<GlobalIFunc *> IFuncsToLower = FilteredIFuncsToLower;
361   if (FilteredIFuncsToLower.empty()) { // Default to lowering all ifuncs
362     for (GlobalIFunc &GI : M.ifuncs())
363       AllIFuncs.push_back(&GI);
364     IFuncsToLower = AllIFuncs;
365   }
366 
367   bool UnhandledUsers = false;
368   LLVMContext &Ctx = M.getContext();
369   const DataLayout &DL = M.getDataLayout();
370 
371   PointerType *TableEntryTy =
372       PointerType::get(Ctx, DL.getProgramAddressSpace());
373 
374   ArrayType *FuncPtrTableTy =
375       ArrayType::get(TableEntryTy, IFuncsToLower.size());
376 
377   Align PtrAlign = DL.getABITypeAlign(TableEntryTy);
378 
379   // Create a global table of function pointers we'll initialize in a global
380   // constructor.
381   auto *FuncPtrTable = new GlobalVariable(
382       M, FuncPtrTableTy, false, GlobalValue::InternalLinkage,
383       PoisonValue::get(FuncPtrTableTy), "", nullptr,
384       GlobalVariable::NotThreadLocal, DL.getDefaultGlobalsAddressSpace());
385   FuncPtrTable->setAlignment(PtrAlign);
386 
387   // Create a function to initialize the function pointer table.
388   Function *NewCtor = Function::Create(
389       FunctionType::get(Type::getVoidTy(Ctx), false), Function::InternalLinkage,
390       DL.getProgramAddressSpace(), "", &M);
391 
392   BasicBlock *BB = BasicBlock::Create(Ctx, "", NewCtor);
393   IRBuilder<> InitBuilder(BB);
394 
395   size_t TableIndex = 0;
396   for (GlobalIFunc *GI : IFuncsToLower) {
397     Function *ResolvedFunction = GI->getResolverFunction();
398 
399     // We don't know what to pass to a resolver function taking arguments
400     //
401     // FIXME: Is this even valid? clang and gcc don't complain but this
402     // probably should be invalid IR. We could just pass through undef.
403     if (!std::empty(ResolvedFunction->getFunctionType()->params())) {
404       LLVM_DEBUG(dbgs() << "Not lowering ifunc resolver function "
405                         << ResolvedFunction->getName() << " with parameters\n");
406       UnhandledUsers = true;
407       continue;
408     }
409 
410     // Initialize the function pointer table.
411     CallInst *ResolvedFunc = InitBuilder.CreateCall(ResolvedFunction);
412     Value *Casted = InitBuilder.CreatePointerCast(ResolvedFunc, TableEntryTy);
413     Constant *GEP = cast<Constant>(InitBuilder.CreateConstInBoundsGEP2_32(
414         FuncPtrTableTy, FuncPtrTable, 0, TableIndex++));
415     InitBuilder.CreateAlignedStore(Casted, GEP, PtrAlign);
416 
417     // Update all users to load a pointer from the global table.
418     for (User *User : make_early_inc_range(GI->users())) {
419       Instruction *UserInst = dyn_cast<Instruction>(User);
420       if (!UserInst) {
421         // TODO: Should handle constantexpr casts in user instructions. Probably
422         // can't do much about constant initializers.
423         UnhandledUsers = true;
424         continue;
425       }
426 
427       IRBuilder<> UseBuilder(UserInst);
428       LoadInst *ResolvedTarget =
429           UseBuilder.CreateAlignedLoad(TableEntryTy, GEP, PtrAlign);
430       Value *ResolvedCast =
431           UseBuilder.CreatePointerCast(ResolvedTarget, GI->getType());
432       UserInst->replaceUsesOfWith(GI, ResolvedCast);
433     }
434 
435     // If we handled all users, erase the ifunc.
436     if (GI->use_empty())
437       GI->eraseFromParent();
438   }
439 
440   InitBuilder.CreateRetVoid();
441 
442   PointerType *ConstantDataTy = PointerType::get(Ctx, 0);
443 
444   // TODO: Is this the right priority? Probably should be before any other
445   // constructors?
446   const int Priority = 10;
447   appendToGlobalCtors(M, NewCtor, Priority,
448                       ConstantPointerNull::get(ConstantDataTy));
449   return UnhandledUsers;
450 }
451