1 //===- HWAddressSanitizer.cpp - detector of uninitialized reads -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file is a part of HWAddressSanitizer, an address basic correctness
11 /// checker based on tagged addressing.
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
15 #include "llvm/ADT/MapVector.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/StringExtras.h"
18 #include "llvm/ADT/StringRef.h"
19 #include "llvm/ADT/Triple.h"
20 #include "llvm/Analysis/CFG.h"
21 #include "llvm/Analysis/PostDominators.h"
22 #include "llvm/Analysis/StackSafetyAnalysis.h"
23 #include "llvm/Analysis/ValueTracking.h"
24 #include "llvm/BinaryFormat/ELF.h"
25 #include "llvm/IR/Attributes.h"
26 #include "llvm/IR/BasicBlock.h"
27 #include "llvm/IR/Constant.h"
28 #include "llvm/IR/Constants.h"
29 #include "llvm/IR/DataLayout.h"
30 #include "llvm/IR/DebugInfoMetadata.h"
31 #include "llvm/IR/DerivedTypes.h"
32 #include "llvm/IR/Dominators.h"
33 #include "llvm/IR/Function.h"
34 #include "llvm/IR/IRBuilder.h"
35 #include "llvm/IR/InlineAsm.h"
36 #include "llvm/IR/InstVisitor.h"
37 #include "llvm/IR/Instruction.h"
38 #include "llvm/IR/Instructions.h"
39 #include "llvm/IR/IntrinsicInst.h"
40 #include "llvm/IR/Intrinsics.h"
41 #include "llvm/IR/LLVMContext.h"
42 #include "llvm/IR/MDBuilder.h"
43 #include "llvm/IR/Module.h"
44 #include "llvm/IR/Type.h"
45 #include "llvm/IR/Value.h"
46 #include "llvm/InitializePasses.h"
47 #include "llvm/Pass.h"
48 #include "llvm/PassRegistry.h"
49 #include "llvm/Support/Casting.h"
50 #include "llvm/Support/CommandLine.h"
51 #include "llvm/Support/Debug.h"
52 #include "llvm/Support/raw_ostream.h"
53 #include "llvm/Transforms/Instrumentation.h"
54 #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
55 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
56 #include "llvm/Transforms/Utils/ModuleUtils.h"
57 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
58 #include <sstream>
59 
60 using namespace llvm;
61 
62 #define DEBUG_TYPE "hwasan"
63 
64 const char kHwasanModuleCtorName[] = "hwasan.module_ctor";
65 const char kHwasanNoteName[] = "hwasan.note";
66 const char kHwasanInitName[] = "__hwasan_init";
67 const char kHwasanPersonalityThunkName[] = "__hwasan_personality_thunk";
68 
69 const char kHwasanShadowMemoryDynamicAddress[] =
70     "__hwasan_shadow_memory_dynamic_address";
71 
72 // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
73 static const size_t kNumberOfAccessSizes = 5;
74 
75 static const size_t kDefaultShadowScale = 4;
76 static const uint64_t kDynamicShadowSentinel =
77     std::numeric_limits<uint64_t>::max();
78 
79 static const unsigned kShadowBaseAlignment = 32;
80 
81 static cl::opt<std::string>
82     ClMemoryAccessCallbackPrefix("hwasan-memory-access-callback-prefix",
83                                  cl::desc("Prefix for memory access callbacks"),
84                                  cl::Hidden, cl::init("__hwasan_"));
85 
86 static cl::opt<bool> ClInstrumentWithCalls(
87     "hwasan-instrument-with-calls",
88     cl::desc("instrument reads and writes with callbacks"), cl::Hidden,
89     cl::init(false));
90 
91 static cl::opt<bool> ClInstrumentReads("hwasan-instrument-reads",
92                                        cl::desc("instrument read instructions"),
93                                        cl::Hidden, cl::init(true));
94 
95 static cl::opt<bool>
96     ClInstrumentWrites("hwasan-instrument-writes",
97                        cl::desc("instrument write instructions"), cl::Hidden,
98                        cl::init(true));
99 
100 static cl::opt<bool> ClInstrumentAtomics(
101     "hwasan-instrument-atomics",
102     cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,
103     cl::init(true));
104 
105 static cl::opt<bool> ClInstrumentByval("hwasan-instrument-byval",
106                                        cl::desc("instrument byval arguments"),
107                                        cl::Hidden, cl::init(true));
108 
109 static cl::opt<bool>
110     ClRecover("hwasan-recover",
111               cl::desc("Enable recovery mode (continue-after-error)."),
112               cl::Hidden, cl::init(false));
113 
114 static cl::opt<bool> ClInstrumentStack("hwasan-instrument-stack",
115                                        cl::desc("instrument stack (allocas)"),
116                                        cl::Hidden, cl::init(true));
117 
118 static cl::opt<bool>
119     ClUseStackSafety("hwasan-use-stack-safety", cl::Hidden, cl::init(true),
120                      cl::Hidden, cl::desc("Use Stack Safety analysis results"),
121                      cl::Optional);
122 
123 static cl::opt<size_t> ClMaxLifetimes(
124     "hwasan-max-lifetimes-for-alloca", cl::Hidden, cl::init(3),
125     cl::ReallyHidden,
126     cl::desc("How many lifetime ends to handle for a single alloca."),
127     cl::Optional);
128 
129 static cl::opt<bool>
130     ClUseAfterScope("hwasan-use-after-scope",
131                     cl::desc("detect use after scope within function"),
132                     cl::Hidden, cl::init(false));
133 
134 static cl::opt<bool> ClUARRetagToZero(
135     "hwasan-uar-retag-to-zero",
136     cl::desc("Clear alloca tags before returning from the function to allow "
137              "non-instrumented and instrumented function calls mix. When set "
138              "to false, allocas are retagged before returning from the "
139              "function to detect use after return."),
140     cl::Hidden, cl::init(true));
141 
142 static cl::opt<bool> ClGenerateTagsWithCalls(
143     "hwasan-generate-tags-with-calls",
144     cl::desc("generate new tags with runtime library calls"), cl::Hidden,
145     cl::init(false));
146 
147 static cl::opt<bool> ClGlobals("hwasan-globals", cl::desc("Instrument globals"),
148                                cl::Hidden, cl::init(false), cl::ZeroOrMore);
149 
150 static cl::opt<int> ClMatchAllTag(
151     "hwasan-match-all-tag",
152     cl::desc("don't report bad accesses via pointers with this tag"),
153     cl::Hidden, cl::init(-1));
154 
155 static cl::opt<bool>
156     ClEnableKhwasan("hwasan-kernel",
157                     cl::desc("Enable KernelHWAddressSanitizer instrumentation"),
158                     cl::Hidden, cl::init(false));
159 
160 // These flags allow to change the shadow mapping and control how shadow memory
161 // is accessed. The shadow mapping looks like:
162 //    Shadow = (Mem >> scale) + offset
163 
164 static cl::opt<uint64_t>
165     ClMappingOffset("hwasan-mapping-offset",
166                     cl::desc("HWASan shadow mapping offset [EXPERIMENTAL]"),
167                     cl::Hidden, cl::init(0));
168 
169 static cl::opt<bool>
170     ClWithIfunc("hwasan-with-ifunc",
171                 cl::desc("Access dynamic shadow through an ifunc global on "
172                          "platforms that support this"),
173                 cl::Hidden, cl::init(false));
174 
175 static cl::opt<bool> ClWithTls(
176     "hwasan-with-tls",
177     cl::desc("Access dynamic shadow through an thread-local pointer on "
178              "platforms that support this"),
179     cl::Hidden, cl::init(true));
180 
181 static cl::opt<bool>
182     ClRecordStackHistory("hwasan-record-stack-history",
183                          cl::desc("Record stack frames with tagged allocations "
184                                   "in a thread-local ring buffer"),
185                          cl::Hidden, cl::init(true));
186 static cl::opt<bool>
187     ClInstrumentMemIntrinsics("hwasan-instrument-mem-intrinsics",
188                               cl::desc("instrument memory intrinsics"),
189                               cl::Hidden, cl::init(true));
190 
191 static cl::opt<bool>
192     ClInstrumentLandingPads("hwasan-instrument-landing-pads",
193                             cl::desc("instrument landing pads"), cl::Hidden,
194                             cl::init(false), cl::ZeroOrMore);
195 
196 static cl::opt<bool> ClUseShortGranules(
197     "hwasan-use-short-granules",
198     cl::desc("use short granules in allocas and outlined checks"), cl::Hidden,
199     cl::init(false), cl::ZeroOrMore);
200 
201 static cl::opt<bool> ClInstrumentPersonalityFunctions(
202     "hwasan-instrument-personality-functions",
203     cl::desc("instrument personality functions"), cl::Hidden, cl::init(false),
204     cl::ZeroOrMore);
205 
206 static cl::opt<bool> ClInlineAllChecks("hwasan-inline-all-checks",
207                                        cl::desc("inline all checks"),
208                                        cl::Hidden, cl::init(false));
209 
210 // Enabled from clang by "-fsanitize-hwaddress-experimental-aliasing".
211 static cl::opt<bool> ClUsePageAliases("hwasan-experimental-use-page-aliases",
212                                       cl::desc("Use page aliasing in HWASan"),
213                                       cl::Hidden, cl::init(false));
214 
215 namespace {
216 
217 bool shouldUsePageAliases(const Triple &TargetTriple) {
218   return ClUsePageAliases && TargetTriple.getArch() == Triple::x86_64;
219 }
220 
221 bool shouldInstrumentStack(const Triple &TargetTriple) {
222   return !shouldUsePageAliases(TargetTriple) && ClInstrumentStack;
223 }
224 
225 bool shouldInstrumentWithCalls(const Triple &TargetTriple) {
226   return ClInstrumentWithCalls || TargetTriple.getArch() == Triple::x86_64;
227 }
228 
229 bool mightUseStackSafetyAnalysis(bool DisableOptimization) {
230   return ClUseStackSafety.getNumOccurrences() ? ClUseStackSafety
231                                               : !DisableOptimization;
232 }
233 
234 bool shouldUseStackSafetyAnalysis(const Triple &TargetTriple,
235                                   bool DisableOptimization) {
236   return shouldInstrumentStack(TargetTriple) &&
237          mightUseStackSafetyAnalysis(DisableOptimization);
238 }
239 
240 bool shouldDetectUseAfterScope(const Triple &TargetTriple) {
241   return ClUseAfterScope && shouldInstrumentStack(TargetTriple);
242 }
243 
244 /// An instrumentation pass implementing detection of addressability bugs
245 /// using tagged pointers.
246 class HWAddressSanitizer {
247 private:
248   struct AllocaInfo {
249     AllocaInst *AI;
250     SmallVector<IntrinsicInst *, 2> LifetimeStart;
251     SmallVector<IntrinsicInst *, 2> LifetimeEnd;
252   };
253 
254 public:
255   HWAddressSanitizer(Module &M, bool CompileKernel, bool Recover,
256                      const StackSafetyGlobalInfo *SSI)
257       : M(M), SSI(SSI) {
258     this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover;
259     this->CompileKernel = ClEnableKhwasan.getNumOccurrences() > 0
260                               ? ClEnableKhwasan
261                               : CompileKernel;
262 
263     initializeModule();
264   }
265 
266   void setSSI(const StackSafetyGlobalInfo *S) { SSI = S; }
267 
268   DenseMap<AllocaInst *, AllocaInst *> padInterestingAllocas(
269       const MapVector<AllocaInst *, AllocaInfo> &AllocasToInstrument);
270   bool sanitizeFunction(Function &F,
271                         llvm::function_ref<const DominatorTree &()> GetDT,
272                         llvm::function_ref<const PostDominatorTree &()> GetPDT);
273   void initializeModule();
274   void createHwasanCtorComdat();
275 
276   void initializeCallbacks(Module &M);
277 
278   Value *getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val);
279 
280   Value *getDynamicShadowIfunc(IRBuilder<> &IRB);
281   Value *getShadowNonTls(IRBuilder<> &IRB);
282 
283   void untagPointerOperand(Instruction *I, Value *Addr);
284   Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
285 
286   int64_t getAccessInfo(bool IsWrite, unsigned AccessSizeIndex);
287   void instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
288                                   unsigned AccessSizeIndex,
289                                   Instruction *InsertBefore);
290   void instrumentMemAccessInline(Value *Ptr, bool IsWrite,
291                                  unsigned AccessSizeIndex,
292                                  Instruction *InsertBefore);
293   bool ignoreMemIntrinsic(MemIntrinsic *MI);
294   void instrumentMemIntrinsic(MemIntrinsic *MI);
295   bool instrumentMemAccess(InterestingMemoryOperand &O);
296   bool ignoreAccess(Instruction *Inst, Value *Ptr);
297   void getInterestingMemoryOperands(
298       Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting);
299 
300   bool isInterestingAlloca(const AllocaInst &AI);
301   void tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size);
302   Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag);
303   Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong);
304   static bool isStandardLifetime(const AllocaInfo &AllocaInfo,
305                                  const DominatorTree &DT);
306   bool instrumentStack(
307       bool ShouldDetectUseAfterScope,
308       MapVector<AllocaInst *, AllocaInfo> &AllocasToInstrument,
309       SmallVector<Instruction *, 4> &UnrecognizedLifetimes,
310       DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> &AllocaDbgMap,
311       SmallVectorImpl<Instruction *> &RetVec, Value *StackTag,
312       llvm::function_ref<const DominatorTree &()> GetDT,
313       llvm::function_ref<const PostDominatorTree &()> GetPDT);
314   Value *readRegister(IRBuilder<> &IRB, StringRef Name);
315   bool instrumentLandingPads(SmallVectorImpl<Instruction *> &RetVec);
316   Value *getNextTagWithCall(IRBuilder<> &IRB);
317   Value *getStackBaseTag(IRBuilder<> &IRB);
318   Value *getAllocaTag(IRBuilder<> &IRB, Value *StackTag, AllocaInst *AI,
319                       unsigned AllocaNo);
320   Value *getUARTag(IRBuilder<> &IRB, Value *StackTag);
321 
322   Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty);
323   Value *applyTagMask(IRBuilder<> &IRB, Value *OldTag);
324   unsigned retagMask(unsigned AllocaNo);
325 
326   void emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord);
327 
328   void instrumentGlobal(GlobalVariable *GV, uint8_t Tag);
329   void instrumentGlobals();
330 
331   void instrumentPersonalityFunctions();
332 
333 private:
334   LLVMContext *C;
335   Module &M;
336   const StackSafetyGlobalInfo *SSI;
337   Triple TargetTriple;
338   FunctionCallee HWAsanMemmove, HWAsanMemcpy, HWAsanMemset;
339   FunctionCallee HWAsanHandleVfork;
340 
341   /// This struct defines the shadow mapping using the rule:
342   ///   shadow = (mem >> Scale) + Offset.
343   /// If InGlobal is true, then
344   ///   extern char __hwasan_shadow[];
345   ///   shadow = (mem >> Scale) + &__hwasan_shadow
346   /// If InTls is true, then
347   ///   extern char *__hwasan_tls;
348   ///   shadow = (mem>>Scale) + align_up(__hwasan_shadow, kShadowBaseAlignment)
349   ///
350   /// If WithFrameRecord is true, then __hwasan_tls will be used to access the
351   /// ring buffer for storing stack allocations on targets that support it.
352   struct ShadowMapping {
353     int Scale;
354     uint64_t Offset;
355     bool InGlobal;
356     bool InTls;
357     bool WithFrameRecord;
358 
359     void init(Triple &TargetTriple, bool InstrumentWithCalls);
360     uint64_t getObjectAlignment() const { return 1ULL << Scale; }
361   };
362 
363   ShadowMapping Mapping;
364 
365   Type *VoidTy = Type::getVoidTy(M.getContext());
366   Type *IntptrTy;
367   Type *Int8PtrTy;
368   Type *Int8Ty;
369   Type *Int32Ty;
370   Type *Int64Ty = Type::getInt64Ty(M.getContext());
371 
372   bool CompileKernel;
373   bool Recover;
374   bool OutlinedChecks;
375   bool UseShortGranules;
376   bool InstrumentLandingPads;
377   bool InstrumentWithCalls;
378   bool InstrumentStack;
379   bool DetectUseAfterScope;
380   bool UsePageAliases;
381 
382   bool HasMatchAllTag = false;
383   uint8_t MatchAllTag = 0;
384 
385   unsigned PointerTagShift;
386   uint64_t TagMaskByte;
387 
388   Function *HwasanCtorFunction;
389 
390   FunctionCallee HwasanMemoryAccessCallback[2][kNumberOfAccessSizes];
391   FunctionCallee HwasanMemoryAccessCallbackSized[2];
392 
393   FunctionCallee HwasanTagMemoryFunc;
394   FunctionCallee HwasanGenerateTagFunc;
395 
396   Constant *ShadowGlobal;
397 
398   Value *ShadowBase = nullptr;
399   Value *StackBaseTag = nullptr;
400   GlobalValue *ThreadPtrGlobal = nullptr;
401 };
402 
403 class HWAddressSanitizerLegacyPass : public FunctionPass {
404 public:
405   // Pass identification, replacement for typeid.
406   static char ID;
407 
408   explicit HWAddressSanitizerLegacyPass(bool CompileKernel = false,
409                                         bool Recover = false,
410                                         bool DisableOptimization = false)
411       : FunctionPass(ID), CompileKernel(CompileKernel), Recover(Recover),
412         DisableOptimization(DisableOptimization) {
413     initializeHWAddressSanitizerLegacyPassPass(
414         *PassRegistry::getPassRegistry());
415   }
416 
417   StringRef getPassName() const override { return "HWAddressSanitizer"; }
418 
419   bool doInitialization(Module &M) override {
420     HWASan = std::make_unique<HWAddressSanitizer>(M, CompileKernel, Recover,
421                                                   /*SSI=*/nullptr);
422     return true;
423   }
424 
425   bool runOnFunction(Function &F) override {
426     auto TargetTriple = Triple(F.getParent()->getTargetTriple());
427     if (shouldUseStackSafetyAnalysis(TargetTriple, DisableOptimization)) {
428       // We cannot call getAnalysis in doInitialization, that would cause a
429       // crash as the required analyses are not initialized yet.
430       HWASan->setSSI(
431           &getAnalysis<StackSafetyGlobalInfoWrapperPass>().getResult());
432     }
433     return HWASan->sanitizeFunction(
434         F,
435         [&]() -> const DominatorTree & {
436           return getAnalysis<DominatorTreeWrapperPass>().getDomTree();
437         },
438         [&]() -> const PostDominatorTree & {
439           return getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
440         });
441   }
442 
443   bool doFinalization(Module &M) override {
444     HWASan.reset();
445     return false;
446   }
447 
448   void getAnalysisUsage(AnalysisUsage &AU) const override {
449     // This is an over-estimation of, in case we are building for an
450     // architecture that doesn't allow stack tagging we will still load the
451     // analysis.
452     // This is so we don't need to plumb TargetTriple all the way to here.
453     if (mightUseStackSafetyAnalysis(DisableOptimization))
454       AU.addRequired<StackSafetyGlobalInfoWrapperPass>();
455     AU.addRequired<DominatorTreeWrapperPass>();
456     AU.addRequired<PostDominatorTreeWrapperPass>();
457   }
458 
459 private:
460   std::unique_ptr<HWAddressSanitizer> HWASan;
461   bool CompileKernel;
462   bool Recover;
463   bool DisableOptimization;
464 };
465 
466 } // end anonymous namespace
467 
468 char HWAddressSanitizerLegacyPass::ID = 0;
469 
470 INITIALIZE_PASS_BEGIN(
471     HWAddressSanitizerLegacyPass, "hwasan",
472     "HWAddressSanitizer: detect memory bugs using tagged addressing.", false,
473     false)
474 INITIALIZE_PASS_DEPENDENCY(StackSafetyGlobalInfoWrapperPass)
475 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
476 INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
477 INITIALIZE_PASS_END(
478     HWAddressSanitizerLegacyPass, "hwasan",
479     "HWAddressSanitizer: detect memory bugs using tagged addressing.", false,
480     false)
481 
482 FunctionPass *
483 llvm::createHWAddressSanitizerLegacyPassPass(bool CompileKernel, bool Recover,
484                                              bool DisableOptimization) {
485   assert(!CompileKernel || Recover);
486   return new HWAddressSanitizerLegacyPass(CompileKernel, Recover,
487                                           DisableOptimization);
488 }
489 
490 PreservedAnalyses HWAddressSanitizerPass::run(Module &M,
491                                               ModuleAnalysisManager &MAM) {
492   const StackSafetyGlobalInfo *SSI = nullptr;
493   auto TargetTriple = llvm::Triple(M.getTargetTriple());
494   if (shouldUseStackSafetyAnalysis(TargetTriple, Options.DisableOptimization))
495     SSI = &MAM.getResult<StackSafetyGlobalAnalysis>(M);
496 
497   HWAddressSanitizer HWASan(M, Options.CompileKernel, Options.Recover, SSI);
498   bool Modified = false;
499   auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
500   for (Function &F : M) {
501     Modified |= HWASan.sanitizeFunction(
502         F,
503         [&]() -> const DominatorTree & {
504           return FAM.getResult<DominatorTreeAnalysis>(F);
505         },
506         [&]() -> const PostDominatorTree & {
507           return FAM.getResult<PostDominatorTreeAnalysis>(F);
508         });
509   }
510   if (Modified)
511     return PreservedAnalyses::none();
512   return PreservedAnalyses::all();
513 }
514 void HWAddressSanitizerPass::printPipeline(
515     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
516   static_cast<PassInfoMixin<HWAddressSanitizerPass> *>(this)->printPipeline(
517       OS, MapClassName2PassName);
518   OS << "<";
519   if (Options.CompileKernel)
520     OS << "kernel;";
521   if (Options.Recover)
522     OS << "recover";
523   OS << ">";
524 }
525 
526 void HWAddressSanitizer::createHwasanCtorComdat() {
527   std::tie(HwasanCtorFunction, std::ignore) =
528       getOrCreateSanitizerCtorAndInitFunctions(
529           M, kHwasanModuleCtorName, kHwasanInitName,
530           /*InitArgTypes=*/{},
531           /*InitArgs=*/{},
532           // This callback is invoked when the functions are created the first
533           // time. Hook them into the global ctors list in that case:
534           [&](Function *Ctor, FunctionCallee) {
535             Comdat *CtorComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
536             Ctor->setComdat(CtorComdat);
537             appendToGlobalCtors(M, Ctor, 0, Ctor);
538           });
539 
540   // Create a note that contains pointers to the list of global
541   // descriptors. Adding a note to the output file will cause the linker to
542   // create a PT_NOTE program header pointing to the note that we can use to
543   // find the descriptor list starting from the program headers. A function
544   // provided by the runtime initializes the shadow memory for the globals by
545   // accessing the descriptor list via the note. The dynamic loader needs to
546   // call this function whenever a library is loaded.
547   //
548   // The reason why we use a note for this instead of a more conventional
549   // approach of having a global constructor pass a descriptor list pointer to
550   // the runtime is because of an order of initialization problem. With
551   // constructors we can encounter the following problematic scenario:
552   //
553   // 1) library A depends on library B and also interposes one of B's symbols
554   // 2) B's constructors are called before A's (as required for correctness)
555   // 3) during construction, B accesses one of its "own" globals (actually
556   //    interposed by A) and triggers a HWASAN failure due to the initialization
557   //    for A not having happened yet
558   //
559   // Even without interposition it is possible to run into similar situations in
560   // cases where two libraries mutually depend on each other.
561   //
562   // We only need one note per binary, so put everything for the note in a
563   // comdat. This needs to be a comdat with an .init_array section to prevent
564   // newer versions of lld from discarding the note.
565   //
566   // Create the note even if we aren't instrumenting globals. This ensures that
567   // binaries linked from object files with both instrumented and
568   // non-instrumented globals will end up with a note, even if a comdat from an
569   // object file with non-instrumented globals is selected. The note is harmless
570   // if the runtime doesn't support it, since it will just be ignored.
571   Comdat *NoteComdat = M.getOrInsertComdat(kHwasanModuleCtorName);
572 
573   Type *Int8Arr0Ty = ArrayType::get(Int8Ty, 0);
574   auto Start =
575       new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
576                          nullptr, "__start_hwasan_globals");
577   Start->setVisibility(GlobalValue::HiddenVisibility);
578   Start->setDSOLocal(true);
579   auto Stop =
580       new GlobalVariable(M, Int8Arr0Ty, true, GlobalVariable::ExternalLinkage,
581                          nullptr, "__stop_hwasan_globals");
582   Stop->setVisibility(GlobalValue::HiddenVisibility);
583   Stop->setDSOLocal(true);
584 
585   // Null-terminated so actually 8 bytes, which are required in order to align
586   // the note properly.
587   auto *Name = ConstantDataArray::get(*C, "LLVM\0\0\0");
588 
589   auto *NoteTy = StructType::get(Int32Ty, Int32Ty, Int32Ty, Name->getType(),
590                                  Int32Ty, Int32Ty);
591   auto *Note =
592       new GlobalVariable(M, NoteTy, /*isConstant=*/true,
593                          GlobalValue::PrivateLinkage, nullptr, kHwasanNoteName);
594   Note->setSection(".note.hwasan.globals");
595   Note->setComdat(NoteComdat);
596   Note->setAlignment(Align(4));
597   Note->setDSOLocal(true);
598 
599   // The pointers in the note need to be relative so that the note ends up being
600   // placed in rodata, which is the standard location for notes.
601   auto CreateRelPtr = [&](Constant *Ptr) {
602     return ConstantExpr::getTrunc(
603         ConstantExpr::getSub(ConstantExpr::getPtrToInt(Ptr, Int64Ty),
604                              ConstantExpr::getPtrToInt(Note, Int64Ty)),
605         Int32Ty);
606   };
607   Note->setInitializer(ConstantStruct::getAnon(
608       {ConstantInt::get(Int32Ty, 8),                           // n_namesz
609        ConstantInt::get(Int32Ty, 8),                           // n_descsz
610        ConstantInt::get(Int32Ty, ELF::NT_LLVM_HWASAN_GLOBALS), // n_type
611        Name, CreateRelPtr(Start), CreateRelPtr(Stop)}));
612   appendToCompilerUsed(M, Note);
613 
614   // Create a zero-length global in hwasan_globals so that the linker will
615   // always create start and stop symbols.
616   auto Dummy = new GlobalVariable(
617       M, Int8Arr0Ty, /*isConstantGlobal*/ true, GlobalVariable::PrivateLinkage,
618       Constant::getNullValue(Int8Arr0Ty), "hwasan.dummy.global");
619   Dummy->setSection("hwasan_globals");
620   Dummy->setComdat(NoteComdat);
621   Dummy->setMetadata(LLVMContext::MD_associated,
622                      MDNode::get(*C, ValueAsMetadata::get(Note)));
623   appendToCompilerUsed(M, Dummy);
624 }
625 
626 /// Module-level initialization.
627 ///
628 /// inserts a call to __hwasan_init to the module's constructor list.
629 void HWAddressSanitizer::initializeModule() {
630   LLVM_DEBUG(dbgs() << "Init " << M.getName() << "\n");
631   auto &DL = M.getDataLayout();
632 
633   TargetTriple = Triple(M.getTargetTriple());
634 
635   // x86_64 currently has two modes:
636   // - Intel LAM (default)
637   // - pointer aliasing (heap only)
638   bool IsX86_64 = TargetTriple.getArch() == Triple::x86_64;
639   UsePageAliases = shouldUsePageAliases(TargetTriple);
640   InstrumentWithCalls = shouldInstrumentWithCalls(TargetTriple);
641   InstrumentStack = shouldInstrumentStack(TargetTriple);
642   DetectUseAfterScope = shouldDetectUseAfterScope(TargetTriple);
643   PointerTagShift = IsX86_64 ? 57 : 56;
644   TagMaskByte = IsX86_64 ? 0x3F : 0xFF;
645 
646   Mapping.init(TargetTriple, InstrumentWithCalls);
647 
648   C = &(M.getContext());
649   IRBuilder<> IRB(*C);
650   IntptrTy = IRB.getIntPtrTy(DL);
651   Int8PtrTy = IRB.getInt8PtrTy();
652   Int8Ty = IRB.getInt8Ty();
653   Int32Ty = IRB.getInt32Ty();
654 
655   HwasanCtorFunction = nullptr;
656 
657   // Older versions of Android do not have the required runtime support for
658   // short granules, global or personality function instrumentation. On other
659   // platforms we currently require using the latest version of the runtime.
660   bool NewRuntime =
661       !TargetTriple.isAndroid() || !TargetTriple.isAndroidVersionLT(30);
662 
663   UseShortGranules =
664       ClUseShortGranules.getNumOccurrences() ? ClUseShortGranules : NewRuntime;
665   OutlinedChecks =
666       TargetTriple.isAArch64() && TargetTriple.isOSBinFormatELF() &&
667       (ClInlineAllChecks.getNumOccurrences() ? !ClInlineAllChecks : !Recover);
668 
669   if (ClMatchAllTag.getNumOccurrences()) {
670     if (ClMatchAllTag != -1) {
671       HasMatchAllTag = true;
672       MatchAllTag = ClMatchAllTag & 0xFF;
673     }
674   } else if (CompileKernel) {
675     HasMatchAllTag = true;
676     MatchAllTag = 0xFF;
677   }
678 
679   // If we don't have personality function support, fall back to landing pads.
680   InstrumentLandingPads = ClInstrumentLandingPads.getNumOccurrences()
681                               ? ClInstrumentLandingPads
682                               : !NewRuntime;
683 
684   if (!CompileKernel) {
685     createHwasanCtorComdat();
686     bool InstrumentGlobals =
687         ClGlobals.getNumOccurrences() ? ClGlobals : NewRuntime;
688 
689     if (InstrumentGlobals && !UsePageAliases)
690       instrumentGlobals();
691 
692     bool InstrumentPersonalityFunctions =
693         ClInstrumentPersonalityFunctions.getNumOccurrences()
694             ? ClInstrumentPersonalityFunctions
695             : NewRuntime;
696     if (InstrumentPersonalityFunctions)
697       instrumentPersonalityFunctions();
698   }
699 
700   if (!TargetTriple.isAndroid()) {
701     Constant *C = M.getOrInsertGlobal("__hwasan_tls", IntptrTy, [&] {
702       auto *GV = new GlobalVariable(M, IntptrTy, /*isConstant=*/false,
703                                     GlobalValue::ExternalLinkage, nullptr,
704                                     "__hwasan_tls", nullptr,
705                                     GlobalVariable::InitialExecTLSModel);
706       appendToCompilerUsed(M, GV);
707       return GV;
708     });
709     ThreadPtrGlobal = cast<GlobalVariable>(C);
710   }
711 }
712 
713 void HWAddressSanitizer::initializeCallbacks(Module &M) {
714   IRBuilder<> IRB(*C);
715   for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
716     const std::string TypeStr = AccessIsWrite ? "store" : "load";
717     const std::string EndingStr = Recover ? "_noabort" : "";
718 
719     HwasanMemoryAccessCallbackSized[AccessIsWrite] = M.getOrInsertFunction(
720         ClMemoryAccessCallbackPrefix + TypeStr + "N" + EndingStr,
721         FunctionType::get(IRB.getVoidTy(), {IntptrTy, IntptrTy}, false));
722 
723     for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
724          AccessSizeIndex++) {
725       HwasanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] =
726           M.getOrInsertFunction(
727               ClMemoryAccessCallbackPrefix + TypeStr +
728                   itostr(1ULL << AccessSizeIndex) + EndingStr,
729               FunctionType::get(IRB.getVoidTy(), {IntptrTy}, false));
730     }
731   }
732 
733   HwasanTagMemoryFunc = M.getOrInsertFunction(
734       "__hwasan_tag_memory", IRB.getVoidTy(), Int8PtrTy, Int8Ty, IntptrTy);
735   HwasanGenerateTagFunc =
736       M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty);
737 
738   ShadowGlobal = M.getOrInsertGlobal("__hwasan_shadow",
739                                      ArrayType::get(IRB.getInt8Ty(), 0));
740 
741   const std::string MemIntrinCallbackPrefix =
742       CompileKernel ? std::string("") : ClMemoryAccessCallbackPrefix;
743   HWAsanMemmove = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memmove",
744                                         IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
745                                         IRB.getInt8PtrTy(), IntptrTy);
746   HWAsanMemcpy = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memcpy",
747                                        IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
748                                        IRB.getInt8PtrTy(), IntptrTy);
749   HWAsanMemset = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memset",
750                                        IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
751                                        IRB.getInt32Ty(), IntptrTy);
752 
753   HWAsanHandleVfork =
754       M.getOrInsertFunction("__hwasan_handle_vfork", IRB.getVoidTy(), IntptrTy);
755 }
756 
757 Value *HWAddressSanitizer::getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val) {
758   // An empty inline asm with input reg == output reg.
759   // An opaque no-op cast, basically.
760   // This prevents code bloat as a result of rematerializing trivial definitions
761   // such as constants or global addresses at every load and store.
762   InlineAsm *Asm =
763       InlineAsm::get(FunctionType::get(Int8PtrTy, {Val->getType()}, false),
764                      StringRef(""), StringRef("=r,0"),
765                      /*hasSideEffects=*/false);
766   return IRB.CreateCall(Asm, {Val}, ".hwasan.shadow");
767 }
768 
769 Value *HWAddressSanitizer::getDynamicShadowIfunc(IRBuilder<> &IRB) {
770   return getOpaqueNoopCast(IRB, ShadowGlobal);
771 }
772 
773 Value *HWAddressSanitizer::getShadowNonTls(IRBuilder<> &IRB) {
774   if (Mapping.Offset != kDynamicShadowSentinel)
775     return getOpaqueNoopCast(
776         IRB, ConstantExpr::getIntToPtr(
777                  ConstantInt::get(IntptrTy, Mapping.Offset), Int8PtrTy));
778 
779   if (Mapping.InGlobal) {
780     return getDynamicShadowIfunc(IRB);
781   } else {
782     Value *GlobalDynamicAddress =
783         IRB.GetInsertBlock()->getParent()->getParent()->getOrInsertGlobal(
784             kHwasanShadowMemoryDynamicAddress, Int8PtrTy);
785     return IRB.CreateLoad(Int8PtrTy, GlobalDynamicAddress);
786   }
787 }
788 
789 bool HWAddressSanitizer::ignoreAccess(Instruction *Inst, Value *Ptr) {
790   // Do not instrument acesses from different address spaces; we cannot deal
791   // with them.
792   Type *PtrTy = cast<PointerType>(Ptr->getType()->getScalarType());
793   if (PtrTy->getPointerAddressSpace() != 0)
794     return true;
795 
796   // Ignore swifterror addresses.
797   // swifterror memory addresses are mem2reg promoted by instruction
798   // selection. As such they cannot have regular uses like an instrumentation
799   // function and it makes no sense to track them as memory.
800   if (Ptr->isSwiftError())
801     return true;
802 
803   if (findAllocaForValue(Ptr)) {
804     if (!InstrumentStack)
805       return true;
806     if (SSI && SSI->stackAccessIsSafe(*Inst))
807       return true;
808   }
809   return false;
810 }
811 
812 void HWAddressSanitizer::getInterestingMemoryOperands(
813     Instruction *I, SmallVectorImpl<InterestingMemoryOperand> &Interesting) {
814   // Skip memory accesses inserted by another instrumentation.
815   if (I->hasMetadata("nosanitize"))
816     return;
817 
818   // Do not instrument the load fetching the dynamic shadow address.
819   if (ShadowBase == I)
820     return;
821 
822   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
823     if (!ClInstrumentReads || ignoreAccess(I, LI->getPointerOperand()))
824       return;
825     Interesting.emplace_back(I, LI->getPointerOperandIndex(), false,
826                              LI->getType(), LI->getAlign());
827   } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
828     if (!ClInstrumentWrites || ignoreAccess(I, SI->getPointerOperand()))
829       return;
830     Interesting.emplace_back(I, SI->getPointerOperandIndex(), true,
831                              SI->getValueOperand()->getType(), SI->getAlign());
832   } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
833     if (!ClInstrumentAtomics || ignoreAccess(I, RMW->getPointerOperand()))
834       return;
835     Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true,
836                              RMW->getValOperand()->getType(), None);
837   } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
838     if (!ClInstrumentAtomics || ignoreAccess(I, XCHG->getPointerOperand()))
839       return;
840     Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true,
841                              XCHG->getCompareOperand()->getType(), None);
842   } else if (auto CI = dyn_cast<CallInst>(I)) {
843     for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
844       if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) ||
845           ignoreAccess(I, CI->getArgOperand(ArgNo)))
846         continue;
847       Type *Ty = CI->getParamByValType(ArgNo);
848       Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
849     }
850   }
851 }
852 
853 static unsigned getPointerOperandIndex(Instruction *I) {
854   if (LoadInst *LI = dyn_cast<LoadInst>(I))
855     return LI->getPointerOperandIndex();
856   if (StoreInst *SI = dyn_cast<StoreInst>(I))
857     return SI->getPointerOperandIndex();
858   if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I))
859     return RMW->getPointerOperandIndex();
860   if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I))
861     return XCHG->getPointerOperandIndex();
862   report_fatal_error("Unexpected instruction");
863   return -1;
864 }
865 
866 static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
867   size_t Res = countTrailingZeros(TypeSize / 8);
868   assert(Res < kNumberOfAccessSizes);
869   return Res;
870 }
871 
872 void HWAddressSanitizer::untagPointerOperand(Instruction *I, Value *Addr) {
873   if (TargetTriple.isAArch64() || TargetTriple.getArch() == Triple::x86_64)
874     return;
875 
876   IRBuilder<> IRB(I);
877   Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
878   Value *UntaggedPtr =
879       IRB.CreateIntToPtr(untagPointer(IRB, AddrLong), Addr->getType());
880   I->setOperand(getPointerOperandIndex(I), UntaggedPtr);
881 }
882 
883 Value *HWAddressSanitizer::memToShadow(Value *Mem, IRBuilder<> &IRB) {
884   // Mem >> Scale
885   Value *Shadow = IRB.CreateLShr(Mem, Mapping.Scale);
886   if (Mapping.Offset == 0)
887     return IRB.CreateIntToPtr(Shadow, Int8PtrTy);
888   // (Mem >> Scale) + Offset
889   return IRB.CreateGEP(Int8Ty, ShadowBase, Shadow);
890 }
891 
892 int64_t HWAddressSanitizer::getAccessInfo(bool IsWrite,
893                                           unsigned AccessSizeIndex) {
894   return (CompileKernel << HWASanAccessInfo::CompileKernelShift) +
895          (HasMatchAllTag << HWASanAccessInfo::HasMatchAllShift) +
896          (MatchAllTag << HWASanAccessInfo::MatchAllShift) +
897          (Recover << HWASanAccessInfo::RecoverShift) +
898          (IsWrite << HWASanAccessInfo::IsWriteShift) +
899          (AccessSizeIndex << HWASanAccessInfo::AccessSizeShift);
900 }
901 
902 void HWAddressSanitizer::instrumentMemAccessOutline(Value *Ptr, bool IsWrite,
903                                                     unsigned AccessSizeIndex,
904                                                     Instruction *InsertBefore) {
905   assert(!UsePageAliases);
906   const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex);
907   IRBuilder<> IRB(InsertBefore);
908   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
909   Ptr = IRB.CreateBitCast(Ptr, Int8PtrTy);
910   IRB.CreateCall(Intrinsic::getDeclaration(
911                      M, UseShortGranules
912                             ? Intrinsic::hwasan_check_memaccess_shortgranules
913                             : Intrinsic::hwasan_check_memaccess),
914                  {ShadowBase, Ptr, ConstantInt::get(Int32Ty, AccessInfo)});
915 }
916 
917 void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
918                                                    unsigned AccessSizeIndex,
919                                                    Instruction *InsertBefore) {
920   assert(!UsePageAliases);
921   const int64_t AccessInfo = getAccessInfo(IsWrite, AccessSizeIndex);
922   IRBuilder<> IRB(InsertBefore);
923 
924   Value *PtrLong = IRB.CreatePointerCast(Ptr, IntptrTy);
925   Value *PtrTag = IRB.CreateTrunc(IRB.CreateLShr(PtrLong, PointerTagShift),
926                                   IRB.getInt8Ty());
927   Value *AddrLong = untagPointer(IRB, PtrLong);
928   Value *Shadow = memToShadow(AddrLong, IRB);
929   Value *MemTag = IRB.CreateLoad(Int8Ty, Shadow);
930   Value *TagMismatch = IRB.CreateICmpNE(PtrTag, MemTag);
931 
932   if (HasMatchAllTag) {
933     Value *TagNotIgnored = IRB.CreateICmpNE(
934         PtrTag, ConstantInt::get(PtrTag->getType(), MatchAllTag));
935     TagMismatch = IRB.CreateAnd(TagMismatch, TagNotIgnored);
936   }
937 
938   Instruction *CheckTerm =
939       SplitBlockAndInsertIfThen(TagMismatch, InsertBefore, false,
940                                 MDBuilder(*C).createBranchWeights(1, 100000));
941 
942   IRB.SetInsertPoint(CheckTerm);
943   Value *OutOfShortGranuleTagRange =
944       IRB.CreateICmpUGT(MemTag, ConstantInt::get(Int8Ty, 15));
945   Instruction *CheckFailTerm =
946       SplitBlockAndInsertIfThen(OutOfShortGranuleTagRange, CheckTerm, !Recover,
947                                 MDBuilder(*C).createBranchWeights(1, 100000));
948 
949   IRB.SetInsertPoint(CheckTerm);
950   Value *PtrLowBits = IRB.CreateTrunc(IRB.CreateAnd(PtrLong, 15), Int8Ty);
951   PtrLowBits = IRB.CreateAdd(
952       PtrLowBits, ConstantInt::get(Int8Ty, (1 << AccessSizeIndex) - 1));
953   Value *PtrLowBitsOOB = IRB.CreateICmpUGE(PtrLowBits, MemTag);
954   SplitBlockAndInsertIfThen(PtrLowBitsOOB, CheckTerm, false,
955                             MDBuilder(*C).createBranchWeights(1, 100000),
956                             (DomTreeUpdater *)nullptr, nullptr,
957                             CheckFailTerm->getParent());
958 
959   IRB.SetInsertPoint(CheckTerm);
960   Value *InlineTagAddr = IRB.CreateOr(AddrLong, 15);
961   InlineTagAddr = IRB.CreateIntToPtr(InlineTagAddr, Int8PtrTy);
962   Value *InlineTag = IRB.CreateLoad(Int8Ty, InlineTagAddr);
963   Value *InlineTagMismatch = IRB.CreateICmpNE(PtrTag, InlineTag);
964   SplitBlockAndInsertIfThen(InlineTagMismatch, CheckTerm, false,
965                             MDBuilder(*C).createBranchWeights(1, 100000),
966                             (DomTreeUpdater *)nullptr, nullptr,
967                             CheckFailTerm->getParent());
968 
969   IRB.SetInsertPoint(CheckFailTerm);
970   InlineAsm *Asm;
971   switch (TargetTriple.getArch()) {
972   case Triple::x86_64:
973     // The signal handler will find the data address in rdi.
974     Asm = InlineAsm::get(
975         FunctionType::get(IRB.getVoidTy(), {PtrLong->getType()}, false),
976         "int3\nnopl " +
977             itostr(0x40 + (AccessInfo & HWASanAccessInfo::RuntimeMask)) +
978             "(%rax)",
979         "{rdi}",
980         /*hasSideEffects=*/true);
981     break;
982   case Triple::aarch64:
983   case Triple::aarch64_be:
984     // The signal handler will find the data address in x0.
985     Asm = InlineAsm::get(
986         FunctionType::get(IRB.getVoidTy(), {PtrLong->getType()}, false),
987         "brk #" + itostr(0x900 + (AccessInfo & HWASanAccessInfo::RuntimeMask)),
988         "{x0}",
989         /*hasSideEffects=*/true);
990     break;
991   default:
992     report_fatal_error("unsupported architecture");
993   }
994   IRB.CreateCall(Asm, PtrLong);
995   if (Recover)
996     cast<BranchInst>(CheckFailTerm)->setSuccessor(0, CheckTerm->getParent());
997 }
998 
999 bool HWAddressSanitizer::ignoreMemIntrinsic(MemIntrinsic *MI) {
1000   if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
1001     return (!ClInstrumentWrites || ignoreAccess(MTI, MTI->getDest())) &&
1002            (!ClInstrumentReads || ignoreAccess(MTI, MTI->getSource()));
1003   }
1004   if (isa<MemSetInst>(MI))
1005     return !ClInstrumentWrites || ignoreAccess(MI, MI->getDest());
1006   return false;
1007 }
1008 
1009 void HWAddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
1010   IRBuilder<> IRB(MI);
1011   if (isa<MemTransferInst>(MI)) {
1012     IRB.CreateCall(
1013         isa<MemMoveInst>(MI) ? HWAsanMemmove : HWAsanMemcpy,
1014         {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
1015          IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()),
1016          IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
1017   } else if (isa<MemSetInst>(MI)) {
1018     IRB.CreateCall(
1019         HWAsanMemset,
1020         {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
1021          IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
1022          IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
1023   }
1024   MI->eraseFromParent();
1025 }
1026 
1027 bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O) {
1028   Value *Addr = O.getPtr();
1029 
1030   LLVM_DEBUG(dbgs() << "Instrumenting: " << O.getInsn() << "\n");
1031 
1032   if (O.MaybeMask)
1033     return false; // FIXME
1034 
1035   IRBuilder<> IRB(O.getInsn());
1036   if (isPowerOf2_64(O.TypeSize) &&
1037       (O.TypeSize / 8 <= (1ULL << (kNumberOfAccessSizes - 1))) &&
1038       (!O.Alignment || *O.Alignment >= (1ULL << Mapping.Scale) ||
1039        *O.Alignment >= O.TypeSize / 8)) {
1040     size_t AccessSizeIndex = TypeSizeToSizeIndex(O.TypeSize);
1041     if (InstrumentWithCalls) {
1042       IRB.CreateCall(HwasanMemoryAccessCallback[O.IsWrite][AccessSizeIndex],
1043                      IRB.CreatePointerCast(Addr, IntptrTy));
1044     } else if (OutlinedChecks) {
1045       instrumentMemAccessOutline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn());
1046     } else {
1047       instrumentMemAccessInline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn());
1048     }
1049   } else {
1050     IRB.CreateCall(HwasanMemoryAccessCallbackSized[O.IsWrite],
1051                    {IRB.CreatePointerCast(Addr, IntptrTy),
1052                     ConstantInt::get(IntptrTy, O.TypeSize / 8)});
1053   }
1054   untagPointerOperand(O.getInsn(), Addr);
1055 
1056   return true;
1057 }
1058 
1059 static uint64_t getAllocaSizeInBytes(const AllocaInst &AI) {
1060   uint64_t ArraySize = 1;
1061   if (AI.isArrayAllocation()) {
1062     const ConstantInt *CI = dyn_cast<ConstantInt>(AI.getArraySize());
1063     assert(CI && "non-constant array size");
1064     ArraySize = CI->getZExtValue();
1065   }
1066   Type *Ty = AI.getAllocatedType();
1067   uint64_t SizeInBytes = AI.getModule()->getDataLayout().getTypeAllocSize(Ty);
1068   return SizeInBytes * ArraySize;
1069 }
1070 
1071 void HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag,
1072                                    size_t Size) {
1073   size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
1074   if (!UseShortGranules)
1075     Size = AlignedSize;
1076 
1077   Value *JustTag = IRB.CreateTrunc(Tag, IRB.getInt8Ty());
1078   if (InstrumentWithCalls) {
1079     IRB.CreateCall(HwasanTagMemoryFunc,
1080                    {IRB.CreatePointerCast(AI, Int8PtrTy), JustTag,
1081                     ConstantInt::get(IntptrTy, AlignedSize)});
1082   } else {
1083     size_t ShadowSize = Size >> Mapping.Scale;
1084     Value *ShadowPtr = memToShadow(IRB.CreatePointerCast(AI, IntptrTy), IRB);
1085     // If this memset is not inlined, it will be intercepted in the hwasan
1086     // runtime library. That's OK, because the interceptor skips the checks if
1087     // the address is in the shadow region.
1088     // FIXME: the interceptor is not as fast as real memset. Consider lowering
1089     // llvm.memset right here into either a sequence of stores, or a call to
1090     // hwasan_tag_memory.
1091     if (ShadowSize)
1092       IRB.CreateMemSet(ShadowPtr, JustTag, ShadowSize, Align(1));
1093     if (Size != AlignedSize) {
1094       IRB.CreateStore(
1095           ConstantInt::get(Int8Ty, Size % Mapping.getObjectAlignment()),
1096           IRB.CreateConstGEP1_32(Int8Ty, ShadowPtr, ShadowSize));
1097       IRB.CreateStore(JustTag, IRB.CreateConstGEP1_32(
1098                                    Int8Ty, IRB.CreateBitCast(AI, Int8PtrTy),
1099                                    AlignedSize - 1));
1100     }
1101   }
1102 }
1103 
1104 unsigned HWAddressSanitizer::retagMask(unsigned AllocaNo) {
1105   if (TargetTriple.getArch() == Triple::x86_64)
1106     return AllocaNo & TagMaskByte;
1107 
1108   // A list of 8-bit numbers that have at most one run of non-zero bits.
1109   // x = x ^ (mask << 56) can be encoded as a single armv8 instruction for these
1110   // masks.
1111   // The list does not include the value 255, which is used for UAR.
1112   //
1113   // Because we are more likely to use earlier elements of this list than later
1114   // ones, it is sorted in increasing order of probability of collision with a
1115   // mask allocated (temporally) nearby. The program that generated this list
1116   // can be found at:
1117   // https://github.com/google/sanitizers/blob/master/hwaddress-sanitizer/sort_masks.py
1118   static unsigned FastMasks[] = {0,  128, 64,  192, 32,  96,  224, 112, 240,
1119                                  48, 16,  120, 248, 56,  24,  8,   124, 252,
1120                                  60, 28,  12,  4,   126, 254, 62,  30,  14,
1121                                  6,  2,   127, 63,  31,  15,  7,   3,   1};
1122   return FastMasks[AllocaNo % (sizeof(FastMasks) / sizeof(FastMasks[0]))];
1123 }
1124 
1125 Value *HWAddressSanitizer::applyTagMask(IRBuilder<> &IRB, Value *OldTag) {
1126   if (TargetTriple.getArch() == Triple::x86_64) {
1127     Constant *TagMask = ConstantInt::get(IntptrTy, TagMaskByte);
1128     Value *NewTag = IRB.CreateAnd(OldTag, TagMask);
1129     return NewTag;
1130   }
1131   // aarch64 uses 8-bit tags, so no mask is needed.
1132   return OldTag;
1133 }
1134 
1135 Value *HWAddressSanitizer::getNextTagWithCall(IRBuilder<> &IRB) {
1136   return IRB.CreateZExt(IRB.CreateCall(HwasanGenerateTagFunc), IntptrTy);
1137 }
1138 
1139 Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) {
1140   if (ClGenerateTagsWithCalls)
1141     return getNextTagWithCall(IRB);
1142   if (StackBaseTag)
1143     return StackBaseTag;
1144   // FIXME: use addressofreturnaddress (but implement it in aarch64 backend
1145   // first).
1146   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
1147   auto GetStackPointerFn = Intrinsic::getDeclaration(
1148       M, Intrinsic::frameaddress,
1149       IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace()));
1150   Value *StackPointer = IRB.CreateCall(
1151       GetStackPointerFn, {Constant::getNullValue(IRB.getInt32Ty())});
1152 
1153   // Extract some entropy from the stack pointer for the tags.
1154   // Take bits 20..28 (ASLR entropy) and xor with bits 0..8 (these differ
1155   // between functions).
1156   Value *StackPointerLong = IRB.CreatePointerCast(StackPointer, IntptrTy);
1157   Value *StackTag =
1158       applyTagMask(IRB, IRB.CreateXor(StackPointerLong,
1159                                       IRB.CreateLShr(StackPointerLong, 20)));
1160   StackTag->setName("hwasan.stack.base.tag");
1161   return StackTag;
1162 }
1163 
1164 Value *HWAddressSanitizer::getAllocaTag(IRBuilder<> &IRB, Value *StackTag,
1165                                         AllocaInst *AI, unsigned AllocaNo) {
1166   if (ClGenerateTagsWithCalls)
1167     return getNextTagWithCall(IRB);
1168   return IRB.CreateXor(StackTag,
1169                        ConstantInt::get(IntptrTy, retagMask(AllocaNo)));
1170 }
1171 
1172 Value *HWAddressSanitizer::getUARTag(IRBuilder<> &IRB, Value *StackTag) {
1173   if (ClUARRetagToZero)
1174     return ConstantInt::get(IntptrTy, 0);
1175   if (ClGenerateTagsWithCalls)
1176     return getNextTagWithCall(IRB);
1177   return IRB.CreateXor(StackTag, ConstantInt::get(IntptrTy, TagMaskByte));
1178 }
1179 
1180 // Add a tag to an address.
1181 Value *HWAddressSanitizer::tagPointer(IRBuilder<> &IRB, Type *Ty,
1182                                       Value *PtrLong, Value *Tag) {
1183   assert(!UsePageAliases);
1184   Value *TaggedPtrLong;
1185   if (CompileKernel) {
1186     // Kernel addresses have 0xFF in the most significant byte.
1187     Value *ShiftedTag =
1188         IRB.CreateOr(IRB.CreateShl(Tag, PointerTagShift),
1189                      ConstantInt::get(IntptrTy, (1ULL << PointerTagShift) - 1));
1190     TaggedPtrLong = IRB.CreateAnd(PtrLong, ShiftedTag);
1191   } else {
1192     // Userspace can simply do OR (tag << PointerTagShift);
1193     Value *ShiftedTag = IRB.CreateShl(Tag, PointerTagShift);
1194     TaggedPtrLong = IRB.CreateOr(PtrLong, ShiftedTag);
1195   }
1196   return IRB.CreateIntToPtr(TaggedPtrLong, Ty);
1197 }
1198 
1199 // Remove tag from an address.
1200 Value *HWAddressSanitizer::untagPointer(IRBuilder<> &IRB, Value *PtrLong) {
1201   assert(!UsePageAliases);
1202   Value *UntaggedPtrLong;
1203   if (CompileKernel) {
1204     // Kernel addresses have 0xFF in the most significant byte.
1205     UntaggedPtrLong =
1206         IRB.CreateOr(PtrLong, ConstantInt::get(PtrLong->getType(),
1207                                                0xFFULL << PointerTagShift));
1208   } else {
1209     // Userspace addresses have 0x00.
1210     UntaggedPtrLong =
1211         IRB.CreateAnd(PtrLong, ConstantInt::get(PtrLong->getType(),
1212                                                 ~(0xFFULL << PointerTagShift)));
1213   }
1214   return UntaggedPtrLong;
1215 }
1216 
1217 Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty) {
1218   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
1219   if (TargetTriple.isAArch64() && TargetTriple.isAndroid()) {
1220     // Android provides a fixed TLS slot for sanitizers. See TLS_SLOT_SANITIZER
1221     // in Bionic's libc/private/bionic_tls.h.
1222     Function *ThreadPointerFunc =
1223         Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
1224     Value *SlotPtr = IRB.CreatePointerCast(
1225         IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
1226                                IRB.CreateCall(ThreadPointerFunc), 0x30),
1227         Ty->getPointerTo(0));
1228     return SlotPtr;
1229   }
1230   if (ThreadPtrGlobal)
1231     return ThreadPtrGlobal;
1232 
1233   return nullptr;
1234 }
1235 
1236 void HWAddressSanitizer::emitPrologue(IRBuilder<> &IRB, bool WithFrameRecord) {
1237   if (!Mapping.InTls)
1238     ShadowBase = getShadowNonTls(IRB);
1239   else if (!WithFrameRecord && TargetTriple.isAndroid())
1240     ShadowBase = getDynamicShadowIfunc(IRB);
1241 
1242   if (!WithFrameRecord && ShadowBase)
1243     return;
1244 
1245   Value *SlotPtr = getHwasanThreadSlotPtr(IRB, IntptrTy);
1246   assert(SlotPtr);
1247 
1248   Value *ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
1249   // Extract the address field from ThreadLong. Unnecessary on AArch64 with TBI.
1250   Value *ThreadLongMaybeUntagged =
1251       TargetTriple.isAArch64() ? ThreadLong : untagPointer(IRB, ThreadLong);
1252 
1253   if (WithFrameRecord) {
1254     Function *F = IRB.GetInsertBlock()->getParent();
1255     StackBaseTag = IRB.CreateAShr(ThreadLong, 3);
1256 
1257     // Prepare ring buffer data.
1258     Value *PC;
1259     if (TargetTriple.getArch() == Triple::aarch64)
1260       PC = readRegister(IRB, "pc");
1261     else
1262       PC = IRB.CreatePtrToInt(F, IntptrTy);
1263     Module *M = F->getParent();
1264     auto GetStackPointerFn = Intrinsic::getDeclaration(
1265         M, Intrinsic::frameaddress,
1266         IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace()));
1267     Value *SP = IRB.CreatePtrToInt(
1268         IRB.CreateCall(GetStackPointerFn,
1269                        {Constant::getNullValue(IRB.getInt32Ty())}),
1270         IntptrTy);
1271     // Mix SP and PC.
1272     // Assumptions:
1273     // PC is 0x0000PPPPPPPPPPPP  (48 bits are meaningful, others are zero)
1274     // SP is 0xsssssssssssSSSS0  (4 lower bits are zero)
1275     // We only really need ~20 lower non-zero bits (SSSS), so we mix like this:
1276     //       0xSSSSPPPPPPPPPPPP
1277     SP = IRB.CreateShl(SP, 44);
1278 
1279     // Store data to ring buffer.
1280     Value *RecordPtr =
1281         IRB.CreateIntToPtr(ThreadLongMaybeUntagged, IntptrTy->getPointerTo(0));
1282     IRB.CreateStore(IRB.CreateOr(PC, SP), RecordPtr);
1283 
1284     // Update the ring buffer. Top byte of ThreadLong defines the size of the
1285     // buffer in pages, it must be a power of two, and the start of the buffer
1286     // must be aligned by twice that much. Therefore wrap around of the ring
1287     // buffer is simply Addr &= ~((ThreadLong >> 56) << 12).
1288     // The use of AShr instead of LShr is due to
1289     //   https://bugs.llvm.org/show_bug.cgi?id=39030
1290     // Runtime library makes sure not to use the highest bit.
1291     Value *WrapMask = IRB.CreateXor(
1292         IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true),
1293         ConstantInt::get(IntptrTy, (uint64_t)-1));
1294     Value *ThreadLongNew = IRB.CreateAnd(
1295         IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 8)), WrapMask);
1296     IRB.CreateStore(ThreadLongNew, SlotPtr);
1297   }
1298 
1299   if (!ShadowBase) {
1300     // Get shadow base address by aligning RecordPtr up.
1301     // Note: this is not correct if the pointer is already aligned.
1302     // Runtime library will make sure this never happens.
1303     ShadowBase = IRB.CreateAdd(
1304         IRB.CreateOr(
1305             ThreadLongMaybeUntagged,
1306             ConstantInt::get(IntptrTy, (1ULL << kShadowBaseAlignment) - 1)),
1307         ConstantInt::get(IntptrTy, 1), "hwasan.shadow");
1308     ShadowBase = IRB.CreateIntToPtr(ShadowBase, Int8PtrTy);
1309   }
1310 }
1311 
1312 Value *HWAddressSanitizer::readRegister(IRBuilder<> &IRB, StringRef Name) {
1313   Module *M = IRB.GetInsertBlock()->getParent()->getParent();
1314   Function *ReadRegister =
1315       Intrinsic::getDeclaration(M, Intrinsic::read_register, IntptrTy);
1316   MDNode *MD = MDNode::get(*C, {MDString::get(*C, Name)});
1317   Value *Args[] = {MetadataAsValue::get(*C, MD)};
1318   return IRB.CreateCall(ReadRegister, Args);
1319 }
1320 
1321 bool HWAddressSanitizer::instrumentLandingPads(
1322     SmallVectorImpl<Instruction *> &LandingPadVec) {
1323   for (auto *LP : LandingPadVec) {
1324     IRBuilder<> IRB(LP->getNextNode());
1325     IRB.CreateCall(
1326         HWAsanHandleVfork,
1327         {readRegister(IRB, (TargetTriple.getArch() == Triple::x86_64) ? "rsp"
1328                                                                       : "sp")});
1329   }
1330   return true;
1331 }
1332 
1333 static bool
1334 maybeReachableFromEachOther(const SmallVectorImpl<IntrinsicInst *> &Insts,
1335                             const DominatorTree &DT) {
1336   // If we have too many lifetime ends, give up, as the algorithm below is N^2.
1337   if (Insts.size() > ClMaxLifetimes)
1338     return true;
1339   for (size_t I = 0; I < Insts.size(); ++I) {
1340     for (size_t J = 0; J < Insts.size(); ++J) {
1341       if (I == J)
1342         continue;
1343       if (isPotentiallyReachable(Insts[I], Insts[J], nullptr, &DT))
1344         return true;
1345     }
1346   }
1347   return false;
1348 }
1349 
1350 // static
1351 bool HWAddressSanitizer::isStandardLifetime(const AllocaInfo &AllocaInfo,
1352                                             const DominatorTree &DT) {
1353   // An alloca that has exactly one start and end in every possible execution.
1354   // If it has multiple ends, they have to be unreachable from each other, so
1355   // at most one of them is actually used for each execution of the function.
1356   return AllocaInfo.LifetimeStart.size() == 1 &&
1357          (AllocaInfo.LifetimeEnd.size() == 1 ||
1358           (AllocaInfo.LifetimeEnd.size() > 0 &&
1359            !maybeReachableFromEachOther(AllocaInfo.LifetimeEnd, DT)));
1360 }
1361 
1362 bool HWAddressSanitizer::instrumentStack(
1363     bool ShouldDetectUseAfterScope,
1364     MapVector<AllocaInst *, AllocaInfo> &AllocasToInstrument,
1365     SmallVector<Instruction *, 4> &UnrecognizedLifetimes,
1366     DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> &AllocaDbgMap,
1367     SmallVectorImpl<Instruction *> &RetVec, Value *StackTag,
1368     llvm::function_ref<const DominatorTree &()> GetDT,
1369     llvm::function_ref<const PostDominatorTree &()> GetPDT) {
1370   // Ideally, we want to calculate tagged stack base pointer, and rewrite all
1371   // alloca addresses using that. Unfortunately, offsets are not known yet
1372   // (unless we use ASan-style mega-alloca). Instead we keep the base tag in a
1373   // temp, shift-OR it into each alloca address and xor with the retag mask.
1374   // This generates one extra instruction per alloca use.
1375   unsigned int I = 0;
1376 
1377   for (auto &KV : AllocasToInstrument) {
1378     auto N = I++;
1379     auto *AI = KV.first;
1380     AllocaInfo &Info = KV.second;
1381     IRBuilder<> IRB(AI->getNextNode());
1382 
1383     // Replace uses of the alloca with tagged address.
1384     Value *Tag = getAllocaTag(IRB, StackTag, AI, N);
1385     Value *AILong = IRB.CreatePointerCast(AI, IntptrTy);
1386     Value *Replacement = tagPointer(IRB, AI->getType(), AILong, Tag);
1387     std::string Name =
1388         AI->hasName() ? AI->getName().str() : "alloca." + itostr(N);
1389     Replacement->setName(Name + ".hwasan");
1390 
1391     AI->replaceUsesWithIf(Replacement,
1392                           [AILong](Use &U) { return U.getUser() != AILong; });
1393 
1394     for (auto *DDI : AllocaDbgMap.lookup(AI)) {
1395       // Prepend "tag_offset, N" to the dwarf expression.
1396       // Tag offset logically applies to the alloca pointer, and it makes sense
1397       // to put it at the beginning of the expression.
1398       SmallVector<uint64_t, 8> NewOps = {dwarf::DW_OP_LLVM_tag_offset,
1399                                          retagMask(N)};
1400       for (size_t LocNo = 0; LocNo < DDI->getNumVariableLocationOps(); ++LocNo)
1401         if (DDI->getVariableLocationOp(LocNo) == AI)
1402           DDI->setExpression(DIExpression::appendOpsToArg(DDI->getExpression(),
1403                                                           NewOps, LocNo));
1404     }
1405 
1406     size_t Size = getAllocaSizeInBytes(*AI);
1407     size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
1408     auto TagEnd = [&](Instruction *Node) {
1409       IRB.SetInsertPoint(Node);
1410       Value *UARTag = getUARTag(IRB, StackTag);
1411       tagAlloca(IRB, AI, UARTag, AlignedSize);
1412     };
1413     bool StandardLifetime =
1414         UnrecognizedLifetimes.empty() && isStandardLifetime(Info, GetDT());
1415     if (ShouldDetectUseAfterScope && StandardLifetime) {
1416       IntrinsicInst *Start = Info.LifetimeStart[0];
1417       IRB.SetInsertPoint(Start->getNextNode());
1418       tagAlloca(IRB, AI, Tag, Size);
1419       if (!forAllReachableExits(GetDT(), GetPDT(), Start, Info.LifetimeEnd,
1420                                 RetVec, TagEnd)) {
1421         for (auto *End : Info.LifetimeEnd)
1422           End->eraseFromParent();
1423       }
1424     } else {
1425       tagAlloca(IRB, AI, Tag, Size);
1426       for (auto *RI : RetVec)
1427         TagEnd(RI);
1428       if (!StandardLifetime) {
1429         for (auto &II : Info.LifetimeStart)
1430           II->eraseFromParent();
1431         for (auto &II : Info.LifetimeEnd)
1432           II->eraseFromParent();
1433       }
1434     }
1435   }
1436   for (auto &I : UnrecognizedLifetimes)
1437     I->eraseFromParent();
1438   return true;
1439 }
1440 
1441 bool HWAddressSanitizer::isInterestingAlloca(const AllocaInst &AI) {
1442   return (AI.getAllocatedType()->isSized() &&
1443           // FIXME: instrument dynamic allocas, too
1444           AI.isStaticAlloca() &&
1445           // alloca() may be called with 0 size, ignore it.
1446           getAllocaSizeInBytes(AI) > 0 &&
1447           // We are only interested in allocas not promotable to registers.
1448           // Promotable allocas are common under -O0.
1449           !isAllocaPromotable(&AI) &&
1450           // inalloca allocas are not treated as static, and we don't want
1451           // dynamic alloca instrumentation for them as well.
1452           !AI.isUsedWithInAlloca() &&
1453           // swifterror allocas are register promoted by ISel
1454           !AI.isSwiftError()) &&
1455          // safe allocas are not interesting
1456          !(SSI && SSI->isSafe(AI));
1457 }
1458 
1459 DenseMap<AllocaInst *, AllocaInst *> HWAddressSanitizer::padInterestingAllocas(
1460     const MapVector<AllocaInst *, AllocaInfo> &AllocasToInstrument) {
1461   DenseMap<AllocaInst *, AllocaInst *> AllocaToPaddedAllocaMap;
1462   for (auto &KV : AllocasToInstrument) {
1463     AllocaInst *AI = KV.first;
1464     uint64_t Size = getAllocaSizeInBytes(*AI);
1465     uint64_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
1466     AI->setAlignment(
1467         Align(std::max(AI->getAlignment(), Mapping.getObjectAlignment())));
1468     if (Size != AlignedSize) {
1469       Type *AllocatedType = AI->getAllocatedType();
1470       if (AI->isArrayAllocation()) {
1471         uint64_t ArraySize =
1472             cast<ConstantInt>(AI->getArraySize())->getZExtValue();
1473         AllocatedType = ArrayType::get(AllocatedType, ArraySize);
1474       }
1475       Type *TypeWithPadding = StructType::get(
1476           AllocatedType, ArrayType::get(Int8Ty, AlignedSize - Size));
1477       auto *NewAI = new AllocaInst(
1478           TypeWithPadding, AI->getType()->getAddressSpace(), nullptr, "", AI);
1479       NewAI->takeName(AI);
1480       NewAI->setAlignment(AI->getAlign());
1481       NewAI->setUsedWithInAlloca(AI->isUsedWithInAlloca());
1482       NewAI->setSwiftError(AI->isSwiftError());
1483       NewAI->copyMetadata(*AI);
1484       auto *Bitcast = new BitCastInst(NewAI, AI->getType(), "", AI);
1485       AI->replaceAllUsesWith(Bitcast);
1486       AllocaToPaddedAllocaMap[AI] = NewAI;
1487     }
1488   }
1489   return AllocaToPaddedAllocaMap;
1490 }
1491 
1492 bool HWAddressSanitizer::sanitizeFunction(
1493     Function &F, llvm::function_ref<const DominatorTree &()> GetDT,
1494     llvm::function_ref<const PostDominatorTree &()> GetPDT) {
1495   if (&F == HwasanCtorFunction)
1496     return false;
1497 
1498   if (!F.hasFnAttribute(Attribute::SanitizeHWAddress))
1499     return false;
1500 
1501   LLVM_DEBUG(dbgs() << "Function: " << F.getName() << "\n");
1502 
1503   SmallVector<InterestingMemoryOperand, 16> OperandsToInstrument;
1504   SmallVector<MemIntrinsic *, 16> IntrinToInstrument;
1505   MapVector<AllocaInst *, AllocaInfo> AllocasToInstrument;
1506   SmallVector<Instruction *, 8> RetVec;
1507   SmallVector<Instruction *, 8> LandingPadVec;
1508   SmallVector<Instruction *, 4> UnrecognizedLifetimes;
1509   DenseMap<AllocaInst *, std::vector<DbgVariableIntrinsic *>> AllocaDbgMap;
1510   bool CallsReturnTwice = false;
1511   for (auto &BB : F) {
1512     for (auto &Inst : BB) {
1513       if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
1514         if (CI->canReturnTwice()) {
1515           CallsReturnTwice = true;
1516         }
1517       }
1518       if (InstrumentStack) {
1519         if (AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
1520           if (isInterestingAlloca(*AI))
1521             AllocasToInstrument.insert({AI, {}});
1522           continue;
1523         }
1524         auto *II = dyn_cast<IntrinsicInst>(&Inst);
1525         if (II && (II->getIntrinsicID() == Intrinsic::lifetime_start ||
1526                    II->getIntrinsicID() == Intrinsic::lifetime_end)) {
1527           AllocaInst *AI = findAllocaForValue(II->getArgOperand(1));
1528           if (!AI) {
1529             UnrecognizedLifetimes.push_back(&Inst);
1530             continue;
1531           }
1532           if (!isInterestingAlloca(*AI))
1533             continue;
1534           if (II->getIntrinsicID() == Intrinsic::lifetime_start)
1535             AllocasToInstrument[AI].LifetimeStart.push_back(II);
1536           else
1537             AllocasToInstrument[AI].LifetimeEnd.push_back(II);
1538           continue;
1539         }
1540       }
1541 
1542       if (isa<ReturnInst>(Inst)) {
1543         if (CallInst *CI = Inst.getParent()->getTerminatingMustTailCall())
1544           RetVec.push_back(CI);
1545         else
1546           RetVec.push_back(&Inst);
1547       } else if (isa<ResumeInst, CleanupReturnInst>(Inst)) {
1548         RetVec.push_back(&Inst);
1549       }
1550 
1551       if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&Inst)) {
1552         for (Value *V : DVI->location_ops()) {
1553           if (auto *Alloca = dyn_cast_or_null<AllocaInst>(V))
1554             if (!AllocaDbgMap.count(Alloca) ||
1555                 AllocaDbgMap[Alloca].back() != DVI)
1556               AllocaDbgMap[Alloca].push_back(DVI);
1557         }
1558       }
1559 
1560       if (InstrumentLandingPads && isa<LandingPadInst>(Inst))
1561         LandingPadVec.push_back(&Inst);
1562 
1563       getInterestingMemoryOperands(&Inst, OperandsToInstrument);
1564 
1565       if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(&Inst))
1566         if (!ignoreMemIntrinsic(MI))
1567           IntrinToInstrument.push_back(MI);
1568     }
1569   }
1570 
1571   initializeCallbacks(*F.getParent());
1572 
1573   bool Changed = false;
1574 
1575   if (!LandingPadVec.empty())
1576     Changed |= instrumentLandingPads(LandingPadVec);
1577 
1578   if (AllocasToInstrument.empty() && F.hasPersonalityFn() &&
1579       F.getPersonalityFn()->getName() == kHwasanPersonalityThunkName) {
1580     // __hwasan_personality_thunk is a no-op for functions without an
1581     // instrumented stack, so we can drop it.
1582     F.setPersonalityFn(nullptr);
1583     Changed = true;
1584   }
1585 
1586   if (AllocasToInstrument.empty() && OperandsToInstrument.empty() &&
1587       IntrinToInstrument.empty())
1588     return Changed;
1589 
1590   assert(!ShadowBase);
1591 
1592   Instruction *InsertPt = &*F.getEntryBlock().begin();
1593   IRBuilder<> EntryIRB(InsertPt);
1594   emitPrologue(EntryIRB,
1595                /*WithFrameRecord*/ ClRecordStackHistory &&
1596                    Mapping.WithFrameRecord && !AllocasToInstrument.empty());
1597 
1598   if (!AllocasToInstrument.empty()) {
1599     Value *StackTag =
1600         ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB);
1601     // Calls to functions that may return twice (e.g. setjmp) confuse the
1602     // postdominator analysis, and will leave us to keep memory tagged after
1603     // function return. Work around this by always untagging at every return
1604     // statement if return_twice functions are called.
1605     instrumentStack(DetectUseAfterScope && !CallsReturnTwice,
1606                     AllocasToInstrument, UnrecognizedLifetimes, AllocaDbgMap,
1607                     RetVec, StackTag, GetDT, GetPDT);
1608   }
1609   // Pad and align each of the allocas that we instrumented to stop small
1610   // uninteresting allocas from hiding in instrumented alloca's padding and so
1611   // that we have enough space to store real tags for short granules.
1612   DenseMap<AllocaInst *, AllocaInst *> AllocaToPaddedAllocaMap =
1613       padInterestingAllocas(AllocasToInstrument);
1614 
1615   if (!AllocaToPaddedAllocaMap.empty()) {
1616     for (auto &BB : F) {
1617       for (auto &Inst : BB) {
1618         if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&Inst)) {
1619           SmallDenseSet<Value *> LocationOps(DVI->location_ops().begin(),
1620                                              DVI->location_ops().end());
1621           for (Value *V : LocationOps) {
1622             if (auto *AI = dyn_cast_or_null<AllocaInst>(V)) {
1623               if (auto *NewAI = AllocaToPaddedAllocaMap.lookup(AI))
1624                 DVI->replaceVariableLocationOp(V, NewAI);
1625             }
1626           }
1627         }
1628       }
1629     }
1630     for (auto &P : AllocaToPaddedAllocaMap)
1631       P.first->eraseFromParent();
1632   }
1633 
1634   // If we split the entry block, move any allocas that were originally in the
1635   // entry block back into the entry block so that they aren't treated as
1636   // dynamic allocas.
1637   if (EntryIRB.GetInsertBlock() != &F.getEntryBlock()) {
1638     InsertPt = &*F.getEntryBlock().begin();
1639     for (Instruction &I :
1640          llvm::make_early_inc_range(*EntryIRB.GetInsertBlock())) {
1641       if (auto *AI = dyn_cast<AllocaInst>(&I))
1642         if (isa<ConstantInt>(AI->getArraySize()))
1643           I.moveBefore(InsertPt);
1644     }
1645   }
1646 
1647   for (auto &Operand : OperandsToInstrument)
1648     instrumentMemAccess(Operand);
1649 
1650   if (ClInstrumentMemIntrinsics && !IntrinToInstrument.empty()) {
1651     for (auto Inst : IntrinToInstrument)
1652       instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
1653   }
1654 
1655   ShadowBase = nullptr;
1656   StackBaseTag = nullptr;
1657 
1658   return true;
1659 }
1660 
1661 void HWAddressSanitizer::instrumentGlobal(GlobalVariable *GV, uint8_t Tag) {
1662   assert(!UsePageAliases);
1663   Constant *Initializer = GV->getInitializer();
1664   uint64_t SizeInBytes =
1665       M.getDataLayout().getTypeAllocSize(Initializer->getType());
1666   uint64_t NewSize = alignTo(SizeInBytes, Mapping.getObjectAlignment());
1667   if (SizeInBytes != NewSize) {
1668     // Pad the initializer out to the next multiple of 16 bytes and add the
1669     // required short granule tag.
1670     std::vector<uint8_t> Init(NewSize - SizeInBytes, 0);
1671     Init.back() = Tag;
1672     Constant *Padding = ConstantDataArray::get(*C, Init);
1673     Initializer = ConstantStruct::getAnon({Initializer, Padding});
1674   }
1675 
1676   auto *NewGV = new GlobalVariable(M, Initializer->getType(), GV->isConstant(),
1677                                    GlobalValue::ExternalLinkage, Initializer,
1678                                    GV->getName() + ".hwasan");
1679   NewGV->copyAttributesFrom(GV);
1680   NewGV->setLinkage(GlobalValue::PrivateLinkage);
1681   NewGV->copyMetadata(GV, 0);
1682   NewGV->setAlignment(
1683       MaybeAlign(std::max(GV->getAlignment(), Mapping.getObjectAlignment())));
1684 
1685   // It is invalid to ICF two globals that have different tags. In the case
1686   // where the size of the global is a multiple of the tag granularity the
1687   // contents of the globals may be the same but the tags (i.e. symbol values)
1688   // may be different, and the symbols are not considered during ICF. In the
1689   // case where the size is not a multiple of the granularity, the short granule
1690   // tags would discriminate two globals with different tags, but there would
1691   // otherwise be nothing stopping such a global from being incorrectly ICF'd
1692   // with an uninstrumented (i.e. tag 0) global that happened to have the short
1693   // granule tag in the last byte.
1694   NewGV->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
1695 
1696   // Descriptor format (assuming little-endian):
1697   // bytes 0-3: relative address of global
1698   // bytes 4-6: size of global (16MB ought to be enough for anyone, but in case
1699   // it isn't, we create multiple descriptors)
1700   // byte 7: tag
1701   auto *DescriptorTy = StructType::get(Int32Ty, Int32Ty);
1702   const uint64_t MaxDescriptorSize = 0xfffff0;
1703   for (uint64_t DescriptorPos = 0; DescriptorPos < SizeInBytes;
1704        DescriptorPos += MaxDescriptorSize) {
1705     auto *Descriptor =
1706         new GlobalVariable(M, DescriptorTy, true, GlobalValue::PrivateLinkage,
1707                            nullptr, GV->getName() + ".hwasan.descriptor");
1708     auto *GVRelPtr = ConstantExpr::getTrunc(
1709         ConstantExpr::getAdd(
1710             ConstantExpr::getSub(
1711                 ConstantExpr::getPtrToInt(NewGV, Int64Ty),
1712                 ConstantExpr::getPtrToInt(Descriptor, Int64Ty)),
1713             ConstantInt::get(Int64Ty, DescriptorPos)),
1714         Int32Ty);
1715     uint32_t Size = std::min(SizeInBytes - DescriptorPos, MaxDescriptorSize);
1716     auto *SizeAndTag = ConstantInt::get(Int32Ty, Size | (uint32_t(Tag) << 24));
1717     Descriptor->setComdat(NewGV->getComdat());
1718     Descriptor->setInitializer(ConstantStruct::getAnon({GVRelPtr, SizeAndTag}));
1719     Descriptor->setSection("hwasan_globals");
1720     Descriptor->setMetadata(LLVMContext::MD_associated,
1721                             MDNode::get(*C, ValueAsMetadata::get(NewGV)));
1722     appendToCompilerUsed(M, Descriptor);
1723   }
1724 
1725   Constant *Aliasee = ConstantExpr::getIntToPtr(
1726       ConstantExpr::getAdd(
1727           ConstantExpr::getPtrToInt(NewGV, Int64Ty),
1728           ConstantInt::get(Int64Ty, uint64_t(Tag) << PointerTagShift)),
1729       GV->getType());
1730   auto *Alias = GlobalAlias::create(GV->getValueType(), GV->getAddressSpace(),
1731                                     GV->getLinkage(), "", Aliasee, &M);
1732   Alias->setVisibility(GV->getVisibility());
1733   Alias->takeName(GV);
1734   GV->replaceAllUsesWith(Alias);
1735   GV->eraseFromParent();
1736 }
1737 
1738 static DenseSet<GlobalVariable *> getExcludedGlobals(Module &M) {
1739   NamedMDNode *Globals = M.getNamedMetadata("llvm.asan.globals");
1740   if (!Globals)
1741     return DenseSet<GlobalVariable *>();
1742   DenseSet<GlobalVariable *> Excluded(Globals->getNumOperands());
1743   for (auto MDN : Globals->operands()) {
1744     // Metadata node contains the global and the fields of "Entry".
1745     assert(MDN->getNumOperands() == 5);
1746     auto *V = mdconst::extract_or_null<Constant>(MDN->getOperand(0));
1747     // The optimizer may optimize away a global entirely.
1748     if (!V)
1749       continue;
1750     auto *StrippedV = V->stripPointerCasts();
1751     auto *GV = dyn_cast<GlobalVariable>(StrippedV);
1752     if (!GV)
1753       continue;
1754     ConstantInt *IsExcluded = mdconst::extract<ConstantInt>(MDN->getOperand(4));
1755     if (IsExcluded->isOne())
1756       Excluded.insert(GV);
1757   }
1758   return Excluded;
1759 }
1760 
1761 void HWAddressSanitizer::instrumentGlobals() {
1762   std::vector<GlobalVariable *> Globals;
1763   auto ExcludedGlobals = getExcludedGlobals(M);
1764   for (GlobalVariable &GV : M.globals()) {
1765     if (ExcludedGlobals.count(&GV))
1766       continue;
1767 
1768     if (GV.isDeclarationForLinker() || GV.getName().startswith("llvm.") ||
1769         GV.isThreadLocal())
1770       continue;
1771 
1772     // Common symbols can't have aliases point to them, so they can't be tagged.
1773     if (GV.hasCommonLinkage())
1774       continue;
1775 
1776     // Globals with custom sections may be used in __start_/__stop_ enumeration,
1777     // which would be broken both by adding tags and potentially by the extra
1778     // padding/alignment that we insert.
1779     if (GV.hasSection())
1780       continue;
1781 
1782     Globals.push_back(&GV);
1783   }
1784 
1785   MD5 Hasher;
1786   Hasher.update(M.getSourceFileName());
1787   MD5::MD5Result Hash;
1788   Hasher.final(Hash);
1789   uint8_t Tag = Hash[0];
1790 
1791   for (GlobalVariable *GV : Globals) {
1792     Tag &= TagMaskByte;
1793     // Skip tag 0 in order to avoid collisions with untagged memory.
1794     if (Tag == 0)
1795       Tag = 1;
1796     instrumentGlobal(GV, Tag++);
1797   }
1798 }
1799 
1800 void HWAddressSanitizer::instrumentPersonalityFunctions() {
1801   // We need to untag stack frames as we unwind past them. That is the job of
1802   // the personality function wrapper, which either wraps an existing
1803   // personality function or acts as a personality function on its own. Each
1804   // function that has a personality function or that can be unwound past has
1805   // its personality function changed to a thunk that calls the personality
1806   // function wrapper in the runtime.
1807   MapVector<Constant *, std::vector<Function *>> PersonalityFns;
1808   for (Function &F : M) {
1809     if (F.isDeclaration() || !F.hasFnAttribute(Attribute::SanitizeHWAddress))
1810       continue;
1811 
1812     if (F.hasPersonalityFn()) {
1813       PersonalityFns[F.getPersonalityFn()->stripPointerCasts()].push_back(&F);
1814     } else if (!F.hasFnAttribute(Attribute::NoUnwind)) {
1815       PersonalityFns[nullptr].push_back(&F);
1816     }
1817   }
1818 
1819   if (PersonalityFns.empty())
1820     return;
1821 
1822   FunctionCallee HwasanPersonalityWrapper = M.getOrInsertFunction(
1823       "__hwasan_personality_wrapper", Int32Ty, Int32Ty, Int32Ty, Int64Ty,
1824       Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy, Int8PtrTy);
1825   FunctionCallee UnwindGetGR = M.getOrInsertFunction("_Unwind_GetGR", VoidTy);
1826   FunctionCallee UnwindGetCFA = M.getOrInsertFunction("_Unwind_GetCFA", VoidTy);
1827 
1828   for (auto &P : PersonalityFns) {
1829     std::string ThunkName = kHwasanPersonalityThunkName;
1830     if (P.first)
1831       ThunkName += ("." + P.first->getName()).str();
1832     FunctionType *ThunkFnTy = FunctionType::get(
1833         Int32Ty, {Int32Ty, Int32Ty, Int64Ty, Int8PtrTy, Int8PtrTy}, false);
1834     bool IsLocal = P.first && (!isa<GlobalValue>(P.first) ||
1835                                cast<GlobalValue>(P.first)->hasLocalLinkage());
1836     auto *ThunkFn = Function::Create(ThunkFnTy,
1837                                      IsLocal ? GlobalValue::InternalLinkage
1838                                              : GlobalValue::LinkOnceODRLinkage,
1839                                      ThunkName, &M);
1840     if (!IsLocal) {
1841       ThunkFn->setVisibility(GlobalValue::HiddenVisibility);
1842       ThunkFn->setComdat(M.getOrInsertComdat(ThunkName));
1843     }
1844 
1845     auto *BB = BasicBlock::Create(*C, "entry", ThunkFn);
1846     IRBuilder<> IRB(BB);
1847     CallInst *WrapperCall = IRB.CreateCall(
1848         HwasanPersonalityWrapper,
1849         {ThunkFn->getArg(0), ThunkFn->getArg(1), ThunkFn->getArg(2),
1850          ThunkFn->getArg(3), ThunkFn->getArg(4),
1851          P.first ? IRB.CreateBitCast(P.first, Int8PtrTy)
1852                  : Constant::getNullValue(Int8PtrTy),
1853          IRB.CreateBitCast(UnwindGetGR.getCallee(), Int8PtrTy),
1854          IRB.CreateBitCast(UnwindGetCFA.getCallee(), Int8PtrTy)});
1855     WrapperCall->setTailCall();
1856     IRB.CreateRet(WrapperCall);
1857 
1858     for (Function *F : P.second)
1859       F->setPersonalityFn(ThunkFn);
1860   }
1861 }
1862 
1863 void HWAddressSanitizer::ShadowMapping::init(Triple &TargetTriple,
1864                                              bool InstrumentWithCalls) {
1865   Scale = kDefaultShadowScale;
1866   if (TargetTriple.isOSFuchsia()) {
1867     // Fuchsia is always PIE, which means that the beginning of the address
1868     // space is always available.
1869     InGlobal = false;
1870     InTls = false;
1871     Offset = 0;
1872     WithFrameRecord = true;
1873   } else if (ClMappingOffset.getNumOccurrences() > 0) {
1874     InGlobal = false;
1875     InTls = false;
1876     Offset = ClMappingOffset;
1877     WithFrameRecord = false;
1878   } else if (ClEnableKhwasan || InstrumentWithCalls) {
1879     InGlobal = false;
1880     InTls = false;
1881     Offset = 0;
1882     WithFrameRecord = false;
1883   } else if (ClWithIfunc) {
1884     InGlobal = true;
1885     InTls = false;
1886     Offset = kDynamicShadowSentinel;
1887     WithFrameRecord = false;
1888   } else if (ClWithTls) {
1889     InGlobal = false;
1890     InTls = true;
1891     Offset = kDynamicShadowSentinel;
1892     WithFrameRecord = true;
1893   } else {
1894     InGlobal = false;
1895     InTls = false;
1896     Offset = kDynamicShadowSentinel;
1897     WithFrameRecord = false;
1898   }
1899 }
1900