1 //===- AArch64StackTagging.cpp - Stack tagging in IR --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //===----------------------------------------------------------------------===//
10 
11 #include "AArch64.h"
12 #include "AArch64InstrInfo.h"
13 #include "AArch64Subtarget.h"
14 #include "AArch64TargetMachine.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DepthFirstIterator.h"
17 #include "llvm/ADT/MapVector.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/Analysis/AliasAnalysis.h"
23 #include "llvm/Analysis/CFG.h"
24 #include "llvm/Analysis/LoopInfo.h"
25 #include "llvm/Analysis/PostDominators.h"
26 #include "llvm/Analysis/ScalarEvolution.h"
27 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
28 #include "llvm/Analysis/StackSafetyAnalysis.h"
29 #include "llvm/Analysis/ValueTracking.h"
30 #include "llvm/CodeGen/LiveRegUnits.h"
31 #include "llvm/CodeGen/MachineBasicBlock.h"
32 #include "llvm/CodeGen/MachineFunction.h"
33 #include "llvm/CodeGen/MachineFunctionPass.h"
34 #include "llvm/CodeGen/MachineInstr.h"
35 #include "llvm/CodeGen/MachineInstrBuilder.h"
36 #include "llvm/CodeGen/MachineLoopInfo.h"
37 #include "llvm/CodeGen/MachineOperand.h"
38 #include "llvm/CodeGen/MachineRegisterInfo.h"
39 #include "llvm/CodeGen/TargetPassConfig.h"
40 #include "llvm/CodeGen/TargetRegisterInfo.h"
41 #include "llvm/IR/DebugLoc.h"
42 #include "llvm/IR/Dominators.h"
43 #include "llvm/IR/Function.h"
44 #include "llvm/IR/GetElementPtrTypeIterator.h"
45 #include "llvm/IR/Instruction.h"
46 #include "llvm/IR/Instructions.h"
47 #include "llvm/IR/IntrinsicInst.h"
48 #include "llvm/IR/IntrinsicsAArch64.h"
49 #include "llvm/IR/Metadata.h"
50 #include "llvm/InitializePasses.h"
51 #include "llvm/Pass.h"
52 #include "llvm/Support/Casting.h"
53 #include "llvm/Support/Debug.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include "llvm/Transforms/Utils/Local.h"
56 #include <cassert>
57 #include <iterator>
58 #include <utility>
59 
60 using namespace llvm;
61 
62 #define DEBUG_TYPE "stack-tagging"
63 
64 static cl::opt<bool> ClMergeInit(
65     "stack-tagging-merge-init", cl::Hidden, cl::init(true), cl::ZeroOrMore,
66     cl::desc("merge stack variable initializers with tagging when possible"));
67 
68 static cl::opt<bool>
69     ClUseStackSafety("stack-tagging-use-stack-safety", cl::Hidden,
70                      cl::init(true), cl::ZeroOrMore,
71                      cl::desc("Use Stack Safety analysis results"));
72 
73 static cl::opt<unsigned> ClScanLimit("stack-tagging-merge-init-scan-limit",
74                                      cl::init(40), cl::Hidden);
75 
76 static const Align kTagGranuleSize = Align(16);
77 
78 namespace {
79 
80 class InitializerBuilder {
81   uint64_t Size;
82   const DataLayout *DL;
83   Value *BasePtr;
84   Function *SetTagFn;
85   Function *SetTagZeroFn;
86   Function *StgpFn;
87 
88   // List of initializers sorted by start offset.
89   struct Range {
90     uint64_t Start, End;
91     Instruction *Inst;
92   };
93   SmallVector<Range, 4> Ranges;
94   // 8-aligned offset => 8-byte initializer
95   // Missing keys are zero initialized.
96   std::map<uint64_t, Value *> Out;
97 
98 public:
99   InitializerBuilder(uint64_t Size, const DataLayout *DL, Value *BasePtr,
100                      Function *SetTagFn, Function *SetTagZeroFn,
101                      Function *StgpFn)
102       : Size(Size), DL(DL), BasePtr(BasePtr), SetTagFn(SetTagFn),
103         SetTagZeroFn(SetTagZeroFn), StgpFn(StgpFn) {}
104 
105   bool addRange(uint64_t Start, uint64_t End, Instruction *Inst) {
106     auto I = std::lower_bound(
107         Ranges.begin(), Ranges.end(), Start,
108         [](const Range &LHS, uint64_t RHS) { return LHS.End <= RHS; });
109     if (I != Ranges.end() && End > I->Start) {
110       // Overlap - bail.
111       return false;
112     }
113     Ranges.insert(I, {Start, End, Inst});
114     return true;
115   }
116 
117   bool addStore(uint64_t Offset, StoreInst *SI, const DataLayout *DL) {
118     int64_t StoreSize = DL->getTypeStoreSize(SI->getOperand(0)->getType());
119     if (!addRange(Offset, Offset + StoreSize, SI))
120       return false;
121     IRBuilder<> IRB(SI);
122     applyStore(IRB, Offset, Offset + StoreSize, SI->getOperand(0));
123     return true;
124   }
125 
126   bool addMemSet(uint64_t Offset, MemSetInst *MSI) {
127     uint64_t StoreSize = cast<ConstantInt>(MSI->getLength())->getZExtValue();
128     if (!addRange(Offset, Offset + StoreSize, MSI))
129       return false;
130     IRBuilder<> IRB(MSI);
131     applyMemSet(IRB, Offset, Offset + StoreSize,
132                 cast<ConstantInt>(MSI->getValue()));
133     return true;
134   }
135 
136   void applyMemSet(IRBuilder<> &IRB, int64_t Start, int64_t End,
137                    ConstantInt *V) {
138     // Out[] does not distinguish between zero and undef, and we already know
139     // that this memset does not overlap with any other initializer. Nothing to
140     // do for memset(0).
141     if (V->isZero())
142       return;
143     for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) {
144       uint64_t Cst = 0x0101010101010101UL;
145       int LowBits = Offset < Start ? (Start - Offset) * 8 : 0;
146       if (LowBits)
147         Cst = (Cst >> LowBits) << LowBits;
148       int HighBits = End - Offset < 8 ? (8 - (End - Offset)) * 8 : 0;
149       if (HighBits)
150         Cst = (Cst << HighBits) >> HighBits;
151       ConstantInt *C =
152           ConstantInt::get(IRB.getInt64Ty(), Cst * V->getZExtValue());
153 
154       Value *&CurrentV = Out[Offset];
155       if (!CurrentV) {
156         CurrentV = C;
157       } else {
158         CurrentV = IRB.CreateOr(CurrentV, C);
159       }
160     }
161   }
162 
163   // Take a 64-bit slice of the value starting at the given offset (in bytes).
164   // Offset can be negative. Pad with zeroes on both sides when necessary.
165   Value *sliceValue(IRBuilder<> &IRB, Value *V, int64_t Offset) {
166     if (Offset > 0) {
167       V = IRB.CreateLShr(V, Offset * 8);
168       V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty());
169     } else if (Offset < 0) {
170       V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty());
171       V = IRB.CreateShl(V, -Offset * 8);
172     } else {
173       V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty());
174     }
175     return V;
176   }
177 
178   void applyStore(IRBuilder<> &IRB, int64_t Start, int64_t End,
179                   Value *StoredValue) {
180     StoredValue = flatten(IRB, StoredValue);
181     for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) {
182       Value *V = sliceValue(IRB, StoredValue, Offset - Start);
183       Value *&CurrentV = Out[Offset];
184       if (!CurrentV) {
185         CurrentV = V;
186       } else {
187         CurrentV = IRB.CreateOr(CurrentV, V);
188       }
189     }
190   }
191 
192   void generate(IRBuilder<> &IRB) {
193     LLVM_DEBUG(dbgs() << "Combined initializer\n");
194     // No initializers => the entire allocation is undef.
195     if (Ranges.empty()) {
196       emitUndef(IRB, 0, Size);
197       return;
198     }
199 
200     // Look through 8-byte initializer list 16 bytes at a time;
201     // If one of the two 8-byte halfs is non-zero non-undef, emit STGP.
202     // Otherwise, emit zeroes up to next available item.
203     uint64_t LastOffset = 0;
204     for (uint64_t Offset = 0; Offset < Size; Offset += 16) {
205       auto I1 = Out.find(Offset);
206       auto I2 = Out.find(Offset + 8);
207       if (I1 == Out.end() && I2 == Out.end())
208         continue;
209 
210       if (Offset > LastOffset)
211         emitZeroes(IRB, LastOffset, Offset - LastOffset);
212 
213       Value *Store1 = I1 == Out.end() ? Constant::getNullValue(IRB.getInt64Ty())
214                                       : I1->second;
215       Value *Store2 = I2 == Out.end() ? Constant::getNullValue(IRB.getInt64Ty())
216                                       : I2->second;
217       emitPair(IRB, Offset, Store1, Store2);
218       LastOffset = Offset + 16;
219     }
220 
221     // memset(0) does not update Out[], therefore the tail can be either undef
222     // or zero.
223     if (LastOffset < Size)
224       emitZeroes(IRB, LastOffset, Size - LastOffset);
225 
226     for (const auto &R : Ranges) {
227       R.Inst->eraseFromParent();
228     }
229   }
230 
231   void emitZeroes(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) {
232     LLVM_DEBUG(dbgs() << "  [" << Offset << ", " << Offset + Size
233                       << ") zero\n");
234     Value *Ptr = BasePtr;
235     if (Offset)
236       Ptr = IRB.CreateConstGEP1_32(Ptr, Offset);
237     IRB.CreateCall(SetTagZeroFn,
238                    {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)});
239   }
240 
241   void emitUndef(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) {
242     LLVM_DEBUG(dbgs() << "  [" << Offset << ", " << Offset + Size
243                       << ") undef\n");
244     Value *Ptr = BasePtr;
245     if (Offset)
246       Ptr = IRB.CreateConstGEP1_32(Ptr, Offset);
247     IRB.CreateCall(SetTagFn, {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)});
248   }
249 
250   void emitPair(IRBuilder<> &IRB, uint64_t Offset, Value *A, Value *B) {
251     LLVM_DEBUG(dbgs() << "  [" << Offset << ", " << Offset + 16 << "):\n");
252     LLVM_DEBUG(dbgs() << "    " << *A << "\n    " << *B << "\n");
253     Value *Ptr = BasePtr;
254     if (Offset)
255       Ptr = IRB.CreateConstGEP1_32(Ptr, Offset);
256     IRB.CreateCall(StgpFn, {Ptr, A, B});
257   }
258 
259   Value *flatten(IRBuilder<> &IRB, Value *V) {
260     if (V->getType()->isIntegerTy())
261       return V;
262     // vector of pointers -> vector of ints
263     if (VectorType *VecTy = dyn_cast<VectorType>(V->getType())) {
264       LLVMContext &Ctx = IRB.getContext();
265       Type *EltTy = VecTy->getElementType();
266       if (EltTy->isPointerTy()) {
267         uint32_t EltSize = DL->getTypeSizeInBits(EltTy);
268         auto *NewTy = FixedVectorType::get(
269             IntegerType::get(Ctx, EltSize),
270             cast<FixedVectorType>(VecTy)->getNumElements());
271         V = IRB.CreatePointerCast(V, NewTy);
272       }
273     }
274     return IRB.CreateBitOrPointerCast(
275         V, IRB.getIntNTy(DL->getTypeStoreSize(V->getType()) * 8));
276   }
277 };
278 
279 class AArch64StackTagging : public FunctionPass {
280   struct AllocaInfo {
281     AllocaInst *AI;
282     SmallVector<IntrinsicInst *, 2> LifetimeStart;
283     SmallVector<IntrinsicInst *, 2> LifetimeEnd;
284     SmallVector<DbgVariableIntrinsic *, 2> DbgVariableIntrinsics;
285     int Tag; // -1 for non-tagged allocations
286   };
287 
288   const bool MergeInit;
289   const bool UseStackSafety;
290 
291 public:
292   static char ID; // Pass ID, replacement for typeid
293 
294   AArch64StackTagging(bool IsOptNone = false)
295       : FunctionPass(ID),
296         MergeInit(ClMergeInit.getNumOccurrences() ? ClMergeInit : !IsOptNone),
297         UseStackSafety(ClUseStackSafety.getNumOccurrences() ? ClUseStackSafety
298                                                             : !IsOptNone) {
299     initializeAArch64StackTaggingPass(*PassRegistry::getPassRegistry());
300   }
301 
302   bool isInterestingAlloca(const AllocaInst &AI);
303   void alignAndPadAlloca(AllocaInfo &Info);
304 
305   void tagAlloca(AllocaInst *AI, Instruction *InsertBefore, Value *Ptr,
306                  uint64_t Size);
307   void untagAlloca(AllocaInst *AI, Instruction *InsertBefore, uint64_t Size);
308 
309   Instruction *collectInitializers(Instruction *StartInst, Value *StartPtr,
310                                    uint64_t Size, InitializerBuilder &IB);
311 
312   Instruction *
313   insertBaseTaggedPointer(const MapVector<AllocaInst *, AllocaInfo> &Allocas,
314                           const DominatorTree *DT);
315   bool runOnFunction(Function &F) override;
316 
317   StringRef getPassName() const override { return "AArch64 Stack Tagging"; }
318 
319 private:
320   Function *F = nullptr;
321   Function *SetTagFunc = nullptr;
322   const DataLayout *DL = nullptr;
323   AAResults *AA = nullptr;
324   const StackSafetyGlobalInfo *SSI = nullptr;
325 
326   void getAnalysisUsage(AnalysisUsage &AU) const override {
327     AU.setPreservesCFG();
328     if (UseStackSafety)
329       AU.addRequired<StackSafetyGlobalInfoWrapperPass>();
330     if (MergeInit)
331       AU.addRequired<AAResultsWrapperPass>();
332   }
333 };
334 
335 } // end anonymous namespace
336 
337 char AArch64StackTagging::ID = 0;
338 
339 INITIALIZE_PASS_BEGIN(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging",
340                       false, false)
341 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
342 INITIALIZE_PASS_DEPENDENCY(StackSafetyGlobalInfoWrapperPass)
343 INITIALIZE_PASS_END(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging",
344                     false, false)
345 
346 FunctionPass *llvm::createAArch64StackTaggingPass(bool IsOptNone) {
347   return new AArch64StackTagging(IsOptNone);
348 }
349 
350 Instruction *AArch64StackTagging::collectInitializers(Instruction *StartInst,
351                                                       Value *StartPtr,
352                                                       uint64_t Size,
353                                                       InitializerBuilder &IB) {
354   MemoryLocation AllocaLoc{StartPtr, Size};
355   Instruction *LastInst = StartInst;
356   BasicBlock::iterator BI(StartInst);
357 
358   unsigned Count = 0;
359   for (; Count < ClScanLimit && !BI->isTerminator(); ++BI) {
360     if (!isa<DbgInfoIntrinsic>(*BI))
361       ++Count;
362 
363     if (isNoModRef(AA->getModRefInfo(&*BI, AllocaLoc)))
364       continue;
365 
366     if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
367       // If the instruction is readnone, ignore it, otherwise bail out.  We
368       // don't even allow readonly here because we don't want something like:
369       // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
370       if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
371         break;
372       continue;
373     }
374 
375     if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
376       if (!NextStore->isSimple())
377         break;
378 
379       // Check to see if this store is to a constant offset from the start ptr.
380       Optional<int64_t> Offset =
381           isPointerOffset(StartPtr, NextStore->getPointerOperand(), *DL);
382       if (!Offset)
383         break;
384 
385       if (!IB.addStore(*Offset, NextStore, DL))
386         break;
387       LastInst = NextStore;
388     } else {
389       MemSetInst *MSI = cast<MemSetInst>(BI);
390 
391       if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength()))
392         break;
393 
394       if (!isa<ConstantInt>(MSI->getValue()))
395         break;
396 
397       // Check to see if this store is to a constant offset from the start ptr.
398       Optional<int64_t> Offset = isPointerOffset(StartPtr, MSI->getDest(), *DL);
399       if (!Offset)
400         break;
401 
402       if (!IB.addMemSet(*Offset, MSI))
403         break;
404       LastInst = MSI;
405     }
406   }
407   return LastInst;
408 }
409 
410 bool AArch64StackTagging::isInterestingAlloca(const AllocaInst &AI) {
411   // FIXME: support dynamic allocas
412   bool IsInteresting =
413       AI.getAllocatedType()->isSized() && AI.isStaticAlloca() &&
414       // alloca() may be called with 0 size, ignore it.
415       AI.getAllocationSizeInBits(*DL).getValue() > 0 &&
416       // inalloca allocas are not treated as static, and we don't want
417       // dynamic alloca instrumentation for them as well.
418       !AI.isUsedWithInAlloca() &&
419       // swifterror allocas are register promoted by ISel
420       !AI.isSwiftError() &&
421       // safe allocas are not interesting
422       !(SSI && SSI->isSafe(AI));
423   return IsInteresting;
424 }
425 
426 void AArch64StackTagging::tagAlloca(AllocaInst *AI, Instruction *InsertBefore,
427                                     Value *Ptr, uint64_t Size) {
428   auto SetTagZeroFunc =
429       Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag_zero);
430   auto StgpFunc =
431       Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_stgp);
432 
433   InitializerBuilder IB(Size, DL, Ptr, SetTagFunc, SetTagZeroFunc, StgpFunc);
434   bool LittleEndian =
435       Triple(AI->getModule()->getTargetTriple()).isLittleEndian();
436   // Current implementation of initializer merging assumes little endianness.
437   if (MergeInit && !F->hasOptNone() && LittleEndian) {
438     LLVM_DEBUG(dbgs() << "collecting initializers for " << *AI
439                       << ", size = " << Size << "\n");
440     InsertBefore = collectInitializers(InsertBefore, Ptr, Size, IB);
441   }
442 
443   IRBuilder<> IRB(InsertBefore);
444   IB.generate(IRB);
445 }
446 
447 void AArch64StackTagging::untagAlloca(AllocaInst *AI, Instruction *InsertBefore,
448                                       uint64_t Size) {
449   IRBuilder<> IRB(InsertBefore);
450   IRB.CreateCall(SetTagFunc, {IRB.CreatePointerCast(AI, IRB.getInt8PtrTy()),
451                               ConstantInt::get(IRB.getInt64Ty(), Size)});
452 }
453 
454 Instruction *AArch64StackTagging::insertBaseTaggedPointer(
455     const MapVector<AllocaInst *, AllocaInfo> &Allocas,
456     const DominatorTree *DT) {
457   BasicBlock *PrologueBB = nullptr;
458   // Try sinking IRG as deep as possible to avoid hurting shrink wrap.
459   for (auto &I : Allocas) {
460     const AllocaInfo &Info = I.second;
461     AllocaInst *AI = Info.AI;
462     if (Info.Tag < 0)
463       continue;
464     if (!PrologueBB) {
465       PrologueBB = AI->getParent();
466       continue;
467     }
468     PrologueBB = DT->findNearestCommonDominator(PrologueBB, AI->getParent());
469   }
470   assert(PrologueBB);
471 
472   IRBuilder<> IRB(&PrologueBB->front());
473   Function *IRG_SP =
474       Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_irg_sp);
475   Instruction *Base =
476       IRB.CreateCall(IRG_SP, {Constant::getNullValue(IRB.getInt64Ty())});
477   Base->setName("basetag");
478   return Base;
479 }
480 
481 void AArch64StackTagging::alignAndPadAlloca(AllocaInfo &Info) {
482   const Align NewAlignment =
483       max(MaybeAlign(Info.AI->getAlignment()), kTagGranuleSize);
484   Info.AI->setAlignment(NewAlignment);
485 
486   uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8;
487   uint64_t AlignedSize = alignTo(Size, kTagGranuleSize);
488   if (Size == AlignedSize)
489     return;
490 
491   // Add padding to the alloca.
492   Type *AllocatedType =
493       Info.AI->isArrayAllocation()
494           ? ArrayType::get(
495                 Info.AI->getAllocatedType(),
496                 cast<ConstantInt>(Info.AI->getArraySize())->getZExtValue())
497           : Info.AI->getAllocatedType();
498   Type *PaddingType =
499       ArrayType::get(Type::getInt8Ty(F->getContext()), AlignedSize - Size);
500   Type *TypeWithPadding = StructType::get(AllocatedType, PaddingType);
501   auto *NewAI = new AllocaInst(
502       TypeWithPadding, Info.AI->getType()->getAddressSpace(), nullptr, "", Info.AI);
503   NewAI->takeName(Info.AI);
504   NewAI->setAlignment(Info.AI->getAlign());
505   NewAI->setUsedWithInAlloca(Info.AI->isUsedWithInAlloca());
506   NewAI->setSwiftError(Info.AI->isSwiftError());
507   NewAI->copyMetadata(*Info.AI);
508 
509   auto *NewPtr = new BitCastInst(NewAI, Info.AI->getType(), "", Info.AI);
510   Info.AI->replaceAllUsesWith(NewPtr);
511   Info.AI->eraseFromParent();
512   Info.AI = NewAI;
513 }
514 
515 // Helper function to check for post-dominance.
516 static bool postDominates(const PostDominatorTree *PDT, const IntrinsicInst *A,
517                           const IntrinsicInst *B) {
518   const BasicBlock *ABB = A->getParent();
519   const BasicBlock *BBB = B->getParent();
520 
521   if (ABB != BBB)
522     return PDT->dominates(ABB, BBB);
523 
524   for (const Instruction &I : *ABB) {
525     if (&I == B)
526       return true;
527     if (&I == A)
528       return false;
529   }
530   llvm_unreachable("Corrupt instruction list");
531 }
532 
533 // FIXME: check for MTE extension
534 bool AArch64StackTagging::runOnFunction(Function &Fn) {
535   if (!Fn.hasFnAttribute(Attribute::SanitizeMemTag))
536     return false;
537 
538   if (UseStackSafety)
539     SSI = &getAnalysis<StackSafetyGlobalInfoWrapperPass>().getResult();
540   F = &Fn;
541   DL = &Fn.getParent()->getDataLayout();
542   if (MergeInit)
543     AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
544 
545   MapVector<AllocaInst *, AllocaInfo> Allocas; // need stable iteration order
546   SmallVector<Instruction *, 8> RetVec;
547   DenseMap<Value *, AllocaInst *> AllocaForValue;
548   SmallVector<Instruction *, 4> UnrecognizedLifetimes;
549 
550   for (auto &BB : *F) {
551     for (BasicBlock::iterator IT = BB.begin(); IT != BB.end(); ++IT) {
552       Instruction *I = &*IT;
553       if (auto *AI = dyn_cast<AllocaInst>(I)) {
554         Allocas[AI].AI = AI;
555         continue;
556       }
557 
558       if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(I)) {
559         if (auto *AI =
560                 dyn_cast_or_null<AllocaInst>(DVI->getVariableLocation())) {
561           Allocas[AI].DbgVariableIntrinsics.push_back(DVI);
562         }
563         continue;
564       }
565 
566       auto *II = dyn_cast<IntrinsicInst>(I);
567       if (II && (II->getIntrinsicID() == Intrinsic::lifetime_start ||
568                  II->getIntrinsicID() == Intrinsic::lifetime_end)) {
569         AllocaInst *AI =
570             llvm::findAllocaForValue(II->getArgOperand(1), AllocaForValue);
571         if (!AI) {
572           UnrecognizedLifetimes.push_back(I);
573           continue;
574         }
575         if (II->getIntrinsicID() == Intrinsic::lifetime_start)
576           Allocas[AI].LifetimeStart.push_back(II);
577         else
578           Allocas[AI].LifetimeEnd.push_back(II);
579       }
580 
581       if (isa<ReturnInst>(I) || isa<ResumeInst>(I) || isa<CleanupReturnInst>(I))
582         RetVec.push_back(I);
583     }
584   }
585 
586   if (Allocas.empty())
587     return false;
588 
589   int NextTag = 0;
590   int NumInterestingAllocas = 0;
591   for (auto &I : Allocas) {
592     AllocaInfo &Info = I.second;
593     assert(Info.AI);
594 
595     if (!isInterestingAlloca(*Info.AI)) {
596       Info.Tag = -1;
597       continue;
598     }
599 
600     alignAndPadAlloca(Info);
601     NumInterestingAllocas++;
602     Info.Tag = NextTag;
603     NextTag = (NextTag + 1) % 16;
604   }
605 
606   if (NumInterestingAllocas == 0)
607     return true;
608 
609   std::unique_ptr<DominatorTree> DeleteDT;
610   DominatorTree *DT = nullptr;
611   if (auto *P = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
612     DT = &P->getDomTree();
613 
614   if (DT == nullptr && (NumInterestingAllocas > 1 ||
615                         !F->hasFnAttribute(Attribute::OptimizeNone))) {
616     DeleteDT = std::make_unique<DominatorTree>(*F);
617     DT = DeleteDT.get();
618   }
619 
620   std::unique_ptr<PostDominatorTree> DeletePDT;
621   PostDominatorTree *PDT = nullptr;
622   if (auto *P = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>())
623     PDT = &P->getPostDomTree();
624 
625   if (PDT == nullptr && !F->hasFnAttribute(Attribute::OptimizeNone)) {
626     DeletePDT = std::make_unique<PostDominatorTree>(*F);
627     PDT = DeletePDT.get();
628   }
629 
630   SetTagFunc =
631       Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag);
632 
633   Instruction *Base = insertBaseTaggedPointer(Allocas, DT);
634 
635   for (auto &I : Allocas) {
636     const AllocaInfo &Info = I.second;
637     AllocaInst *AI = Info.AI;
638     if (Info.Tag < 0)
639       continue;
640 
641     // Replace alloca with tagp(alloca).
642     IRBuilder<> IRB(Info.AI->getNextNode());
643     Function *TagP = Intrinsic::getDeclaration(
644         F->getParent(), Intrinsic::aarch64_tagp, {Info.AI->getType()});
645     Instruction *TagPCall =
646         IRB.CreateCall(TagP, {Constant::getNullValue(Info.AI->getType()), Base,
647                               ConstantInt::get(IRB.getInt64Ty(), Info.Tag)});
648     if (Info.AI->hasName())
649       TagPCall->setName(Info.AI->getName() + ".tag");
650     Info.AI->replaceAllUsesWith(TagPCall);
651     TagPCall->setOperand(0, Info.AI);
652 
653     if (UnrecognizedLifetimes.empty() && Info.LifetimeStart.size() == 1 &&
654         Info.LifetimeEnd.size() == 1) {
655       IntrinsicInst *Start = Info.LifetimeStart[0];
656       IntrinsicInst *End = Info.LifetimeEnd[0];
657       uint64_t Size =
658           dyn_cast<ConstantInt>(Start->getArgOperand(0))->getZExtValue();
659       Size = alignTo(Size, kTagGranuleSize);
660       tagAlloca(AI, Start->getNextNode(), Start->getArgOperand(1), Size);
661       // We need to ensure that if we tag some object, we certainly untag it
662       // before the function exits.
663       if (PDT != nullptr && postDominates(PDT, End, Start)) {
664         untagAlloca(AI, End, Size);
665       } else {
666         SmallVector<Instruction *, 8> ReachableRetVec;
667         unsigned NumCoveredExits = 0;
668         for (auto &RI : RetVec) {
669           if (!isPotentiallyReachable(Start, RI, nullptr, DT))
670             continue;
671           ReachableRetVec.push_back(RI);
672           if (DT != nullptr && DT->dominates(End, RI))
673             ++NumCoveredExits;
674         }
675         // If there's a mix of covered and non-covered exits, just put the untag
676         // on exits, so we avoid the redundancy of untagging twice.
677         if (NumCoveredExits == ReachableRetVec.size()) {
678           untagAlloca(AI, End, Size);
679         } else {
680           for (auto &RI : ReachableRetVec)
681             untagAlloca(AI, RI, Size);
682           // We may have inserted untag outside of the lifetime interval.
683           // Remove the lifetime end call for this alloca.
684           End->eraseFromParent();
685         }
686       }
687     } else {
688       uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8;
689       Value *Ptr = IRB.CreatePointerCast(TagPCall, IRB.getInt8PtrTy());
690       tagAlloca(AI, &*IRB.GetInsertPoint(), Ptr, Size);
691       for (auto &RI : RetVec) {
692         untagAlloca(AI, RI, Size);
693       }
694       // We may have inserted tag/untag outside of any lifetime interval.
695       // Remove all lifetime intrinsics for this alloca.
696       for (auto &II : Info.LifetimeStart)
697         II->eraseFromParent();
698       for (auto &II : Info.LifetimeEnd)
699         II->eraseFromParent();
700     }
701 
702     // Fixup debug intrinsics to point to the new alloca.
703     for (auto DVI : Info.DbgVariableIntrinsics)
704       DVI->setArgOperand(
705           0,
706           MetadataAsValue::get(F->getContext(), LocalAsMetadata::get(Info.AI)));
707   }
708 
709   // If we have instrumented at least one alloca, all unrecognized lifetime
710   // instrinsics have to go.
711   for (auto &I : UnrecognizedLifetimes)
712     I->eraseFromParent();
713 
714   return true;
715 }
716