1 //===- AArch64StackTagging.cpp - Stack tagging in IR --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //===----------------------------------------------------------------------===//
10 
11 #include "AArch64.h"
12 #include "AArch64InstrInfo.h"
13 #include "AArch64Subtarget.h"
14 #include "AArch64TargetMachine.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DepthFirstIterator.h"
17 #include "llvm/ADT/MapVector.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/Analysis/CFG.h"
23 #include "llvm/Analysis/LoopInfo.h"
24 #include "llvm/Analysis/ScalarEvolution.h"
25 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
26 #include "llvm/Analysis/ValueTracking.h"
27 #include "llvm/CodeGen/LiveRegUnits.h"
28 #include "llvm/CodeGen/MachineBasicBlock.h"
29 #include "llvm/CodeGen/MachineFunction.h"
30 #include "llvm/CodeGen/MachineFunctionPass.h"
31 #include "llvm/CodeGen/MachineInstr.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineLoopInfo.h"
34 #include "llvm/CodeGen/MachineOperand.h"
35 #include "llvm/CodeGen/MachineRegisterInfo.h"
36 #include "llvm/CodeGen/TargetPassConfig.h"
37 #include "llvm/CodeGen/TargetRegisterInfo.h"
38 #include "llvm/IR/DebugLoc.h"
39 #include "llvm/IR/Dominators.h"
40 #include "llvm/IR/Function.h"
41 #include "llvm/IR/GetElementPtrTypeIterator.h"
42 #include "llvm/IR/Instruction.h"
43 #include "llvm/IR/Instructions.h"
44 #include "llvm/IR/IntrinsicInst.h"
45 #include "llvm/IR/IntrinsicsAArch64.h"
46 #include "llvm/IR/Metadata.h"
47 #include "llvm/Pass.h"
48 #include "llvm/Support/Casting.h"
49 #include "llvm/Support/Debug.h"
50 #include "llvm/Support/raw_ostream.h"
51 #include "llvm/Transforms/Utils/Local.h"
52 #include <cassert>
53 #include <iterator>
54 #include <utility>
55 
56 using namespace llvm;
57 
58 #define DEBUG_TYPE "stack-tagging"
59 
60 static cl::opt<bool> ClMergeInit(
61     "stack-tagging-merge-init", cl::Hidden, cl::init(true), cl::ZeroOrMore,
62     cl::desc("merge stack variable initializers with tagging when possible"));
63 
64 static cl::opt<unsigned> ClScanLimit("stack-tagging-merge-init-scan-limit",
65                                      cl::init(40), cl::Hidden);
66 
67 static const Align kTagGranuleSize = Align(16);
68 
69 namespace {
70 
71 class InitializerBuilder {
72   uint64_t Size;
73   const DataLayout *DL;
74   Value *BasePtr;
75   Function *SetTagFn;
76   Function *SetTagZeroFn;
77   Function *StgpFn;
78 
79   // List of initializers sorted by start offset.
80   struct Range {
81     uint64_t Start, End;
82     Instruction *Inst;
83   };
84   SmallVector<Range, 4> Ranges;
85   // 8-aligned offset => 8-byte initializer
86   // Missing keys are zero initialized.
87   std::map<uint64_t, Value *> Out;
88 
89 public:
90   InitializerBuilder(uint64_t Size, const DataLayout *DL, Value *BasePtr,
91                      Function *SetTagFn, Function *SetTagZeroFn,
92                      Function *StgpFn)
93       : Size(Size), DL(DL), BasePtr(BasePtr), SetTagFn(SetTagFn),
94         SetTagZeroFn(SetTagZeroFn), StgpFn(StgpFn) {}
95 
96   bool addRange(uint64_t Start, uint64_t End, Instruction *Inst) {
97     auto I = std::lower_bound(
98         Ranges.begin(), Ranges.end(), Start,
99         [](const Range &LHS, uint64_t RHS) { return LHS.End <= RHS; });
100     if (I != Ranges.end() && End > I->Start) {
101       // Overlap - bail.
102       return false;
103     }
104     Ranges.insert(I, {Start, End, Inst});
105     return true;
106   }
107 
108   bool addStore(uint64_t Offset, StoreInst *SI, const DataLayout *DL) {
109     int64_t StoreSize = DL->getTypeStoreSize(SI->getOperand(0)->getType());
110     if (!addRange(Offset, Offset + StoreSize, SI))
111       return false;
112     IRBuilder<> IRB(SI);
113     applyStore(IRB, Offset, Offset + StoreSize, SI->getOperand(0));
114     return true;
115   }
116 
117   bool addMemSet(uint64_t Offset, MemSetInst *MSI) {
118     uint64_t StoreSize = cast<ConstantInt>(MSI->getLength())->getZExtValue();
119     if (!addRange(Offset, Offset + StoreSize, MSI))
120       return false;
121     IRBuilder<> IRB(MSI);
122     applyMemSet(IRB, Offset, Offset + StoreSize,
123                 cast<ConstantInt>(MSI->getValue()));
124     return true;
125   }
126 
127   void applyMemSet(IRBuilder<> &IRB, int64_t Start, int64_t End,
128                    ConstantInt *V) {
129     // Out[] does not distinguish between zero and undef, and we already know
130     // that this memset does not overlap with any other initializer. Nothing to
131     // do for memset(0).
132     if (V->isZero())
133       return;
134     for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) {
135       uint64_t Cst = 0x0101010101010101UL;
136       int LowBits = Offset < Start ? (Start - Offset) * 8 : 0;
137       if (LowBits)
138         Cst = (Cst >> LowBits) << LowBits;
139       int HighBits = End - Offset < 8 ? (8 - (End - Offset)) * 8 : 0;
140       if (HighBits)
141         Cst = (Cst << HighBits) >> HighBits;
142       ConstantInt *C =
143           ConstantInt::get(IRB.getInt64Ty(), Cst * V->getZExtValue());
144 
145       Value *&CurrentV = Out[Offset];
146       if (!CurrentV) {
147         CurrentV = C;
148       } else {
149         CurrentV = IRB.CreateOr(CurrentV, C);
150       }
151     }
152   }
153 
154   // Take a 64-bit slice of the value starting at the given offset (in bytes).
155   // Offset can be negative. Pad with zeroes on both sides when necessary.
156   Value *sliceValue(IRBuilder<> &IRB, Value *V, int64_t Offset) {
157     if (Offset > 0) {
158       V = IRB.CreateLShr(V, Offset * 8);
159       V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty());
160     } else if (Offset < 0) {
161       V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty());
162       V = IRB.CreateShl(V, -Offset * 8);
163     } else {
164       V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty());
165     }
166     return V;
167   }
168 
169   void applyStore(IRBuilder<> &IRB, int64_t Start, int64_t End,
170                   Value *StoredValue) {
171     StoredValue = flatten(IRB, StoredValue);
172     for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) {
173       Value *V = sliceValue(IRB, StoredValue, Offset - Start);
174       Value *&CurrentV = Out[Offset];
175       if (!CurrentV) {
176         CurrentV = V;
177       } else {
178         CurrentV = IRB.CreateOr(CurrentV, V);
179       }
180     }
181   }
182 
183   void generate(IRBuilder<> &IRB) {
184     LLVM_DEBUG(dbgs() << "Combined initializer\n");
185     // No initializers => the entire allocation is undef.
186     if (Ranges.empty()) {
187       emitUndef(IRB, 0, Size);
188       return;
189     }
190 
191     // Look through 8-byte initializer list 16 bytes at a time;
192     // If one of the two 8-byte halfs is non-zero non-undef, emit STGP.
193     // Otherwise, emit zeroes up to next available item.
194     uint64_t LastOffset = 0;
195     for (uint64_t Offset = 0; Offset < Size; Offset += 16) {
196       auto I1 = Out.find(Offset);
197       auto I2 = Out.find(Offset + 8);
198       if (I1 == Out.end() && I2 == Out.end())
199         continue;
200 
201       if (Offset > LastOffset)
202         emitZeroes(IRB, LastOffset, Offset - LastOffset);
203 
204       Value *Store1 = I1 == Out.end() ? Constant::getNullValue(IRB.getInt64Ty())
205                                       : I1->second;
206       Value *Store2 = I2 == Out.end() ? Constant::getNullValue(IRB.getInt64Ty())
207                                       : I2->second;
208       emitPair(IRB, Offset, Store1, Store2);
209       LastOffset = Offset + 16;
210     }
211 
212     // memset(0) does not update Out[], therefore the tail can be either undef
213     // or zero.
214     if (LastOffset < Size)
215       emitZeroes(IRB, LastOffset, Size - LastOffset);
216 
217     for (const auto &R : Ranges) {
218       R.Inst->eraseFromParent();
219     }
220   }
221 
222   void emitZeroes(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) {
223     LLVM_DEBUG(dbgs() << "  [" << Offset << ", " << Offset + Size
224                       << ") zero\n");
225     Value *Ptr = BasePtr;
226     if (Offset)
227       Ptr = IRB.CreateConstGEP1_32(Ptr, Offset);
228     IRB.CreateCall(SetTagZeroFn,
229                    {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)});
230   }
231 
232   void emitUndef(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) {
233     LLVM_DEBUG(dbgs() << "  [" << Offset << ", " << Offset + Size
234                       << ") undef\n");
235     Value *Ptr = BasePtr;
236     if (Offset)
237       Ptr = IRB.CreateConstGEP1_32(Ptr, Offset);
238     IRB.CreateCall(SetTagFn, {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)});
239   }
240 
241   void emitPair(IRBuilder<> &IRB, uint64_t Offset, Value *A, Value *B) {
242     LLVM_DEBUG(dbgs() << "  [" << Offset << ", " << Offset + 16 << "):\n");
243     LLVM_DEBUG(dbgs() << "    " << *A << "\n    " << *B << "\n");
244     Value *Ptr = BasePtr;
245     if (Offset)
246       Ptr = IRB.CreateConstGEP1_32(Ptr, Offset);
247     IRB.CreateCall(StgpFn, {Ptr, A, B});
248   }
249 
250   Value *flatten(IRBuilder<> &IRB, Value *V) {
251     if (V->getType()->isIntegerTy())
252       return V;
253     // vector of pointers -> vector of ints
254     if (VectorType *VecTy = dyn_cast<VectorType>(V->getType())) {
255       LLVMContext &Ctx = IRB.getContext();
256       Type *EltTy = VecTy->getElementType();
257       if (EltTy->isPointerTy()) {
258         uint32_t EltSize = DL->getTypeSizeInBits(EltTy);
259         Type *NewTy = VectorType::get(IntegerType::get(Ctx, EltSize),
260                                       VecTy->getNumElements());
261         V = IRB.CreatePointerCast(V, NewTy);
262       }
263     }
264     return IRB.CreateBitOrPointerCast(
265         V, IRB.getIntNTy(DL->getTypeStoreSize(V->getType()) * 8));
266   }
267 };
268 
269 class AArch64StackTagging : public FunctionPass {
270   struct AllocaInfo {
271     AllocaInst *AI;
272     SmallVector<IntrinsicInst *, 2> LifetimeStart;
273     SmallVector<IntrinsicInst *, 2> LifetimeEnd;
274     SmallVector<DbgVariableIntrinsic *, 2> DbgVariableIntrinsics;
275     int Tag; // -1 for non-tagged allocations
276   };
277 
278   bool MergeInit;
279 
280 public:
281   static char ID; // Pass ID, replacement for typeid
282 
283   AArch64StackTagging(bool MergeInit = true)
284       : FunctionPass(ID),
285         MergeInit(ClMergeInit.getNumOccurrences() > 0 ? ClMergeInit
286                                                       : MergeInit) {
287     initializeAArch64StackTaggingPass(*PassRegistry::getPassRegistry());
288   }
289 
290   bool isInterestingAlloca(const AllocaInst &AI);
291   void alignAndPadAlloca(AllocaInfo &Info);
292 
293   void tagAlloca(AllocaInst *AI, Instruction *InsertBefore, Value *Ptr,
294                  uint64_t Size);
295   void untagAlloca(AllocaInst *AI, Instruction *InsertBefore, uint64_t Size);
296 
297   Instruction *collectInitializers(Instruction *StartInst, Value *StartPtr,
298                                    uint64_t Size, InitializerBuilder &IB);
299 
300   Instruction *
301   insertBaseTaggedPointer(const MapVector<AllocaInst *, AllocaInfo> &Allocas,
302                           const DominatorTree *DT);
303   bool runOnFunction(Function &F) override;
304 
305   StringRef getPassName() const override { return "AArch64 Stack Tagging"; }
306 
307 private:
308   Function *F;
309   Function *SetTagFunc;
310   const DataLayout *DL;
311   AAResults *AA;
312 
313   void getAnalysisUsage(AnalysisUsage &AU) const override {
314     AU.setPreservesCFG();
315     if (MergeInit)
316       AU.addRequired<AAResultsWrapperPass>();
317   }
318 };
319 
320 } // end anonymous namespace
321 
322 char AArch64StackTagging::ID = 0;
323 
324 INITIALIZE_PASS_BEGIN(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging",
325                       false, false)
326 INITIALIZE_PASS_END(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging",
327                     false, false)
328 
329 FunctionPass *llvm::createAArch64StackTaggingPass(bool MergeInit) {
330   return new AArch64StackTagging(MergeInit);
331 }
332 
333 Instruction *AArch64StackTagging::collectInitializers(Instruction *StartInst,
334                                                       Value *StartPtr,
335                                                       uint64_t Size,
336                                                       InitializerBuilder &IB) {
337   MemoryLocation AllocaLoc{StartPtr, Size};
338   Instruction *LastInst = StartInst;
339   BasicBlock::iterator BI(StartInst);
340 
341   unsigned Count = 0;
342   for (; Count < ClScanLimit && !BI->isTerminator(); ++BI) {
343     if (!isa<DbgInfoIntrinsic>(*BI))
344       ++Count;
345 
346     if (isNoModRef(AA->getModRefInfo(&*BI, AllocaLoc)))
347       continue;
348 
349     if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
350       // If the instruction is readnone, ignore it, otherwise bail out.  We
351       // don't even allow readonly here because we don't want something like:
352       // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
353       if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
354         break;
355       continue;
356     }
357 
358     if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
359       if (!NextStore->isSimple())
360         break;
361 
362       // Check to see if this store is to a constant offset from the start ptr.
363       Optional<int64_t> Offset =
364           isPointerOffset(StartPtr, NextStore->getPointerOperand(), *DL);
365       if (!Offset)
366         break;
367 
368       if (!IB.addStore(*Offset, NextStore, DL))
369         break;
370       LastInst = NextStore;
371     } else {
372       MemSetInst *MSI = cast<MemSetInst>(BI);
373 
374       if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength()))
375         break;
376 
377       if (!isa<ConstantInt>(MSI->getValue()))
378         break;
379 
380       // Check to see if this store is to a constant offset from the start ptr.
381       Optional<int64_t> Offset = isPointerOffset(StartPtr, MSI->getDest(), *DL);
382       if (!Offset)
383         break;
384 
385       if (!IB.addMemSet(*Offset, MSI))
386         break;
387       LastInst = MSI;
388     }
389   }
390   return LastInst;
391 }
392 
393 bool AArch64StackTagging::isInterestingAlloca(const AllocaInst &AI) {
394   // FIXME: support dynamic allocas
395   bool IsInteresting =
396       AI.getAllocatedType()->isSized() && AI.isStaticAlloca() &&
397       // alloca() may be called with 0 size, ignore it.
398       AI.getAllocationSizeInBits(*DL).getValue() > 0 &&
399       // inalloca allocas are not treated as static, and we don't want
400       // dynamic alloca instrumentation for them as well.
401       !AI.isUsedWithInAlloca() &&
402       // swifterror allocas are register promoted by ISel
403       !AI.isSwiftError();
404   return IsInteresting;
405 }
406 
407 void AArch64StackTagging::tagAlloca(AllocaInst *AI, Instruction *InsertBefore,
408                                     Value *Ptr, uint64_t Size) {
409   auto SetTagZeroFunc =
410       Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag_zero);
411   auto StgpFunc =
412       Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_stgp);
413 
414   InitializerBuilder IB(Size, DL, Ptr, SetTagFunc, SetTagZeroFunc, StgpFunc);
415   bool LittleEndian =
416       Triple(AI->getModule()->getTargetTriple()).isLittleEndian();
417   // Current implementation of initializer merging assumes little endianness.
418   if (MergeInit && !F->hasOptNone() && LittleEndian) {
419     LLVM_DEBUG(dbgs() << "collecting initializers for " << *AI
420                       << ", size = " << Size << "\n");
421     InsertBefore = collectInitializers(InsertBefore, Ptr, Size, IB);
422   }
423 
424   IRBuilder<> IRB(InsertBefore);
425   IB.generate(IRB);
426 }
427 
428 void AArch64StackTagging::untagAlloca(AllocaInst *AI, Instruction *InsertBefore,
429                                       uint64_t Size) {
430   IRBuilder<> IRB(InsertBefore);
431   IRB.CreateCall(SetTagFunc, {IRB.CreatePointerCast(AI, IRB.getInt8PtrTy()),
432                               ConstantInt::get(IRB.getInt64Ty(), Size)});
433 }
434 
435 Instruction *AArch64StackTagging::insertBaseTaggedPointer(
436     const MapVector<AllocaInst *, AllocaInfo> &Allocas,
437     const DominatorTree *DT) {
438   BasicBlock *PrologueBB = nullptr;
439   // Try sinking IRG as deep as possible to avoid hurting shrink wrap.
440   for (auto &I : Allocas) {
441     const AllocaInfo &Info = I.second;
442     AllocaInst *AI = Info.AI;
443     if (Info.Tag < 0)
444       continue;
445     if (!PrologueBB) {
446       PrologueBB = AI->getParent();
447       continue;
448     }
449     PrologueBB = DT->findNearestCommonDominator(PrologueBB, AI->getParent());
450   }
451   assert(PrologueBB);
452 
453   IRBuilder<> IRB(&PrologueBB->front());
454   Function *IRG_SP =
455       Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_irg_sp);
456   Instruction *Base =
457       IRB.CreateCall(IRG_SP, {Constant::getNullValue(IRB.getInt64Ty())});
458   Base->setName("basetag");
459   return Base;
460 }
461 
462 void AArch64StackTagging::alignAndPadAlloca(AllocaInfo &Info) {
463   const Align NewAlignment =
464       max(MaybeAlign(Info.AI->getAlignment()), kTagGranuleSize);
465   Info.AI->setAlignment(NewAlignment);
466 
467   uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8;
468   uint64_t AlignedSize = alignTo(Size, kTagGranuleSize);
469   if (Size == AlignedSize)
470     return;
471 
472   // Add padding to the alloca.
473   Type *AllocatedType =
474       Info.AI->isArrayAllocation()
475           ? ArrayType::get(
476                 Info.AI->getAllocatedType(),
477                 cast<ConstantInt>(Info.AI->getArraySize())->getZExtValue())
478           : Info.AI->getAllocatedType();
479   Type *PaddingType =
480       ArrayType::get(Type::getInt8Ty(F->getContext()), AlignedSize - Size);
481   Type *TypeWithPadding = StructType::get(AllocatedType, PaddingType);
482   auto *NewAI = new AllocaInst(
483       TypeWithPadding, Info.AI->getType()->getAddressSpace(), nullptr, "", Info.AI);
484   NewAI->takeName(Info.AI);
485   NewAI->setAlignment(MaybeAlign(Info.AI->getAlignment()));
486   NewAI->setUsedWithInAlloca(Info.AI->isUsedWithInAlloca());
487   NewAI->setSwiftError(Info.AI->isSwiftError());
488   NewAI->copyMetadata(*Info.AI);
489 
490   auto *NewPtr = new BitCastInst(NewAI, Info.AI->getType(), "", Info.AI);
491   Info.AI->replaceAllUsesWith(NewPtr);
492   Info.AI->eraseFromParent();
493   Info.AI = NewAI;
494 }
495 
496 // Helper function to check for post-dominance.
497 static bool postDominates(const PostDominatorTree *PDT, const IntrinsicInst *A,
498                           const IntrinsicInst *B) {
499   const BasicBlock *ABB = A->getParent();
500   const BasicBlock *BBB = B->getParent();
501 
502   if (ABB != BBB)
503     return PDT->dominates(ABB, BBB);
504 
505   for (const Instruction &I : *ABB) {
506     if (&I == B)
507       return true;
508     if (&I == A)
509       return false;
510   }
511   llvm_unreachable("Corrupt instruction list");
512 }
513 
514 // FIXME: check for MTE extension
515 bool AArch64StackTagging::runOnFunction(Function &Fn) {
516   if (!Fn.hasFnAttribute(Attribute::SanitizeMemTag))
517     return false;
518 
519   F = &Fn;
520   DL = &Fn.getParent()->getDataLayout();
521   if (MergeInit)
522     AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
523 
524   MapVector<AllocaInst *, AllocaInfo> Allocas; // need stable iteration order
525   SmallVector<Instruction *, 8> RetVec;
526   DenseMap<Value *, AllocaInst *> AllocaForValue;
527   SmallVector<Instruction *, 4> UnrecognizedLifetimes;
528 
529   for (auto &BB : *F) {
530     for (BasicBlock::iterator IT = BB.begin(); IT != BB.end(); ++IT) {
531       Instruction *I = &*IT;
532       if (auto *AI = dyn_cast<AllocaInst>(I)) {
533         Allocas[AI].AI = AI;
534         continue;
535       }
536 
537       if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(I)) {
538         if (auto *AI =
539                 dyn_cast_or_null<AllocaInst>(DVI->getVariableLocation())) {
540           Allocas[AI].DbgVariableIntrinsics.push_back(DVI);
541         }
542         continue;
543       }
544 
545       auto *II = dyn_cast<IntrinsicInst>(I);
546       if (II && (II->getIntrinsicID() == Intrinsic::lifetime_start ||
547                  II->getIntrinsicID() == Intrinsic::lifetime_end)) {
548         AllocaInst *AI =
549             llvm::findAllocaForValue(II->getArgOperand(1), AllocaForValue);
550         if (!AI) {
551           UnrecognizedLifetimes.push_back(I);
552           continue;
553         }
554         if (II->getIntrinsicID() == Intrinsic::lifetime_start)
555           Allocas[AI].LifetimeStart.push_back(II);
556         else
557           Allocas[AI].LifetimeEnd.push_back(II);
558       }
559 
560       if (isa<ReturnInst>(I) || isa<ResumeInst>(I) || isa<CleanupReturnInst>(I))
561         RetVec.push_back(I);
562     }
563   }
564 
565   if (Allocas.empty())
566     return false;
567 
568   int NextTag = 0;
569   int NumInterestingAllocas = 0;
570   for (auto &I : Allocas) {
571     AllocaInfo &Info = I.second;
572     assert(Info.AI);
573 
574     if (!isInterestingAlloca(*Info.AI)) {
575       Info.Tag = -1;
576       continue;
577     }
578 
579     alignAndPadAlloca(Info);
580     NumInterestingAllocas++;
581     Info.Tag = NextTag;
582     NextTag = (NextTag + 1) % 16;
583   }
584 
585   if (NumInterestingAllocas == 0)
586     return true;
587 
588   std::unique_ptr<DominatorTree> DeleteDT;
589   DominatorTree *DT = nullptr;
590   if (auto *P = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
591     DT = &P->getDomTree();
592 
593   if (DT == nullptr && (NumInterestingAllocas > 1 ||
594                         !F->hasFnAttribute(Attribute::OptimizeNone))) {
595     DeleteDT = std::make_unique<DominatorTree>(*F);
596     DT = DeleteDT.get();
597   }
598 
599   std::unique_ptr<PostDominatorTree> DeletePDT;
600   PostDominatorTree *PDT = nullptr;
601   if (auto *P = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>())
602     PDT = &P->getPostDomTree();
603 
604   if (PDT == nullptr && !F->hasFnAttribute(Attribute::OptimizeNone)) {
605     DeletePDT = std::make_unique<PostDominatorTree>(*F);
606     PDT = DeletePDT.get();
607   }
608 
609   SetTagFunc =
610       Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag);
611 
612   Instruction *Base = insertBaseTaggedPointer(Allocas, DT);
613 
614   for (auto &I : Allocas) {
615     const AllocaInfo &Info = I.second;
616     AllocaInst *AI = Info.AI;
617     if (Info.Tag < 0)
618       continue;
619 
620     // Replace alloca with tagp(alloca).
621     IRBuilder<> IRB(Info.AI->getNextNode());
622     Function *TagP = Intrinsic::getDeclaration(
623         F->getParent(), Intrinsic::aarch64_tagp, {Info.AI->getType()});
624     Instruction *TagPCall =
625         IRB.CreateCall(TagP, {Constant::getNullValue(Info.AI->getType()), Base,
626                               ConstantInt::get(IRB.getInt64Ty(), Info.Tag)});
627     if (Info.AI->hasName())
628       TagPCall->setName(Info.AI->getName() + ".tag");
629     Info.AI->replaceAllUsesWith(TagPCall);
630     TagPCall->setOperand(0, Info.AI);
631 
632     if (UnrecognizedLifetimes.empty() && Info.LifetimeStart.size() == 1 &&
633         Info.LifetimeEnd.size() == 1) {
634       IntrinsicInst *Start = Info.LifetimeStart[0];
635       IntrinsicInst *End = Info.LifetimeEnd[0];
636       uint64_t Size =
637           dyn_cast<ConstantInt>(Start->getArgOperand(0))->getZExtValue();
638       Size = alignTo(Size, kTagGranuleSize);
639       tagAlloca(AI, Start->getNextNode(), Start->getArgOperand(1), Size);
640       // We need to ensure that if we tag some object, we certainly untag it
641       // before the function exits.
642       if (PDT != nullptr && postDominates(PDT, End, Start)) {
643         untagAlloca(AI, End, Size);
644       } else {
645         SmallVector<Instruction *, 8> ReachableRetVec;
646         unsigned NumCoveredExits = 0;
647         for (auto &RI : RetVec) {
648           if (!isPotentiallyReachable(Start, RI, nullptr, DT))
649             continue;
650           ReachableRetVec.push_back(RI);
651           if (DT != nullptr && DT->dominates(End, RI))
652             ++NumCoveredExits;
653         }
654         // If there's a mix of covered and non-covered exits, just put the untag
655         // on exits, so we avoid the redundancy of untagging twice.
656         if (NumCoveredExits == ReachableRetVec.size()) {
657           untagAlloca(AI, End, Size);
658         } else {
659           for (auto &RI : ReachableRetVec)
660             untagAlloca(AI, RI, Size);
661           // We may have inserted untag outside of the lifetime interval.
662           // Remove the lifetime end call for this alloca.
663           End->eraseFromParent();
664         }
665       }
666     } else {
667       uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8;
668       Value *Ptr = IRB.CreatePointerCast(TagPCall, IRB.getInt8PtrTy());
669       tagAlloca(AI, &*IRB.GetInsertPoint(), Ptr, Size);
670       for (auto &RI : RetVec) {
671         untagAlloca(AI, RI, Size);
672       }
673       // We may have inserted tag/untag outside of any lifetime interval.
674       // Remove all lifetime intrinsics for this alloca.
675       for (auto &II : Info.LifetimeStart)
676         II->eraseFromParent();
677       for (auto &II : Info.LifetimeEnd)
678         II->eraseFromParent();
679     }
680 
681     // Fixup debug intrinsics to point to the new alloca.
682     for (auto DVI : Info.DbgVariableIntrinsics)
683       DVI->setArgOperand(
684           0,
685           MetadataAsValue::get(F->getContext(), LocalAsMetadata::get(Info.AI)));
686   }
687 
688   // If we have instrumented at least one alloca, all unrecognized lifetime
689   // instrinsics have to go.
690   for (auto &I : UnrecognizedLifetimes)
691     I->eraseFromParent();
692 
693   return true;
694 }
695