1 //===- AArch64StackTagging.cpp - Stack tagging in IR --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //===----------------------------------------------------------------------===//
10 
11 #include "AArch64.h"
12 #include "AArch64InstrInfo.h"
13 #include "AArch64Subtarget.h"
14 #include "AArch64TargetMachine.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DepthFirstIterator.h"
17 #include "llvm/ADT/MapVector.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/Analysis/AliasAnalysis.h"
23 #include "llvm/Analysis/CFG.h"
24 #include "llvm/Analysis/LoopInfo.h"
25 #include "llvm/Analysis/PostDominators.h"
26 #include "llvm/Analysis/ScalarEvolution.h"
27 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
28 #include "llvm/Analysis/StackSafetyAnalysis.h"
29 #include "llvm/Analysis/ValueTracking.h"
30 #include "llvm/CodeGen/LiveRegUnits.h"
31 #include "llvm/CodeGen/MachineBasicBlock.h"
32 #include "llvm/CodeGen/MachineFunction.h"
33 #include "llvm/CodeGen/MachineFunctionPass.h"
34 #include "llvm/CodeGen/MachineInstr.h"
35 #include "llvm/CodeGen/MachineInstrBuilder.h"
36 #include "llvm/CodeGen/MachineLoopInfo.h"
37 #include "llvm/CodeGen/MachineOperand.h"
38 #include "llvm/CodeGen/MachineRegisterInfo.h"
39 #include "llvm/CodeGen/TargetPassConfig.h"
40 #include "llvm/CodeGen/TargetRegisterInfo.h"
41 #include "llvm/IR/DebugLoc.h"
42 #include "llvm/IR/Dominators.h"
43 #include "llvm/IR/Function.h"
44 #include "llvm/IR/GetElementPtrTypeIterator.h"
45 #include "llvm/IR/IRBuilder.h"
46 #include "llvm/IR/Instruction.h"
47 #include "llvm/IR/Instructions.h"
48 #include "llvm/IR/IntrinsicInst.h"
49 #include "llvm/IR/IntrinsicsAArch64.h"
50 #include "llvm/IR/Metadata.h"
51 #include "llvm/InitializePasses.h"
52 #include "llvm/Pass.h"
53 #include "llvm/Support/Casting.h"
54 #include "llvm/Support/Debug.h"
55 #include "llvm/Support/raw_ostream.h"
56 #include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
57 #include "llvm/Transforms/Utils/Local.h"
58 #include <cassert>
59 #include <iterator>
60 #include <utility>
61 
62 using namespace llvm;
63 
64 #define DEBUG_TYPE "aarch64-stack-tagging"
65 
66 static cl::opt<bool> ClMergeInit(
67     "stack-tagging-merge-init", cl::Hidden, cl::init(true), cl::ZeroOrMore,
68     cl::desc("merge stack variable initializers with tagging when possible"));
69 
70 static cl::opt<bool>
71     ClUseStackSafety("stack-tagging-use-stack-safety", cl::Hidden,
72                      cl::init(true), cl::ZeroOrMore,
73                      cl::desc("Use Stack Safety analysis results"));
74 
75 static cl::opt<unsigned> ClScanLimit("stack-tagging-merge-init-scan-limit",
76                                      cl::init(40), cl::Hidden);
77 
78 static cl::opt<unsigned>
79     ClMergeInitSizeLimit("stack-tagging-merge-init-size-limit", cl::init(272),
80                          cl::Hidden);
81 
82 static const Align kTagGranuleSize = Align(16);
83 
84 namespace {
85 
86 class InitializerBuilder {
87   uint64_t Size;
88   const DataLayout *DL;
89   Value *BasePtr;
90   Function *SetTagFn;
91   Function *SetTagZeroFn;
92   Function *StgpFn;
93 
94   // List of initializers sorted by start offset.
95   struct Range {
96     uint64_t Start, End;
97     Instruction *Inst;
98   };
99   SmallVector<Range, 4> Ranges;
100   // 8-aligned offset => 8-byte initializer
101   // Missing keys are zero initialized.
102   std::map<uint64_t, Value *> Out;
103 
104 public:
InitializerBuilder(uint64_t Size,const DataLayout * DL,Value * BasePtr,Function * SetTagFn,Function * SetTagZeroFn,Function * StgpFn)105   InitializerBuilder(uint64_t Size, const DataLayout *DL, Value *BasePtr,
106                      Function *SetTagFn, Function *SetTagZeroFn,
107                      Function *StgpFn)
108       : Size(Size), DL(DL), BasePtr(BasePtr), SetTagFn(SetTagFn),
109         SetTagZeroFn(SetTagZeroFn), StgpFn(StgpFn) {}
110 
addRange(uint64_t Start,uint64_t End,Instruction * Inst)111   bool addRange(uint64_t Start, uint64_t End, Instruction *Inst) {
112     auto I =
113         llvm::lower_bound(Ranges, Start, [](const Range &LHS, uint64_t RHS) {
114           return LHS.End <= RHS;
115         });
116     if (I != Ranges.end() && End > I->Start) {
117       // Overlap - bail.
118       return false;
119     }
120     Ranges.insert(I, {Start, End, Inst});
121     return true;
122   }
123 
addStore(uint64_t Offset,StoreInst * SI,const DataLayout * DL)124   bool addStore(uint64_t Offset, StoreInst *SI, const DataLayout *DL) {
125     int64_t StoreSize = DL->getTypeStoreSize(SI->getOperand(0)->getType());
126     if (!addRange(Offset, Offset + StoreSize, SI))
127       return false;
128     IRBuilder<> IRB(SI);
129     applyStore(IRB, Offset, Offset + StoreSize, SI->getOperand(0));
130     return true;
131   }
132 
addMemSet(uint64_t Offset,MemSetInst * MSI)133   bool addMemSet(uint64_t Offset, MemSetInst *MSI) {
134     uint64_t StoreSize = cast<ConstantInt>(MSI->getLength())->getZExtValue();
135     if (!addRange(Offset, Offset + StoreSize, MSI))
136       return false;
137     IRBuilder<> IRB(MSI);
138     applyMemSet(IRB, Offset, Offset + StoreSize,
139                 cast<ConstantInt>(MSI->getValue()));
140     return true;
141   }
142 
applyMemSet(IRBuilder<> & IRB,int64_t Start,int64_t End,ConstantInt * V)143   void applyMemSet(IRBuilder<> &IRB, int64_t Start, int64_t End,
144                    ConstantInt *V) {
145     // Out[] does not distinguish between zero and undef, and we already know
146     // that this memset does not overlap with any other initializer. Nothing to
147     // do for memset(0).
148     if (V->isZero())
149       return;
150     for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) {
151       uint64_t Cst = 0x0101010101010101UL;
152       int LowBits = Offset < Start ? (Start - Offset) * 8 : 0;
153       if (LowBits)
154         Cst = (Cst >> LowBits) << LowBits;
155       int HighBits = End - Offset < 8 ? (8 - (End - Offset)) * 8 : 0;
156       if (HighBits)
157         Cst = (Cst << HighBits) >> HighBits;
158       ConstantInt *C =
159           ConstantInt::get(IRB.getInt64Ty(), Cst * V->getZExtValue());
160 
161       Value *&CurrentV = Out[Offset];
162       if (!CurrentV) {
163         CurrentV = C;
164       } else {
165         CurrentV = IRB.CreateOr(CurrentV, C);
166       }
167     }
168   }
169 
170   // Take a 64-bit slice of the value starting at the given offset (in bytes).
171   // Offset can be negative. Pad with zeroes on both sides when necessary.
sliceValue(IRBuilder<> & IRB,Value * V,int64_t Offset)172   Value *sliceValue(IRBuilder<> &IRB, Value *V, int64_t Offset) {
173     if (Offset > 0) {
174       V = IRB.CreateLShr(V, Offset * 8);
175       V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty());
176     } else if (Offset < 0) {
177       V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty());
178       V = IRB.CreateShl(V, -Offset * 8);
179     } else {
180       V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty());
181     }
182     return V;
183   }
184 
applyStore(IRBuilder<> & IRB,int64_t Start,int64_t End,Value * StoredValue)185   void applyStore(IRBuilder<> &IRB, int64_t Start, int64_t End,
186                   Value *StoredValue) {
187     StoredValue = flatten(IRB, StoredValue);
188     for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) {
189       Value *V = sliceValue(IRB, StoredValue, Offset - Start);
190       Value *&CurrentV = Out[Offset];
191       if (!CurrentV) {
192         CurrentV = V;
193       } else {
194         CurrentV = IRB.CreateOr(CurrentV, V);
195       }
196     }
197   }
198 
generate(IRBuilder<> & IRB)199   void generate(IRBuilder<> &IRB) {
200     LLVM_DEBUG(dbgs() << "Combined initializer\n");
201     // No initializers => the entire allocation is undef.
202     if (Ranges.empty()) {
203       emitUndef(IRB, 0, Size);
204       return;
205     }
206 
207     // Look through 8-byte initializer list 16 bytes at a time;
208     // If one of the two 8-byte halfs is non-zero non-undef, emit STGP.
209     // Otherwise, emit zeroes up to next available item.
210     uint64_t LastOffset = 0;
211     for (uint64_t Offset = 0; Offset < Size; Offset += 16) {
212       auto I1 = Out.find(Offset);
213       auto I2 = Out.find(Offset + 8);
214       if (I1 == Out.end() && I2 == Out.end())
215         continue;
216 
217       if (Offset > LastOffset)
218         emitZeroes(IRB, LastOffset, Offset - LastOffset);
219 
220       Value *Store1 = I1 == Out.end() ? Constant::getNullValue(IRB.getInt64Ty())
221                                       : I1->second;
222       Value *Store2 = I2 == Out.end() ? Constant::getNullValue(IRB.getInt64Ty())
223                                       : I2->second;
224       emitPair(IRB, Offset, Store1, Store2);
225       LastOffset = Offset + 16;
226     }
227 
228     // memset(0) does not update Out[], therefore the tail can be either undef
229     // or zero.
230     if (LastOffset < Size)
231       emitZeroes(IRB, LastOffset, Size - LastOffset);
232 
233     for (const auto &R : Ranges) {
234       R.Inst->eraseFromParent();
235     }
236   }
237 
emitZeroes(IRBuilder<> & IRB,uint64_t Offset,uint64_t Size)238   void emitZeroes(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) {
239     LLVM_DEBUG(dbgs() << "  [" << Offset << ", " << Offset + Size
240                       << ") zero\n");
241     Value *Ptr = BasePtr;
242     if (Offset)
243       Ptr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), Ptr, Offset);
244     IRB.CreateCall(SetTagZeroFn,
245                    {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)});
246   }
247 
emitUndef(IRBuilder<> & IRB,uint64_t Offset,uint64_t Size)248   void emitUndef(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) {
249     LLVM_DEBUG(dbgs() << "  [" << Offset << ", " << Offset + Size
250                       << ") undef\n");
251     Value *Ptr = BasePtr;
252     if (Offset)
253       Ptr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), Ptr, Offset);
254     IRB.CreateCall(SetTagFn, {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)});
255   }
256 
emitPair(IRBuilder<> & IRB,uint64_t Offset,Value * A,Value * B)257   void emitPair(IRBuilder<> &IRB, uint64_t Offset, Value *A, Value *B) {
258     LLVM_DEBUG(dbgs() << "  [" << Offset << ", " << Offset + 16 << "):\n");
259     LLVM_DEBUG(dbgs() << "    " << *A << "\n    " << *B << "\n");
260     Value *Ptr = BasePtr;
261     if (Offset)
262       Ptr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), Ptr, Offset);
263     IRB.CreateCall(StgpFn, {Ptr, A, B});
264   }
265 
flatten(IRBuilder<> & IRB,Value * V)266   Value *flatten(IRBuilder<> &IRB, Value *V) {
267     if (V->getType()->isIntegerTy())
268       return V;
269     // vector of pointers -> vector of ints
270     if (VectorType *VecTy = dyn_cast<VectorType>(V->getType())) {
271       LLVMContext &Ctx = IRB.getContext();
272       Type *EltTy = VecTy->getElementType();
273       if (EltTy->isPointerTy()) {
274         uint32_t EltSize = DL->getTypeSizeInBits(EltTy);
275         auto *NewTy = FixedVectorType::get(
276             IntegerType::get(Ctx, EltSize),
277             cast<FixedVectorType>(VecTy)->getNumElements());
278         V = IRB.CreatePointerCast(V, NewTy);
279       }
280     }
281     return IRB.CreateBitOrPointerCast(
282         V, IRB.getIntNTy(DL->getTypeStoreSize(V->getType()) * 8));
283   }
284 };
285 
286 class AArch64StackTagging : public FunctionPass {
287   struct AllocaInfo {
288     AllocaInst *AI;
289     TrackingVH<Instruction> OldAI; // Track through RAUW to replace debug uses.
290     SmallVector<IntrinsicInst *, 2> LifetimeStart;
291     SmallVector<IntrinsicInst *, 2> LifetimeEnd;
292     SmallVector<DbgVariableIntrinsic *, 2> DbgVariableIntrinsics;
293     int Tag; // -1 for non-tagged allocations
294   };
295 
296   const bool MergeInit;
297   const bool UseStackSafety;
298 
299 public:
300   static char ID; // Pass ID, replacement for typeid
301 
AArch64StackTagging(bool IsOptNone=false)302   AArch64StackTagging(bool IsOptNone = false)
303       : FunctionPass(ID),
304         MergeInit(ClMergeInit.getNumOccurrences() ? ClMergeInit : !IsOptNone),
305         UseStackSafety(ClUseStackSafety.getNumOccurrences() ? ClUseStackSafety
306                                                             : !IsOptNone) {
307     initializeAArch64StackTaggingPass(*PassRegistry::getPassRegistry());
308   }
309 
310   bool isInterestingAlloca(const AllocaInst &AI);
311   void alignAndPadAlloca(AllocaInfo &Info);
312 
313   void tagAlloca(AllocaInst *AI, Instruction *InsertBefore, Value *Ptr,
314                  uint64_t Size);
315   void untagAlloca(AllocaInst *AI, Instruction *InsertBefore, uint64_t Size);
316 
317   Instruction *collectInitializers(Instruction *StartInst, Value *StartPtr,
318                                    uint64_t Size, InitializerBuilder &IB);
319 
320   Instruction *
321   insertBaseTaggedPointer(const MapVector<AllocaInst *, AllocaInfo> &Allocas,
322                           const DominatorTree *DT);
323   bool runOnFunction(Function &F) override;
324 
getPassName() const325   StringRef getPassName() const override { return "AArch64 Stack Tagging"; }
326 
327 private:
328   Function *F = nullptr;
329   Function *SetTagFunc = nullptr;
330   const DataLayout *DL = nullptr;
331   AAResults *AA = nullptr;
332   const StackSafetyGlobalInfo *SSI = nullptr;
333 
getAnalysisUsage(AnalysisUsage & AU) const334   void getAnalysisUsage(AnalysisUsage &AU) const override {
335     AU.setPreservesCFG();
336     if (UseStackSafety)
337       AU.addRequired<StackSafetyGlobalInfoWrapperPass>();
338     if (MergeInit)
339       AU.addRequired<AAResultsWrapperPass>();
340   }
341 };
342 
343 } // end anonymous namespace
344 
345 char AArch64StackTagging::ID = 0;
346 
347 INITIALIZE_PASS_BEGIN(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging",
348                       false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)349 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
350 INITIALIZE_PASS_DEPENDENCY(StackSafetyGlobalInfoWrapperPass)
351 INITIALIZE_PASS_END(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging",
352                     false, false)
353 
354 FunctionPass *llvm::createAArch64StackTaggingPass(bool IsOptNone) {
355   return new AArch64StackTagging(IsOptNone);
356 }
357 
collectInitializers(Instruction * StartInst,Value * StartPtr,uint64_t Size,InitializerBuilder & IB)358 Instruction *AArch64StackTagging::collectInitializers(Instruction *StartInst,
359                                                       Value *StartPtr,
360                                                       uint64_t Size,
361                                                       InitializerBuilder &IB) {
362   MemoryLocation AllocaLoc{StartPtr, Size};
363   Instruction *LastInst = StartInst;
364   BasicBlock::iterator BI(StartInst);
365 
366   unsigned Count = 0;
367   for (; Count < ClScanLimit && !BI->isTerminator(); ++BI) {
368     if (!isa<DbgInfoIntrinsic>(*BI))
369       ++Count;
370 
371     if (isNoModRef(AA->getModRefInfo(&*BI, AllocaLoc)))
372       continue;
373 
374     if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
375       // If the instruction is readnone, ignore it, otherwise bail out.  We
376       // don't even allow readonly here because we don't want something like:
377       // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
378       if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
379         break;
380       continue;
381     }
382 
383     if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
384       if (!NextStore->isSimple())
385         break;
386 
387       // Check to see if this store is to a constant offset from the start ptr.
388       Optional<int64_t> Offset =
389           isPointerOffset(StartPtr, NextStore->getPointerOperand(), *DL);
390       if (!Offset)
391         break;
392 
393       if (!IB.addStore(*Offset, NextStore, DL))
394         break;
395       LastInst = NextStore;
396     } else {
397       MemSetInst *MSI = cast<MemSetInst>(BI);
398 
399       if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength()))
400         break;
401 
402       if (!isa<ConstantInt>(MSI->getValue()))
403         break;
404 
405       // Check to see if this store is to a constant offset from the start ptr.
406       Optional<int64_t> Offset = isPointerOffset(StartPtr, MSI->getDest(), *DL);
407       if (!Offset)
408         break;
409 
410       if (!IB.addMemSet(*Offset, MSI))
411         break;
412       LastInst = MSI;
413     }
414   }
415   return LastInst;
416 }
417 
isInterestingAlloca(const AllocaInst & AI)418 bool AArch64StackTagging::isInterestingAlloca(const AllocaInst &AI) {
419   // FIXME: support dynamic allocas
420   bool IsInteresting =
421       AI.getAllocatedType()->isSized() && AI.isStaticAlloca() &&
422       // alloca() may be called with 0 size, ignore it.
423       AI.getAllocationSizeInBits(*DL).getValue() > 0 &&
424       // inalloca allocas are not treated as static, and we don't want
425       // dynamic alloca instrumentation for them as well.
426       !AI.isUsedWithInAlloca() &&
427       // swifterror allocas are register promoted by ISel
428       !AI.isSwiftError() &&
429       // safe allocas are not interesting
430       !(SSI && SSI->isSafe(AI));
431   return IsInteresting;
432 }
433 
tagAlloca(AllocaInst * AI,Instruction * InsertBefore,Value * Ptr,uint64_t Size)434 void AArch64StackTagging::tagAlloca(AllocaInst *AI, Instruction *InsertBefore,
435                                     Value *Ptr, uint64_t Size) {
436   auto SetTagZeroFunc =
437       Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag_zero);
438   auto StgpFunc =
439       Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_stgp);
440 
441   InitializerBuilder IB(Size, DL, Ptr, SetTagFunc, SetTagZeroFunc, StgpFunc);
442   bool LittleEndian =
443       Triple(AI->getModule()->getTargetTriple()).isLittleEndian();
444   // Current implementation of initializer merging assumes little endianness.
445   if (MergeInit && !F->hasOptNone() && LittleEndian &&
446       Size < ClMergeInitSizeLimit) {
447     LLVM_DEBUG(dbgs() << "collecting initializers for " << *AI
448                       << ", size = " << Size << "\n");
449     InsertBefore = collectInitializers(InsertBefore, Ptr, Size, IB);
450   }
451 
452   IRBuilder<> IRB(InsertBefore);
453   IB.generate(IRB);
454 }
455 
untagAlloca(AllocaInst * AI,Instruction * InsertBefore,uint64_t Size)456 void AArch64StackTagging::untagAlloca(AllocaInst *AI, Instruction *InsertBefore,
457                                       uint64_t Size) {
458   IRBuilder<> IRB(InsertBefore);
459   IRB.CreateCall(SetTagFunc, {IRB.CreatePointerCast(AI, IRB.getInt8PtrTy()),
460                               ConstantInt::get(IRB.getInt64Ty(), Size)});
461 }
462 
insertBaseTaggedPointer(const MapVector<AllocaInst *,AllocaInfo> & Allocas,const DominatorTree * DT)463 Instruction *AArch64StackTagging::insertBaseTaggedPointer(
464     const MapVector<AllocaInst *, AllocaInfo> &Allocas,
465     const DominatorTree *DT) {
466   BasicBlock *PrologueBB = nullptr;
467   // Try sinking IRG as deep as possible to avoid hurting shrink wrap.
468   for (auto &I : Allocas) {
469     const AllocaInfo &Info = I.second;
470     AllocaInst *AI = Info.AI;
471     if (Info.Tag < 0)
472       continue;
473     if (!PrologueBB) {
474       PrologueBB = AI->getParent();
475       continue;
476     }
477     PrologueBB = DT->findNearestCommonDominator(PrologueBB, AI->getParent());
478   }
479   assert(PrologueBB);
480 
481   IRBuilder<> IRB(&PrologueBB->front());
482   Function *IRG_SP =
483       Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_irg_sp);
484   Instruction *Base =
485       IRB.CreateCall(IRG_SP, {Constant::getNullValue(IRB.getInt64Ty())});
486   Base->setName("basetag");
487   return Base;
488 }
489 
alignAndPadAlloca(AllocaInfo & Info)490 void AArch64StackTagging::alignAndPadAlloca(AllocaInfo &Info) {
491   const Align NewAlignment =
492       max(MaybeAlign(Info.AI->getAlignment()), kTagGranuleSize);
493   Info.AI->setAlignment(NewAlignment);
494 
495   uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8;
496   uint64_t AlignedSize = alignTo(Size, kTagGranuleSize);
497   if (Size == AlignedSize)
498     return;
499 
500   // Add padding to the alloca.
501   Type *AllocatedType =
502       Info.AI->isArrayAllocation()
503           ? ArrayType::get(
504                 Info.AI->getAllocatedType(),
505                 cast<ConstantInt>(Info.AI->getArraySize())->getZExtValue())
506           : Info.AI->getAllocatedType();
507   Type *PaddingType =
508       ArrayType::get(Type::getInt8Ty(F->getContext()), AlignedSize - Size);
509   Type *TypeWithPadding = StructType::get(AllocatedType, PaddingType);
510   auto *NewAI = new AllocaInst(
511       TypeWithPadding, Info.AI->getType()->getAddressSpace(), nullptr, "", Info.AI);
512   NewAI->takeName(Info.AI);
513   NewAI->setAlignment(Info.AI->getAlign());
514   NewAI->setUsedWithInAlloca(Info.AI->isUsedWithInAlloca());
515   NewAI->setSwiftError(Info.AI->isSwiftError());
516   NewAI->copyMetadata(*Info.AI);
517 
518   auto *NewPtr = new BitCastInst(NewAI, Info.AI->getType(), "", Info.AI);
519   Info.AI->replaceAllUsesWith(NewPtr);
520   Info.AI->eraseFromParent();
521   Info.AI = NewAI;
522 }
523 
524 // FIXME: check for MTE extension
runOnFunction(Function & Fn)525 bool AArch64StackTagging::runOnFunction(Function &Fn) {
526   if (!Fn.hasFnAttribute(Attribute::SanitizeMemTag))
527     return false;
528 
529   if (UseStackSafety)
530     SSI = &getAnalysis<StackSafetyGlobalInfoWrapperPass>().getResult();
531   F = &Fn;
532   DL = &Fn.getParent()->getDataLayout();
533   if (MergeInit)
534     AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
535 
536   MapVector<AllocaInst *, AllocaInfo> Allocas; // need stable iteration order
537   SmallVector<Instruction *, 8> RetVec;
538   SmallVector<Instruction *, 4> UnrecognizedLifetimes;
539 
540   for (auto &BB : *F) {
541     for (BasicBlock::iterator IT = BB.begin(); IT != BB.end(); ++IT) {
542       Instruction *I = &*IT;
543       if (auto *AI = dyn_cast<AllocaInst>(I)) {
544         Allocas[AI].AI = AI;
545         Allocas[AI].OldAI = AI;
546         continue;
547       }
548 
549       if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(I)) {
550         for (Value *V : DVI->location_ops())
551           if (auto *AI = dyn_cast_or_null<AllocaInst>(V))
552             if (Allocas[AI].DbgVariableIntrinsics.empty() ||
553                 Allocas[AI].DbgVariableIntrinsics.back() != DVI)
554               Allocas[AI].DbgVariableIntrinsics.push_back(DVI);
555         continue;
556       }
557 
558       auto *II = dyn_cast<IntrinsicInst>(I);
559       if (II && (II->getIntrinsicID() == Intrinsic::lifetime_start ||
560                  II->getIntrinsicID() == Intrinsic::lifetime_end)) {
561         AllocaInst *AI = findAllocaForValue(II->getArgOperand(1));
562         if (!AI) {
563           UnrecognizedLifetimes.push_back(I);
564           continue;
565         }
566         if (II->getIntrinsicID() == Intrinsic::lifetime_start)
567           Allocas[AI].LifetimeStart.push_back(II);
568         else
569           Allocas[AI].LifetimeEnd.push_back(II);
570       }
571 
572       if (isa<ReturnInst>(I) || isa<ResumeInst>(I) || isa<CleanupReturnInst>(I))
573         RetVec.push_back(I);
574     }
575   }
576 
577   if (Allocas.empty())
578     return false;
579 
580   int NextTag = 0;
581   int NumInterestingAllocas = 0;
582   for (auto &I : Allocas) {
583     AllocaInfo &Info = I.second;
584     assert(Info.AI);
585 
586     if (!isInterestingAlloca(*Info.AI)) {
587       Info.Tag = -1;
588       continue;
589     }
590 
591     alignAndPadAlloca(Info);
592     NumInterestingAllocas++;
593     Info.Tag = NextTag;
594     NextTag = (NextTag + 1) % 16;
595   }
596 
597   if (NumInterestingAllocas == 0)
598     return true;
599 
600   std::unique_ptr<DominatorTree> DeleteDT;
601   DominatorTree *DT = nullptr;
602   if (auto *P = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
603     DT = &P->getDomTree();
604 
605   if (DT == nullptr && (NumInterestingAllocas > 1 ||
606                         !F->hasFnAttribute(Attribute::OptimizeNone))) {
607     DeleteDT = std::make_unique<DominatorTree>(*F);
608     DT = DeleteDT.get();
609   }
610 
611   std::unique_ptr<PostDominatorTree> DeletePDT;
612   PostDominatorTree *PDT = nullptr;
613   if (auto *P = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>())
614     PDT = &P->getPostDomTree();
615 
616   if (PDT == nullptr && !F->hasFnAttribute(Attribute::OptimizeNone)) {
617     DeletePDT = std::make_unique<PostDominatorTree>(*F);
618     PDT = DeletePDT.get();
619   }
620 
621   SetTagFunc =
622       Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag);
623 
624   Instruction *Base = insertBaseTaggedPointer(Allocas, DT);
625 
626   for (auto &I : Allocas) {
627     const AllocaInfo &Info = I.second;
628     AllocaInst *AI = Info.AI;
629     if (Info.Tag < 0)
630       continue;
631 
632     // Replace alloca with tagp(alloca).
633     IRBuilder<> IRB(Info.AI->getNextNode());
634     Function *TagP = Intrinsic::getDeclaration(
635         F->getParent(), Intrinsic::aarch64_tagp, {Info.AI->getType()});
636     Instruction *TagPCall =
637         IRB.CreateCall(TagP, {Constant::getNullValue(Info.AI->getType()), Base,
638                               ConstantInt::get(IRB.getInt64Ty(), Info.Tag)});
639     if (Info.AI->hasName())
640       TagPCall->setName(Info.AI->getName() + ".tag");
641     Info.AI->replaceAllUsesWith(TagPCall);
642     TagPCall->setOperand(0, Info.AI);
643 
644     if (UnrecognizedLifetimes.empty() && Info.LifetimeStart.size() == 1 &&
645         Info.LifetimeEnd.size() == 1) {
646       IntrinsicInst *Start = Info.LifetimeStart[0];
647       IntrinsicInst *End = Info.LifetimeEnd[0];
648       uint64_t Size =
649           cast<ConstantInt>(Start->getArgOperand(0))->getZExtValue();
650       Size = alignTo(Size, kTagGranuleSize);
651       tagAlloca(AI, Start->getNextNode(), Start->getArgOperand(1), Size);
652 
653       auto TagEnd = [&](Instruction *Node) { untagAlloca(AI, Node, Size); };
654       if (!DT || !PDT ||
655           !forAllReachableExits(*DT, *PDT, Start, End, RetVec, TagEnd))
656         End->eraseFromParent();
657     } else {
658       uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8;
659       Value *Ptr = IRB.CreatePointerCast(TagPCall, IRB.getInt8PtrTy());
660       tagAlloca(AI, &*IRB.GetInsertPoint(), Ptr, Size);
661       for (auto &RI : RetVec) {
662         untagAlloca(AI, RI, Size);
663       }
664       // We may have inserted tag/untag outside of any lifetime interval.
665       // Remove all lifetime intrinsics for this alloca.
666       for (auto &II : Info.LifetimeStart)
667         II->eraseFromParent();
668       for (auto &II : Info.LifetimeEnd)
669         II->eraseFromParent();
670     }
671 
672     // Fixup debug intrinsics to point to the new alloca.
673     for (auto DVI : Info.DbgVariableIntrinsics)
674       DVI->replaceVariableLocationOp(Info.OldAI, Info.AI);
675   }
676 
677   // If we have instrumented at least one alloca, all unrecognized lifetime
678   // instrinsics have to go.
679   for (auto &I : UnrecognizedLifetimes)
680     I->eraseFromParent();
681 
682   return true;
683 }
684