1 //===- AArch64StackTagging.cpp - Stack tagging in IR --===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //===----------------------------------------------------------------------===//
10
11 #include "AArch64.h"
12 #include "AArch64InstrInfo.h"
13 #include "AArch64Subtarget.h"
14 #include "AArch64TargetMachine.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DepthFirstIterator.h"
17 #include "llvm/ADT/MapVector.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/Analysis/AliasAnalysis.h"
23 #include "llvm/Analysis/CFG.h"
24 #include "llvm/Analysis/LoopInfo.h"
25 #include "llvm/Analysis/PostDominators.h"
26 #include "llvm/Analysis/ScalarEvolution.h"
27 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
28 #include "llvm/Analysis/StackSafetyAnalysis.h"
29 #include "llvm/Analysis/ValueTracking.h"
30 #include "llvm/CodeGen/LiveRegUnits.h"
31 #include "llvm/CodeGen/MachineBasicBlock.h"
32 #include "llvm/CodeGen/MachineFunction.h"
33 #include "llvm/CodeGen/MachineFunctionPass.h"
34 #include "llvm/CodeGen/MachineInstr.h"
35 #include "llvm/CodeGen/MachineInstrBuilder.h"
36 #include "llvm/CodeGen/MachineLoopInfo.h"
37 #include "llvm/CodeGen/MachineOperand.h"
38 #include "llvm/CodeGen/MachineRegisterInfo.h"
39 #include "llvm/CodeGen/TargetPassConfig.h"
40 #include "llvm/CodeGen/TargetRegisterInfo.h"
41 #include "llvm/IR/DebugLoc.h"
42 #include "llvm/IR/Dominators.h"
43 #include "llvm/IR/Function.h"
44 #include "llvm/IR/GetElementPtrTypeIterator.h"
45 #include "llvm/IR/Instruction.h"
46 #include "llvm/IR/Instructions.h"
47 #include "llvm/IR/IntrinsicInst.h"
48 #include "llvm/IR/IntrinsicsAArch64.h"
49 #include "llvm/IR/Metadata.h"
50 #include "llvm/InitializePasses.h"
51 #include "llvm/Pass.h"
52 #include "llvm/Support/Casting.h"
53 #include "llvm/Support/Debug.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include "llvm/Transforms/Utils/Local.h"
56 #include <cassert>
57 #include <iterator>
58 #include <utility>
59
60 using namespace llvm;
61
62 #define DEBUG_TYPE "aarch64-stack-tagging"
63
64 static cl::opt<bool> ClMergeInit(
65 "stack-tagging-merge-init", cl::Hidden, cl::init(true), cl::ZeroOrMore,
66 cl::desc("merge stack variable initializers with tagging when possible"));
67
68 static cl::opt<bool>
69 ClUseStackSafety("stack-tagging-use-stack-safety", cl::Hidden,
70 cl::init(true), cl::ZeroOrMore,
71 cl::desc("Use Stack Safety analysis results"));
72
73 static cl::opt<unsigned> ClScanLimit("stack-tagging-merge-init-scan-limit",
74 cl::init(40), cl::Hidden);
75
76 static cl::opt<unsigned>
77 ClMergeInitSizeLimit("stack-tagging-merge-init-size-limit", cl::init(272),
78 cl::Hidden);
79
80 static const Align kTagGranuleSize = Align(16);
81
82 namespace {
83
84 class InitializerBuilder {
85 uint64_t Size;
86 const DataLayout *DL;
87 Value *BasePtr;
88 Function *SetTagFn;
89 Function *SetTagZeroFn;
90 Function *StgpFn;
91
92 // List of initializers sorted by start offset.
93 struct Range {
94 uint64_t Start, End;
95 Instruction *Inst;
96 };
97 SmallVector<Range, 4> Ranges;
98 // 8-aligned offset => 8-byte initializer
99 // Missing keys are zero initialized.
100 std::map<uint64_t, Value *> Out;
101
102 public:
InitializerBuilder(uint64_t Size,const DataLayout * DL,Value * BasePtr,Function * SetTagFn,Function * SetTagZeroFn,Function * StgpFn)103 InitializerBuilder(uint64_t Size, const DataLayout *DL, Value *BasePtr,
104 Function *SetTagFn, Function *SetTagZeroFn,
105 Function *StgpFn)
106 : Size(Size), DL(DL), BasePtr(BasePtr), SetTagFn(SetTagFn),
107 SetTagZeroFn(SetTagZeroFn), StgpFn(StgpFn) {}
108
addRange(uint64_t Start,uint64_t End,Instruction * Inst)109 bool addRange(uint64_t Start, uint64_t End, Instruction *Inst) {
110 auto I = std::lower_bound(
111 Ranges.begin(), Ranges.end(), Start,
112 [](const Range &LHS, uint64_t RHS) { return LHS.End <= RHS; });
113 if (I != Ranges.end() && End > I->Start) {
114 // Overlap - bail.
115 return false;
116 }
117 Ranges.insert(I, {Start, End, Inst});
118 return true;
119 }
120
addStore(uint64_t Offset,StoreInst * SI,const DataLayout * DL)121 bool addStore(uint64_t Offset, StoreInst *SI, const DataLayout *DL) {
122 int64_t StoreSize = DL->getTypeStoreSize(SI->getOperand(0)->getType());
123 if (!addRange(Offset, Offset + StoreSize, SI))
124 return false;
125 IRBuilder<> IRB(SI);
126 applyStore(IRB, Offset, Offset + StoreSize, SI->getOperand(0));
127 return true;
128 }
129
addMemSet(uint64_t Offset,MemSetInst * MSI)130 bool addMemSet(uint64_t Offset, MemSetInst *MSI) {
131 uint64_t StoreSize = cast<ConstantInt>(MSI->getLength())->getZExtValue();
132 if (!addRange(Offset, Offset + StoreSize, MSI))
133 return false;
134 IRBuilder<> IRB(MSI);
135 applyMemSet(IRB, Offset, Offset + StoreSize,
136 cast<ConstantInt>(MSI->getValue()));
137 return true;
138 }
139
applyMemSet(IRBuilder<> & IRB,int64_t Start,int64_t End,ConstantInt * V)140 void applyMemSet(IRBuilder<> &IRB, int64_t Start, int64_t End,
141 ConstantInt *V) {
142 // Out[] does not distinguish between zero and undef, and we already know
143 // that this memset does not overlap with any other initializer. Nothing to
144 // do for memset(0).
145 if (V->isZero())
146 return;
147 for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) {
148 uint64_t Cst = 0x0101010101010101UL;
149 int LowBits = Offset < Start ? (Start - Offset) * 8 : 0;
150 if (LowBits)
151 Cst = (Cst >> LowBits) << LowBits;
152 int HighBits = End - Offset < 8 ? (8 - (End - Offset)) * 8 : 0;
153 if (HighBits)
154 Cst = (Cst << HighBits) >> HighBits;
155 ConstantInt *C =
156 ConstantInt::get(IRB.getInt64Ty(), Cst * V->getZExtValue());
157
158 Value *&CurrentV = Out[Offset];
159 if (!CurrentV) {
160 CurrentV = C;
161 } else {
162 CurrentV = IRB.CreateOr(CurrentV, C);
163 }
164 }
165 }
166
167 // Take a 64-bit slice of the value starting at the given offset (in bytes).
168 // Offset can be negative. Pad with zeroes on both sides when necessary.
sliceValue(IRBuilder<> & IRB,Value * V,int64_t Offset)169 Value *sliceValue(IRBuilder<> &IRB, Value *V, int64_t Offset) {
170 if (Offset > 0) {
171 V = IRB.CreateLShr(V, Offset * 8);
172 V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty());
173 } else if (Offset < 0) {
174 V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty());
175 V = IRB.CreateShl(V, -Offset * 8);
176 } else {
177 V = IRB.CreateZExtOrTrunc(V, IRB.getInt64Ty());
178 }
179 return V;
180 }
181
applyStore(IRBuilder<> & IRB,int64_t Start,int64_t End,Value * StoredValue)182 void applyStore(IRBuilder<> &IRB, int64_t Start, int64_t End,
183 Value *StoredValue) {
184 StoredValue = flatten(IRB, StoredValue);
185 for (int64_t Offset = Start - Start % 8; Offset < End; Offset += 8) {
186 Value *V = sliceValue(IRB, StoredValue, Offset - Start);
187 Value *&CurrentV = Out[Offset];
188 if (!CurrentV) {
189 CurrentV = V;
190 } else {
191 CurrentV = IRB.CreateOr(CurrentV, V);
192 }
193 }
194 }
195
generate(IRBuilder<> & IRB)196 void generate(IRBuilder<> &IRB) {
197 LLVM_DEBUG(dbgs() << "Combined initializer\n");
198 // No initializers => the entire allocation is undef.
199 if (Ranges.empty()) {
200 emitUndef(IRB, 0, Size);
201 return;
202 }
203
204 // Look through 8-byte initializer list 16 bytes at a time;
205 // If one of the two 8-byte halfs is non-zero non-undef, emit STGP.
206 // Otherwise, emit zeroes up to next available item.
207 uint64_t LastOffset = 0;
208 for (uint64_t Offset = 0; Offset < Size; Offset += 16) {
209 auto I1 = Out.find(Offset);
210 auto I2 = Out.find(Offset + 8);
211 if (I1 == Out.end() && I2 == Out.end())
212 continue;
213
214 if (Offset > LastOffset)
215 emitZeroes(IRB, LastOffset, Offset - LastOffset);
216
217 Value *Store1 = I1 == Out.end() ? Constant::getNullValue(IRB.getInt64Ty())
218 : I1->second;
219 Value *Store2 = I2 == Out.end() ? Constant::getNullValue(IRB.getInt64Ty())
220 : I2->second;
221 emitPair(IRB, Offset, Store1, Store2);
222 LastOffset = Offset + 16;
223 }
224
225 // memset(0) does not update Out[], therefore the tail can be either undef
226 // or zero.
227 if (LastOffset < Size)
228 emitZeroes(IRB, LastOffset, Size - LastOffset);
229
230 for (const auto &R : Ranges) {
231 R.Inst->eraseFromParent();
232 }
233 }
234
emitZeroes(IRBuilder<> & IRB,uint64_t Offset,uint64_t Size)235 void emitZeroes(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) {
236 LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + Size
237 << ") zero\n");
238 Value *Ptr = BasePtr;
239 if (Offset)
240 Ptr = IRB.CreateConstGEP1_32(Ptr, Offset);
241 IRB.CreateCall(SetTagZeroFn,
242 {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)});
243 }
244
emitUndef(IRBuilder<> & IRB,uint64_t Offset,uint64_t Size)245 void emitUndef(IRBuilder<> &IRB, uint64_t Offset, uint64_t Size) {
246 LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + Size
247 << ") undef\n");
248 Value *Ptr = BasePtr;
249 if (Offset)
250 Ptr = IRB.CreateConstGEP1_32(Ptr, Offset);
251 IRB.CreateCall(SetTagFn, {Ptr, ConstantInt::get(IRB.getInt64Ty(), Size)});
252 }
253
emitPair(IRBuilder<> & IRB,uint64_t Offset,Value * A,Value * B)254 void emitPair(IRBuilder<> &IRB, uint64_t Offset, Value *A, Value *B) {
255 LLVM_DEBUG(dbgs() << " [" << Offset << ", " << Offset + 16 << "):\n");
256 LLVM_DEBUG(dbgs() << " " << *A << "\n " << *B << "\n");
257 Value *Ptr = BasePtr;
258 if (Offset)
259 Ptr = IRB.CreateConstGEP1_32(Ptr, Offset);
260 IRB.CreateCall(StgpFn, {Ptr, A, B});
261 }
262
flatten(IRBuilder<> & IRB,Value * V)263 Value *flatten(IRBuilder<> &IRB, Value *V) {
264 if (V->getType()->isIntegerTy())
265 return V;
266 // vector of pointers -> vector of ints
267 if (VectorType *VecTy = dyn_cast<VectorType>(V->getType())) {
268 LLVMContext &Ctx = IRB.getContext();
269 Type *EltTy = VecTy->getElementType();
270 if (EltTy->isPointerTy()) {
271 uint32_t EltSize = DL->getTypeSizeInBits(EltTy);
272 auto *NewTy = FixedVectorType::get(
273 IntegerType::get(Ctx, EltSize),
274 cast<FixedVectorType>(VecTy)->getNumElements());
275 V = IRB.CreatePointerCast(V, NewTy);
276 }
277 }
278 return IRB.CreateBitOrPointerCast(
279 V, IRB.getIntNTy(DL->getTypeStoreSize(V->getType()) * 8));
280 }
281 };
282
283 class AArch64StackTagging : public FunctionPass {
284 struct AllocaInfo {
285 AllocaInst *AI;
286 SmallVector<IntrinsicInst *, 2> LifetimeStart;
287 SmallVector<IntrinsicInst *, 2> LifetimeEnd;
288 SmallVector<DbgVariableIntrinsic *, 2> DbgVariableIntrinsics;
289 int Tag; // -1 for non-tagged allocations
290 };
291
292 const bool MergeInit;
293 const bool UseStackSafety;
294
295 public:
296 static char ID; // Pass ID, replacement for typeid
297
AArch64StackTagging(bool IsOptNone=false)298 AArch64StackTagging(bool IsOptNone = false)
299 : FunctionPass(ID),
300 MergeInit(ClMergeInit.getNumOccurrences() ? ClMergeInit : !IsOptNone),
301 UseStackSafety(ClUseStackSafety.getNumOccurrences() ? ClUseStackSafety
302 : !IsOptNone) {
303 initializeAArch64StackTaggingPass(*PassRegistry::getPassRegistry());
304 }
305
306 bool isInterestingAlloca(const AllocaInst &AI);
307 void alignAndPadAlloca(AllocaInfo &Info);
308
309 void tagAlloca(AllocaInst *AI, Instruction *InsertBefore, Value *Ptr,
310 uint64_t Size);
311 void untagAlloca(AllocaInst *AI, Instruction *InsertBefore, uint64_t Size);
312
313 Instruction *collectInitializers(Instruction *StartInst, Value *StartPtr,
314 uint64_t Size, InitializerBuilder &IB);
315
316 Instruction *
317 insertBaseTaggedPointer(const MapVector<AllocaInst *, AllocaInfo> &Allocas,
318 const DominatorTree *DT);
319 bool runOnFunction(Function &F) override;
320
getPassName() const321 StringRef getPassName() const override { return "AArch64 Stack Tagging"; }
322
323 private:
324 Function *F = nullptr;
325 Function *SetTagFunc = nullptr;
326 const DataLayout *DL = nullptr;
327 AAResults *AA = nullptr;
328 const StackSafetyGlobalInfo *SSI = nullptr;
329
getAnalysisUsage(AnalysisUsage & AU) const330 void getAnalysisUsage(AnalysisUsage &AU) const override {
331 AU.setPreservesCFG();
332 if (UseStackSafety)
333 AU.addRequired<StackSafetyGlobalInfoWrapperPass>();
334 if (MergeInit)
335 AU.addRequired<AAResultsWrapperPass>();
336 }
337 };
338
339 } // end anonymous namespace
340
341 char AArch64StackTagging::ID = 0;
342
343 INITIALIZE_PASS_BEGIN(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging",
344 false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)345 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
346 INITIALIZE_PASS_DEPENDENCY(StackSafetyGlobalInfoWrapperPass)
347 INITIALIZE_PASS_END(AArch64StackTagging, DEBUG_TYPE, "AArch64 Stack Tagging",
348 false, false)
349
350 FunctionPass *llvm::createAArch64StackTaggingPass(bool IsOptNone) {
351 return new AArch64StackTagging(IsOptNone);
352 }
353
collectInitializers(Instruction * StartInst,Value * StartPtr,uint64_t Size,InitializerBuilder & IB)354 Instruction *AArch64StackTagging::collectInitializers(Instruction *StartInst,
355 Value *StartPtr,
356 uint64_t Size,
357 InitializerBuilder &IB) {
358 MemoryLocation AllocaLoc{StartPtr, Size};
359 Instruction *LastInst = StartInst;
360 BasicBlock::iterator BI(StartInst);
361
362 unsigned Count = 0;
363 for (; Count < ClScanLimit && !BI->isTerminator(); ++BI) {
364 if (!isa<DbgInfoIntrinsic>(*BI))
365 ++Count;
366
367 if (isNoModRef(AA->getModRefInfo(&*BI, AllocaLoc)))
368 continue;
369
370 if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
371 // If the instruction is readnone, ignore it, otherwise bail out. We
372 // don't even allow readonly here because we don't want something like:
373 // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
374 if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
375 break;
376 continue;
377 }
378
379 if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
380 if (!NextStore->isSimple())
381 break;
382
383 // Check to see if this store is to a constant offset from the start ptr.
384 Optional<int64_t> Offset =
385 isPointerOffset(StartPtr, NextStore->getPointerOperand(), *DL);
386 if (!Offset)
387 break;
388
389 if (!IB.addStore(*Offset, NextStore, DL))
390 break;
391 LastInst = NextStore;
392 } else {
393 MemSetInst *MSI = cast<MemSetInst>(BI);
394
395 if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength()))
396 break;
397
398 if (!isa<ConstantInt>(MSI->getValue()))
399 break;
400
401 // Check to see if this store is to a constant offset from the start ptr.
402 Optional<int64_t> Offset = isPointerOffset(StartPtr, MSI->getDest(), *DL);
403 if (!Offset)
404 break;
405
406 if (!IB.addMemSet(*Offset, MSI))
407 break;
408 LastInst = MSI;
409 }
410 }
411 return LastInst;
412 }
413
isInterestingAlloca(const AllocaInst & AI)414 bool AArch64StackTagging::isInterestingAlloca(const AllocaInst &AI) {
415 // FIXME: support dynamic allocas
416 bool IsInteresting =
417 AI.getAllocatedType()->isSized() && AI.isStaticAlloca() &&
418 // alloca() may be called with 0 size, ignore it.
419 AI.getAllocationSizeInBits(*DL).getValue() > 0 &&
420 // inalloca allocas are not treated as static, and we don't want
421 // dynamic alloca instrumentation for them as well.
422 !AI.isUsedWithInAlloca() &&
423 // swifterror allocas are register promoted by ISel
424 !AI.isSwiftError() &&
425 // safe allocas are not interesting
426 !(SSI && SSI->isSafe(AI));
427 return IsInteresting;
428 }
429
tagAlloca(AllocaInst * AI,Instruction * InsertBefore,Value * Ptr,uint64_t Size)430 void AArch64StackTagging::tagAlloca(AllocaInst *AI, Instruction *InsertBefore,
431 Value *Ptr, uint64_t Size) {
432 auto SetTagZeroFunc =
433 Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag_zero);
434 auto StgpFunc =
435 Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_stgp);
436
437 InitializerBuilder IB(Size, DL, Ptr, SetTagFunc, SetTagZeroFunc, StgpFunc);
438 bool LittleEndian =
439 Triple(AI->getModule()->getTargetTriple()).isLittleEndian();
440 // Current implementation of initializer merging assumes little endianness.
441 if (MergeInit && !F->hasOptNone() && LittleEndian &&
442 Size < ClMergeInitSizeLimit) {
443 LLVM_DEBUG(dbgs() << "collecting initializers for " << *AI
444 << ", size = " << Size << "\n");
445 InsertBefore = collectInitializers(InsertBefore, Ptr, Size, IB);
446 }
447
448 IRBuilder<> IRB(InsertBefore);
449 IB.generate(IRB);
450 }
451
untagAlloca(AllocaInst * AI,Instruction * InsertBefore,uint64_t Size)452 void AArch64StackTagging::untagAlloca(AllocaInst *AI, Instruction *InsertBefore,
453 uint64_t Size) {
454 IRBuilder<> IRB(InsertBefore);
455 IRB.CreateCall(SetTagFunc, {IRB.CreatePointerCast(AI, IRB.getInt8PtrTy()),
456 ConstantInt::get(IRB.getInt64Ty(), Size)});
457 }
458
insertBaseTaggedPointer(const MapVector<AllocaInst *,AllocaInfo> & Allocas,const DominatorTree * DT)459 Instruction *AArch64StackTagging::insertBaseTaggedPointer(
460 const MapVector<AllocaInst *, AllocaInfo> &Allocas,
461 const DominatorTree *DT) {
462 BasicBlock *PrologueBB = nullptr;
463 // Try sinking IRG as deep as possible to avoid hurting shrink wrap.
464 for (auto &I : Allocas) {
465 const AllocaInfo &Info = I.second;
466 AllocaInst *AI = Info.AI;
467 if (Info.Tag < 0)
468 continue;
469 if (!PrologueBB) {
470 PrologueBB = AI->getParent();
471 continue;
472 }
473 PrologueBB = DT->findNearestCommonDominator(PrologueBB, AI->getParent());
474 }
475 assert(PrologueBB);
476
477 IRBuilder<> IRB(&PrologueBB->front());
478 Function *IRG_SP =
479 Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_irg_sp);
480 Instruction *Base =
481 IRB.CreateCall(IRG_SP, {Constant::getNullValue(IRB.getInt64Ty())});
482 Base->setName("basetag");
483 return Base;
484 }
485
alignAndPadAlloca(AllocaInfo & Info)486 void AArch64StackTagging::alignAndPadAlloca(AllocaInfo &Info) {
487 const Align NewAlignment =
488 max(MaybeAlign(Info.AI->getAlignment()), kTagGranuleSize);
489 Info.AI->setAlignment(NewAlignment);
490
491 uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8;
492 uint64_t AlignedSize = alignTo(Size, kTagGranuleSize);
493 if (Size == AlignedSize)
494 return;
495
496 // Add padding to the alloca.
497 Type *AllocatedType =
498 Info.AI->isArrayAllocation()
499 ? ArrayType::get(
500 Info.AI->getAllocatedType(),
501 cast<ConstantInt>(Info.AI->getArraySize())->getZExtValue())
502 : Info.AI->getAllocatedType();
503 Type *PaddingType =
504 ArrayType::get(Type::getInt8Ty(F->getContext()), AlignedSize - Size);
505 Type *TypeWithPadding = StructType::get(AllocatedType, PaddingType);
506 auto *NewAI = new AllocaInst(
507 TypeWithPadding, Info.AI->getType()->getAddressSpace(), nullptr, "", Info.AI);
508 NewAI->takeName(Info.AI);
509 NewAI->setAlignment(Info.AI->getAlign());
510 NewAI->setUsedWithInAlloca(Info.AI->isUsedWithInAlloca());
511 NewAI->setSwiftError(Info.AI->isSwiftError());
512 NewAI->copyMetadata(*Info.AI);
513
514 auto *NewPtr = new BitCastInst(NewAI, Info.AI->getType(), "", Info.AI);
515 Info.AI->replaceAllUsesWith(NewPtr);
516 Info.AI->eraseFromParent();
517 Info.AI = NewAI;
518 }
519
520 // Helper function to check for post-dominance.
postDominates(const PostDominatorTree * PDT,const IntrinsicInst * A,const IntrinsicInst * B)521 static bool postDominates(const PostDominatorTree *PDT, const IntrinsicInst *A,
522 const IntrinsicInst *B) {
523 const BasicBlock *ABB = A->getParent();
524 const BasicBlock *BBB = B->getParent();
525
526 if (ABB != BBB)
527 return PDT->dominates(ABB, BBB);
528
529 for (const Instruction &I : *ABB) {
530 if (&I == B)
531 return true;
532 if (&I == A)
533 return false;
534 }
535 llvm_unreachable("Corrupt instruction list");
536 }
537
538 // FIXME: check for MTE extension
runOnFunction(Function & Fn)539 bool AArch64StackTagging::runOnFunction(Function &Fn) {
540 if (!Fn.hasFnAttribute(Attribute::SanitizeMemTag))
541 return false;
542
543 if (UseStackSafety)
544 SSI = &getAnalysis<StackSafetyGlobalInfoWrapperPass>().getResult();
545 F = &Fn;
546 DL = &Fn.getParent()->getDataLayout();
547 if (MergeInit)
548 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
549
550 MapVector<AllocaInst *, AllocaInfo> Allocas; // need stable iteration order
551 SmallVector<Instruction *, 8> RetVec;
552 SmallVector<Instruction *, 4> UnrecognizedLifetimes;
553
554 for (auto &BB : *F) {
555 for (BasicBlock::iterator IT = BB.begin(); IT != BB.end(); ++IT) {
556 Instruction *I = &*IT;
557 if (auto *AI = dyn_cast<AllocaInst>(I)) {
558 Allocas[AI].AI = AI;
559 continue;
560 }
561
562 if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(I)) {
563 if (auto *AI =
564 dyn_cast_or_null<AllocaInst>(DVI->getVariableLocation())) {
565 Allocas[AI].DbgVariableIntrinsics.push_back(DVI);
566 }
567 continue;
568 }
569
570 auto *II = dyn_cast<IntrinsicInst>(I);
571 if (II && (II->getIntrinsicID() == Intrinsic::lifetime_start ||
572 II->getIntrinsicID() == Intrinsic::lifetime_end)) {
573 AllocaInst *AI = findAllocaForValue(II->getArgOperand(1));
574 if (!AI) {
575 UnrecognizedLifetimes.push_back(I);
576 continue;
577 }
578 if (II->getIntrinsicID() == Intrinsic::lifetime_start)
579 Allocas[AI].LifetimeStart.push_back(II);
580 else
581 Allocas[AI].LifetimeEnd.push_back(II);
582 }
583
584 if (isa<ReturnInst>(I) || isa<ResumeInst>(I) || isa<CleanupReturnInst>(I))
585 RetVec.push_back(I);
586 }
587 }
588
589 if (Allocas.empty())
590 return false;
591
592 int NextTag = 0;
593 int NumInterestingAllocas = 0;
594 for (auto &I : Allocas) {
595 AllocaInfo &Info = I.second;
596 assert(Info.AI);
597
598 if (!isInterestingAlloca(*Info.AI)) {
599 Info.Tag = -1;
600 continue;
601 }
602
603 alignAndPadAlloca(Info);
604 NumInterestingAllocas++;
605 Info.Tag = NextTag;
606 NextTag = (NextTag + 1) % 16;
607 }
608
609 if (NumInterestingAllocas == 0)
610 return true;
611
612 std::unique_ptr<DominatorTree> DeleteDT;
613 DominatorTree *DT = nullptr;
614 if (auto *P = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
615 DT = &P->getDomTree();
616
617 if (DT == nullptr && (NumInterestingAllocas > 1 ||
618 !F->hasFnAttribute(Attribute::OptimizeNone))) {
619 DeleteDT = std::make_unique<DominatorTree>(*F);
620 DT = DeleteDT.get();
621 }
622
623 std::unique_ptr<PostDominatorTree> DeletePDT;
624 PostDominatorTree *PDT = nullptr;
625 if (auto *P = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>())
626 PDT = &P->getPostDomTree();
627
628 if (PDT == nullptr && !F->hasFnAttribute(Attribute::OptimizeNone)) {
629 DeletePDT = std::make_unique<PostDominatorTree>(*F);
630 PDT = DeletePDT.get();
631 }
632
633 SetTagFunc =
634 Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag);
635
636 Instruction *Base = insertBaseTaggedPointer(Allocas, DT);
637
638 for (auto &I : Allocas) {
639 const AllocaInfo &Info = I.second;
640 AllocaInst *AI = Info.AI;
641 if (Info.Tag < 0)
642 continue;
643
644 // Replace alloca with tagp(alloca).
645 IRBuilder<> IRB(Info.AI->getNextNode());
646 Function *TagP = Intrinsic::getDeclaration(
647 F->getParent(), Intrinsic::aarch64_tagp, {Info.AI->getType()});
648 Instruction *TagPCall =
649 IRB.CreateCall(TagP, {Constant::getNullValue(Info.AI->getType()), Base,
650 ConstantInt::get(IRB.getInt64Ty(), Info.Tag)});
651 if (Info.AI->hasName())
652 TagPCall->setName(Info.AI->getName() + ".tag");
653 Info.AI->replaceAllUsesWith(TagPCall);
654 TagPCall->setOperand(0, Info.AI);
655
656 if (UnrecognizedLifetimes.empty() && Info.LifetimeStart.size() == 1 &&
657 Info.LifetimeEnd.size() == 1) {
658 IntrinsicInst *Start = Info.LifetimeStart[0];
659 IntrinsicInst *End = Info.LifetimeEnd[0];
660 uint64_t Size =
661 cast<ConstantInt>(Start->getArgOperand(0))->getZExtValue();
662 Size = alignTo(Size, kTagGranuleSize);
663 tagAlloca(AI, Start->getNextNode(), Start->getArgOperand(1), Size);
664 // We need to ensure that if we tag some object, we certainly untag it
665 // before the function exits.
666 if (PDT != nullptr && postDominates(PDT, End, Start)) {
667 untagAlloca(AI, End, Size);
668 } else {
669 SmallVector<Instruction *, 8> ReachableRetVec;
670 unsigned NumCoveredExits = 0;
671 for (auto &RI : RetVec) {
672 if (!isPotentiallyReachable(Start, RI, nullptr, DT))
673 continue;
674 ReachableRetVec.push_back(RI);
675 if (DT != nullptr && DT->dominates(End, RI))
676 ++NumCoveredExits;
677 }
678 // If there's a mix of covered and non-covered exits, just put the untag
679 // on exits, so we avoid the redundancy of untagging twice.
680 if (NumCoveredExits == ReachableRetVec.size()) {
681 untagAlloca(AI, End, Size);
682 } else {
683 for (auto &RI : ReachableRetVec)
684 untagAlloca(AI, RI, Size);
685 // We may have inserted untag outside of the lifetime interval.
686 // Remove the lifetime end call for this alloca.
687 End->eraseFromParent();
688 }
689 }
690 } else {
691 uint64_t Size = Info.AI->getAllocationSizeInBits(*DL).getValue() / 8;
692 Value *Ptr = IRB.CreatePointerCast(TagPCall, IRB.getInt8PtrTy());
693 tagAlloca(AI, &*IRB.GetInsertPoint(), Ptr, Size);
694 for (auto &RI : RetVec) {
695 untagAlloca(AI, RI, Size);
696 }
697 // We may have inserted tag/untag outside of any lifetime interval.
698 // Remove all lifetime intrinsics for this alloca.
699 for (auto &II : Info.LifetimeStart)
700 II->eraseFromParent();
701 for (auto &II : Info.LifetimeEnd)
702 II->eraseFromParent();
703 }
704
705 // Fixup debug intrinsics to point to the new alloca.
706 for (auto DVI : Info.DbgVariableIntrinsics)
707 DVI->setArgOperand(
708 0,
709 MetadataAsValue::get(F->getContext(), LocalAsMetadata::get(Info.AI)));
710 }
711
712 // If we have instrumented at least one alloca, all unrecognized lifetime
713 // instrinsics have to go.
714 for (auto &I : UnrecognizedLifetimes)
715 I->eraseFromParent();
716
717 return true;
718 }
719