1 //===-- AArch64StackTaggingPreRA.cpp --- Stack Tagging for AArch64 -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 
10 #include "AArch64.h"
11 #include "AArch64MachineFunctionInfo.h"
12 #include "AArch64InstrInfo.h"
13 #include "llvm/ADT/DepthFirstIterator.h"
14 #include "llvm/ADT/SetVector.h"
15 #include "llvm/ADT/Statistic.h"
16 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
17 #include "llvm/CodeGen/MachineFrameInfo.h"
18 #include "llvm/CodeGen/MachineFunction.h"
19 #include "llvm/CodeGen/MachineFunctionPass.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 #include "llvm/CodeGen/MachineLoopInfo.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/MachineTraceMetrics.h"
24 #include "llvm/CodeGen/Passes.h"
25 #include "llvm/CodeGen/TargetInstrInfo.h"
26 #include "llvm/CodeGen/TargetRegisterInfo.h"
27 #include "llvm/CodeGen/TargetSubtargetInfo.h"
28 #include "llvm/Support/CommandLine.h"
29 #include "llvm/Support/Debug.h"
30 #include "llvm/Support/raw_ostream.h"
31 
32 using namespace llvm;
33 
34 #define DEBUG_TYPE "aarch64-stack-tagging-pre-ra"
35 
36 enum UncheckedLdStMode { UncheckedNever, UncheckedSafe, UncheckedAlways };
37 
38 cl::opt<UncheckedLdStMode> ClUncheckedLdSt(
39     "stack-tagging-unchecked-ld-st", cl::Hidden,
40     cl::init(UncheckedSafe),
41     cl::desc(
42         "Unconditionally apply unchecked-ld-st optimization (even for large "
43         "stack frames, or in the presence of variable sized allocas)."),
44     cl::values(
45         clEnumValN(UncheckedNever, "never", "never apply unchecked-ld-st"),
46         clEnumValN(
47             UncheckedSafe, "safe",
48             "apply unchecked-ld-st when the target is definitely within range"),
49         clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st")));
50 
51 static cl::opt<bool>
52     ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden, cl::init(true),
53                 cl::desc("Apply first slot optimization for stack tagging "
54                          "(eliminate ADDG Rt, Rn, 0, 0)."));
55 
56 namespace {
57 
58 class AArch64StackTaggingPreRA : public MachineFunctionPass {
59   MachineFunction *MF;
60   AArch64FunctionInfo *AFI;
61   MachineFrameInfo *MFI;
62   MachineRegisterInfo *MRI;
63   const AArch64RegisterInfo *TRI;
64   const AArch64InstrInfo *TII;
65 
66   SmallVector<MachineInstr*, 16> ReTags;
67 
68 public:
69   static char ID;
70   AArch64StackTaggingPreRA() : MachineFunctionPass(ID) {
71     initializeAArch64StackTaggingPreRAPass(*PassRegistry::getPassRegistry());
72   }
73 
74   bool mayUseUncheckedLoadStore();
75   void uncheckUsesOf(unsigned TaggedReg, int FI);
76   void uncheckLoadsAndStores();
77   Optional<int> findFirstSlotCandidate();
78 
79   bool runOnMachineFunction(MachineFunction &Func) override;
80   StringRef getPassName() const override {
81     return "AArch64 Stack Tagging PreRA";
82   }
83 
84   void getAnalysisUsage(AnalysisUsage &AU) const override {
85     AU.setPreservesCFG();
86     MachineFunctionPass::getAnalysisUsage(AU);
87   }
88 };
89 } // end anonymous namespace
90 
91 char AArch64StackTaggingPreRA::ID = 0;
92 
93 INITIALIZE_PASS_BEGIN(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra",
94                       "AArch64 Stack Tagging PreRA Pass", false, false)
95 INITIALIZE_PASS_END(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra",
96                     "AArch64 Stack Tagging PreRA Pass", false, false)
97 
98 FunctionPass *llvm::createAArch64StackTaggingPreRAPass() {
99   return new AArch64StackTaggingPreRA();
100 }
101 
102 static bool isUncheckedLoadOrStoreOpcode(unsigned Opcode) {
103   switch (Opcode) {
104   case AArch64::LDRBBui:
105   case AArch64::LDRHHui:
106   case AArch64::LDRWui:
107   case AArch64::LDRXui:
108 
109   case AArch64::LDRBui:
110   case AArch64::LDRHui:
111   case AArch64::LDRSui:
112   case AArch64::LDRDui:
113   case AArch64::LDRQui:
114 
115   case AArch64::LDRSHWui:
116   case AArch64::LDRSHXui:
117 
118   case AArch64::LDRSBWui:
119   case AArch64::LDRSBXui:
120 
121   case AArch64::LDRSWui:
122 
123   case AArch64::STRBBui:
124   case AArch64::STRHHui:
125   case AArch64::STRWui:
126   case AArch64::STRXui:
127 
128   case AArch64::STRBui:
129   case AArch64::STRHui:
130   case AArch64::STRSui:
131   case AArch64::STRDui:
132   case AArch64::STRQui:
133 
134   case AArch64::LDPWi:
135   case AArch64::LDPXi:
136   case AArch64::LDPSi:
137   case AArch64::LDPDi:
138   case AArch64::LDPQi:
139 
140   case AArch64::LDPSWi:
141 
142   case AArch64::STPWi:
143   case AArch64::STPXi:
144   case AArch64::STPSi:
145   case AArch64::STPDi:
146   case AArch64::STPQi:
147     return true;
148   default:
149     return false;
150   }
151 }
152 
153 bool AArch64StackTaggingPreRA::mayUseUncheckedLoadStore() {
154   if (ClUncheckedLdSt == UncheckedNever)
155     return false;
156   else if (ClUncheckedLdSt == UncheckedAlways)
157     return true;
158 
159   // This estimate can be improved if we had harder guarantees about stack frame
160   // layout. With LocalStackAllocation we can estimate SP offset to any
161   // preallocated slot. AArch64FrameLowering::orderFrameObjects could put tagged
162   // objects ahead of non-tagged ones, but that's not always desirable.
163   //
164   // Underestimating SP offset here may require the use of LDG to materialize
165   // the tagged address of the stack slot, along with a scratch register
166   // allocation (post-regalloc!).
167   //
168   // For now we do the safe thing here and require that the entire stack frame
169   // is within range of the shortest of the unchecked instructions.
170   unsigned FrameSize = 0;
171   for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i)
172     FrameSize += MFI->getObjectSize(i);
173   bool EntireFrameReachableFromSP = FrameSize < 0xf00;
174   return !MFI->hasVarSizedObjects() && EntireFrameReachableFromSP;
175 }
176 
177 void AArch64StackTaggingPreRA::uncheckUsesOf(unsigned TaggedReg, int FI) {
178   for (MachineInstr &UseI :
179        llvm::make_early_inc_range(MRI->use_instructions(TaggedReg))) {
180     if (isUncheckedLoadOrStoreOpcode(UseI.getOpcode())) {
181       // FI operand is always the one before the immediate offset.
182       unsigned OpIdx = TII->getLoadStoreImmIdx(UseI.getOpcode()) - 1;
183       if (UseI.getOperand(OpIdx).isReg() &&
184           UseI.getOperand(OpIdx).getReg() == TaggedReg) {
185         UseI.getOperand(OpIdx).ChangeToFrameIndex(FI);
186         UseI.getOperand(OpIdx).setTargetFlags(AArch64II::MO_TAGGED);
187       }
188     } else if (UseI.isCopy() &&
189                Register::isVirtualRegister(UseI.getOperand(0).getReg())) {
190       uncheckUsesOf(UseI.getOperand(0).getReg(), FI);
191     }
192   }
193 }
194 
195 void AArch64StackTaggingPreRA::uncheckLoadsAndStores() {
196   for (auto *I : ReTags) {
197     Register TaggedReg = I->getOperand(0).getReg();
198     int FI = I->getOperand(1).getIndex();
199     uncheckUsesOf(TaggedReg, FI);
200   }
201 }
202 
203 namespace {
204 struct SlotWithTag {
205   int FI;
206   int Tag;
207   SlotWithTag(int FI, int Tag) : FI(FI), Tag(Tag) {}
208   explicit SlotWithTag(const MachineInstr &MI)
209       : FI(MI.getOperand(1).getIndex()), Tag(MI.getOperand(4).getImm()) {}
210   bool operator==(const SlotWithTag &Other) const {
211     return FI == Other.FI && Tag == Other.Tag;
212   }
213 };
214 } // namespace
215 
216 namespace llvm {
217 template <> struct DenseMapInfo<SlotWithTag> {
218   static inline SlotWithTag getEmptyKey() { return {-2, -2}; }
219   static inline SlotWithTag getTombstoneKey() { return {-3, -3}; }
220   static unsigned getHashValue(const SlotWithTag &V) {
221     return hash_combine(DenseMapInfo<int>::getHashValue(V.FI),
222                         DenseMapInfo<int>::getHashValue(V.Tag));
223   }
224   static bool isEqual(const SlotWithTag &A, const SlotWithTag &B) {
225     return A == B;
226   }
227 };
228 } // namespace llvm
229 
230 static bool isSlotPreAllocated(MachineFrameInfo *MFI, int FI) {
231   return MFI->getUseLocalStackAllocationBlock() &&
232          MFI->isObjectPreAllocated(FI);
233 }
234 
235 // Pin one of the tagged slots to offset 0 from the tagged base pointer.
236 // This would make its address available in a virtual register (IRG's def), as
237 // opposed to requiring an ADDG instruction to materialize. This effectively
238 // eliminates a vreg (by replacing it with direct uses of IRG, which is usually
239 // live almost everywhere anyway), and therefore needs to happen before
240 // regalloc.
241 Optional<int> AArch64StackTaggingPreRA::findFirstSlotCandidate() {
242   // Find the best (FI, Tag) pair to pin to offset 0.
243   // Looking at the possible uses of a tagged address, the advantage of pinning
244   // is:
245   // - COPY to physical register.
246   //   Does not matter, this would trade a MOV instruction for an ADDG.
247   // - ST*G matter, but those mostly appear near the function prologue where all
248   //   the tagged addresses need to be materialized anyway; also, counting ST*G
249   //   uses would overweight large allocas that require more than one ST*G
250   //   instruction.
251   // - Load/Store instructions in the address operand do not require a tagged
252   //   pointer, so they also do not benefit. These operands have already been
253   //   eliminated (see uncheckLoadsAndStores) so all remaining load/store
254   //   instructions count.
255   // - Any other instruction may benefit from being pinned to offset 0.
256   LLVM_DEBUG(dbgs() << "AArch64StackTaggingPreRA::findFirstSlotCandidate\n");
257   if (!ClFirstSlot)
258     return None;
259 
260   DenseMap<SlotWithTag, int> RetagScore;
261   SlotWithTag MaxScoreST{-1, -1};
262   int MaxScore = -1;
263   for (auto *I : ReTags) {
264     SlotWithTag ST{*I};
265     if (isSlotPreAllocated(MFI, ST.FI))
266       continue;
267 
268     Register RetagReg = I->getOperand(0).getReg();
269     if (!Register::isVirtualRegister(RetagReg))
270       continue;
271 
272     int Score = 0;
273     SmallVector<Register, 8> WorkList;
274     WorkList.push_back(RetagReg);
275 
276     while (!WorkList.empty()) {
277       Register UseReg = WorkList.pop_back_val();
278       for (auto &UseI : MRI->use_instructions(UseReg)) {
279         unsigned Opcode = UseI.getOpcode();
280         if (Opcode == AArch64::STGOffset || Opcode == AArch64::ST2GOffset ||
281             Opcode == AArch64::STZGOffset || Opcode == AArch64::STZ2GOffset ||
282             Opcode == AArch64::STGPi || Opcode == AArch64::STGloop ||
283             Opcode == AArch64::STZGloop || Opcode == AArch64::STGloop_wback ||
284             Opcode == AArch64::STZGloop_wback)
285           continue;
286         if (UseI.isCopy()) {
287           Register DstReg = UseI.getOperand(0).getReg();
288           if (Register::isVirtualRegister(DstReg))
289             WorkList.push_back(DstReg);
290           continue;
291         }
292         LLVM_DEBUG(dbgs() << "[" << ST.FI << ":" << ST.Tag << "] use of %"
293                           << Register::virtReg2Index(UseReg) << " in " << UseI
294                           << "\n");
295         Score++;
296       }
297     }
298 
299     int TotalScore = RetagScore[ST] += Score;
300     if (TotalScore > MaxScore ||
301         (TotalScore == MaxScore && ST.FI > MaxScoreST.FI)) {
302       MaxScore = TotalScore;
303       MaxScoreST = ST;
304     }
305   }
306 
307   if (MaxScoreST.FI < 0)
308     return None;
309 
310   // If FI's tag is already 0, we are done.
311   if (MaxScoreST.Tag == 0)
312     return MaxScoreST.FI;
313 
314   // Otherwise, find a random victim pair (FI, Tag) where Tag == 0.
315   SlotWithTag SwapST{-1, -1};
316   for (auto *I : ReTags) {
317     SlotWithTag ST{*I};
318     if (ST.Tag == 0) {
319       SwapST = ST;
320       break;
321     }
322   }
323 
324   // Swap tags between the victim and the highest scoring pair.
325   // If SwapWith is still (-1, -1), that's fine, too - we'll simply take tag for
326   // the highest score slot without changing anything else.
327   for (auto *&I : ReTags) {
328     SlotWithTag ST{*I};
329     MachineOperand &TagOp = I->getOperand(4);
330     if (ST == MaxScoreST) {
331       TagOp.setImm(0);
332     } else if (ST == SwapST) {
333       TagOp.setImm(MaxScoreST.Tag);
334     }
335   }
336   return MaxScoreST.FI;
337 }
338 
339 bool AArch64StackTaggingPreRA::runOnMachineFunction(MachineFunction &Func) {
340   MF = &Func;
341   MRI = &MF->getRegInfo();
342   AFI = MF->getInfo<AArch64FunctionInfo>();
343   TII = static_cast<const AArch64InstrInfo *>(MF->getSubtarget().getInstrInfo());
344   TRI = static_cast<const AArch64RegisterInfo *>(
345       MF->getSubtarget().getRegisterInfo());
346   MFI = &MF->getFrameInfo();
347   ReTags.clear();
348 
349   assert(MRI->isSSA());
350 
351   LLVM_DEBUG(dbgs() << "********** AArch64 Stack Tagging PreRA **********\n"
352                     << "********** Function: " << MF->getName() << '\n');
353 
354   SmallSetVector<int, 8> TaggedSlots;
355   for (auto &BB : *MF) {
356     for (auto &I : BB) {
357       if (I.getOpcode() == AArch64::TAGPstack) {
358         ReTags.push_back(&I);
359         int FI = I.getOperand(1).getIndex();
360         TaggedSlots.insert(FI);
361         // There should be no offsets in TAGP yet.
362         assert(I.getOperand(2).getImm() == 0);
363       }
364     }
365   }
366 
367   // Take over from SSP. It does nothing for tagged slots, and should not really
368   // have been enabled in the first place.
369   for (int FI : TaggedSlots)
370     MFI->setObjectSSPLayout(FI, MachineFrameInfo::SSPLK_None);
371 
372   if (ReTags.empty())
373     return false;
374 
375   if (mayUseUncheckedLoadStore())
376     uncheckLoadsAndStores();
377 
378   // Find a slot that is used with zero tag offset, like ADDG #fi, 0.
379   // If the base tagged pointer is set up to the address of this slot,
380   // the ADDG instruction can be eliminated.
381   Optional<int> BaseSlot = findFirstSlotCandidate();
382   if (BaseSlot)
383     AFI->setTaggedBasePointerIndex(*BaseSlot);
384 
385   for (auto *I : ReTags) {
386     int FI = I->getOperand(1).getIndex();
387     int Tag = I->getOperand(4).getImm();
388     Register Base = I->getOperand(3).getReg();
389     if (Tag == 0 && FI == BaseSlot) {
390       BuildMI(*I->getParent(), I, {}, TII->get(AArch64::COPY),
391               I->getOperand(0).getReg())
392           .addReg(Base);
393       I->eraseFromParent();
394     }
395   }
396 
397   return true;
398 }
399