1 //===-- AArch64StackTaggingPreRA.cpp --- Stack Tagging for AArch64 -----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9
10
11 #include "AArch64.h"
12 #include "AArch64MachineFunctionInfo.h"
13 #include "AArch64InstrInfo.h"
14 #include "llvm/ADT/DepthFirstIterator.h"
15 #include "llvm/ADT/SetVector.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineFunctionPass.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineLoopInfo.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/MachineTraceMetrics.h"
25 #include "llvm/CodeGen/Passes.h"
26 #include "llvm/CodeGen/TargetInstrInfo.h"
27 #include "llvm/CodeGen/TargetRegisterInfo.h"
28 #include "llvm/CodeGen/TargetSubtargetInfo.h"
29 #include "llvm/Support/CommandLine.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/raw_ostream.h"
32
33 using namespace llvm;
34
35 #define DEBUG_TYPE "aarch64-stack-tagging-pre-ra"
36
37 enum UncheckedLdStMode { UncheckedNever, UncheckedSafe, UncheckedAlways };
38
39 cl::opt<UncheckedLdStMode> ClUncheckedLdSt(
40 "stack-tagging-unchecked-ld-st", cl::Hidden,
41 cl::init(UncheckedSafe),
42 cl::desc(
43 "Unconditionally apply unchecked-ld-st optimization (even for large "
44 "stack frames, or in the presence of variable sized allocas)."),
45 cl::values(
46 clEnumValN(UncheckedNever, "never", "never apply unchecked-ld-st"),
47 clEnumValN(
48 UncheckedSafe, "safe",
49 "apply unchecked-ld-st when the target is definitely within range"),
50 clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st")));
51
52 static cl::opt<bool>
53 ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden, cl::init(true),
54 cl::ZeroOrMore,
55 cl::desc("Apply first slot optimization for stack tagging "
56 "(eliminate ADDG Rt, Rn, 0, 0)."));
57
58 namespace {
59
60 class AArch64StackTaggingPreRA : public MachineFunctionPass {
61 MachineFunction *MF;
62 AArch64FunctionInfo *AFI;
63 MachineFrameInfo *MFI;
64 MachineRegisterInfo *MRI;
65 const AArch64RegisterInfo *TRI;
66 const AArch64InstrInfo *TII;
67
68 SmallVector<MachineInstr*, 16> ReTags;
69
70 public:
71 static char ID;
AArch64StackTaggingPreRA()72 AArch64StackTaggingPreRA() : MachineFunctionPass(ID) {
73 initializeAArch64StackTaggingPreRAPass(*PassRegistry::getPassRegistry());
74 }
75
76 bool mayUseUncheckedLoadStore();
77 void uncheckUsesOf(unsigned TaggedReg, int FI);
78 void uncheckLoadsAndStores();
79 Optional<int> findFirstSlotCandidate();
80
81 bool runOnMachineFunction(MachineFunction &Func) override;
getPassName() const82 StringRef getPassName() const override {
83 return "AArch64 Stack Tagging PreRA";
84 }
85
getAnalysisUsage(AnalysisUsage & AU) const86 void getAnalysisUsage(AnalysisUsage &AU) const override {
87 AU.setPreservesCFG();
88 MachineFunctionPass::getAnalysisUsage(AU);
89 }
90 };
91 } // end anonymous namespace
92
93 char AArch64StackTaggingPreRA::ID = 0;
94
95 INITIALIZE_PASS_BEGIN(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra",
96 "AArch64 Stack Tagging PreRA Pass", false, false)
97 INITIALIZE_PASS_END(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra",
98 "AArch64 Stack Tagging PreRA Pass", false, false)
99
createAArch64StackTaggingPreRAPass()100 FunctionPass *llvm::createAArch64StackTaggingPreRAPass() {
101 return new AArch64StackTaggingPreRA();
102 }
103
isUncheckedLoadOrStoreOpcode(unsigned Opcode)104 static bool isUncheckedLoadOrStoreOpcode(unsigned Opcode) {
105 switch (Opcode) {
106 case AArch64::LDRBBui:
107 case AArch64::LDRHHui:
108 case AArch64::LDRWui:
109 case AArch64::LDRXui:
110
111 case AArch64::LDRBui:
112 case AArch64::LDRHui:
113 case AArch64::LDRSui:
114 case AArch64::LDRDui:
115 case AArch64::LDRQui:
116
117 case AArch64::LDRSHWui:
118 case AArch64::LDRSHXui:
119
120 case AArch64::LDRSBWui:
121 case AArch64::LDRSBXui:
122
123 case AArch64::LDRSWui:
124
125 case AArch64::STRBBui:
126 case AArch64::STRHHui:
127 case AArch64::STRWui:
128 case AArch64::STRXui:
129
130 case AArch64::STRBui:
131 case AArch64::STRHui:
132 case AArch64::STRSui:
133 case AArch64::STRDui:
134 case AArch64::STRQui:
135
136 case AArch64::LDPWi:
137 case AArch64::LDPXi:
138 case AArch64::LDPSi:
139 case AArch64::LDPDi:
140 case AArch64::LDPQi:
141
142 case AArch64::LDPSWi:
143
144 case AArch64::STPWi:
145 case AArch64::STPXi:
146 case AArch64::STPSi:
147 case AArch64::STPDi:
148 case AArch64::STPQi:
149 return true;
150 default:
151 return false;
152 }
153 }
154
mayUseUncheckedLoadStore()155 bool AArch64StackTaggingPreRA::mayUseUncheckedLoadStore() {
156 if (ClUncheckedLdSt == UncheckedNever)
157 return false;
158 else if (ClUncheckedLdSt == UncheckedAlways)
159 return true;
160
161 // This estimate can be improved if we had harder guarantees about stack frame
162 // layout. With LocalStackAllocation we can estimate SP offset to any
163 // preallocated slot. AArch64FrameLowering::orderFrameObjects could put tagged
164 // objects ahead of non-tagged ones, but that's not always desirable.
165 //
166 // Underestimating SP offset here may require the use of LDG to materialize
167 // the tagged address of the stack slot, along with a scratch register
168 // allocation (post-regalloc!).
169 //
170 // For now we do the safe thing here and require that the entire stack frame
171 // is within range of the shortest of the unchecked instructions.
172 unsigned FrameSize = 0;
173 for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i)
174 FrameSize += MFI->getObjectSize(i);
175 bool EntireFrameReachableFromSP = FrameSize < 0xf00;
176 return !MFI->hasVarSizedObjects() && EntireFrameReachableFromSP;
177 }
178
uncheckUsesOf(unsigned TaggedReg,int FI)179 void AArch64StackTaggingPreRA::uncheckUsesOf(unsigned TaggedReg, int FI) {
180 for (auto UI = MRI->use_instr_begin(TaggedReg), E = MRI->use_instr_end();
181 UI != E;) {
182 MachineInstr *UseI = &*(UI++);
183 if (isUncheckedLoadOrStoreOpcode(UseI->getOpcode())) {
184 // FI operand is always the one before the immediate offset.
185 unsigned OpIdx = TII->getLoadStoreImmIdx(UseI->getOpcode()) - 1;
186 if (UseI->getOperand(OpIdx).isReg() &&
187 UseI->getOperand(OpIdx).getReg() == TaggedReg) {
188 UseI->getOperand(OpIdx).ChangeToFrameIndex(FI);
189 UseI->getOperand(OpIdx).setTargetFlags(AArch64II::MO_TAGGED);
190 }
191 } else if (UseI->isCopy() &&
192 Register::isVirtualRegister(UseI->getOperand(0).getReg())) {
193 uncheckUsesOf(UseI->getOperand(0).getReg(), FI);
194 }
195 }
196 }
197
uncheckLoadsAndStores()198 void AArch64StackTaggingPreRA::uncheckLoadsAndStores() {
199 for (auto *I : ReTags) {
200 unsigned TaggedReg = I->getOperand(0).getReg();
201 int FI = I->getOperand(1).getIndex();
202 uncheckUsesOf(TaggedReg, FI);
203 }
204 }
205
206 struct SlotWithTag {
207 int FI;
208 int Tag;
SlotWithTagSlotWithTag209 SlotWithTag(int FI, int Tag) : FI(FI), Tag(Tag) {}
SlotWithTagSlotWithTag210 explicit SlotWithTag(const MachineInstr &MI)
211 : FI(MI.getOperand(1).getIndex()), Tag(MI.getOperand(4).getImm()) {}
operator ==SlotWithTag212 bool operator==(const SlotWithTag &Other) const {
213 return FI == Other.FI && Tag == Other.Tag;
214 }
215 };
216
217 namespace llvm {
218 template <> struct DenseMapInfo<SlotWithTag> {
getEmptyKeyllvm::DenseMapInfo219 static inline SlotWithTag getEmptyKey() { return {-2, -2}; }
getTombstoneKeyllvm::DenseMapInfo220 static inline SlotWithTag getTombstoneKey() { return {-3, -3}; }
getHashValuellvm::DenseMapInfo221 static unsigned getHashValue(const SlotWithTag &V) {
222 return hash_combine(DenseMapInfo<int>::getHashValue(V.FI),
223 DenseMapInfo<int>::getHashValue(V.Tag));
224 }
isEqualllvm::DenseMapInfo225 static bool isEqual(const SlotWithTag &A, const SlotWithTag &B) {
226 return A == B;
227 }
228 };
229 } // namespace llvm
230
isSlotPreAllocated(MachineFrameInfo * MFI,int FI)231 static bool isSlotPreAllocated(MachineFrameInfo *MFI, int FI) {
232 return MFI->getUseLocalStackAllocationBlock() &&
233 MFI->isObjectPreAllocated(FI);
234 }
235
236 // Pin one of the tagged slots to offset 0 from the tagged base pointer.
237 // This would make its address available in a virtual register (IRG's def), as
238 // opposed to requiring an ADDG instruction to materialize. This effectively
239 // eliminates a vreg (by replacing it with direct uses of IRG, which is usually
240 // live almost everywhere anyway), and therefore needs to happen before
241 // regalloc.
findFirstSlotCandidate()242 Optional<int> AArch64StackTaggingPreRA::findFirstSlotCandidate() {
243 // Find the best (FI, Tag) pair to pin to offset 0.
244 // Looking at the possible uses of a tagged address, the advantage of pinning
245 // is:
246 // - COPY to physical register.
247 // Does not matter, this would trade a MOV instruction for an ADDG.
248 // - ST*G matter, but those mostly appear near the function prologue where all
249 // the tagged addresses need to be materialized anyway; also, counting ST*G
250 // uses would overweight large allocas that require more than one ST*G
251 // instruction.
252 // - Load/Store instructions in the address operand do not require a tagged
253 // pointer, so they also do not benefit. These operands have already been
254 // eliminated (see uncheckLoadsAndStores) so all remaining load/store
255 // instructions count.
256 // - Any other instruction may benefit from being pinned to offset 0.
257 LLVM_DEBUG(dbgs() << "AArch64StackTaggingPreRA::findFirstSlotCandidate\n");
258 if (!ClFirstSlot)
259 return None;
260
261 DenseMap<SlotWithTag, int> RetagScore;
262 SlotWithTag MaxScoreST{-1, -1};
263 int MaxScore = -1;
264 for (auto *I : ReTags) {
265 SlotWithTag ST{*I};
266 if (isSlotPreAllocated(MFI, ST.FI))
267 continue;
268
269 Register RetagReg = I->getOperand(0).getReg();
270 if (!Register::isVirtualRegister(RetagReg))
271 continue;
272
273 int Score = 0;
274 SmallVector<Register, 8> WorkList;
275 WorkList.push_back(RetagReg);
276
277 while (!WorkList.empty()) {
278 Register UseReg = WorkList.back();
279 WorkList.pop_back();
280 for (auto &UseI : MRI->use_instructions(UseReg)) {
281 unsigned Opcode = UseI.getOpcode();
282 if (Opcode == AArch64::STGOffset || Opcode == AArch64::ST2GOffset ||
283 Opcode == AArch64::STZGOffset || Opcode == AArch64::STZ2GOffset ||
284 Opcode == AArch64::STGPi || Opcode == AArch64::STGloop ||
285 Opcode == AArch64::STZGloop || Opcode == AArch64::STGloop_wback ||
286 Opcode == AArch64::STZGloop_wback)
287 continue;
288 if (UseI.isCopy()) {
289 Register DstReg = UseI.getOperand(0).getReg();
290 if (Register::isVirtualRegister(DstReg))
291 WorkList.push_back(DstReg);
292 continue;
293 }
294 LLVM_DEBUG(dbgs() << "[" << ST.FI << ":" << ST.Tag << "] use of %"
295 << Register::virtReg2Index(UseReg) << " in " << UseI
296 << "\n");
297 Score++;
298 }
299 }
300
301 int TotalScore = RetagScore[ST] += Score;
302 if (TotalScore > MaxScore ||
303 (TotalScore == MaxScore && ST.FI > MaxScoreST.FI)) {
304 MaxScore = TotalScore;
305 MaxScoreST = ST;
306 }
307 }
308
309 if (MaxScoreST.FI < 0)
310 return None;
311
312 // If FI's tag is already 0, we are done.
313 if (MaxScoreST.Tag == 0)
314 return MaxScoreST.FI;
315
316 // Otherwise, find a random victim pair (FI, Tag) where Tag == 0.
317 SlotWithTag SwapST{-1, -1};
318 for (auto *I : ReTags) {
319 SlotWithTag ST{*I};
320 if (ST.Tag == 0) {
321 SwapST = ST;
322 break;
323 }
324 }
325
326 // Swap tags between the victim and the highest scoring pair.
327 // If SwapWith is still (-1, -1), that's fine, too - we'll simply take tag for
328 // the highest score slot without changing anything else.
329 for (auto *&I : ReTags) {
330 SlotWithTag ST{*I};
331 MachineOperand &TagOp = I->getOperand(4);
332 if (ST == MaxScoreST) {
333 TagOp.setImm(0);
334 } else if (ST == SwapST) {
335 TagOp.setImm(MaxScoreST.Tag);
336 }
337 }
338 return MaxScoreST.FI;
339 }
340
runOnMachineFunction(MachineFunction & Func)341 bool AArch64StackTaggingPreRA::runOnMachineFunction(MachineFunction &Func) {
342 MF = &Func;
343 MRI = &MF->getRegInfo();
344 AFI = MF->getInfo<AArch64FunctionInfo>();
345 TII = static_cast<const AArch64InstrInfo *>(MF->getSubtarget().getInstrInfo());
346 TRI = static_cast<const AArch64RegisterInfo *>(
347 MF->getSubtarget().getRegisterInfo());
348 MFI = &MF->getFrameInfo();
349 ReTags.clear();
350
351 assert(MRI->isSSA());
352
353 LLVM_DEBUG(dbgs() << "********** AArch64 Stack Tagging PreRA **********\n"
354 << "********** Function: " << MF->getName() << '\n');
355
356 SmallSetVector<int, 8> TaggedSlots;
357 for (auto &BB : *MF) {
358 for (auto &I : BB) {
359 if (I.getOpcode() == AArch64::TAGPstack) {
360 ReTags.push_back(&I);
361 int FI = I.getOperand(1).getIndex();
362 TaggedSlots.insert(FI);
363 // There should be no offsets in TAGP yet.
364 assert(I.getOperand(2).getImm() == 0);
365 }
366 }
367 }
368
369 // Take over from SSP. It does nothing for tagged slots, and should not really
370 // have been enabled in the first place.
371 for (int FI : TaggedSlots)
372 MFI->setObjectSSPLayout(FI, MachineFrameInfo::SSPLK_None);
373
374 if (ReTags.empty())
375 return false;
376
377 if (mayUseUncheckedLoadStore())
378 uncheckLoadsAndStores();
379
380 // Find a slot that is used with zero tag offset, like ADDG #fi, 0.
381 // If the base tagged pointer is set up to the address of this slot,
382 // the ADDG instruction can be eliminated.
383 Optional<int> BaseSlot = findFirstSlotCandidate();
384 if (BaseSlot)
385 AFI->setTaggedBasePointerIndex(*BaseSlot);
386
387 for (auto *I : ReTags) {
388 int FI = I->getOperand(1).getIndex();
389 int Tag = I->getOperand(4).getImm();
390 Register Base = I->getOperand(3).getReg();
391 if (Tag == 0 && FI == BaseSlot) {
392 BuildMI(*I->getParent(), I, {}, TII->get(AArch64::COPY),
393 I->getOperand(0).getReg())
394 .addReg(Base);
395 I->eraseFromParent();
396 }
397 }
398
399 return true;
400 }
401