1 //===-- AArch64StackTaggingPreRA.cpp --- Stack Tagging for AArch64 -----===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9
10
11 #include "AArch64.h"
12 #include "AArch64MachineFunctionInfo.h"
13 #include "AArch64InstrInfo.h"
14 #include "llvm/ADT/DepthFirstIterator.h"
15 #include "llvm/ADT/SetVector.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineFunctionPass.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22 #include "llvm/CodeGen/MachineLoopInfo.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/MachineTraceMetrics.h"
25 #include "llvm/CodeGen/Passes.h"
26 #include "llvm/CodeGen/TargetInstrInfo.h"
27 #include "llvm/CodeGen/TargetRegisterInfo.h"
28 #include "llvm/CodeGen/TargetSubtargetInfo.h"
29 #include "llvm/Support/CommandLine.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/raw_ostream.h"
32
33 using namespace llvm;
34
35 #define DEBUG_TYPE "aarch64-stack-tagging-pre-ra"
36
37 enum UncheckedLdStMode { UncheckedNever, UncheckedSafe, UncheckedAlways };
38
39 cl::opt<UncheckedLdStMode> ClUncheckedLdSt(
40 "stack-tagging-unchecked-ld-st", cl::Hidden,
41 cl::init(UncheckedSafe),
42 cl::desc(
43 "Unconditionally apply unchecked-ld-st optimization (even for large "
44 "stack frames, or in the presence of variable sized allocas)."),
45 cl::values(
46 clEnumValN(UncheckedNever, "never", "never apply unchecked-ld-st"),
47 clEnumValN(
48 UncheckedSafe, "safe",
49 "apply unchecked-ld-st when the target is definitely within range"),
50 clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st")));
51
52 static cl::opt<bool>
53 ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden, cl::init(true),
54 cl::ZeroOrMore,
55 cl::desc("Apply first slot optimization for stack tagging "
56 "(eliminate ADDG Rt, Rn, 0, 0)."));
57
58 namespace {
59
60 class AArch64StackTaggingPreRA : public MachineFunctionPass {
61 MachineFunction *MF;
62 AArch64FunctionInfo *AFI;
63 MachineFrameInfo *MFI;
64 MachineRegisterInfo *MRI;
65 const AArch64RegisterInfo *TRI;
66 const AArch64InstrInfo *TII;
67
68 SmallVector<MachineInstr*, 16> ReTags;
69
70 public:
71 static char ID;
AArch64StackTaggingPreRA()72 AArch64StackTaggingPreRA() : MachineFunctionPass(ID) {
73 initializeAArch64StackTaggingPreRAPass(*PassRegistry::getPassRegistry());
74 }
75
76 bool mayUseUncheckedLoadStore();
77 void uncheckUsesOf(unsigned TaggedReg, int FI);
78 void uncheckLoadsAndStores();
79 Optional<int> findFirstSlotCandidate();
80
81 bool runOnMachineFunction(MachineFunction &Func) override;
getPassName() const82 StringRef getPassName() const override {
83 return "AArch64 Stack Tagging PreRA";
84 }
85
getAnalysisUsage(AnalysisUsage & AU) const86 void getAnalysisUsage(AnalysisUsage &AU) const override {
87 AU.setPreservesCFG();
88 MachineFunctionPass::getAnalysisUsage(AU);
89 }
90 };
91 } // end anonymous namespace
92
93 char AArch64StackTaggingPreRA::ID = 0;
94
95 INITIALIZE_PASS_BEGIN(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra",
96 "AArch64 Stack Tagging PreRA Pass", false, false)
97 INITIALIZE_PASS_END(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra",
98 "AArch64 Stack Tagging PreRA Pass", false, false)
99
createAArch64StackTaggingPreRAPass()100 FunctionPass *llvm::createAArch64StackTaggingPreRAPass() {
101 return new AArch64StackTaggingPreRA();
102 }
103
isUncheckedLoadOrStoreOpcode(unsigned Opcode)104 static bool isUncheckedLoadOrStoreOpcode(unsigned Opcode) {
105 switch (Opcode) {
106 case AArch64::LDRBBui:
107 case AArch64::LDRHHui:
108 case AArch64::LDRWui:
109 case AArch64::LDRXui:
110
111 case AArch64::LDRBui:
112 case AArch64::LDRHui:
113 case AArch64::LDRSui:
114 case AArch64::LDRDui:
115 case AArch64::LDRQui:
116
117 case AArch64::LDRSHWui:
118 case AArch64::LDRSHXui:
119
120 case AArch64::LDRSBWui:
121 case AArch64::LDRSBXui:
122
123 case AArch64::LDRSWui:
124
125 case AArch64::STRBBui:
126 case AArch64::STRHHui:
127 case AArch64::STRWui:
128 case AArch64::STRXui:
129
130 case AArch64::STRBui:
131 case AArch64::STRHui:
132 case AArch64::STRSui:
133 case AArch64::STRDui:
134 case AArch64::STRQui:
135
136 case AArch64::LDPWi:
137 case AArch64::LDPXi:
138 case AArch64::LDPSi:
139 case AArch64::LDPDi:
140 case AArch64::LDPQi:
141
142 case AArch64::LDPSWi:
143
144 case AArch64::STPWi:
145 case AArch64::STPXi:
146 case AArch64::STPSi:
147 case AArch64::STPDi:
148 case AArch64::STPQi:
149 return true;
150 default:
151 return false;
152 }
153 }
154
mayUseUncheckedLoadStore()155 bool AArch64StackTaggingPreRA::mayUseUncheckedLoadStore() {
156 if (ClUncheckedLdSt == UncheckedNever)
157 return false;
158 else if (ClUncheckedLdSt == UncheckedAlways)
159 return true;
160
161 // This estimate can be improved if we had harder guarantees about stack frame
162 // layout. With LocalStackAllocation we can estimate SP offset to any
163 // preallocated slot. AArch64FrameLowering::orderFrameObjects could put tagged
164 // objects ahead of non-tagged ones, but that's not always desirable.
165 //
166 // Underestimating SP offset here may require the use of LDG to materialize
167 // the tagged address of the stack slot, along with a scratch register
168 // allocation (post-regalloc!).
169 //
170 // For now we do the safe thing here and require that the entire stack frame
171 // is within range of the shortest of the unchecked instructions.
172 unsigned FrameSize = 0;
173 for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i)
174 FrameSize += MFI->getObjectSize(i);
175 bool EntireFrameReachableFromSP = FrameSize < 0xf00;
176 return !MFI->hasVarSizedObjects() && EntireFrameReachableFromSP;
177 }
178
uncheckUsesOf(unsigned TaggedReg,int FI)179 void AArch64StackTaggingPreRA::uncheckUsesOf(unsigned TaggedReg, int FI) {
180 for (auto UI = MRI->use_instr_begin(TaggedReg), E = MRI->use_instr_end();
181 UI != E;) {
182 MachineInstr *UseI = &*(UI++);
183 if (isUncheckedLoadOrStoreOpcode(UseI->getOpcode())) {
184 // FI operand is always the one before the immediate offset.
185 unsigned OpIdx = TII->getLoadStoreImmIdx(UseI->getOpcode()) - 1;
186 if (UseI->getOperand(OpIdx).isReg() &&
187 UseI->getOperand(OpIdx).getReg() == TaggedReg) {
188 UseI->getOperand(OpIdx).ChangeToFrameIndex(FI);
189 UseI->getOperand(OpIdx).setTargetFlags(AArch64II::MO_TAGGED);
190 }
191 } else if (UseI->isCopy() &&
192 Register::isVirtualRegister(UseI->getOperand(0).getReg())) {
193 uncheckUsesOf(UseI->getOperand(0).getReg(), FI);
194 }
195 }
196 }
197
uncheckLoadsAndStores()198 void AArch64StackTaggingPreRA::uncheckLoadsAndStores() {
199 for (auto *I : ReTags) {
200 unsigned TaggedReg = I->getOperand(0).getReg();
201 int FI = I->getOperand(1).getIndex();
202 uncheckUsesOf(TaggedReg, FI);
203 }
204 }
205
206 namespace {
207 struct SlotWithTag {
208 int FI;
209 int Tag;
SlotWithTag__anonc63156590211::SlotWithTag210 SlotWithTag(int FI, int Tag) : FI(FI), Tag(Tag) {}
SlotWithTag__anonc63156590211::SlotWithTag211 explicit SlotWithTag(const MachineInstr &MI)
212 : FI(MI.getOperand(1).getIndex()), Tag(MI.getOperand(4).getImm()) {}
operator ==__anonc63156590211::SlotWithTag213 bool operator==(const SlotWithTag &Other) const {
214 return FI == Other.FI && Tag == Other.Tag;
215 }
216 };
217 } // namespace
218
219 namespace llvm {
220 template <> struct DenseMapInfo<SlotWithTag> {
getEmptyKeyllvm::DenseMapInfo221 static inline SlotWithTag getEmptyKey() { return {-2, -2}; }
getTombstoneKeyllvm::DenseMapInfo222 static inline SlotWithTag getTombstoneKey() { return {-3, -3}; }
getHashValuellvm::DenseMapInfo223 static unsigned getHashValue(const SlotWithTag &V) {
224 return hash_combine(DenseMapInfo<int>::getHashValue(V.FI),
225 DenseMapInfo<int>::getHashValue(V.Tag));
226 }
isEqualllvm::DenseMapInfo227 static bool isEqual(const SlotWithTag &A, const SlotWithTag &B) {
228 return A == B;
229 }
230 };
231 } // namespace llvm
232
isSlotPreAllocated(MachineFrameInfo * MFI,int FI)233 static bool isSlotPreAllocated(MachineFrameInfo *MFI, int FI) {
234 return MFI->getUseLocalStackAllocationBlock() &&
235 MFI->isObjectPreAllocated(FI);
236 }
237
238 // Pin one of the tagged slots to offset 0 from the tagged base pointer.
239 // This would make its address available in a virtual register (IRG's def), as
240 // opposed to requiring an ADDG instruction to materialize. This effectively
241 // eliminates a vreg (by replacing it with direct uses of IRG, which is usually
242 // live almost everywhere anyway), and therefore needs to happen before
243 // regalloc.
findFirstSlotCandidate()244 Optional<int> AArch64StackTaggingPreRA::findFirstSlotCandidate() {
245 // Find the best (FI, Tag) pair to pin to offset 0.
246 // Looking at the possible uses of a tagged address, the advantage of pinning
247 // is:
248 // - COPY to physical register.
249 // Does not matter, this would trade a MOV instruction for an ADDG.
250 // - ST*G matter, but those mostly appear near the function prologue where all
251 // the tagged addresses need to be materialized anyway; also, counting ST*G
252 // uses would overweight large allocas that require more than one ST*G
253 // instruction.
254 // - Load/Store instructions in the address operand do not require a tagged
255 // pointer, so they also do not benefit. These operands have already been
256 // eliminated (see uncheckLoadsAndStores) so all remaining load/store
257 // instructions count.
258 // - Any other instruction may benefit from being pinned to offset 0.
259 LLVM_DEBUG(dbgs() << "AArch64StackTaggingPreRA::findFirstSlotCandidate\n");
260 if (!ClFirstSlot)
261 return None;
262
263 DenseMap<SlotWithTag, int> RetagScore;
264 SlotWithTag MaxScoreST{-1, -1};
265 int MaxScore = -1;
266 for (auto *I : ReTags) {
267 SlotWithTag ST{*I};
268 if (isSlotPreAllocated(MFI, ST.FI))
269 continue;
270
271 Register RetagReg = I->getOperand(0).getReg();
272 if (!Register::isVirtualRegister(RetagReg))
273 continue;
274
275 int Score = 0;
276 SmallVector<Register, 8> WorkList;
277 WorkList.push_back(RetagReg);
278
279 while (!WorkList.empty()) {
280 Register UseReg = WorkList.back();
281 WorkList.pop_back();
282 for (auto &UseI : MRI->use_instructions(UseReg)) {
283 unsigned Opcode = UseI.getOpcode();
284 if (Opcode == AArch64::STGOffset || Opcode == AArch64::ST2GOffset ||
285 Opcode == AArch64::STZGOffset || Opcode == AArch64::STZ2GOffset ||
286 Opcode == AArch64::STGPi || Opcode == AArch64::STGloop ||
287 Opcode == AArch64::STZGloop || Opcode == AArch64::STGloop_wback ||
288 Opcode == AArch64::STZGloop_wback)
289 continue;
290 if (UseI.isCopy()) {
291 Register DstReg = UseI.getOperand(0).getReg();
292 if (Register::isVirtualRegister(DstReg))
293 WorkList.push_back(DstReg);
294 continue;
295 }
296 LLVM_DEBUG(dbgs() << "[" << ST.FI << ":" << ST.Tag << "] use of %"
297 << Register::virtReg2Index(UseReg) << " in " << UseI
298 << "\n");
299 Score++;
300 }
301 }
302
303 int TotalScore = RetagScore[ST] += Score;
304 if (TotalScore > MaxScore ||
305 (TotalScore == MaxScore && ST.FI > MaxScoreST.FI)) {
306 MaxScore = TotalScore;
307 MaxScoreST = ST;
308 }
309 }
310
311 if (MaxScoreST.FI < 0)
312 return None;
313
314 // If FI's tag is already 0, we are done.
315 if (MaxScoreST.Tag == 0)
316 return MaxScoreST.FI;
317
318 // Otherwise, find a random victim pair (FI, Tag) where Tag == 0.
319 SlotWithTag SwapST{-1, -1};
320 for (auto *I : ReTags) {
321 SlotWithTag ST{*I};
322 if (ST.Tag == 0) {
323 SwapST = ST;
324 break;
325 }
326 }
327
328 // Swap tags between the victim and the highest scoring pair.
329 // If SwapWith is still (-1, -1), that's fine, too - we'll simply take tag for
330 // the highest score slot without changing anything else.
331 for (auto *&I : ReTags) {
332 SlotWithTag ST{*I};
333 MachineOperand &TagOp = I->getOperand(4);
334 if (ST == MaxScoreST) {
335 TagOp.setImm(0);
336 } else if (ST == SwapST) {
337 TagOp.setImm(MaxScoreST.Tag);
338 }
339 }
340 return MaxScoreST.FI;
341 }
342
runOnMachineFunction(MachineFunction & Func)343 bool AArch64StackTaggingPreRA::runOnMachineFunction(MachineFunction &Func) {
344 MF = &Func;
345 MRI = &MF->getRegInfo();
346 AFI = MF->getInfo<AArch64FunctionInfo>();
347 TII = static_cast<const AArch64InstrInfo *>(MF->getSubtarget().getInstrInfo());
348 TRI = static_cast<const AArch64RegisterInfo *>(
349 MF->getSubtarget().getRegisterInfo());
350 MFI = &MF->getFrameInfo();
351 ReTags.clear();
352
353 assert(MRI->isSSA());
354
355 LLVM_DEBUG(dbgs() << "********** AArch64 Stack Tagging PreRA **********\n"
356 << "********** Function: " << MF->getName() << '\n');
357
358 SmallSetVector<int, 8> TaggedSlots;
359 for (auto &BB : *MF) {
360 for (auto &I : BB) {
361 if (I.getOpcode() == AArch64::TAGPstack) {
362 ReTags.push_back(&I);
363 int FI = I.getOperand(1).getIndex();
364 TaggedSlots.insert(FI);
365 // There should be no offsets in TAGP yet.
366 assert(I.getOperand(2).getImm() == 0);
367 }
368 }
369 }
370
371 // Take over from SSP. It does nothing for tagged slots, and should not really
372 // have been enabled in the first place.
373 for (int FI : TaggedSlots)
374 MFI->setObjectSSPLayout(FI, MachineFrameInfo::SSPLK_None);
375
376 if (ReTags.empty())
377 return false;
378
379 if (mayUseUncheckedLoadStore())
380 uncheckLoadsAndStores();
381
382 // Find a slot that is used with zero tag offset, like ADDG #fi, 0.
383 // If the base tagged pointer is set up to the address of this slot,
384 // the ADDG instruction can be eliminated.
385 Optional<int> BaseSlot = findFirstSlotCandidate();
386 if (BaseSlot)
387 AFI->setTaggedBasePointerIndex(*BaseSlot);
388
389 for (auto *I : ReTags) {
390 int FI = I->getOperand(1).getIndex();
391 int Tag = I->getOperand(4).getImm();
392 Register Base = I->getOperand(3).getReg();
393 if (Tag == 0 && FI == BaseSlot) {
394 BuildMI(*I->getParent(), I, {}, TII->get(AArch64::COPY),
395 I->getOperand(0).getReg())
396 .addReg(Base);
397 I->eraseFromParent();
398 }
399 }
400
401 return true;
402 }
403