1 //===-- AArch64StackTaggingPreRA.cpp --- Stack Tagging for AArch64 -----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 11 #include "AArch64.h" 12 #include "AArch64MachineFunctionInfo.h" 13 #include "AArch64InstrInfo.h" 14 #include "llvm/ADT/DepthFirstIterator.h" 15 #include "llvm/ADT/SetVector.h" 16 #include "llvm/ADT/Statistic.h" 17 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" 18 #include "llvm/CodeGen/MachineFrameInfo.h" 19 #include "llvm/CodeGen/MachineFunction.h" 20 #include "llvm/CodeGen/MachineFunctionPass.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/CodeGen/MachineLoopInfo.h" 23 #include "llvm/CodeGen/MachineRegisterInfo.h" 24 #include "llvm/CodeGen/MachineTraceMetrics.h" 25 #include "llvm/CodeGen/Passes.h" 26 #include "llvm/CodeGen/TargetInstrInfo.h" 27 #include "llvm/CodeGen/TargetRegisterInfo.h" 28 #include "llvm/CodeGen/TargetSubtargetInfo.h" 29 #include "llvm/Support/CommandLine.h" 30 #include "llvm/Support/Debug.h" 31 #include "llvm/Support/raw_ostream.h" 32 33 using namespace llvm; 34 35 #define DEBUG_TYPE "aarch64-stack-tagging-pre-ra" 36 37 enum UncheckedLdStMode { UncheckedNever, UncheckedSafe, UncheckedAlways }; 38 39 cl::opt<UncheckedLdStMode> ClUncheckedLdSt( 40 "stack-tagging-unchecked-ld-st", cl::Hidden, 41 cl::init(UncheckedSafe), 42 cl::desc( 43 "Unconditionally apply unchecked-ld-st optimization (even for large " 44 "stack frames, or in the presence of variable sized allocas)."), 45 cl::values( 46 clEnumValN(UncheckedNever, "never", "never apply unchecked-ld-st"), 47 clEnumValN( 48 UncheckedSafe, "safe", 49 "apply unchecked-ld-st when the target is definitely within range"), 50 clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st"))); 51 52 static cl::opt<bool> 53 ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden, cl::init(true), 54 cl::ZeroOrMore, 55 cl::desc("Apply first slot optimization for stack tagging " 56 "(eliminate ADDG Rt, Rn, 0, 0).")); 57 58 namespace { 59 60 class AArch64StackTaggingPreRA : public MachineFunctionPass { 61 MachineFunction *MF; 62 AArch64FunctionInfo *AFI; 63 MachineFrameInfo *MFI; 64 MachineRegisterInfo *MRI; 65 const AArch64RegisterInfo *TRI; 66 const AArch64InstrInfo *TII; 67 68 SmallVector<MachineInstr*, 16> ReTags; 69 70 public: 71 static char ID; 72 AArch64StackTaggingPreRA() : MachineFunctionPass(ID) { 73 initializeAArch64StackTaggingPreRAPass(*PassRegistry::getPassRegistry()); 74 } 75 76 bool mayUseUncheckedLoadStore(); 77 void uncheckUsesOf(unsigned TaggedReg, int FI); 78 void uncheckLoadsAndStores(); 79 Optional<int> findFirstSlotCandidate(); 80 81 bool runOnMachineFunction(MachineFunction &Func) override; 82 StringRef getPassName() const override { 83 return "AArch64 Stack Tagging PreRA"; 84 } 85 86 void getAnalysisUsage(AnalysisUsage &AU) const override { 87 AU.setPreservesCFG(); 88 MachineFunctionPass::getAnalysisUsage(AU); 89 } 90 }; 91 } // end anonymous namespace 92 93 char AArch64StackTaggingPreRA::ID = 0; 94 95 INITIALIZE_PASS_BEGIN(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra", 96 "AArch64 Stack Tagging PreRA Pass", false, false) 97 INITIALIZE_PASS_END(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra", 98 "AArch64 Stack Tagging PreRA Pass", false, false) 99 100 FunctionPass *llvm::createAArch64StackTaggingPreRAPass() { 101 return new AArch64StackTaggingPreRA(); 102 } 103 104 static bool isUncheckedLoadOrStoreOpcode(unsigned Opcode) { 105 switch (Opcode) { 106 case AArch64::LDRBBui: 107 case AArch64::LDRHHui: 108 case AArch64::LDRWui: 109 case AArch64::LDRXui: 110 111 case AArch64::LDRBui: 112 case AArch64::LDRHui: 113 case AArch64::LDRSui: 114 case AArch64::LDRDui: 115 case AArch64::LDRQui: 116 117 case AArch64::LDRSHWui: 118 case AArch64::LDRSHXui: 119 120 case AArch64::LDRSBWui: 121 case AArch64::LDRSBXui: 122 123 case AArch64::LDRSWui: 124 125 case AArch64::STRBBui: 126 case AArch64::STRHHui: 127 case AArch64::STRWui: 128 case AArch64::STRXui: 129 130 case AArch64::STRBui: 131 case AArch64::STRHui: 132 case AArch64::STRSui: 133 case AArch64::STRDui: 134 case AArch64::STRQui: 135 136 case AArch64::LDPWi: 137 case AArch64::LDPXi: 138 case AArch64::LDPSi: 139 case AArch64::LDPDi: 140 case AArch64::LDPQi: 141 142 case AArch64::LDPSWi: 143 144 case AArch64::STPWi: 145 case AArch64::STPXi: 146 case AArch64::STPSi: 147 case AArch64::STPDi: 148 case AArch64::STPQi: 149 return true; 150 default: 151 return false; 152 } 153 } 154 155 bool AArch64StackTaggingPreRA::mayUseUncheckedLoadStore() { 156 if (ClUncheckedLdSt == UncheckedNever) 157 return false; 158 else if (ClUncheckedLdSt == UncheckedAlways) 159 return true; 160 161 // This estimate can be improved if we had harder guarantees about stack frame 162 // layout. With LocalStackAllocation we can estimate SP offset to any 163 // preallocated slot. AArch64FrameLowering::orderFrameObjects could put tagged 164 // objects ahead of non-tagged ones, but that's not always desirable. 165 // 166 // Underestimating SP offset here may require the use of LDG to materialize 167 // the tagged address of the stack slot, along with a scratch register 168 // allocation (post-regalloc!). 169 // 170 // For now we do the safe thing here and require that the entire stack frame 171 // is within range of the shortest of the unchecked instructions. 172 unsigned FrameSize = 0; 173 for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) 174 FrameSize += MFI->getObjectSize(i); 175 bool EntireFrameReachableFromSP = FrameSize < 0xf00; 176 return !MFI->hasVarSizedObjects() && EntireFrameReachableFromSP; 177 } 178 179 void AArch64StackTaggingPreRA::uncheckUsesOf(unsigned TaggedReg, int FI) { 180 for (auto UI = MRI->use_instr_begin(TaggedReg), E = MRI->use_instr_end(); 181 UI != E;) { 182 MachineInstr *UseI = &*(UI++); 183 if (isUncheckedLoadOrStoreOpcode(UseI->getOpcode())) { 184 // FI operand is always the one before the immediate offset. 185 unsigned OpIdx = TII->getLoadStoreImmIdx(UseI->getOpcode()) - 1; 186 if (UseI->getOperand(OpIdx).isReg() && 187 UseI->getOperand(OpIdx).getReg() == TaggedReg) { 188 UseI->getOperand(OpIdx).ChangeToFrameIndex(FI); 189 UseI->getOperand(OpIdx).setTargetFlags(AArch64II::MO_TAGGED); 190 } 191 } else if (UseI->isCopy() && 192 Register::isVirtualRegister(UseI->getOperand(0).getReg())) { 193 uncheckUsesOf(UseI->getOperand(0).getReg(), FI); 194 } 195 } 196 } 197 198 void AArch64StackTaggingPreRA::uncheckLoadsAndStores() { 199 for (auto *I : ReTags) { 200 unsigned TaggedReg = I->getOperand(0).getReg(); 201 int FI = I->getOperand(1).getIndex(); 202 uncheckUsesOf(TaggedReg, FI); 203 } 204 } 205 206 namespace { 207 struct SlotWithTag { 208 int FI; 209 int Tag; 210 SlotWithTag(int FI, int Tag) : FI(FI), Tag(Tag) {} 211 explicit SlotWithTag(const MachineInstr &MI) 212 : FI(MI.getOperand(1).getIndex()), Tag(MI.getOperand(4).getImm()) {} 213 bool operator==(const SlotWithTag &Other) const { 214 return FI == Other.FI && Tag == Other.Tag; 215 } 216 }; 217 } // namespace 218 219 namespace llvm { 220 template <> struct DenseMapInfo<SlotWithTag> { 221 static inline SlotWithTag getEmptyKey() { return {-2, -2}; } 222 static inline SlotWithTag getTombstoneKey() { return {-3, -3}; } 223 static unsigned getHashValue(const SlotWithTag &V) { 224 return hash_combine(DenseMapInfo<int>::getHashValue(V.FI), 225 DenseMapInfo<int>::getHashValue(V.Tag)); 226 } 227 static bool isEqual(const SlotWithTag &A, const SlotWithTag &B) { 228 return A == B; 229 } 230 }; 231 } // namespace llvm 232 233 static bool isSlotPreAllocated(MachineFrameInfo *MFI, int FI) { 234 return MFI->getUseLocalStackAllocationBlock() && 235 MFI->isObjectPreAllocated(FI); 236 } 237 238 // Pin one of the tagged slots to offset 0 from the tagged base pointer. 239 // This would make its address available in a virtual register (IRG's def), as 240 // opposed to requiring an ADDG instruction to materialize. This effectively 241 // eliminates a vreg (by replacing it with direct uses of IRG, which is usually 242 // live almost everywhere anyway), and therefore needs to happen before 243 // regalloc. 244 Optional<int> AArch64StackTaggingPreRA::findFirstSlotCandidate() { 245 // Find the best (FI, Tag) pair to pin to offset 0. 246 // Looking at the possible uses of a tagged address, the advantage of pinning 247 // is: 248 // - COPY to physical register. 249 // Does not matter, this would trade a MOV instruction for an ADDG. 250 // - ST*G matter, but those mostly appear near the function prologue where all 251 // the tagged addresses need to be materialized anyway; also, counting ST*G 252 // uses would overweight large allocas that require more than one ST*G 253 // instruction. 254 // - Load/Store instructions in the address operand do not require a tagged 255 // pointer, so they also do not benefit. These operands have already been 256 // eliminated (see uncheckLoadsAndStores) so all remaining load/store 257 // instructions count. 258 // - Any other instruction may benefit from being pinned to offset 0. 259 LLVM_DEBUG(dbgs() << "AArch64StackTaggingPreRA::findFirstSlotCandidate\n"); 260 if (!ClFirstSlot) 261 return None; 262 263 DenseMap<SlotWithTag, int> RetagScore; 264 SlotWithTag MaxScoreST{-1, -1}; 265 int MaxScore = -1; 266 for (auto *I : ReTags) { 267 SlotWithTag ST{*I}; 268 if (isSlotPreAllocated(MFI, ST.FI)) 269 continue; 270 271 Register RetagReg = I->getOperand(0).getReg(); 272 if (!Register::isVirtualRegister(RetagReg)) 273 continue; 274 275 int Score = 0; 276 SmallVector<Register, 8> WorkList; 277 WorkList.push_back(RetagReg); 278 279 while (!WorkList.empty()) { 280 Register UseReg = WorkList.back(); 281 WorkList.pop_back(); 282 for (auto &UseI : MRI->use_instructions(UseReg)) { 283 unsigned Opcode = UseI.getOpcode(); 284 if (Opcode == AArch64::STGOffset || Opcode == AArch64::ST2GOffset || 285 Opcode == AArch64::STZGOffset || Opcode == AArch64::STZ2GOffset || 286 Opcode == AArch64::STGPi || Opcode == AArch64::STGloop || 287 Opcode == AArch64::STZGloop || Opcode == AArch64::STGloop_wback || 288 Opcode == AArch64::STZGloop_wback) 289 continue; 290 if (UseI.isCopy()) { 291 Register DstReg = UseI.getOperand(0).getReg(); 292 if (Register::isVirtualRegister(DstReg)) 293 WorkList.push_back(DstReg); 294 continue; 295 } 296 LLVM_DEBUG(dbgs() << "[" << ST.FI << ":" << ST.Tag << "] use of %" 297 << Register::virtReg2Index(UseReg) << " in " << UseI 298 << "\n"); 299 Score++; 300 } 301 } 302 303 int TotalScore = RetagScore[ST] += Score; 304 if (TotalScore > MaxScore || 305 (TotalScore == MaxScore && ST.FI > MaxScoreST.FI)) { 306 MaxScore = TotalScore; 307 MaxScoreST = ST; 308 } 309 } 310 311 if (MaxScoreST.FI < 0) 312 return None; 313 314 // If FI's tag is already 0, we are done. 315 if (MaxScoreST.Tag == 0) 316 return MaxScoreST.FI; 317 318 // Otherwise, find a random victim pair (FI, Tag) where Tag == 0. 319 SlotWithTag SwapST{-1, -1}; 320 for (auto *I : ReTags) { 321 SlotWithTag ST{*I}; 322 if (ST.Tag == 0) { 323 SwapST = ST; 324 break; 325 } 326 } 327 328 // Swap tags between the victim and the highest scoring pair. 329 // If SwapWith is still (-1, -1), that's fine, too - we'll simply take tag for 330 // the highest score slot without changing anything else. 331 for (auto *&I : ReTags) { 332 SlotWithTag ST{*I}; 333 MachineOperand &TagOp = I->getOperand(4); 334 if (ST == MaxScoreST) { 335 TagOp.setImm(0); 336 } else if (ST == SwapST) { 337 TagOp.setImm(MaxScoreST.Tag); 338 } 339 } 340 return MaxScoreST.FI; 341 } 342 343 bool AArch64StackTaggingPreRA::runOnMachineFunction(MachineFunction &Func) { 344 MF = &Func; 345 MRI = &MF->getRegInfo(); 346 AFI = MF->getInfo<AArch64FunctionInfo>(); 347 TII = static_cast<const AArch64InstrInfo *>(MF->getSubtarget().getInstrInfo()); 348 TRI = static_cast<const AArch64RegisterInfo *>( 349 MF->getSubtarget().getRegisterInfo()); 350 MFI = &MF->getFrameInfo(); 351 ReTags.clear(); 352 353 assert(MRI->isSSA()); 354 355 LLVM_DEBUG(dbgs() << "********** AArch64 Stack Tagging PreRA **********\n" 356 << "********** Function: " << MF->getName() << '\n'); 357 358 SmallSetVector<int, 8> TaggedSlots; 359 for (auto &BB : *MF) { 360 for (auto &I : BB) { 361 if (I.getOpcode() == AArch64::TAGPstack) { 362 ReTags.push_back(&I); 363 int FI = I.getOperand(1).getIndex(); 364 TaggedSlots.insert(FI); 365 // There should be no offsets in TAGP yet. 366 assert(I.getOperand(2).getImm() == 0); 367 } 368 } 369 } 370 371 // Take over from SSP. It does nothing for tagged slots, and should not really 372 // have been enabled in the first place. 373 for (int FI : TaggedSlots) 374 MFI->setObjectSSPLayout(FI, MachineFrameInfo::SSPLK_None); 375 376 if (ReTags.empty()) 377 return false; 378 379 if (mayUseUncheckedLoadStore()) 380 uncheckLoadsAndStores(); 381 382 // Find a slot that is used with zero tag offset, like ADDG #fi, 0. 383 // If the base tagged pointer is set up to the address of this slot, 384 // the ADDG instruction can be eliminated. 385 Optional<int> BaseSlot = findFirstSlotCandidate(); 386 if (BaseSlot) 387 AFI->setTaggedBasePointerIndex(*BaseSlot); 388 389 for (auto *I : ReTags) { 390 int FI = I->getOperand(1).getIndex(); 391 int Tag = I->getOperand(4).getImm(); 392 Register Base = I->getOperand(3).getReg(); 393 if (Tag == 0 && FI == BaseSlot) { 394 BuildMI(*I->getParent(), I, {}, TII->get(AArch64::COPY), 395 I->getOperand(0).getReg()) 396 .addReg(Base); 397 I->eraseFromParent(); 398 } 399 } 400 401 return true; 402 } 403