1 //===-- AArch64StackTaggingPreRA.cpp --- Stack Tagging for AArch64 -----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 11 #include "AArch64.h" 12 #include "AArch64MachineFunctionInfo.h" 13 #include "AArch64InstrInfo.h" 14 #include "llvm/ADT/DepthFirstIterator.h" 15 #include "llvm/ADT/SetVector.h" 16 #include "llvm/ADT/Statistic.h" 17 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" 18 #include "llvm/CodeGen/MachineFrameInfo.h" 19 #include "llvm/CodeGen/MachineFunction.h" 20 #include "llvm/CodeGen/MachineFunctionPass.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/CodeGen/MachineLoopInfo.h" 23 #include "llvm/CodeGen/MachineRegisterInfo.h" 24 #include "llvm/CodeGen/MachineTraceMetrics.h" 25 #include "llvm/CodeGen/Passes.h" 26 #include "llvm/CodeGen/TargetInstrInfo.h" 27 #include "llvm/CodeGen/TargetRegisterInfo.h" 28 #include "llvm/CodeGen/TargetSubtargetInfo.h" 29 #include "llvm/Support/CommandLine.h" 30 #include "llvm/Support/Debug.h" 31 #include "llvm/Support/raw_ostream.h" 32 33 using namespace llvm; 34 35 #define DEBUG_TYPE "aarch64-stack-tagging-pre-ra" 36 37 enum UncheckedLdStMode { UncheckedNever, UncheckedSafe, UncheckedAlways }; 38 39 cl::opt<UncheckedLdStMode> ClUncheckedLdSt( 40 "stack-tagging-unchecked-ld-st", cl::Hidden, 41 cl::init(UncheckedSafe), 42 cl::desc( 43 "Unconditionally apply unchecked-ld-st optimization (even for large " 44 "stack frames, or in the presence of variable sized allocas)."), 45 cl::values( 46 clEnumValN(UncheckedNever, "never", "never apply unchecked-ld-st"), 47 clEnumValN( 48 UncheckedSafe, "safe", 49 "apply unchecked-ld-st when the target is definitely within range"), 50 clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st"))); 51 52 static cl::opt<bool> 53 ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden, cl::init(true), 54 cl::ZeroOrMore, 55 cl::desc("Apply first slot optimization for stack tagging " 56 "(eliminate ADDG Rt, Rn, 0, 0).")); 57 58 namespace { 59 60 class AArch64StackTaggingPreRA : public MachineFunctionPass { 61 MachineFunction *MF; 62 AArch64FunctionInfo *AFI; 63 MachineFrameInfo *MFI; 64 MachineRegisterInfo *MRI; 65 const AArch64RegisterInfo *TRI; 66 const AArch64InstrInfo *TII; 67 68 SmallVector<MachineInstr*, 16> ReTags; 69 70 public: 71 static char ID; 72 AArch64StackTaggingPreRA() : MachineFunctionPass(ID) { 73 initializeAArch64StackTaggingPreRAPass(*PassRegistry::getPassRegistry()); 74 } 75 76 bool mayUseUncheckedLoadStore(); 77 void uncheckUsesOf(unsigned TaggedReg, int FI); 78 void uncheckLoadsAndStores(); 79 Optional<int> findFirstSlotCandidate(); 80 81 bool runOnMachineFunction(MachineFunction &Func) override; 82 StringRef getPassName() const override { 83 return "AArch64 Stack Tagging PreRA"; 84 } 85 86 void getAnalysisUsage(AnalysisUsage &AU) const override { 87 AU.setPreservesCFG(); 88 MachineFunctionPass::getAnalysisUsage(AU); 89 } 90 }; 91 } // end anonymous namespace 92 93 char AArch64StackTaggingPreRA::ID = 0; 94 95 INITIALIZE_PASS_BEGIN(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra", 96 "AArch64 Stack Tagging PreRA Pass", false, false) 97 INITIALIZE_PASS_END(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra", 98 "AArch64 Stack Tagging PreRA Pass", false, false) 99 100 FunctionPass *llvm::createAArch64StackTaggingPreRAPass() { 101 return new AArch64StackTaggingPreRA(); 102 } 103 104 static bool isUncheckedLoadOrStoreOpcode(unsigned Opcode) { 105 switch (Opcode) { 106 case AArch64::LDRBBui: 107 case AArch64::LDRHHui: 108 case AArch64::LDRWui: 109 case AArch64::LDRXui: 110 111 case AArch64::LDRBui: 112 case AArch64::LDRHui: 113 case AArch64::LDRSui: 114 case AArch64::LDRDui: 115 case AArch64::LDRQui: 116 117 case AArch64::LDRSHWui: 118 case AArch64::LDRSHXui: 119 120 case AArch64::LDRSBWui: 121 case AArch64::LDRSBXui: 122 123 case AArch64::LDRSWui: 124 125 case AArch64::STRBBui: 126 case AArch64::STRHHui: 127 case AArch64::STRWui: 128 case AArch64::STRXui: 129 130 case AArch64::STRBui: 131 case AArch64::STRHui: 132 case AArch64::STRSui: 133 case AArch64::STRDui: 134 case AArch64::STRQui: 135 136 case AArch64::LDPWi: 137 case AArch64::LDPXi: 138 case AArch64::LDPSi: 139 case AArch64::LDPDi: 140 case AArch64::LDPQi: 141 142 case AArch64::LDPSWi: 143 144 case AArch64::STPWi: 145 case AArch64::STPXi: 146 case AArch64::STPSi: 147 case AArch64::STPDi: 148 case AArch64::STPQi: 149 return true; 150 default: 151 return false; 152 } 153 } 154 155 bool AArch64StackTaggingPreRA::mayUseUncheckedLoadStore() { 156 if (ClUncheckedLdSt == UncheckedNever) 157 return false; 158 else if (ClUncheckedLdSt == UncheckedAlways) 159 return true; 160 161 // This estimate can be improved if we had harder guarantees about stack frame 162 // layout. With LocalStackAllocation we can estimate SP offset to any 163 // preallocated slot. AArch64FrameLowering::orderFrameObjects could put tagged 164 // objects ahead of non-tagged ones, but that's not always desirable. 165 // 166 // Underestimating SP offset here may require the use of LDG to materialize 167 // the tagged address of the stack slot, along with a scratch register 168 // allocation (post-regalloc!). 169 // 170 // For now we do the safe thing here and require that the entire stack frame 171 // is within range of the shortest of the unchecked instructions. 172 unsigned FrameSize = 0; 173 for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) 174 FrameSize += MFI->getObjectSize(i); 175 bool EntireFrameReachableFromSP = FrameSize < 0xf00; 176 return !MFI->hasVarSizedObjects() && EntireFrameReachableFromSP; 177 } 178 179 void AArch64StackTaggingPreRA::uncheckUsesOf(unsigned TaggedReg, int FI) { 180 for (auto UI = MRI->use_instr_begin(TaggedReg), E = MRI->use_instr_end(); 181 UI != E;) { 182 MachineInstr *UseI = &*(UI++); 183 if (isUncheckedLoadOrStoreOpcode(UseI->getOpcode())) { 184 // FI operand is always the one before the immediate offset. 185 unsigned OpIdx = TII->getLoadStoreImmIdx(UseI->getOpcode()) - 1; 186 if (UseI->getOperand(OpIdx).isReg() && 187 UseI->getOperand(OpIdx).getReg() == TaggedReg) { 188 UseI->getOperand(OpIdx).ChangeToFrameIndex(FI); 189 UseI->getOperand(OpIdx).setTargetFlags(AArch64II::MO_TAGGED); 190 } 191 } else if (UseI->isCopy() && 192 Register::isVirtualRegister(UseI->getOperand(0).getReg())) { 193 uncheckUsesOf(UseI->getOperand(0).getReg(), FI); 194 } 195 } 196 } 197 198 void AArch64StackTaggingPreRA::uncheckLoadsAndStores() { 199 for (auto *I : ReTags) { 200 unsigned TaggedReg = I->getOperand(0).getReg(); 201 int FI = I->getOperand(1).getIndex(); 202 uncheckUsesOf(TaggedReg, FI); 203 } 204 } 205 206 struct SlotWithTag { 207 int FI; 208 int Tag; 209 SlotWithTag(int FI, int Tag) : FI(FI), Tag(Tag) {} 210 explicit SlotWithTag(const MachineInstr &MI) 211 : FI(MI.getOperand(1).getIndex()), Tag(MI.getOperand(4).getImm()) {} 212 bool operator==(const SlotWithTag &Other) const { 213 return FI == Other.FI && Tag == Other.Tag; 214 } 215 }; 216 217 namespace llvm { 218 template <> struct DenseMapInfo<SlotWithTag> { 219 static inline SlotWithTag getEmptyKey() { return {-2, -2}; } 220 static inline SlotWithTag getTombstoneKey() { return {-3, -3}; } 221 static unsigned getHashValue(const SlotWithTag &V) { 222 return hash_combine(DenseMapInfo<int>::getHashValue(V.FI), 223 DenseMapInfo<int>::getHashValue(V.Tag)); 224 } 225 static bool isEqual(const SlotWithTag &A, const SlotWithTag &B) { 226 return A == B; 227 } 228 }; 229 } // namespace llvm 230 231 static bool isSlotPreAllocated(MachineFrameInfo *MFI, int FI) { 232 return MFI->getUseLocalStackAllocationBlock() && 233 MFI->isObjectPreAllocated(FI); 234 } 235 236 // Pin one of the tagged slots to offset 0 from the tagged base pointer. 237 // This would make its address available in a virtual register (IRG's def), as 238 // opposed to requiring an ADDG instruction to materialize. This effectively 239 // eliminates a vreg (by replacing it with direct uses of IRG, which is usually 240 // live almost everywhere anyway), and therefore needs to happen before 241 // regalloc. 242 Optional<int> AArch64StackTaggingPreRA::findFirstSlotCandidate() { 243 // Find the best (FI, Tag) pair to pin to offset 0. 244 // Looking at the possible uses of a tagged address, the advantage of pinning 245 // is: 246 // - COPY to physical register. 247 // Does not matter, this would trade a MOV instruction for an ADDG. 248 // - ST*G matter, but those mostly appear near the function prologue where all 249 // the tagged addresses need to be materialized anyway; also, counting ST*G 250 // uses would overweight large allocas that require more than one ST*G 251 // instruction. 252 // - Load/Store instructions in the address operand do not require a tagged 253 // pointer, so they also do not benefit. These operands have already been 254 // eliminated (see uncheckLoadsAndStores) so all remaining load/store 255 // instructions count. 256 // - Any other instruction may benefit from being pinned to offset 0. 257 LLVM_DEBUG(dbgs() << "AArch64StackTaggingPreRA::findFirstSlotCandidate\n"); 258 if (!ClFirstSlot) 259 return None; 260 261 DenseMap<SlotWithTag, int> RetagScore; 262 SlotWithTag MaxScoreST{-1, -1}; 263 int MaxScore = -1; 264 for (auto *I : ReTags) { 265 SlotWithTag ST{*I}; 266 if (isSlotPreAllocated(MFI, ST.FI)) 267 continue; 268 269 Register RetagReg = I->getOperand(0).getReg(); 270 if (!Register::isVirtualRegister(RetagReg)) 271 continue; 272 273 int Score = 0; 274 SmallVector<Register, 8> WorkList; 275 WorkList.push_back(RetagReg); 276 277 while (!WorkList.empty()) { 278 Register UseReg = WorkList.back(); 279 WorkList.pop_back(); 280 for (auto &UseI : MRI->use_instructions(UseReg)) { 281 unsigned Opcode = UseI.getOpcode(); 282 if (Opcode == AArch64::STGOffset || Opcode == AArch64::ST2GOffset || 283 Opcode == AArch64::STZGOffset || Opcode == AArch64::STZ2GOffset || 284 Opcode == AArch64::STGPi || Opcode == AArch64::STGloop || 285 Opcode == AArch64::STZGloop || Opcode == AArch64::STGloop_wback || 286 Opcode == AArch64::STZGloop_wback) 287 continue; 288 if (UseI.isCopy()) { 289 Register DstReg = UseI.getOperand(0).getReg(); 290 if (Register::isVirtualRegister(DstReg)) 291 WorkList.push_back(DstReg); 292 continue; 293 } 294 LLVM_DEBUG(dbgs() << "[" << ST.FI << ":" << ST.Tag << "] use of %" 295 << Register::virtReg2Index(UseReg) << " in " << UseI 296 << "\n"); 297 Score++; 298 } 299 } 300 301 int TotalScore = RetagScore[ST] += Score; 302 if (TotalScore > MaxScore || 303 (TotalScore == MaxScore && ST.FI > MaxScoreST.FI)) { 304 MaxScore = TotalScore; 305 MaxScoreST = ST; 306 } 307 } 308 309 if (MaxScoreST.FI < 0) 310 return None; 311 312 // If FI's tag is already 0, we are done. 313 if (MaxScoreST.Tag == 0) 314 return MaxScoreST.FI; 315 316 // Otherwise, find a random victim pair (FI, Tag) where Tag == 0. 317 SlotWithTag SwapST{-1, -1}; 318 for (auto *I : ReTags) { 319 SlotWithTag ST{*I}; 320 if (ST.Tag == 0) { 321 SwapST = ST; 322 break; 323 } 324 } 325 326 // Swap tags between the victim and the highest scoring pair. 327 // If SwapWith is still (-1, -1), that's fine, too - we'll simply take tag for 328 // the highest score slot without changing anything else. 329 for (auto *&I : ReTags) { 330 SlotWithTag ST{*I}; 331 MachineOperand &TagOp = I->getOperand(4); 332 if (ST == MaxScoreST) { 333 TagOp.setImm(0); 334 } else if (ST == SwapST) { 335 TagOp.setImm(MaxScoreST.Tag); 336 } 337 } 338 return MaxScoreST.FI; 339 } 340 341 bool AArch64StackTaggingPreRA::runOnMachineFunction(MachineFunction &Func) { 342 MF = &Func; 343 MRI = &MF->getRegInfo(); 344 AFI = MF->getInfo<AArch64FunctionInfo>(); 345 TII = static_cast<const AArch64InstrInfo *>(MF->getSubtarget().getInstrInfo()); 346 TRI = static_cast<const AArch64RegisterInfo *>( 347 MF->getSubtarget().getRegisterInfo()); 348 MFI = &MF->getFrameInfo(); 349 ReTags.clear(); 350 351 assert(MRI->isSSA()); 352 353 LLVM_DEBUG(dbgs() << "********** AArch64 Stack Tagging PreRA **********\n" 354 << "********** Function: " << MF->getName() << '\n'); 355 356 SmallSetVector<int, 8> TaggedSlots; 357 for (auto &BB : *MF) { 358 for (auto &I : BB) { 359 if (I.getOpcode() == AArch64::TAGPstack) { 360 ReTags.push_back(&I); 361 int FI = I.getOperand(1).getIndex(); 362 TaggedSlots.insert(FI); 363 // There should be no offsets in TAGP yet. 364 assert(I.getOperand(2).getImm() == 0); 365 } 366 } 367 } 368 369 // Take over from SSP. It does nothing for tagged slots, and should not really 370 // have been enabled in the first place. 371 for (int FI : TaggedSlots) 372 MFI->setObjectSSPLayout(FI, MachineFrameInfo::SSPLK_None); 373 374 if (ReTags.empty()) 375 return false; 376 377 if (mayUseUncheckedLoadStore()) 378 uncheckLoadsAndStores(); 379 380 // Find a slot that is used with zero tag offset, like ADDG #fi, 0. 381 // If the base tagged pointer is set up to the address of this slot, 382 // the ADDG instruction can be eliminated. 383 Optional<int> BaseSlot = findFirstSlotCandidate(); 384 if (BaseSlot) 385 AFI->setTaggedBasePointerIndex(*BaseSlot); 386 387 for (auto *I : ReTags) { 388 int FI = I->getOperand(1).getIndex(); 389 int Tag = I->getOperand(4).getImm(); 390 Register Base = I->getOperand(3).getReg(); 391 if (Tag == 0 && FI == BaseSlot) { 392 BuildMI(*I->getParent(), I, {}, TII->get(AArch64::COPY), 393 I->getOperand(0).getReg()) 394 .addReg(Base); 395 I->eraseFromParent(); 396 } 397 } 398 399 return true; 400 } 401