1 //===-- AArch64StackTaggingPreRA.cpp --- Stack Tagging for AArch64 -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 10 #include "AArch64.h" 11 #include "AArch64MachineFunctionInfo.h" 12 #include "AArch64InstrInfo.h" 13 #include "llvm/ADT/DepthFirstIterator.h" 14 #include "llvm/ADT/SetVector.h" 15 #include "llvm/ADT/Statistic.h" 16 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" 17 #include "llvm/CodeGen/MachineFrameInfo.h" 18 #include "llvm/CodeGen/MachineFunction.h" 19 #include "llvm/CodeGen/MachineFunctionPass.h" 20 #include "llvm/CodeGen/MachineInstrBuilder.h" 21 #include "llvm/CodeGen/MachineLoopInfo.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h" 23 #include "llvm/CodeGen/MachineTraceMetrics.h" 24 #include "llvm/CodeGen/Passes.h" 25 #include "llvm/CodeGen/TargetInstrInfo.h" 26 #include "llvm/CodeGen/TargetRegisterInfo.h" 27 #include "llvm/CodeGen/TargetSubtargetInfo.h" 28 #include "llvm/Support/CommandLine.h" 29 #include "llvm/Support/Debug.h" 30 #include "llvm/Support/raw_ostream.h" 31 32 using namespace llvm; 33 34 #define DEBUG_TYPE "aarch64-stack-tagging-pre-ra" 35 36 enum UncheckedLdStMode { UncheckedNever, UncheckedSafe, UncheckedAlways }; 37 38 cl::opt<UncheckedLdStMode> ClUncheckedLdSt( 39 "stack-tagging-unchecked-ld-st", cl::Hidden, 40 cl::init(UncheckedSafe), 41 cl::desc( 42 "Unconditionally apply unchecked-ld-st optimization (even for large " 43 "stack frames, or in the presence of variable sized allocas)."), 44 cl::values( 45 clEnumValN(UncheckedNever, "never", "never apply unchecked-ld-st"), 46 clEnumValN( 47 UncheckedSafe, "safe", 48 "apply unchecked-ld-st when the target is definitely within range"), 49 clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st"))); 50 51 static cl::opt<bool> 52 ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden, cl::init(true), 53 cl::desc("Apply first slot optimization for stack tagging " 54 "(eliminate ADDG Rt, Rn, 0, 0).")); 55 56 namespace { 57 58 class AArch64StackTaggingPreRA : public MachineFunctionPass { 59 MachineFunction *MF; 60 AArch64FunctionInfo *AFI; 61 MachineFrameInfo *MFI; 62 MachineRegisterInfo *MRI; 63 const AArch64RegisterInfo *TRI; 64 const AArch64InstrInfo *TII; 65 66 SmallVector<MachineInstr*, 16> ReTags; 67 68 public: 69 static char ID; 70 AArch64StackTaggingPreRA() : MachineFunctionPass(ID) { 71 initializeAArch64StackTaggingPreRAPass(*PassRegistry::getPassRegistry()); 72 } 73 74 bool mayUseUncheckedLoadStore(); 75 void uncheckUsesOf(unsigned TaggedReg, int FI); 76 void uncheckLoadsAndStores(); 77 Optional<int> findFirstSlotCandidate(); 78 79 bool runOnMachineFunction(MachineFunction &Func) override; 80 StringRef getPassName() const override { 81 return "AArch64 Stack Tagging PreRA"; 82 } 83 84 void getAnalysisUsage(AnalysisUsage &AU) const override { 85 AU.setPreservesCFG(); 86 MachineFunctionPass::getAnalysisUsage(AU); 87 } 88 }; 89 } // end anonymous namespace 90 91 char AArch64StackTaggingPreRA::ID = 0; 92 93 INITIALIZE_PASS_BEGIN(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra", 94 "AArch64 Stack Tagging PreRA Pass", false, false) 95 INITIALIZE_PASS_END(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra", 96 "AArch64 Stack Tagging PreRA Pass", false, false) 97 98 FunctionPass *llvm::createAArch64StackTaggingPreRAPass() { 99 return new AArch64StackTaggingPreRA(); 100 } 101 102 static bool isUncheckedLoadOrStoreOpcode(unsigned Opcode) { 103 switch (Opcode) { 104 case AArch64::LDRBBui: 105 case AArch64::LDRHHui: 106 case AArch64::LDRWui: 107 case AArch64::LDRXui: 108 109 case AArch64::LDRBui: 110 case AArch64::LDRHui: 111 case AArch64::LDRSui: 112 case AArch64::LDRDui: 113 case AArch64::LDRQui: 114 115 case AArch64::LDRSHWui: 116 case AArch64::LDRSHXui: 117 118 case AArch64::LDRSBWui: 119 case AArch64::LDRSBXui: 120 121 case AArch64::LDRSWui: 122 123 case AArch64::STRBBui: 124 case AArch64::STRHHui: 125 case AArch64::STRWui: 126 case AArch64::STRXui: 127 128 case AArch64::STRBui: 129 case AArch64::STRHui: 130 case AArch64::STRSui: 131 case AArch64::STRDui: 132 case AArch64::STRQui: 133 134 case AArch64::LDPWi: 135 case AArch64::LDPXi: 136 case AArch64::LDPSi: 137 case AArch64::LDPDi: 138 case AArch64::LDPQi: 139 140 case AArch64::LDPSWi: 141 142 case AArch64::STPWi: 143 case AArch64::STPXi: 144 case AArch64::STPSi: 145 case AArch64::STPDi: 146 case AArch64::STPQi: 147 return true; 148 default: 149 return false; 150 } 151 } 152 153 bool AArch64StackTaggingPreRA::mayUseUncheckedLoadStore() { 154 if (ClUncheckedLdSt == UncheckedNever) 155 return false; 156 else if (ClUncheckedLdSt == UncheckedAlways) 157 return true; 158 159 // This estimate can be improved if we had harder guarantees about stack frame 160 // layout. With LocalStackAllocation we can estimate SP offset to any 161 // preallocated slot. AArch64FrameLowering::orderFrameObjects could put tagged 162 // objects ahead of non-tagged ones, but that's not always desirable. 163 // 164 // Underestimating SP offset here may require the use of LDG to materialize 165 // the tagged address of the stack slot, along with a scratch register 166 // allocation (post-regalloc!). 167 // 168 // For now we do the safe thing here and require that the entire stack frame 169 // is within range of the shortest of the unchecked instructions. 170 unsigned FrameSize = 0; 171 for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) 172 FrameSize += MFI->getObjectSize(i); 173 bool EntireFrameReachableFromSP = FrameSize < 0xf00; 174 return !MFI->hasVarSizedObjects() && EntireFrameReachableFromSP; 175 } 176 177 void AArch64StackTaggingPreRA::uncheckUsesOf(unsigned TaggedReg, int FI) { 178 for (MachineInstr &UseI : 179 llvm::make_early_inc_range(MRI->use_instructions(TaggedReg))) { 180 if (isUncheckedLoadOrStoreOpcode(UseI.getOpcode())) { 181 // FI operand is always the one before the immediate offset. 182 unsigned OpIdx = TII->getLoadStoreImmIdx(UseI.getOpcode()) - 1; 183 if (UseI.getOperand(OpIdx).isReg() && 184 UseI.getOperand(OpIdx).getReg() == TaggedReg) { 185 UseI.getOperand(OpIdx).ChangeToFrameIndex(FI); 186 UseI.getOperand(OpIdx).setTargetFlags(AArch64II::MO_TAGGED); 187 } 188 } else if (UseI.isCopy() && 189 Register::isVirtualRegister(UseI.getOperand(0).getReg())) { 190 uncheckUsesOf(UseI.getOperand(0).getReg(), FI); 191 } 192 } 193 } 194 195 void AArch64StackTaggingPreRA::uncheckLoadsAndStores() { 196 for (auto *I : ReTags) { 197 Register TaggedReg = I->getOperand(0).getReg(); 198 int FI = I->getOperand(1).getIndex(); 199 uncheckUsesOf(TaggedReg, FI); 200 } 201 } 202 203 namespace { 204 struct SlotWithTag { 205 int FI; 206 int Tag; 207 SlotWithTag(int FI, int Tag) : FI(FI), Tag(Tag) {} 208 explicit SlotWithTag(const MachineInstr &MI) 209 : FI(MI.getOperand(1).getIndex()), Tag(MI.getOperand(4).getImm()) {} 210 bool operator==(const SlotWithTag &Other) const { 211 return FI == Other.FI && Tag == Other.Tag; 212 } 213 }; 214 } // namespace 215 216 namespace llvm { 217 template <> struct DenseMapInfo<SlotWithTag> { 218 static inline SlotWithTag getEmptyKey() { return {-2, -2}; } 219 static inline SlotWithTag getTombstoneKey() { return {-3, -3}; } 220 static unsigned getHashValue(const SlotWithTag &V) { 221 return hash_combine(DenseMapInfo<int>::getHashValue(V.FI), 222 DenseMapInfo<int>::getHashValue(V.Tag)); 223 } 224 static bool isEqual(const SlotWithTag &A, const SlotWithTag &B) { 225 return A == B; 226 } 227 }; 228 } // namespace llvm 229 230 static bool isSlotPreAllocated(MachineFrameInfo *MFI, int FI) { 231 return MFI->getUseLocalStackAllocationBlock() && 232 MFI->isObjectPreAllocated(FI); 233 } 234 235 // Pin one of the tagged slots to offset 0 from the tagged base pointer. 236 // This would make its address available in a virtual register (IRG's def), as 237 // opposed to requiring an ADDG instruction to materialize. This effectively 238 // eliminates a vreg (by replacing it with direct uses of IRG, which is usually 239 // live almost everywhere anyway), and therefore needs to happen before 240 // regalloc. 241 Optional<int> AArch64StackTaggingPreRA::findFirstSlotCandidate() { 242 // Find the best (FI, Tag) pair to pin to offset 0. 243 // Looking at the possible uses of a tagged address, the advantage of pinning 244 // is: 245 // - COPY to physical register. 246 // Does not matter, this would trade a MOV instruction for an ADDG. 247 // - ST*G matter, but those mostly appear near the function prologue where all 248 // the tagged addresses need to be materialized anyway; also, counting ST*G 249 // uses would overweight large allocas that require more than one ST*G 250 // instruction. 251 // - Load/Store instructions in the address operand do not require a tagged 252 // pointer, so they also do not benefit. These operands have already been 253 // eliminated (see uncheckLoadsAndStores) so all remaining load/store 254 // instructions count. 255 // - Any other instruction may benefit from being pinned to offset 0. 256 LLVM_DEBUG(dbgs() << "AArch64StackTaggingPreRA::findFirstSlotCandidate\n"); 257 if (!ClFirstSlot) 258 return None; 259 260 DenseMap<SlotWithTag, int> RetagScore; 261 SlotWithTag MaxScoreST{-1, -1}; 262 int MaxScore = -1; 263 for (auto *I : ReTags) { 264 SlotWithTag ST{*I}; 265 if (isSlotPreAllocated(MFI, ST.FI)) 266 continue; 267 268 Register RetagReg = I->getOperand(0).getReg(); 269 if (!Register::isVirtualRegister(RetagReg)) 270 continue; 271 272 int Score = 0; 273 SmallVector<Register, 8> WorkList; 274 WorkList.push_back(RetagReg); 275 276 while (!WorkList.empty()) { 277 Register UseReg = WorkList.pop_back_val(); 278 for (auto &UseI : MRI->use_instructions(UseReg)) { 279 unsigned Opcode = UseI.getOpcode(); 280 if (Opcode == AArch64::STGOffset || Opcode == AArch64::ST2GOffset || 281 Opcode == AArch64::STZGOffset || Opcode == AArch64::STZ2GOffset || 282 Opcode == AArch64::STGPi || Opcode == AArch64::STGloop || 283 Opcode == AArch64::STZGloop || Opcode == AArch64::STGloop_wback || 284 Opcode == AArch64::STZGloop_wback) 285 continue; 286 if (UseI.isCopy()) { 287 Register DstReg = UseI.getOperand(0).getReg(); 288 if (Register::isVirtualRegister(DstReg)) 289 WorkList.push_back(DstReg); 290 continue; 291 } 292 LLVM_DEBUG(dbgs() << "[" << ST.FI << ":" << ST.Tag << "] use of %" 293 << Register::virtReg2Index(UseReg) << " in " << UseI 294 << "\n"); 295 Score++; 296 } 297 } 298 299 int TotalScore = RetagScore[ST] += Score; 300 if (TotalScore > MaxScore || 301 (TotalScore == MaxScore && ST.FI > MaxScoreST.FI)) { 302 MaxScore = TotalScore; 303 MaxScoreST = ST; 304 } 305 } 306 307 if (MaxScoreST.FI < 0) 308 return None; 309 310 // If FI's tag is already 0, we are done. 311 if (MaxScoreST.Tag == 0) 312 return MaxScoreST.FI; 313 314 // Otherwise, find a random victim pair (FI, Tag) where Tag == 0. 315 SlotWithTag SwapST{-1, -1}; 316 for (auto *I : ReTags) { 317 SlotWithTag ST{*I}; 318 if (ST.Tag == 0) { 319 SwapST = ST; 320 break; 321 } 322 } 323 324 // Swap tags between the victim and the highest scoring pair. 325 // If SwapWith is still (-1, -1), that's fine, too - we'll simply take tag for 326 // the highest score slot without changing anything else. 327 for (auto *&I : ReTags) { 328 SlotWithTag ST{*I}; 329 MachineOperand &TagOp = I->getOperand(4); 330 if (ST == MaxScoreST) { 331 TagOp.setImm(0); 332 } else if (ST == SwapST) { 333 TagOp.setImm(MaxScoreST.Tag); 334 } 335 } 336 return MaxScoreST.FI; 337 } 338 339 bool AArch64StackTaggingPreRA::runOnMachineFunction(MachineFunction &Func) { 340 MF = &Func; 341 MRI = &MF->getRegInfo(); 342 AFI = MF->getInfo<AArch64FunctionInfo>(); 343 TII = static_cast<const AArch64InstrInfo *>(MF->getSubtarget().getInstrInfo()); 344 TRI = static_cast<const AArch64RegisterInfo *>( 345 MF->getSubtarget().getRegisterInfo()); 346 MFI = &MF->getFrameInfo(); 347 ReTags.clear(); 348 349 assert(MRI->isSSA()); 350 351 LLVM_DEBUG(dbgs() << "********** AArch64 Stack Tagging PreRA **********\n" 352 << "********** Function: " << MF->getName() << '\n'); 353 354 SmallSetVector<int, 8> TaggedSlots; 355 for (auto &BB : *MF) { 356 for (auto &I : BB) { 357 if (I.getOpcode() == AArch64::TAGPstack) { 358 ReTags.push_back(&I); 359 int FI = I.getOperand(1).getIndex(); 360 TaggedSlots.insert(FI); 361 // There should be no offsets in TAGP yet. 362 assert(I.getOperand(2).getImm() == 0); 363 } 364 } 365 } 366 367 // Take over from SSP. It does nothing for tagged slots, and should not really 368 // have been enabled in the first place. 369 for (int FI : TaggedSlots) 370 MFI->setObjectSSPLayout(FI, MachineFrameInfo::SSPLK_None); 371 372 if (ReTags.empty()) 373 return false; 374 375 if (mayUseUncheckedLoadStore()) 376 uncheckLoadsAndStores(); 377 378 // Find a slot that is used with zero tag offset, like ADDG #fi, 0. 379 // If the base tagged pointer is set up to the address of this slot, 380 // the ADDG instruction can be eliminated. 381 Optional<int> BaseSlot = findFirstSlotCandidate(); 382 if (BaseSlot) 383 AFI->setTaggedBasePointerIndex(*BaseSlot); 384 385 for (auto *I : ReTags) { 386 int FI = I->getOperand(1).getIndex(); 387 int Tag = I->getOperand(4).getImm(); 388 Register Base = I->getOperand(3).getReg(); 389 if (Tag == 0 && FI == BaseSlot) { 390 BuildMI(*I->getParent(), I, {}, TII->get(AArch64::COPY), 391 I->getOperand(0).getReg()) 392 .addReg(Base); 393 I->eraseFromParent(); 394 } 395 } 396 397 return true; 398 } 399