1 //===-- AArch64StackTaggingPreRA.cpp --- Stack Tagging for AArch64 -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 10 #include "AArch64.h" 11 #include "AArch64MachineFunctionInfo.h" 12 #include "AArch64InstrInfo.h" 13 #include "llvm/ADT/DepthFirstIterator.h" 14 #include "llvm/ADT/SetVector.h" 15 #include "llvm/ADT/Statistic.h" 16 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" 17 #include "llvm/CodeGen/MachineFrameInfo.h" 18 #include "llvm/CodeGen/MachineFunction.h" 19 #include "llvm/CodeGen/MachineFunctionPass.h" 20 #include "llvm/CodeGen/MachineInstrBuilder.h" 21 #include "llvm/CodeGen/MachineLoopInfo.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h" 23 #include "llvm/CodeGen/MachineTraceMetrics.h" 24 #include "llvm/CodeGen/Passes.h" 25 #include "llvm/CodeGen/TargetInstrInfo.h" 26 #include "llvm/CodeGen/TargetRegisterInfo.h" 27 #include "llvm/CodeGen/TargetSubtargetInfo.h" 28 #include "llvm/Support/CommandLine.h" 29 #include "llvm/Support/Debug.h" 30 #include "llvm/Support/raw_ostream.h" 31 32 using namespace llvm; 33 34 #define DEBUG_TYPE "aarch64-stack-tagging-pre-ra" 35 36 enum UncheckedLdStMode { UncheckedNever, UncheckedSafe, UncheckedAlways }; 37 38 cl::opt<UncheckedLdStMode> ClUncheckedLdSt( 39 "stack-tagging-unchecked-ld-st", cl::Hidden, 40 cl::init(UncheckedSafe), 41 cl::desc( 42 "Unconditionally apply unchecked-ld-st optimization (even for large " 43 "stack frames, or in the presence of variable sized allocas)."), 44 cl::values( 45 clEnumValN(UncheckedNever, "never", "never apply unchecked-ld-st"), 46 clEnumValN( 47 UncheckedSafe, "safe", 48 "apply unchecked-ld-st when the target is definitely within range"), 49 clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st"))); 50 51 static cl::opt<bool> 52 ClFirstSlot("stack-tagging-first-slot-opt", cl::Hidden, cl::init(true), 53 cl::desc("Apply first slot optimization for stack tagging " 54 "(eliminate ADDG Rt, Rn, 0, 0).")); 55 56 namespace { 57 58 class AArch64StackTaggingPreRA : public MachineFunctionPass { 59 MachineFunction *MF; 60 AArch64FunctionInfo *AFI; 61 MachineFrameInfo *MFI; 62 MachineRegisterInfo *MRI; 63 const AArch64RegisterInfo *TRI; 64 const AArch64InstrInfo *TII; 65 66 SmallVector<MachineInstr*, 16> ReTags; 67 68 public: 69 static char ID; 70 AArch64StackTaggingPreRA() : MachineFunctionPass(ID) { 71 initializeAArch64StackTaggingPreRAPass(*PassRegistry::getPassRegistry()); 72 } 73 74 bool mayUseUncheckedLoadStore(); 75 void uncheckUsesOf(unsigned TaggedReg, int FI); 76 void uncheckLoadsAndStores(); 77 std::optional<int> findFirstSlotCandidate(); 78 79 bool runOnMachineFunction(MachineFunction &Func) override; 80 StringRef getPassName() const override { 81 return "AArch64 Stack Tagging PreRA"; 82 } 83 84 void getAnalysisUsage(AnalysisUsage &AU) const override { 85 AU.setPreservesCFG(); 86 MachineFunctionPass::getAnalysisUsage(AU); 87 } 88 }; 89 } // end anonymous namespace 90 91 char AArch64StackTaggingPreRA::ID = 0; 92 93 INITIALIZE_PASS_BEGIN(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra", 94 "AArch64 Stack Tagging PreRA Pass", false, false) 95 INITIALIZE_PASS_END(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra", 96 "AArch64 Stack Tagging PreRA Pass", false, false) 97 98 FunctionPass *llvm::createAArch64StackTaggingPreRAPass() { 99 return new AArch64StackTaggingPreRA(); 100 } 101 102 static bool isUncheckedLoadOrStoreOpcode(unsigned Opcode) { 103 switch (Opcode) { 104 case AArch64::LDRBBui: 105 case AArch64::LDRHHui: 106 case AArch64::LDRWui: 107 case AArch64::LDRXui: 108 109 case AArch64::LDRBui: 110 case AArch64::LDRHui: 111 case AArch64::LDRSui: 112 case AArch64::LDRDui: 113 case AArch64::LDRQui: 114 115 case AArch64::LDRSHWui: 116 case AArch64::LDRSHXui: 117 118 case AArch64::LDRSBWui: 119 case AArch64::LDRSBXui: 120 121 case AArch64::LDRSWui: 122 123 case AArch64::STRBBui: 124 case AArch64::STRHHui: 125 case AArch64::STRWui: 126 case AArch64::STRXui: 127 128 case AArch64::STRBui: 129 case AArch64::STRHui: 130 case AArch64::STRSui: 131 case AArch64::STRDui: 132 case AArch64::STRQui: 133 134 case AArch64::LDPWi: 135 case AArch64::LDPXi: 136 case AArch64::LDPSi: 137 case AArch64::LDPDi: 138 case AArch64::LDPQi: 139 140 case AArch64::LDPSWi: 141 142 case AArch64::STPWi: 143 case AArch64::STPXi: 144 case AArch64::STPSi: 145 case AArch64::STPDi: 146 case AArch64::STPQi: 147 return true; 148 default: 149 return false; 150 } 151 } 152 153 bool AArch64StackTaggingPreRA::mayUseUncheckedLoadStore() { 154 if (ClUncheckedLdSt == UncheckedNever) 155 return false; 156 else if (ClUncheckedLdSt == UncheckedAlways) 157 return true; 158 159 // This estimate can be improved if we had harder guarantees about stack frame 160 // layout. With LocalStackAllocation we can estimate SP offset to any 161 // preallocated slot. AArch64FrameLowering::orderFrameObjects could put tagged 162 // objects ahead of non-tagged ones, but that's not always desirable. 163 // 164 // Underestimating SP offset here may require the use of LDG to materialize 165 // the tagged address of the stack slot, along with a scratch register 166 // allocation (post-regalloc!). 167 // 168 // For now we do the safe thing here and require that the entire stack frame 169 // is within range of the shortest of the unchecked instructions. 170 unsigned FrameSize = 0; 171 for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) 172 FrameSize += MFI->getObjectSize(i); 173 bool EntireFrameReachableFromSP = FrameSize < 0xf00; 174 return !MFI->hasVarSizedObjects() && EntireFrameReachableFromSP; 175 } 176 177 void AArch64StackTaggingPreRA::uncheckUsesOf(unsigned TaggedReg, int FI) { 178 for (MachineInstr &UseI : 179 llvm::make_early_inc_range(MRI->use_instructions(TaggedReg))) { 180 if (isUncheckedLoadOrStoreOpcode(UseI.getOpcode())) { 181 // FI operand is always the one before the immediate offset. 182 unsigned OpIdx = TII->getLoadStoreImmIdx(UseI.getOpcode()) - 1; 183 if (UseI.getOperand(OpIdx).isReg() && 184 UseI.getOperand(OpIdx).getReg() == TaggedReg) { 185 UseI.getOperand(OpIdx).ChangeToFrameIndex(FI); 186 UseI.getOperand(OpIdx).setTargetFlags(AArch64II::MO_TAGGED); 187 } 188 } else if (UseI.isCopy() && UseI.getOperand(0).getReg().isVirtual()) { 189 uncheckUsesOf(UseI.getOperand(0).getReg(), FI); 190 } 191 } 192 } 193 194 void AArch64StackTaggingPreRA::uncheckLoadsAndStores() { 195 for (auto *I : ReTags) { 196 Register TaggedReg = I->getOperand(0).getReg(); 197 int FI = I->getOperand(1).getIndex(); 198 uncheckUsesOf(TaggedReg, FI); 199 } 200 } 201 202 namespace { 203 struct SlotWithTag { 204 int FI; 205 int Tag; 206 SlotWithTag(int FI, int Tag) : FI(FI), Tag(Tag) {} 207 explicit SlotWithTag(const MachineInstr &MI) 208 : FI(MI.getOperand(1).getIndex()), Tag(MI.getOperand(4).getImm()) {} 209 bool operator==(const SlotWithTag &Other) const { 210 return FI == Other.FI && Tag == Other.Tag; 211 } 212 }; 213 } // namespace 214 215 namespace llvm { 216 template <> struct DenseMapInfo<SlotWithTag> { 217 static inline SlotWithTag getEmptyKey() { return {-2, -2}; } 218 static inline SlotWithTag getTombstoneKey() { return {-3, -3}; } 219 static unsigned getHashValue(const SlotWithTag &V) { 220 return hash_combine(DenseMapInfo<int>::getHashValue(V.FI), 221 DenseMapInfo<int>::getHashValue(V.Tag)); 222 } 223 static bool isEqual(const SlotWithTag &A, const SlotWithTag &B) { 224 return A == B; 225 } 226 }; 227 } // namespace llvm 228 229 static bool isSlotPreAllocated(MachineFrameInfo *MFI, int FI) { 230 return MFI->getUseLocalStackAllocationBlock() && 231 MFI->isObjectPreAllocated(FI); 232 } 233 234 // Pin one of the tagged slots to offset 0 from the tagged base pointer. 235 // This would make its address available in a virtual register (IRG's def), as 236 // opposed to requiring an ADDG instruction to materialize. This effectively 237 // eliminates a vreg (by replacing it with direct uses of IRG, which is usually 238 // live almost everywhere anyway), and therefore needs to happen before 239 // regalloc. 240 std::optional<int> AArch64StackTaggingPreRA::findFirstSlotCandidate() { 241 // Find the best (FI, Tag) pair to pin to offset 0. 242 // Looking at the possible uses of a tagged address, the advantage of pinning 243 // is: 244 // - COPY to physical register. 245 // Does not matter, this would trade a MOV instruction for an ADDG. 246 // - ST*G matter, but those mostly appear near the function prologue where all 247 // the tagged addresses need to be materialized anyway; also, counting ST*G 248 // uses would overweight large allocas that require more than one ST*G 249 // instruction. 250 // - Load/Store instructions in the address operand do not require a tagged 251 // pointer, so they also do not benefit. These operands have already been 252 // eliminated (see uncheckLoadsAndStores) so all remaining load/store 253 // instructions count. 254 // - Any other instruction may benefit from being pinned to offset 0. 255 LLVM_DEBUG(dbgs() << "AArch64StackTaggingPreRA::findFirstSlotCandidate\n"); 256 if (!ClFirstSlot) 257 return std::nullopt; 258 259 DenseMap<SlotWithTag, int> RetagScore; 260 SlotWithTag MaxScoreST{-1, -1}; 261 int MaxScore = -1; 262 for (auto *I : ReTags) { 263 SlotWithTag ST{*I}; 264 if (isSlotPreAllocated(MFI, ST.FI)) 265 continue; 266 267 Register RetagReg = I->getOperand(0).getReg(); 268 if (!RetagReg.isVirtual()) 269 continue; 270 271 int Score = 0; 272 SmallVector<Register, 8> WorkList; 273 WorkList.push_back(RetagReg); 274 275 while (!WorkList.empty()) { 276 Register UseReg = WorkList.pop_back_val(); 277 for (auto &UseI : MRI->use_instructions(UseReg)) { 278 unsigned Opcode = UseI.getOpcode(); 279 if (Opcode == AArch64::STGOffset || Opcode == AArch64::ST2GOffset || 280 Opcode == AArch64::STZGOffset || Opcode == AArch64::STZ2GOffset || 281 Opcode == AArch64::STGPi || Opcode == AArch64::STGloop || 282 Opcode == AArch64::STZGloop || Opcode == AArch64::STGloop_wback || 283 Opcode == AArch64::STZGloop_wback) 284 continue; 285 if (UseI.isCopy()) { 286 Register DstReg = UseI.getOperand(0).getReg(); 287 if (DstReg.isVirtual()) 288 WorkList.push_back(DstReg); 289 continue; 290 } 291 LLVM_DEBUG(dbgs() << "[" << ST.FI << ":" << ST.Tag << "] use of %" 292 << Register::virtReg2Index(UseReg) << " in " << UseI 293 << "\n"); 294 Score++; 295 } 296 } 297 298 int TotalScore = RetagScore[ST] += Score; 299 if (TotalScore > MaxScore || 300 (TotalScore == MaxScore && ST.FI > MaxScoreST.FI)) { 301 MaxScore = TotalScore; 302 MaxScoreST = ST; 303 } 304 } 305 306 if (MaxScoreST.FI < 0) 307 return std::nullopt; 308 309 // If FI's tag is already 0, we are done. 310 if (MaxScoreST.Tag == 0) 311 return MaxScoreST.FI; 312 313 // Otherwise, find a random victim pair (FI, Tag) where Tag == 0. 314 SlotWithTag SwapST{-1, -1}; 315 for (auto *I : ReTags) { 316 SlotWithTag ST{*I}; 317 if (ST.Tag == 0) { 318 SwapST = ST; 319 break; 320 } 321 } 322 323 // Swap tags between the victim and the highest scoring pair. 324 // If SwapWith is still (-1, -1), that's fine, too - we'll simply take tag for 325 // the highest score slot without changing anything else. 326 for (auto *&I : ReTags) { 327 SlotWithTag ST{*I}; 328 MachineOperand &TagOp = I->getOperand(4); 329 if (ST == MaxScoreST) { 330 TagOp.setImm(0); 331 } else if (ST == SwapST) { 332 TagOp.setImm(MaxScoreST.Tag); 333 } 334 } 335 return MaxScoreST.FI; 336 } 337 338 bool AArch64StackTaggingPreRA::runOnMachineFunction(MachineFunction &Func) { 339 MF = &Func; 340 MRI = &MF->getRegInfo(); 341 AFI = MF->getInfo<AArch64FunctionInfo>(); 342 TII = static_cast<const AArch64InstrInfo *>(MF->getSubtarget().getInstrInfo()); 343 TRI = static_cast<const AArch64RegisterInfo *>( 344 MF->getSubtarget().getRegisterInfo()); 345 MFI = &MF->getFrameInfo(); 346 ReTags.clear(); 347 348 assert(MRI->isSSA()); 349 350 LLVM_DEBUG(dbgs() << "********** AArch64 Stack Tagging PreRA **********\n" 351 << "********** Function: " << MF->getName() << '\n'); 352 353 SmallSetVector<int, 8> TaggedSlots; 354 for (auto &BB : *MF) { 355 for (auto &I : BB) { 356 if (I.getOpcode() == AArch64::TAGPstack) { 357 ReTags.push_back(&I); 358 int FI = I.getOperand(1).getIndex(); 359 TaggedSlots.insert(FI); 360 // There should be no offsets in TAGP yet. 361 assert(I.getOperand(2).getImm() == 0); 362 } 363 } 364 } 365 366 // Take over from SSP. It does nothing for tagged slots, and should not really 367 // have been enabled in the first place. 368 for (int FI : TaggedSlots) 369 MFI->setObjectSSPLayout(FI, MachineFrameInfo::SSPLK_None); 370 371 if (ReTags.empty()) 372 return false; 373 374 if (mayUseUncheckedLoadStore()) 375 uncheckLoadsAndStores(); 376 377 // Find a slot that is used with zero tag offset, like ADDG #fi, 0. 378 // If the base tagged pointer is set up to the address of this slot, 379 // the ADDG instruction can be eliminated. 380 std::optional<int> BaseSlot = findFirstSlotCandidate(); 381 if (BaseSlot) 382 AFI->setTaggedBasePointerIndex(*BaseSlot); 383 384 for (auto *I : ReTags) { 385 int FI = I->getOperand(1).getIndex(); 386 int Tag = I->getOperand(4).getImm(); 387 Register Base = I->getOperand(3).getReg(); 388 if (Tag == 0 && FI == BaseSlot) { 389 BuildMI(*I->getParent(), I, {}, TII->get(AArch64::COPY), 390 I->getOperand(0).getReg()) 391 .addReg(Base); 392 I->eraseFromParent(); 393 } 394 } 395 396 return true; 397 } 398