1 //===-- ARMFixCortexA57AES1742098Pass.cpp ---------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // This pass works around a Cortex Core Fused AES erratum: 9 // - Cortex-A57 Erratum 1742098 10 // - Cortex-A72 Erratum 1655431 11 // 12 // The erratum may be triggered if an input vector register to AESE or AESD was 13 // last written by an instruction that only updated 32 bits of it. This can 14 // occur for either of the input registers. 15 // 16 // The workaround chosen is to update the input register using `r = VORRq r, r`, 17 // as this updates all 128 bits of the register unconditionally, but does not 18 // change the values observed in `r`, making the input safe. 19 // 20 // This pass has to be conservative in a few cases: 21 // - an input vector register to the AES instruction is defined outside the 22 // current function, where we have to assume the register was updated in an 23 // unsafe way; and 24 // - an input vector register to the AES instruction is updated along multiple 25 // different control-flow paths, where we have to ensure all the register 26 // updating instructions are safe. 27 // 28 // Both of these cases may apply to a input vector register. In either case, we 29 // need to ensure that, when the pass is finished, there exists a safe 30 // instruction between every unsafe register updating instruction and the AES 31 // instruction. 32 // 33 //===----------------------------------------------------------------------===// 34 35 #include "ARM.h" 36 #include "ARMBaseInstrInfo.h" 37 #include "ARMBaseRegisterInfo.h" 38 #include "ARMSubtarget.h" 39 #include "Utils/ARMBaseInfo.h" 40 #include "llvm/ADT/STLExtras.h" 41 #include "llvm/ADT/SmallPtrSet.h" 42 #include "llvm/ADT/SmallVector.h" 43 #include "llvm/ADT/StringRef.h" 44 #include "llvm/CodeGen/MachineBasicBlock.h" 45 #include "llvm/CodeGen/MachineFunction.h" 46 #include "llvm/CodeGen/MachineFunctionPass.h" 47 #include "llvm/CodeGen/MachineInstr.h" 48 #include "llvm/CodeGen/MachineInstrBuilder.h" 49 #include "llvm/CodeGen/MachineInstrBundleIterator.h" 50 #include "llvm/CodeGen/MachineOperand.h" 51 #include "llvm/CodeGen/ReachingDefAnalysis.h" 52 #include "llvm/CodeGen/Register.h" 53 #include "llvm/CodeGen/TargetRegisterInfo.h" 54 #include "llvm/IR/DebugLoc.h" 55 #include "llvm/InitializePasses.h" 56 #include "llvm/MC/MCInstrDesc.h" 57 #include "llvm/Pass.h" 58 #include "llvm/PassRegistry.h" 59 #include "llvm/Support/Debug.h" 60 #include "llvm/Support/raw_ostream.h" 61 #include <assert.h> 62 #include <stdint.h> 63 64 using namespace llvm; 65 66 #define DEBUG_TYPE "arm-fix-cortex-a57-aes-1742098" 67 68 //===----------------------------------------------------------------------===// 69 70 namespace { 71 class ARMFixCortexA57AES1742098 : public MachineFunctionPass { 72 public: 73 static char ID; 74 explicit ARMFixCortexA57AES1742098() : MachineFunctionPass(ID) { 75 initializeARMFixCortexA57AES1742098Pass(*PassRegistry::getPassRegistry()); 76 } 77 78 bool runOnMachineFunction(MachineFunction &F) override; 79 80 MachineFunctionProperties getRequiredProperties() const override { 81 return MachineFunctionProperties().set( 82 MachineFunctionProperties::Property::NoVRegs); 83 } 84 85 StringRef getPassName() const override { 86 return "ARM fix for Cortex-A57 AES Erratum 1742098"; 87 } 88 89 void getAnalysisUsage(AnalysisUsage &AU) const override { 90 AU.addRequired<ReachingDefAnalysis>(); 91 AU.setPreservesCFG(); 92 MachineFunctionPass::getAnalysisUsage(AU); 93 } 94 95 private: 96 // This is the information needed to insert the fixup in the right place. 97 struct AESFixupLocation { 98 MachineBasicBlock *Block; 99 // The fixup instruction will be inserted *before* InsertionPt. 100 MachineInstr *InsertionPt; 101 MachineOperand *MOp; 102 }; 103 104 void analyzeMF(MachineFunction &MF, ReachingDefAnalysis &RDA, 105 const ARMBaseRegisterInfo *TRI, 106 SmallVectorImpl<AESFixupLocation> &FixupLocsForFn) const; 107 108 void insertAESFixup(AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII, 109 const ARMBaseRegisterInfo *TRI) const; 110 111 static bool isFirstAESPairInstr(MachineInstr &MI); 112 static bool isSafeAESInput(MachineInstr &MI); 113 }; 114 char ARMFixCortexA57AES1742098::ID = 0; 115 116 } // end anonymous namespace 117 118 INITIALIZE_PASS_BEGIN(ARMFixCortexA57AES1742098, DEBUG_TYPE, 119 "ARM fix for Cortex-A57 AES Erratum 1742098", false, 120 false) 121 INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis); 122 INITIALIZE_PASS_END(ARMFixCortexA57AES1742098, DEBUG_TYPE, 123 "ARM fix for Cortex-A57 AES Erratum 1742098", false, false) 124 125 //===----------------------------------------------------------------------===// 126 127 bool ARMFixCortexA57AES1742098::isFirstAESPairInstr(MachineInstr &MI) { 128 unsigned Opc = MI.getOpcode(); 129 return Opc == ARM::AESD || Opc == ARM::AESE; 130 } 131 132 bool ARMFixCortexA57AES1742098::isSafeAESInput(MachineInstr &MI) { 133 auto CondCodeIsAL = [](MachineInstr &MI) -> bool { 134 int CCIdx = MI.findFirstPredOperandIdx(); 135 if (CCIdx == -1) 136 return false; 137 return MI.getOperand(CCIdx).getImm() == (int64_t)ARMCC::AL; 138 }; 139 140 switch (MI.getOpcode()) { 141 // Unknown: Assume not safe. 142 default: 143 return false; 144 // 128-bit wide AES instructions 145 case ARM::AESD: 146 case ARM::AESE: 147 case ARM::AESMC: 148 case ARM::AESIMC: 149 // No CondCode. 150 return true; 151 // 128-bit and 64-bit wide bitwise ops (when condition = al) 152 case ARM::VANDd: 153 case ARM::VANDq: 154 case ARM::VORRd: 155 case ARM::VORRq: 156 case ARM::VEORd: 157 case ARM::VEORq: 158 case ARM::VMVNd: 159 case ARM::VMVNq: 160 // VMOV of 64-bit value between D registers (when condition = al) 161 case ARM::VMOVD: 162 // VMOV of 64 bit value from GPRs (when condition = al) 163 case ARM::VMOVDRR: 164 // VMOV of immediate into D or Q registers (when condition = al) 165 case ARM::VMOVv2i64: 166 case ARM::VMOVv1i64: 167 case ARM::VMOVv2f32: 168 case ARM::VMOVv4f32: 169 case ARM::VMOVv2i32: 170 case ARM::VMOVv4i32: 171 case ARM::VMOVv4i16: 172 case ARM::VMOVv8i16: 173 case ARM::VMOVv8i8: 174 case ARM::VMOVv16i8: 175 // Loads (when condition = al) 176 // VLD Dn, [Rn, #imm] 177 case ARM::VLDRD: 178 // VLDM 179 case ARM::VLDMDDB_UPD: 180 case ARM::VLDMDIA_UPD: 181 case ARM::VLDMDIA: 182 // VLDn to all lanes. 183 case ARM::VLD1d64: 184 case ARM::VLD1q64: 185 case ARM::VLD1d32: 186 case ARM::VLD1q32: 187 case ARM::VLD2b32: 188 case ARM::VLD2d32: 189 case ARM::VLD2q32: 190 case ARM::VLD1d16: 191 case ARM::VLD1q16: 192 case ARM::VLD2d16: 193 case ARM::VLD2q16: 194 case ARM::VLD1d8: 195 case ARM::VLD1q8: 196 case ARM::VLD2b8: 197 case ARM::VLD2d8: 198 case ARM::VLD2q8: 199 case ARM::VLD3d32: 200 case ARM::VLD3q32: 201 case ARM::VLD3d16: 202 case ARM::VLD3q16: 203 case ARM::VLD3d8: 204 case ARM::VLD3q8: 205 case ARM::VLD4d32: 206 case ARM::VLD4q32: 207 case ARM::VLD4d16: 208 case ARM::VLD4q16: 209 case ARM::VLD4d8: 210 case ARM::VLD4q8: 211 // VLD1 (single element to one lane) 212 case ARM::VLD1LNd32: 213 case ARM::VLD1LNd32_UPD: 214 case ARM::VLD1LNd8: 215 case ARM::VLD1LNd8_UPD: 216 case ARM::VLD1LNd16: 217 case ARM::VLD1LNd16_UPD: 218 // VLD1 (single element to all lanes) 219 case ARM::VLD1DUPd32: 220 case ARM::VLD1DUPd32wb_fixed: 221 case ARM::VLD1DUPd32wb_register: 222 case ARM::VLD1DUPd16: 223 case ARM::VLD1DUPd16wb_fixed: 224 case ARM::VLD1DUPd16wb_register: 225 case ARM::VLD1DUPd8: 226 case ARM::VLD1DUPd8wb_fixed: 227 case ARM::VLD1DUPd8wb_register: 228 case ARM::VLD1DUPq32: 229 case ARM::VLD1DUPq32wb_fixed: 230 case ARM::VLD1DUPq32wb_register: 231 case ARM::VLD1DUPq16: 232 case ARM::VLD1DUPq16wb_fixed: 233 case ARM::VLD1DUPq16wb_register: 234 case ARM::VLD1DUPq8: 235 case ARM::VLD1DUPq8wb_fixed: 236 case ARM::VLD1DUPq8wb_register: 237 // VMOV 238 case ARM::VSETLNi32: 239 case ARM::VSETLNi16: 240 case ARM::VSETLNi8: 241 return CondCodeIsAL(MI); 242 }; 243 244 return false; 245 } 246 247 bool ARMFixCortexA57AES1742098::runOnMachineFunction(MachineFunction &F) { 248 LLVM_DEBUG(dbgs() << "***** ARMFixCortexA57AES1742098 *****\n"); 249 auto &STI = F.getSubtarget<ARMSubtarget>(); 250 251 // Fix not requested or AES instructions not present: skip pass. 252 if (!STI.hasAES() || !STI.fixCortexA57AES1742098()) 253 return false; 254 255 const ARMBaseRegisterInfo *TRI = STI.getRegisterInfo(); 256 const ARMBaseInstrInfo *TII = STI.getInstrInfo(); 257 258 auto &RDA = getAnalysis<ReachingDefAnalysis>(); 259 260 // Analyze whole function to find instructions which need fixing up... 261 SmallVector<AESFixupLocation> FixupLocsForFn{}; 262 analyzeMF(F, RDA, TRI, FixupLocsForFn); 263 264 // ... and fix the instructions up all at the same time. 265 bool Changed = false; 266 LLVM_DEBUG(dbgs() << "Inserting " << FixupLocsForFn.size() << " fixup(s)\n"); 267 for (AESFixupLocation &FixupLoc : FixupLocsForFn) { 268 insertAESFixup(FixupLoc, TII, TRI); 269 Changed |= true; 270 } 271 272 return Changed; 273 } 274 275 void ARMFixCortexA57AES1742098::analyzeMF( 276 MachineFunction &MF, ReachingDefAnalysis &RDA, 277 const ARMBaseRegisterInfo *TRI, 278 SmallVectorImpl<AESFixupLocation> &FixupLocsForFn) const { 279 unsigned MaxAllowedFixups = 0; 280 281 for (MachineBasicBlock &MBB : MF) { 282 for (MachineInstr &MI : MBB) { 283 if (!isFirstAESPairInstr(MI)) 284 continue; 285 286 // Found an instruction to check the operands of. 287 LLVM_DEBUG(dbgs() << "Found AES Pair starting: " << MI); 288 assert(MI.getNumExplicitOperands() == 3 && MI.getNumExplicitDefs() == 1 && 289 "Unknown AES Instruction Format. Expected 1 def, 2 uses."); 290 291 // A maximum of two fixups should be inserted for each AES pair (one per 292 // register use). 293 MaxAllowedFixups += 2; 294 295 // Inspect all operands, choosing whether to insert a fixup. 296 for (MachineOperand &MOp : MI.uses()) { 297 SmallPtrSet<MachineInstr *, 1> AllDefs{}; 298 RDA.getGlobalReachingDefs(&MI, MOp.getReg(), AllDefs); 299 300 // Planned Fixup: This should be added to FixupLocsForFn at most once. 301 AESFixupLocation NewLoc{&MBB, &MI, &MOp}; 302 303 // In small functions with loops, this operand may be both a live-in and 304 // have definitions within the function itself. These will need a fixup. 305 bool IsLiveIn = MF.front().isLiveIn(MOp.getReg()); 306 307 // If the register doesn't have defining instructions, and is not a 308 // live-in, then something is wrong and the fixup must always be 309 // inserted to be safe. 310 if (!IsLiveIn && AllDefs.size() == 0) { 311 LLVM_DEBUG(dbgs() 312 << "Fixup Planned: No Defining Instrs found, not live-in: " 313 << printReg(MOp.getReg(), TRI) << "\n"); 314 FixupLocsForFn.emplace_back(NewLoc); 315 continue; 316 } 317 318 auto IsUnsafe = [](MachineInstr *MI) -> bool { 319 return !isSafeAESInput(*MI); 320 }; 321 size_t UnsafeCount = llvm::count_if(AllDefs, IsUnsafe); 322 323 // If there are no unsafe definitions... 324 if (UnsafeCount == 0) { 325 // ... and the register is not live-in ... 326 if (!IsLiveIn) { 327 // ... then skip the fixup. 328 LLVM_DEBUG(dbgs() << "No Fixup: Defining instrs are all safe: " 329 << printReg(MOp.getReg(), TRI) << "\n"); 330 continue; 331 } 332 333 // Otherwise, the only unsafe "definition" is a live-in, so insert the 334 // fixup at the start of the function. 335 LLVM_DEBUG(dbgs() 336 << "Fixup Planned: Live-In (with safe defining instrs): " 337 << printReg(MOp.getReg(), TRI) << "\n"); 338 NewLoc.Block = &MF.front(); 339 NewLoc.InsertionPt = &*NewLoc.Block->begin(); 340 LLVM_DEBUG(dbgs() << "Moving Fixup for Live-In to immediately before " 341 << *NewLoc.InsertionPt); 342 FixupLocsForFn.emplace_back(NewLoc); 343 continue; 344 } 345 346 // If a fixup is needed in more than one place, then the best place to 347 // insert it is adjacent to the use rather than introducing a fixup 348 // adjacent to each def. 349 // 350 // FIXME: It might be better to hoist this to the start of the BB, if 351 // possible. 352 if (IsLiveIn || UnsafeCount > 1) { 353 LLVM_DEBUG(dbgs() << "Fixup Planned: Multiple unsafe defining instrs " 354 "(including live-ins): " 355 << printReg(MOp.getReg(), TRI) << "\n"); 356 FixupLocsForFn.emplace_back(NewLoc); 357 continue; 358 } 359 360 assert(UnsafeCount == 1 && !IsLiveIn && 361 "At this point, there should be one unsafe defining instrs " 362 "and the defined register should not be a live-in."); 363 SmallPtrSetIterator<MachineInstr *> It = 364 llvm::find_if(AllDefs, IsUnsafe); 365 assert(It != AllDefs.end() && 366 "UnsafeCount == 1 but No Unsafe MachineInstr found."); 367 MachineInstr *DefMI = *It; 368 369 LLVM_DEBUG( 370 dbgs() << "Fixup Planned: Found single unsafe defining instrs for " 371 << printReg(MOp.getReg(), TRI) << ": " << *DefMI); 372 373 // There is one unsafe defining instruction, which needs a fixup. It is 374 // generally good to hoist the fixup to be adjacent to the defining 375 // instruction rather than the using instruction, as the using 376 // instruction may be inside a loop when the defining instruction is 377 // not. 378 MachineBasicBlock::iterator DefIt = DefMI; 379 ++DefIt; 380 if (DefIt != DefMI->getParent()->end()) { 381 LLVM_DEBUG(dbgs() << "Moving Fixup to immediately after " << *DefMI 382 << "And immediately before " << *DefIt); 383 NewLoc.Block = DefIt->getParent(); 384 NewLoc.InsertionPt = &*DefIt; 385 } 386 387 FixupLocsForFn.emplace_back(NewLoc); 388 } 389 } 390 } 391 392 assert(FixupLocsForFn.size() <= MaxAllowedFixups && 393 "Inserted too many fixups for this function."); 394 (void)MaxAllowedFixups; 395 } 396 397 void ARMFixCortexA57AES1742098::insertAESFixup( 398 AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII, 399 const ARMBaseRegisterInfo *TRI) const { 400 MachineOperand *OperandToFixup = FixupLoc.MOp; 401 402 assert(OperandToFixup->isReg() && "OperandToFixup must be a register"); 403 Register RegToFixup = OperandToFixup->getReg(); 404 405 LLVM_DEBUG(dbgs() << "Inserting VORRq of " << printReg(RegToFixup, TRI) 406 << " before: " << *FixupLoc.InsertionPt); 407 408 // Insert the new `VORRq qN, qN, qN`. There are a few details here: 409 // 410 // The uses are marked as killed, even if the original use of OperandToFixup 411 // is not killed, as the new instruction is clobbering the register. This is 412 // safe even if there are other uses of `qN`, as the VORRq value-wise a no-op 413 // (it is inserted for microarchitectural reasons). 414 // 415 // The def and the uses are still marked as Renamable if the original register 416 // was, to avoid having to rummage through all the other uses and defs and 417 // unset their renamable bits. 418 unsigned Renamable = OperandToFixup->isRenamable() ? RegState::Renamable : 0; 419 BuildMI(*FixupLoc.Block, FixupLoc.InsertionPt, DebugLoc(), 420 TII->get(ARM::VORRq)) 421 .addReg(RegToFixup, RegState::Define | Renamable) 422 .addReg(RegToFixup, RegState::Kill | Renamable) 423 .addReg(RegToFixup, RegState::Kill | Renamable) 424 .addImm((uint64_t)ARMCC::AL) 425 .addReg(ARM::NoRegister); 426 } 427 428 // Factory function used by AArch64TargetMachine to add the pass to 429 // the passmanager. 430 FunctionPass *llvm::createARMFixCortexA57AES1742098Pass() { 431 return new ARMFixCortexA57AES1742098(); 432 } 433