1 //===-- ARMFixCortexA57AES1742098Pass.cpp ---------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This pass works around a Cortex Core Fused AES erratum:
9 // - Cortex-A57 Erratum 1742098
10 // - Cortex-A72 Erratum 1655431
11 //
12 // The erratum may be triggered if an input vector register to AESE or AESD was
13 // last written by an instruction that only updated 32 bits of it. This can
14 // occur for either of the input registers.
15 //
16 // The workaround chosen is to update the input register using `r = VORRq r, r`,
17 // as this updates all 128 bits of the register unconditionally, but does not
18 // change the values observed in `r`, making the input safe.
19 //
20 // This pass has to be conservative in a few cases:
21 // - an input vector register to the AES instruction is defined outside the
22 //   current function, where we have to assume the register was updated in an
23 //   unsafe way; and
24 // - an input vector register to the AES instruction is updated along multiple
25 //   different control-flow paths, where we have to ensure all the register
26 //   updating instructions are safe.
27 //
28 // Both of these cases may apply to a input vector register. In either case, we
29 // need to ensure that, when the pass is finished, there exists a safe
30 // instruction between every unsafe register updating instruction and the AES
31 // instruction.
32 //
33 //===----------------------------------------------------------------------===//
34 
35 #include "ARM.h"
36 #include "ARMBaseInstrInfo.h"
37 #include "ARMBaseRegisterInfo.h"
38 #include "ARMSubtarget.h"
39 #include "Utils/ARMBaseInfo.h"
40 #include "llvm/ADT/STLExtras.h"
41 #include "llvm/ADT/SmallPtrSet.h"
42 #include "llvm/ADT/SmallVector.h"
43 #include "llvm/ADT/StringRef.h"
44 #include "llvm/CodeGen/MachineBasicBlock.h"
45 #include "llvm/CodeGen/MachineFunction.h"
46 #include "llvm/CodeGen/MachineFunctionPass.h"
47 #include "llvm/CodeGen/MachineInstr.h"
48 #include "llvm/CodeGen/MachineInstrBuilder.h"
49 #include "llvm/CodeGen/MachineInstrBundleIterator.h"
50 #include "llvm/CodeGen/MachineOperand.h"
51 #include "llvm/CodeGen/ReachingDefAnalysis.h"
52 #include "llvm/CodeGen/Register.h"
53 #include "llvm/CodeGen/TargetRegisterInfo.h"
54 #include "llvm/IR/DebugLoc.h"
55 #include "llvm/InitializePasses.h"
56 #include "llvm/MC/MCInstrDesc.h"
57 #include "llvm/Pass.h"
58 #include "llvm/PassRegistry.h"
59 #include "llvm/Support/Debug.h"
60 #include "llvm/Support/raw_ostream.h"
61 #include <assert.h>
62 #include <stdint.h>
63 
64 using namespace llvm;
65 
66 #define DEBUG_TYPE "arm-fix-cortex-a57-aes-1742098"
67 
68 //===----------------------------------------------------------------------===//
69 
70 namespace {
71 class ARMFixCortexA57AES1742098 : public MachineFunctionPass {
72 public:
73   static char ID;
74   explicit ARMFixCortexA57AES1742098() : MachineFunctionPass(ID) {
75     initializeARMFixCortexA57AES1742098Pass(*PassRegistry::getPassRegistry());
76   }
77 
78   bool runOnMachineFunction(MachineFunction &F) override;
79 
80   MachineFunctionProperties getRequiredProperties() const override {
81     return MachineFunctionProperties().set(
82         MachineFunctionProperties::Property::NoVRegs);
83   }
84 
85   StringRef getPassName() const override {
86     return "ARM fix for Cortex-A57 AES Erratum 1742098";
87   }
88 
89   void getAnalysisUsage(AnalysisUsage &AU) const override {
90     AU.addRequired<ReachingDefAnalysis>();
91     AU.setPreservesCFG();
92     MachineFunctionPass::getAnalysisUsage(AU);
93   }
94 
95 private:
96   // This is the information needed to insert the fixup in the right place.
97   struct AESFixupLocation {
98     MachineBasicBlock *Block;
99     // The fixup instruction will be inserted *before* InsertionPt.
100     MachineInstr *InsertionPt;
101     MachineOperand *MOp;
102   };
103 
104   void analyzeMF(MachineFunction &MF, ReachingDefAnalysis &RDA,
105                  const ARMBaseRegisterInfo *TRI,
106                  SmallVectorImpl<AESFixupLocation> &FixupLocsForFn) const;
107 
108   void insertAESFixup(AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII,
109                       const ARMBaseRegisterInfo *TRI) const;
110 
111   static bool isFirstAESPairInstr(MachineInstr &MI);
112   static bool isSafeAESInput(MachineInstr &MI);
113 };
114 char ARMFixCortexA57AES1742098::ID = 0;
115 
116 } // end anonymous namespace
117 
118 INITIALIZE_PASS_BEGIN(ARMFixCortexA57AES1742098, DEBUG_TYPE,
119                       "ARM fix for Cortex-A57 AES Erratum 1742098", false,
120                       false)
121 INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis);
122 INITIALIZE_PASS_END(ARMFixCortexA57AES1742098, DEBUG_TYPE,
123                     "ARM fix for Cortex-A57 AES Erratum 1742098", false, false)
124 
125 //===----------------------------------------------------------------------===//
126 
127 bool ARMFixCortexA57AES1742098::isFirstAESPairInstr(MachineInstr &MI) {
128   unsigned Opc = MI.getOpcode();
129   return Opc == ARM::AESD || Opc == ARM::AESE;
130 }
131 
132 bool ARMFixCortexA57AES1742098::isSafeAESInput(MachineInstr &MI) {
133   auto CondCodeIsAL = [](MachineInstr &MI) -> bool {
134     int CCIdx = MI.findFirstPredOperandIdx();
135     if (CCIdx == -1)
136       return false;
137     return MI.getOperand(CCIdx).getImm() == (int64_t)ARMCC::AL;
138   };
139 
140   switch (MI.getOpcode()) {
141   // Unknown: Assume not safe.
142   default:
143     return false;
144   // 128-bit wide AES instructions
145   case ARM::AESD:
146   case ARM::AESE:
147   case ARM::AESMC:
148   case ARM::AESIMC:
149     // No CondCode.
150     return true;
151   // 128-bit and 64-bit wide bitwise ops (when condition = al)
152   case ARM::VANDd:
153   case ARM::VANDq:
154   case ARM::VORRd:
155   case ARM::VORRq:
156   case ARM::VEORd:
157   case ARM::VEORq:
158   case ARM::VMVNd:
159   case ARM::VMVNq:
160   // VMOV of 64-bit value between D registers (when condition = al)
161   case ARM::VMOVD:
162   // VMOV of 64 bit value from GPRs (when condition = al)
163   case ARM::VMOVDRR:
164   // VMOV of immediate into D or Q registers (when condition = al)
165   case ARM::VMOVv2i64:
166   case ARM::VMOVv1i64:
167   case ARM::VMOVv2f32:
168   case ARM::VMOVv4f32:
169   case ARM::VMOVv2i32:
170   case ARM::VMOVv4i32:
171   case ARM::VMOVv4i16:
172   case ARM::VMOVv8i16:
173   case ARM::VMOVv8i8:
174   case ARM::VMOVv16i8:
175   // Loads (when condition = al)
176   // VLD Dn, [Rn, #imm]
177   case ARM::VLDRD:
178   // VLDM
179   case ARM::VLDMDDB_UPD:
180   case ARM::VLDMDIA_UPD:
181   case ARM::VLDMDIA:
182   // VLDn to all lanes.
183   case ARM::VLD1d64:
184   case ARM::VLD1q64:
185   case ARM::VLD1d32:
186   case ARM::VLD1q32:
187   case ARM::VLD2b32:
188   case ARM::VLD2d32:
189   case ARM::VLD2q32:
190   case ARM::VLD1d16:
191   case ARM::VLD1q16:
192   case ARM::VLD2d16:
193   case ARM::VLD2q16:
194   case ARM::VLD1d8:
195   case ARM::VLD1q8:
196   case ARM::VLD2b8:
197   case ARM::VLD2d8:
198   case ARM::VLD2q8:
199   case ARM::VLD3d32:
200   case ARM::VLD3q32:
201   case ARM::VLD3d16:
202   case ARM::VLD3q16:
203   case ARM::VLD3d8:
204   case ARM::VLD3q8:
205   case ARM::VLD4d32:
206   case ARM::VLD4q32:
207   case ARM::VLD4d16:
208   case ARM::VLD4q16:
209   case ARM::VLD4d8:
210   case ARM::VLD4q8:
211   // VLD1 (single element to one lane)
212   case ARM::VLD1LNd32:
213   case ARM::VLD1LNd32_UPD:
214   case ARM::VLD1LNd8:
215   case ARM::VLD1LNd8_UPD:
216   case ARM::VLD1LNd16:
217   case ARM::VLD1LNd16_UPD:
218   // VLD1 (single element to all lanes)
219   case ARM::VLD1DUPd32:
220   case ARM::VLD1DUPd32wb_fixed:
221   case ARM::VLD1DUPd32wb_register:
222   case ARM::VLD1DUPd16:
223   case ARM::VLD1DUPd16wb_fixed:
224   case ARM::VLD1DUPd16wb_register:
225   case ARM::VLD1DUPd8:
226   case ARM::VLD1DUPd8wb_fixed:
227   case ARM::VLD1DUPd8wb_register:
228   case ARM::VLD1DUPq32:
229   case ARM::VLD1DUPq32wb_fixed:
230   case ARM::VLD1DUPq32wb_register:
231   case ARM::VLD1DUPq16:
232   case ARM::VLD1DUPq16wb_fixed:
233   case ARM::VLD1DUPq16wb_register:
234   case ARM::VLD1DUPq8:
235   case ARM::VLD1DUPq8wb_fixed:
236   case ARM::VLD1DUPq8wb_register:
237   // VMOV
238   case ARM::VSETLNi32:
239   case ARM::VSETLNi16:
240   case ARM::VSETLNi8:
241     return CondCodeIsAL(MI);
242   };
243 
244   return false;
245 }
246 
247 bool ARMFixCortexA57AES1742098::runOnMachineFunction(MachineFunction &F) {
248   LLVM_DEBUG(dbgs() << "***** ARMFixCortexA57AES1742098 *****\n");
249   auto &STI = F.getSubtarget<ARMSubtarget>();
250 
251   // Fix not requested or AES instructions not present: skip pass.
252   if (!STI.hasAES() || !STI.fixCortexA57AES1742098())
253     return false;
254 
255   const ARMBaseRegisterInfo *TRI = STI.getRegisterInfo();
256   const ARMBaseInstrInfo *TII = STI.getInstrInfo();
257 
258   auto &RDA = getAnalysis<ReachingDefAnalysis>();
259 
260   // Analyze whole function to find instructions which need fixing up...
261   SmallVector<AESFixupLocation> FixupLocsForFn{};
262   analyzeMF(F, RDA, TRI, FixupLocsForFn);
263 
264   // ... and fix the instructions up all at the same time.
265   bool Changed = false;
266   LLVM_DEBUG(dbgs() << "Inserting " << FixupLocsForFn.size() << " fixup(s)\n");
267   for (AESFixupLocation &FixupLoc : FixupLocsForFn) {
268     insertAESFixup(FixupLoc, TII, TRI);
269     Changed |= true;
270   }
271 
272   return Changed;
273 }
274 
275 void ARMFixCortexA57AES1742098::analyzeMF(
276     MachineFunction &MF, ReachingDefAnalysis &RDA,
277     const ARMBaseRegisterInfo *TRI,
278     SmallVectorImpl<AESFixupLocation> &FixupLocsForFn) const {
279   unsigned MaxAllowedFixups = 0;
280 
281   for (MachineBasicBlock &MBB : MF) {
282     for (MachineInstr &MI : MBB) {
283       if (!isFirstAESPairInstr(MI))
284         continue;
285 
286       // Found an instruction to check the operands of.
287       LLVM_DEBUG(dbgs() << "Found AES Pair starting: " << MI);
288       assert(MI.getNumExplicitOperands() == 3 && MI.getNumExplicitDefs() == 1 &&
289              "Unknown AES Instruction Format. Expected 1 def, 2 uses.");
290 
291       // A maximum of two fixups should be inserted for each AES pair (one per
292       // register use).
293       MaxAllowedFixups += 2;
294 
295       // Inspect all operands, choosing whether to insert a fixup.
296       for (MachineOperand &MOp : MI.uses()) {
297         SmallPtrSet<MachineInstr *, 1> AllDefs{};
298         RDA.getGlobalReachingDefs(&MI, MOp.getReg(), AllDefs);
299 
300         // Planned Fixup: This should be added to FixupLocsForFn at most once.
301         AESFixupLocation NewLoc{&MBB, &MI, &MOp};
302 
303         // In small functions with loops, this operand may be both a live-in and
304         // have definitions within the function itself. These will need a fixup.
305         bool IsLiveIn = MF.front().isLiveIn(MOp.getReg());
306 
307         // If the register doesn't have defining instructions, and is not a
308         // live-in, then something is wrong and the fixup must always be
309         // inserted to be safe.
310         if (!IsLiveIn && AllDefs.size() == 0) {
311           LLVM_DEBUG(dbgs()
312                      << "Fixup Planned: No Defining Instrs found, not live-in: "
313                      << printReg(MOp.getReg(), TRI) << "\n");
314           FixupLocsForFn.emplace_back(NewLoc);
315           continue;
316         }
317 
318         auto IsUnsafe = [](MachineInstr *MI) -> bool {
319           return !isSafeAESInput(*MI);
320         };
321         size_t UnsafeCount = llvm::count_if(AllDefs, IsUnsafe);
322 
323         // If there are no unsafe definitions...
324         if (UnsafeCount == 0) {
325           // ... and the register is not live-in ...
326           if (!IsLiveIn) {
327             // ... then skip the fixup.
328             LLVM_DEBUG(dbgs() << "No Fixup: Defining instrs are all safe: "
329                               << printReg(MOp.getReg(), TRI) << "\n");
330             continue;
331           }
332 
333           // Otherwise, the only unsafe "definition" is a live-in, so insert the
334           // fixup at the start of the function.
335           LLVM_DEBUG(dbgs()
336                      << "Fixup Planned: Live-In (with safe defining instrs): "
337                      << printReg(MOp.getReg(), TRI) << "\n");
338           NewLoc.Block = &MF.front();
339           NewLoc.InsertionPt = &*NewLoc.Block->begin();
340           LLVM_DEBUG(dbgs() << "Moving Fixup for Live-In to immediately before "
341                             << *NewLoc.InsertionPt);
342           FixupLocsForFn.emplace_back(NewLoc);
343           continue;
344         }
345 
346         // If a fixup is needed in more than one place, then the best place to
347         // insert it is adjacent to the use rather than introducing a fixup
348         // adjacent to each def.
349         //
350         // FIXME: It might be better to hoist this to the start of the BB, if
351         // possible.
352         if (IsLiveIn || UnsafeCount > 1) {
353           LLVM_DEBUG(dbgs() << "Fixup Planned: Multiple unsafe defining instrs "
354                                "(including live-ins): "
355                             << printReg(MOp.getReg(), TRI) << "\n");
356           FixupLocsForFn.emplace_back(NewLoc);
357           continue;
358         }
359 
360         assert(UnsafeCount == 1 && !IsLiveIn &&
361                "At this point, there should be one unsafe defining instrs "
362                "and the defined register should not be a live-in.");
363         SmallPtrSetIterator<MachineInstr *> It =
364             llvm::find_if(AllDefs, IsUnsafe);
365         assert(It != AllDefs.end() &&
366                "UnsafeCount == 1 but No Unsafe MachineInstr found.");
367         MachineInstr *DefMI = *It;
368 
369         LLVM_DEBUG(
370             dbgs() << "Fixup Planned: Found single unsafe defining instrs for "
371                    << printReg(MOp.getReg(), TRI) << ": " << *DefMI);
372 
373         // There is one unsafe defining instruction, which needs a fixup. It is
374         // generally good to hoist the fixup to be adjacent to the defining
375         // instruction rather than the using instruction, as the using
376         // instruction may be inside a loop when the defining instruction is
377         // not.
378         MachineBasicBlock::iterator DefIt = DefMI;
379         ++DefIt;
380         if (DefIt != DefMI->getParent()->end()) {
381           LLVM_DEBUG(dbgs() << "Moving Fixup to immediately after " << *DefMI
382                             << "And immediately before " << *DefIt);
383           NewLoc.Block = DefIt->getParent();
384           NewLoc.InsertionPt = &*DefIt;
385         }
386 
387         FixupLocsForFn.emplace_back(NewLoc);
388       }
389     }
390   }
391 
392   assert(FixupLocsForFn.size() <= MaxAllowedFixups &&
393          "Inserted too many fixups for this function.");
394   (void)MaxAllowedFixups;
395 }
396 
397 void ARMFixCortexA57AES1742098::insertAESFixup(
398     AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII,
399     const ARMBaseRegisterInfo *TRI) const {
400   MachineOperand *OperandToFixup = FixupLoc.MOp;
401 
402   assert(OperandToFixup->isReg() && "OperandToFixup must be a register");
403   Register RegToFixup = OperandToFixup->getReg();
404 
405   LLVM_DEBUG(dbgs() << "Inserting VORRq of " << printReg(RegToFixup, TRI)
406                     << " before: " << *FixupLoc.InsertionPt);
407 
408   // Insert the new `VORRq qN, qN, qN`. There are a few details here:
409   //
410   // The uses are marked as killed, even if the original use of OperandToFixup
411   // is not killed, as the new instruction is clobbering the register. This is
412   // safe even if there are other uses of `qN`, as the VORRq value-wise a no-op
413   // (it is inserted for microarchitectural reasons).
414   //
415   // The def and the uses are still marked as Renamable if the original register
416   // was, to avoid having to rummage through all the other uses and defs and
417   // unset their renamable bits.
418   unsigned Renamable = OperandToFixup->isRenamable() ? RegState::Renamable : 0;
419   BuildMI(*FixupLoc.Block, FixupLoc.InsertionPt, DebugLoc(),
420           TII->get(ARM::VORRq))
421       .addReg(RegToFixup, RegState::Define | Renamable)
422       .addReg(RegToFixup, RegState::Kill | Renamable)
423       .addReg(RegToFixup, RegState::Kill | Renamable)
424       .addImm((uint64_t)ARMCC::AL)
425       .addReg(ARM::NoRegister);
426 }
427 
428 // Factory function used by AArch64TargetMachine to add the pass to
429 // the passmanager.
430 FunctionPass *llvm::createARMFixCortexA57AES1742098Pass() {
431   return new ARMFixCortexA57AES1742098();
432 }
433