1*81ad6265SDimitry Andric //===-- ARMFixCortexA57AES1742098Pass.cpp ---------------------------------===//
2*81ad6265SDimitry Andric //
3*81ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*81ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*81ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*81ad6265SDimitry Andric //
7*81ad6265SDimitry Andric //===----------------------------------------------------------------------===//
8*81ad6265SDimitry Andric // This pass works around a Cortex Core Fused AES erratum:
9*81ad6265SDimitry Andric // - Cortex-A57 Erratum 1742098
10*81ad6265SDimitry Andric // - Cortex-A72 Erratum 1655431
11*81ad6265SDimitry Andric //
12*81ad6265SDimitry Andric // The erratum may be triggered if an input vector register to AESE or AESD was
13*81ad6265SDimitry Andric // last written by an instruction that only updated 32 bits of it. This can
14*81ad6265SDimitry Andric // occur for either of the input registers.
15*81ad6265SDimitry Andric //
16*81ad6265SDimitry Andric // The workaround chosen is to update the input register using `r = VORRq r, r`,
17*81ad6265SDimitry Andric // as this updates all 128 bits of the register unconditionally, but does not
18*81ad6265SDimitry Andric // change the values observed in `r`, making the input safe.
19*81ad6265SDimitry Andric //
20*81ad6265SDimitry Andric // This pass has to be conservative in a few cases:
21*81ad6265SDimitry Andric // - an input vector register to the AES instruction is defined outside the
22*81ad6265SDimitry Andric //   current function, where we have to assume the register was updated in an
23*81ad6265SDimitry Andric //   unsafe way; and
24*81ad6265SDimitry Andric // - an input vector register to the AES instruction is updated along multiple
25*81ad6265SDimitry Andric //   different control-flow paths, where we have to ensure all the register
26*81ad6265SDimitry Andric //   updating instructions are safe.
27*81ad6265SDimitry Andric //
28*81ad6265SDimitry Andric // Both of these cases may apply to a input vector register. In either case, we
29*81ad6265SDimitry Andric // need to ensure that, when the pass is finished, there exists a safe
30*81ad6265SDimitry Andric // instruction between every unsafe register updating instruction and the AES
31*81ad6265SDimitry Andric // instruction.
32*81ad6265SDimitry Andric //
33*81ad6265SDimitry Andric //===----------------------------------------------------------------------===//
34*81ad6265SDimitry Andric 
35*81ad6265SDimitry Andric #include "ARM.h"
36*81ad6265SDimitry Andric #include "ARMBaseInstrInfo.h"
37*81ad6265SDimitry Andric #include "ARMBaseRegisterInfo.h"
38*81ad6265SDimitry Andric #include "ARMSubtarget.h"
39*81ad6265SDimitry Andric #include "Utils/ARMBaseInfo.h"
40*81ad6265SDimitry Andric #include "llvm/ADT/STLExtras.h"
41*81ad6265SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
42*81ad6265SDimitry Andric #include "llvm/ADT/SmallVector.h"
43*81ad6265SDimitry Andric #include "llvm/ADT/StringRef.h"
44*81ad6265SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
45*81ad6265SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
46*81ad6265SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
47*81ad6265SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
48*81ad6265SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
49*81ad6265SDimitry Andric #include "llvm/CodeGen/MachineInstrBundleIterator.h"
50*81ad6265SDimitry Andric #include "llvm/CodeGen/MachineOperand.h"
51*81ad6265SDimitry Andric #include "llvm/CodeGen/ReachingDefAnalysis.h"
52*81ad6265SDimitry Andric #include "llvm/CodeGen/Register.h"
53*81ad6265SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h"
54*81ad6265SDimitry Andric #include "llvm/IR/DebugLoc.h"
55*81ad6265SDimitry Andric #include "llvm/InitializePasses.h"
56*81ad6265SDimitry Andric #include "llvm/MC/MCInstrDesc.h"
57*81ad6265SDimitry Andric #include "llvm/Pass.h"
58*81ad6265SDimitry Andric #include "llvm/PassRegistry.h"
59*81ad6265SDimitry Andric #include "llvm/Support/Debug.h"
60*81ad6265SDimitry Andric #include "llvm/Support/raw_ostream.h"
61*81ad6265SDimitry Andric #include <assert.h>
62*81ad6265SDimitry Andric #include <stdint.h>
63*81ad6265SDimitry Andric 
64*81ad6265SDimitry Andric using namespace llvm;
65*81ad6265SDimitry Andric 
66*81ad6265SDimitry Andric #define DEBUG_TYPE "arm-fix-cortex-a57-aes-1742098"
67*81ad6265SDimitry Andric 
68*81ad6265SDimitry Andric //===----------------------------------------------------------------------===//
69*81ad6265SDimitry Andric 
70*81ad6265SDimitry Andric namespace {
71*81ad6265SDimitry Andric class ARMFixCortexA57AES1742098 : public MachineFunctionPass {
72*81ad6265SDimitry Andric public:
73*81ad6265SDimitry Andric   static char ID;
ARMFixCortexA57AES1742098()74*81ad6265SDimitry Andric   explicit ARMFixCortexA57AES1742098() : MachineFunctionPass(ID) {
75*81ad6265SDimitry Andric     initializeARMFixCortexA57AES1742098Pass(*PassRegistry::getPassRegistry());
76*81ad6265SDimitry Andric   }
77*81ad6265SDimitry Andric 
78*81ad6265SDimitry Andric   bool runOnMachineFunction(MachineFunction &F) override;
79*81ad6265SDimitry Andric 
getRequiredProperties() const80*81ad6265SDimitry Andric   MachineFunctionProperties getRequiredProperties() const override {
81*81ad6265SDimitry Andric     return MachineFunctionProperties().set(
82*81ad6265SDimitry Andric         MachineFunctionProperties::Property::NoVRegs);
83*81ad6265SDimitry Andric   }
84*81ad6265SDimitry Andric 
getPassName() const85*81ad6265SDimitry Andric   StringRef getPassName() const override {
86*81ad6265SDimitry Andric     return "ARM fix for Cortex-A57 AES Erratum 1742098";
87*81ad6265SDimitry Andric   }
88*81ad6265SDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const89*81ad6265SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
90*81ad6265SDimitry Andric     AU.addRequired<ReachingDefAnalysis>();
91*81ad6265SDimitry Andric     AU.setPreservesCFG();
92*81ad6265SDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
93*81ad6265SDimitry Andric   }
94*81ad6265SDimitry Andric 
95*81ad6265SDimitry Andric private:
96*81ad6265SDimitry Andric   // This is the information needed to insert the fixup in the right place.
97*81ad6265SDimitry Andric   struct AESFixupLocation {
98*81ad6265SDimitry Andric     MachineBasicBlock *Block;
99*81ad6265SDimitry Andric     // The fixup instruction will be inserted *before* InsertionPt.
100*81ad6265SDimitry Andric     MachineInstr *InsertionPt;
101*81ad6265SDimitry Andric     MachineOperand *MOp;
102*81ad6265SDimitry Andric   };
103*81ad6265SDimitry Andric 
104*81ad6265SDimitry Andric   void analyzeMF(MachineFunction &MF, ReachingDefAnalysis &RDA,
105*81ad6265SDimitry Andric                  const ARMBaseRegisterInfo *TRI,
106*81ad6265SDimitry Andric                  SmallVectorImpl<AESFixupLocation> &FixupLocsForFn) const;
107*81ad6265SDimitry Andric 
108*81ad6265SDimitry Andric   void insertAESFixup(AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII,
109*81ad6265SDimitry Andric                       const ARMBaseRegisterInfo *TRI) const;
110*81ad6265SDimitry Andric 
111*81ad6265SDimitry Andric   static bool isFirstAESPairInstr(MachineInstr &MI);
112*81ad6265SDimitry Andric   static bool isSafeAESInput(MachineInstr &MI);
113*81ad6265SDimitry Andric };
114*81ad6265SDimitry Andric char ARMFixCortexA57AES1742098::ID = 0;
115*81ad6265SDimitry Andric 
116*81ad6265SDimitry Andric } // end anonymous namespace
117*81ad6265SDimitry Andric 
118*81ad6265SDimitry Andric INITIALIZE_PASS_BEGIN(ARMFixCortexA57AES1742098, DEBUG_TYPE,
119*81ad6265SDimitry Andric                       "ARM fix for Cortex-A57 AES Erratum 1742098", false,
120*81ad6265SDimitry Andric                       false)
121*81ad6265SDimitry Andric INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis);
122*81ad6265SDimitry Andric INITIALIZE_PASS_END(ARMFixCortexA57AES1742098, DEBUG_TYPE,
123*81ad6265SDimitry Andric                     "ARM fix for Cortex-A57 AES Erratum 1742098", false, false)
124*81ad6265SDimitry Andric 
125*81ad6265SDimitry Andric //===----------------------------------------------------------------------===//
126*81ad6265SDimitry Andric 
isFirstAESPairInstr(MachineInstr & MI)127*81ad6265SDimitry Andric bool ARMFixCortexA57AES1742098::isFirstAESPairInstr(MachineInstr &MI) {
128*81ad6265SDimitry Andric   unsigned Opc = MI.getOpcode();
129*81ad6265SDimitry Andric   return Opc == ARM::AESD || Opc == ARM::AESE;
130*81ad6265SDimitry Andric }
131*81ad6265SDimitry Andric 
isSafeAESInput(MachineInstr & MI)132*81ad6265SDimitry Andric bool ARMFixCortexA57AES1742098::isSafeAESInput(MachineInstr &MI) {
133*81ad6265SDimitry Andric   auto CondCodeIsAL = [](MachineInstr &MI) -> bool {
134*81ad6265SDimitry Andric     int CCIdx = MI.findFirstPredOperandIdx();
135*81ad6265SDimitry Andric     if (CCIdx == -1)
136*81ad6265SDimitry Andric       return false;
137*81ad6265SDimitry Andric     return MI.getOperand(CCIdx).getImm() == (int64_t)ARMCC::AL;
138*81ad6265SDimitry Andric   };
139*81ad6265SDimitry Andric 
140*81ad6265SDimitry Andric   switch (MI.getOpcode()) {
141*81ad6265SDimitry Andric   // Unknown: Assume not safe.
142*81ad6265SDimitry Andric   default:
143*81ad6265SDimitry Andric     return false;
144*81ad6265SDimitry Andric   // 128-bit wide AES instructions
145*81ad6265SDimitry Andric   case ARM::AESD:
146*81ad6265SDimitry Andric   case ARM::AESE:
147*81ad6265SDimitry Andric   case ARM::AESMC:
148*81ad6265SDimitry Andric   case ARM::AESIMC:
149*81ad6265SDimitry Andric     // No CondCode.
150*81ad6265SDimitry Andric     return true;
151*81ad6265SDimitry Andric   // 128-bit and 64-bit wide bitwise ops (when condition = al)
152*81ad6265SDimitry Andric   case ARM::VANDd:
153*81ad6265SDimitry Andric   case ARM::VANDq:
154*81ad6265SDimitry Andric   case ARM::VORRd:
155*81ad6265SDimitry Andric   case ARM::VORRq:
156*81ad6265SDimitry Andric   case ARM::VEORd:
157*81ad6265SDimitry Andric   case ARM::VEORq:
158*81ad6265SDimitry Andric   case ARM::VMVNd:
159*81ad6265SDimitry Andric   case ARM::VMVNq:
160*81ad6265SDimitry Andric   // VMOV of 64-bit value between D registers (when condition = al)
161*81ad6265SDimitry Andric   case ARM::VMOVD:
162*81ad6265SDimitry Andric   // VMOV of 64 bit value from GPRs (when condition = al)
163*81ad6265SDimitry Andric   case ARM::VMOVDRR:
164*81ad6265SDimitry Andric   // VMOV of immediate into D or Q registers (when condition = al)
165*81ad6265SDimitry Andric   case ARM::VMOVv2i64:
166*81ad6265SDimitry Andric   case ARM::VMOVv1i64:
167*81ad6265SDimitry Andric   case ARM::VMOVv2f32:
168*81ad6265SDimitry Andric   case ARM::VMOVv4f32:
169*81ad6265SDimitry Andric   case ARM::VMOVv2i32:
170*81ad6265SDimitry Andric   case ARM::VMOVv4i32:
171*81ad6265SDimitry Andric   case ARM::VMOVv4i16:
172*81ad6265SDimitry Andric   case ARM::VMOVv8i16:
173*81ad6265SDimitry Andric   case ARM::VMOVv8i8:
174*81ad6265SDimitry Andric   case ARM::VMOVv16i8:
175*81ad6265SDimitry Andric   // Loads (when condition = al)
176*81ad6265SDimitry Andric   // VLD Dn, [Rn, #imm]
177*81ad6265SDimitry Andric   case ARM::VLDRD:
178*81ad6265SDimitry Andric   // VLDM
179*81ad6265SDimitry Andric   case ARM::VLDMDDB_UPD:
180*81ad6265SDimitry Andric   case ARM::VLDMDIA_UPD:
181*81ad6265SDimitry Andric   case ARM::VLDMDIA:
182*81ad6265SDimitry Andric   // VLDn to all lanes.
183*81ad6265SDimitry Andric   case ARM::VLD1d64:
184*81ad6265SDimitry Andric   case ARM::VLD1q64:
185*81ad6265SDimitry Andric   case ARM::VLD1d32:
186*81ad6265SDimitry Andric   case ARM::VLD1q32:
187*81ad6265SDimitry Andric   case ARM::VLD2b32:
188*81ad6265SDimitry Andric   case ARM::VLD2d32:
189*81ad6265SDimitry Andric   case ARM::VLD2q32:
190*81ad6265SDimitry Andric   case ARM::VLD1d16:
191*81ad6265SDimitry Andric   case ARM::VLD1q16:
192*81ad6265SDimitry Andric   case ARM::VLD2d16:
193*81ad6265SDimitry Andric   case ARM::VLD2q16:
194*81ad6265SDimitry Andric   case ARM::VLD1d8:
195*81ad6265SDimitry Andric   case ARM::VLD1q8:
196*81ad6265SDimitry Andric   case ARM::VLD2b8:
197*81ad6265SDimitry Andric   case ARM::VLD2d8:
198*81ad6265SDimitry Andric   case ARM::VLD2q8:
199*81ad6265SDimitry Andric   case ARM::VLD3d32:
200*81ad6265SDimitry Andric   case ARM::VLD3q32:
201*81ad6265SDimitry Andric   case ARM::VLD3d16:
202*81ad6265SDimitry Andric   case ARM::VLD3q16:
203*81ad6265SDimitry Andric   case ARM::VLD3d8:
204*81ad6265SDimitry Andric   case ARM::VLD3q8:
205*81ad6265SDimitry Andric   case ARM::VLD4d32:
206*81ad6265SDimitry Andric   case ARM::VLD4q32:
207*81ad6265SDimitry Andric   case ARM::VLD4d16:
208*81ad6265SDimitry Andric   case ARM::VLD4q16:
209*81ad6265SDimitry Andric   case ARM::VLD4d8:
210*81ad6265SDimitry Andric   case ARM::VLD4q8:
211*81ad6265SDimitry Andric   // VLD1 (single element to one lane)
212*81ad6265SDimitry Andric   case ARM::VLD1LNd32:
213*81ad6265SDimitry Andric   case ARM::VLD1LNd32_UPD:
214*81ad6265SDimitry Andric   case ARM::VLD1LNd8:
215*81ad6265SDimitry Andric   case ARM::VLD1LNd8_UPD:
216*81ad6265SDimitry Andric   case ARM::VLD1LNd16:
217*81ad6265SDimitry Andric   case ARM::VLD1LNd16_UPD:
218*81ad6265SDimitry Andric   // VLD1 (single element to all lanes)
219*81ad6265SDimitry Andric   case ARM::VLD1DUPd32:
220*81ad6265SDimitry Andric   case ARM::VLD1DUPd32wb_fixed:
221*81ad6265SDimitry Andric   case ARM::VLD1DUPd32wb_register:
222*81ad6265SDimitry Andric   case ARM::VLD1DUPd16:
223*81ad6265SDimitry Andric   case ARM::VLD1DUPd16wb_fixed:
224*81ad6265SDimitry Andric   case ARM::VLD1DUPd16wb_register:
225*81ad6265SDimitry Andric   case ARM::VLD1DUPd8:
226*81ad6265SDimitry Andric   case ARM::VLD1DUPd8wb_fixed:
227*81ad6265SDimitry Andric   case ARM::VLD1DUPd8wb_register:
228*81ad6265SDimitry Andric   case ARM::VLD1DUPq32:
229*81ad6265SDimitry Andric   case ARM::VLD1DUPq32wb_fixed:
230*81ad6265SDimitry Andric   case ARM::VLD1DUPq32wb_register:
231*81ad6265SDimitry Andric   case ARM::VLD1DUPq16:
232*81ad6265SDimitry Andric   case ARM::VLD1DUPq16wb_fixed:
233*81ad6265SDimitry Andric   case ARM::VLD1DUPq16wb_register:
234*81ad6265SDimitry Andric   case ARM::VLD1DUPq8:
235*81ad6265SDimitry Andric   case ARM::VLD1DUPq8wb_fixed:
236*81ad6265SDimitry Andric   case ARM::VLD1DUPq8wb_register:
237*81ad6265SDimitry Andric   // VMOV
238*81ad6265SDimitry Andric   case ARM::VSETLNi32:
239*81ad6265SDimitry Andric   case ARM::VSETLNi16:
240*81ad6265SDimitry Andric   case ARM::VSETLNi8:
241*81ad6265SDimitry Andric     return CondCodeIsAL(MI);
242*81ad6265SDimitry Andric   };
243*81ad6265SDimitry Andric 
244*81ad6265SDimitry Andric   return false;
245*81ad6265SDimitry Andric }
246*81ad6265SDimitry Andric 
runOnMachineFunction(MachineFunction & F)247*81ad6265SDimitry Andric bool ARMFixCortexA57AES1742098::runOnMachineFunction(MachineFunction &F) {
248*81ad6265SDimitry Andric   LLVM_DEBUG(dbgs() << "***** ARMFixCortexA57AES1742098 *****\n");
249*81ad6265SDimitry Andric   auto &STI = F.getSubtarget<ARMSubtarget>();
250*81ad6265SDimitry Andric 
251*81ad6265SDimitry Andric   // Fix not requested or AES instructions not present: skip pass.
252*81ad6265SDimitry Andric   if (!STI.hasAES() || !STI.fixCortexA57AES1742098())
253*81ad6265SDimitry Andric     return false;
254*81ad6265SDimitry Andric 
255*81ad6265SDimitry Andric   const ARMBaseRegisterInfo *TRI = STI.getRegisterInfo();
256*81ad6265SDimitry Andric   const ARMBaseInstrInfo *TII = STI.getInstrInfo();
257*81ad6265SDimitry Andric 
258*81ad6265SDimitry Andric   auto &RDA = getAnalysis<ReachingDefAnalysis>();
259*81ad6265SDimitry Andric 
260*81ad6265SDimitry Andric   // Analyze whole function to find instructions which need fixing up...
261*81ad6265SDimitry Andric   SmallVector<AESFixupLocation> FixupLocsForFn{};
262*81ad6265SDimitry Andric   analyzeMF(F, RDA, TRI, FixupLocsForFn);
263*81ad6265SDimitry Andric 
264*81ad6265SDimitry Andric   // ... and fix the instructions up all at the same time.
265*81ad6265SDimitry Andric   bool Changed = false;
266*81ad6265SDimitry Andric   LLVM_DEBUG(dbgs() << "Inserting " << FixupLocsForFn.size() << " fixup(s)\n");
267*81ad6265SDimitry Andric   for (AESFixupLocation &FixupLoc : FixupLocsForFn) {
268*81ad6265SDimitry Andric     insertAESFixup(FixupLoc, TII, TRI);
269*81ad6265SDimitry Andric     Changed |= true;
270*81ad6265SDimitry Andric   }
271*81ad6265SDimitry Andric 
272*81ad6265SDimitry Andric   return Changed;
273*81ad6265SDimitry Andric }
274*81ad6265SDimitry Andric 
analyzeMF(MachineFunction & MF,ReachingDefAnalysis & RDA,const ARMBaseRegisterInfo * TRI,SmallVectorImpl<AESFixupLocation> & FixupLocsForFn) const275*81ad6265SDimitry Andric void ARMFixCortexA57AES1742098::analyzeMF(
276*81ad6265SDimitry Andric     MachineFunction &MF, ReachingDefAnalysis &RDA,
277*81ad6265SDimitry Andric     const ARMBaseRegisterInfo *TRI,
278*81ad6265SDimitry Andric     SmallVectorImpl<AESFixupLocation> &FixupLocsForFn) const {
279*81ad6265SDimitry Andric   unsigned MaxAllowedFixups = 0;
280*81ad6265SDimitry Andric 
281*81ad6265SDimitry Andric   for (MachineBasicBlock &MBB : MF) {
282*81ad6265SDimitry Andric     for (MachineInstr &MI : MBB) {
283*81ad6265SDimitry Andric       if (!isFirstAESPairInstr(MI))
284*81ad6265SDimitry Andric         continue;
285*81ad6265SDimitry Andric 
286*81ad6265SDimitry Andric       // Found an instruction to check the operands of.
287*81ad6265SDimitry Andric       LLVM_DEBUG(dbgs() << "Found AES Pair starting: " << MI);
288*81ad6265SDimitry Andric       assert(MI.getNumExplicitOperands() == 3 && MI.getNumExplicitDefs() == 1 &&
289*81ad6265SDimitry Andric              "Unknown AES Instruction Format. Expected 1 def, 2 uses.");
290*81ad6265SDimitry Andric 
291*81ad6265SDimitry Andric       // A maximum of two fixups should be inserted for each AES pair (one per
292*81ad6265SDimitry Andric       // register use).
293*81ad6265SDimitry Andric       MaxAllowedFixups += 2;
294*81ad6265SDimitry Andric 
295*81ad6265SDimitry Andric       // Inspect all operands, choosing whether to insert a fixup.
296*81ad6265SDimitry Andric       for (MachineOperand &MOp : MI.uses()) {
297*81ad6265SDimitry Andric         SmallPtrSet<MachineInstr *, 1> AllDefs{};
298*81ad6265SDimitry Andric         RDA.getGlobalReachingDefs(&MI, MOp.getReg(), AllDefs);
299*81ad6265SDimitry Andric 
300*81ad6265SDimitry Andric         // Planned Fixup: This should be added to FixupLocsForFn at most once.
301*81ad6265SDimitry Andric         AESFixupLocation NewLoc{&MBB, &MI, &MOp};
302*81ad6265SDimitry Andric 
303*81ad6265SDimitry Andric         // In small functions with loops, this operand may be both a live-in and
304*81ad6265SDimitry Andric         // have definitions within the function itself. These will need a fixup.
305*81ad6265SDimitry Andric         bool IsLiveIn = MF.front().isLiveIn(MOp.getReg());
306*81ad6265SDimitry Andric 
307*81ad6265SDimitry Andric         // If the register doesn't have defining instructions, and is not a
308*81ad6265SDimitry Andric         // live-in, then something is wrong and the fixup must always be
309*81ad6265SDimitry Andric         // inserted to be safe.
310*81ad6265SDimitry Andric         if (!IsLiveIn && AllDefs.size() == 0) {
311*81ad6265SDimitry Andric           LLVM_DEBUG(dbgs()
312*81ad6265SDimitry Andric                      << "Fixup Planned: No Defining Instrs found, not live-in: "
313*81ad6265SDimitry Andric                      << printReg(MOp.getReg(), TRI) << "\n");
314*81ad6265SDimitry Andric           FixupLocsForFn.emplace_back(NewLoc);
315*81ad6265SDimitry Andric           continue;
316*81ad6265SDimitry Andric         }
317*81ad6265SDimitry Andric 
318*81ad6265SDimitry Andric         auto IsUnsafe = [](MachineInstr *MI) -> bool {
319*81ad6265SDimitry Andric           return !isSafeAESInput(*MI);
320*81ad6265SDimitry Andric         };
321*81ad6265SDimitry Andric         size_t UnsafeCount = llvm::count_if(AllDefs, IsUnsafe);
322*81ad6265SDimitry Andric 
323*81ad6265SDimitry Andric         // If there are no unsafe definitions...
324*81ad6265SDimitry Andric         if (UnsafeCount == 0) {
325*81ad6265SDimitry Andric           // ... and the register is not live-in ...
326*81ad6265SDimitry Andric           if (!IsLiveIn) {
327*81ad6265SDimitry Andric             // ... then skip the fixup.
328*81ad6265SDimitry Andric             LLVM_DEBUG(dbgs() << "No Fixup: Defining instrs are all safe: "
329*81ad6265SDimitry Andric                               << printReg(MOp.getReg(), TRI) << "\n");
330*81ad6265SDimitry Andric             continue;
331*81ad6265SDimitry Andric           }
332*81ad6265SDimitry Andric 
333*81ad6265SDimitry Andric           // Otherwise, the only unsafe "definition" is a live-in, so insert the
334*81ad6265SDimitry Andric           // fixup at the start of the function.
335*81ad6265SDimitry Andric           LLVM_DEBUG(dbgs()
336*81ad6265SDimitry Andric                      << "Fixup Planned: Live-In (with safe defining instrs): "
337*81ad6265SDimitry Andric                      << printReg(MOp.getReg(), TRI) << "\n");
338*81ad6265SDimitry Andric           NewLoc.Block = &MF.front();
339*81ad6265SDimitry Andric           NewLoc.InsertionPt = &*NewLoc.Block->begin();
340*81ad6265SDimitry Andric           LLVM_DEBUG(dbgs() << "Moving Fixup for Live-In to immediately before "
341*81ad6265SDimitry Andric                             << *NewLoc.InsertionPt);
342*81ad6265SDimitry Andric           FixupLocsForFn.emplace_back(NewLoc);
343*81ad6265SDimitry Andric           continue;
344*81ad6265SDimitry Andric         }
345*81ad6265SDimitry Andric 
346*81ad6265SDimitry Andric         // If a fixup is needed in more than one place, then the best place to
347*81ad6265SDimitry Andric         // insert it is adjacent to the use rather than introducing a fixup
348*81ad6265SDimitry Andric         // adjacent to each def.
349*81ad6265SDimitry Andric         //
350*81ad6265SDimitry Andric         // FIXME: It might be better to hoist this to the start of the BB, if
351*81ad6265SDimitry Andric         // possible.
352*81ad6265SDimitry Andric         if (IsLiveIn || UnsafeCount > 1) {
353*81ad6265SDimitry Andric           LLVM_DEBUG(dbgs() << "Fixup Planned: Multiple unsafe defining instrs "
354*81ad6265SDimitry Andric                                "(including live-ins): "
355*81ad6265SDimitry Andric                             << printReg(MOp.getReg(), TRI) << "\n");
356*81ad6265SDimitry Andric           FixupLocsForFn.emplace_back(NewLoc);
357*81ad6265SDimitry Andric           continue;
358*81ad6265SDimitry Andric         }
359*81ad6265SDimitry Andric 
360*81ad6265SDimitry Andric         assert(UnsafeCount == 1 && !IsLiveIn &&
361*81ad6265SDimitry Andric                "At this point, there should be one unsafe defining instrs "
362*81ad6265SDimitry Andric                "and the defined register should not be a live-in.");
363*81ad6265SDimitry Andric         SmallPtrSetIterator<MachineInstr *> It =
364*81ad6265SDimitry Andric             llvm::find_if(AllDefs, IsUnsafe);
365*81ad6265SDimitry Andric         assert(It != AllDefs.end() &&
366*81ad6265SDimitry Andric                "UnsafeCount == 1 but No Unsafe MachineInstr found.");
367*81ad6265SDimitry Andric         MachineInstr *DefMI = *It;
368*81ad6265SDimitry Andric 
369*81ad6265SDimitry Andric         LLVM_DEBUG(
370*81ad6265SDimitry Andric             dbgs() << "Fixup Planned: Found single unsafe defining instrs for "
371*81ad6265SDimitry Andric                    << printReg(MOp.getReg(), TRI) << ": " << *DefMI);
372*81ad6265SDimitry Andric 
373*81ad6265SDimitry Andric         // There is one unsafe defining instruction, which needs a fixup. It is
374*81ad6265SDimitry Andric         // generally good to hoist the fixup to be adjacent to the defining
375*81ad6265SDimitry Andric         // instruction rather than the using instruction, as the using
376*81ad6265SDimitry Andric         // instruction may be inside a loop when the defining instruction is
377*81ad6265SDimitry Andric         // not.
378*81ad6265SDimitry Andric         MachineBasicBlock::iterator DefIt = DefMI;
379*81ad6265SDimitry Andric         ++DefIt;
380*81ad6265SDimitry Andric         if (DefIt != DefMI->getParent()->end()) {
381*81ad6265SDimitry Andric           LLVM_DEBUG(dbgs() << "Moving Fixup to immediately after " << *DefMI
382*81ad6265SDimitry Andric                             << "And immediately before " << *DefIt);
383*81ad6265SDimitry Andric           NewLoc.Block = DefIt->getParent();
384*81ad6265SDimitry Andric           NewLoc.InsertionPt = &*DefIt;
385*81ad6265SDimitry Andric         }
386*81ad6265SDimitry Andric 
387*81ad6265SDimitry Andric         FixupLocsForFn.emplace_back(NewLoc);
388*81ad6265SDimitry Andric       }
389*81ad6265SDimitry Andric     }
390*81ad6265SDimitry Andric   }
391*81ad6265SDimitry Andric 
392*81ad6265SDimitry Andric   assert(FixupLocsForFn.size() <= MaxAllowedFixups &&
393*81ad6265SDimitry Andric          "Inserted too many fixups for this function.");
394*81ad6265SDimitry Andric   (void)MaxAllowedFixups;
395*81ad6265SDimitry Andric }
396*81ad6265SDimitry Andric 
insertAESFixup(AESFixupLocation & FixupLoc,const ARMBaseInstrInfo * TII,const ARMBaseRegisterInfo * TRI) const397*81ad6265SDimitry Andric void ARMFixCortexA57AES1742098::insertAESFixup(
398*81ad6265SDimitry Andric     AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII,
399*81ad6265SDimitry Andric     const ARMBaseRegisterInfo *TRI) const {
400*81ad6265SDimitry Andric   MachineOperand *OperandToFixup = FixupLoc.MOp;
401*81ad6265SDimitry Andric 
402*81ad6265SDimitry Andric   assert(OperandToFixup->isReg() && "OperandToFixup must be a register");
403*81ad6265SDimitry Andric   Register RegToFixup = OperandToFixup->getReg();
404*81ad6265SDimitry Andric 
405*81ad6265SDimitry Andric   LLVM_DEBUG(dbgs() << "Inserting VORRq of " << printReg(RegToFixup, TRI)
406*81ad6265SDimitry Andric                     << " before: " << *FixupLoc.InsertionPt);
407*81ad6265SDimitry Andric 
408*81ad6265SDimitry Andric   // Insert the new `VORRq qN, qN, qN`. There are a few details here:
409*81ad6265SDimitry Andric   //
410*81ad6265SDimitry Andric   // The uses are marked as killed, even if the original use of OperandToFixup
411*81ad6265SDimitry Andric   // is not killed, as the new instruction is clobbering the register. This is
412*81ad6265SDimitry Andric   // safe even if there are other uses of `qN`, as the VORRq value-wise a no-op
413*81ad6265SDimitry Andric   // (it is inserted for microarchitectural reasons).
414*81ad6265SDimitry Andric   //
415*81ad6265SDimitry Andric   // The def and the uses are still marked as Renamable if the original register
416*81ad6265SDimitry Andric   // was, to avoid having to rummage through all the other uses and defs and
417*81ad6265SDimitry Andric   // unset their renamable bits.
418*81ad6265SDimitry Andric   unsigned Renamable = OperandToFixup->isRenamable() ? RegState::Renamable : 0;
419*81ad6265SDimitry Andric   BuildMI(*FixupLoc.Block, FixupLoc.InsertionPt, DebugLoc(),
420*81ad6265SDimitry Andric           TII->get(ARM::VORRq))
421*81ad6265SDimitry Andric       .addReg(RegToFixup, RegState::Define | Renamable)
422*81ad6265SDimitry Andric       .addReg(RegToFixup, RegState::Kill | Renamable)
423*81ad6265SDimitry Andric       .addReg(RegToFixup, RegState::Kill | Renamable)
424*81ad6265SDimitry Andric       .addImm((uint64_t)ARMCC::AL)
425*81ad6265SDimitry Andric       .addReg(ARM::NoRegister);
426*81ad6265SDimitry Andric }
427*81ad6265SDimitry Andric 
428*81ad6265SDimitry Andric // Factory function used by AArch64TargetMachine to add the pass to
429*81ad6265SDimitry Andric // the passmanager.
createARMFixCortexA57AES1742098Pass()430*81ad6265SDimitry Andric FunctionPass *llvm::createARMFixCortexA57AES1742098Pass() {
431*81ad6265SDimitry Andric   return new ARMFixCortexA57AES1742098();
432*81ad6265SDimitry Andric }
433