1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the AArch64 implementation of the TargetInstrInfo class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AArch64InstrInfo.h"
14 #include "AArch64MachineFunctionInfo.h"
15 #include "AArch64Subtarget.h"
16 #include "MCTargetDesc/AArch64AddressingModes.h"
17 #include "Utils/AArch64BaseInfo.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/CodeGen/MachineBasicBlock.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstr.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineMemOperand.h"
27 #include "llvm/CodeGen/MachineModuleInfo.h"
28 #include "llvm/CodeGen/MachineOperand.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/StackMaps.h"
31 #include "llvm/CodeGen/TargetRegisterInfo.h"
32 #include "llvm/CodeGen/TargetSubtargetInfo.h"
33 #include "llvm/IR/DebugInfoMetadata.h"
34 #include "llvm/IR/DebugLoc.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/MC/MCAsmInfo.h"
37 #include "llvm/MC/MCInst.h"
38 #include "llvm/MC/MCInstrDesc.h"
39 #include "llvm/Support/Casting.h"
40 #include "llvm/Support/CodeGen.h"
41 #include "llvm/Support/CommandLine.h"
42 #include "llvm/Support/Compiler.h"
43 #include "llvm/Support/ErrorHandling.h"
44 #include "llvm/Support/MathExtras.h"
45 #include "llvm/Target/TargetMachine.h"
46 #include "llvm/Target/TargetOptions.h"
47 #include <cassert>
48 #include <cstdint>
49 #include <iterator>
50 #include <utility>
51 
52 using namespace llvm;
53 
54 #define GET_INSTRINFO_CTOR_DTOR
55 #include "AArch64GenInstrInfo.inc"
56 
57 static cl::opt<unsigned> TBZDisplacementBits(
58     "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
59     cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
60 
61 static cl::opt<unsigned> CBZDisplacementBits(
62     "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
63     cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
64 
65 static cl::opt<unsigned>
66     BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
67                         cl::desc("Restrict range of Bcc instructions (DEBUG)"));
68 
AArch64InstrInfo(const AArch64Subtarget & STI)69 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
70     : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
71                           AArch64::CATCHRET),
72       RI(STI.getTargetTriple()), Subtarget(STI) {}
73 
74 /// GetInstSize - Return the number of bytes of code the specified
75 /// instruction may be.  This returns the maximum number of bytes.
getInstSizeInBytes(const MachineInstr & MI) const76 unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
77   const MachineBasicBlock &MBB = *MI.getParent();
78   const MachineFunction *MF = MBB.getParent();
79   const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
80 
81   {
82     auto Op = MI.getOpcode();
83     if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
84       return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
85   }
86 
87   // Meta-instructions emit no code.
88   if (MI.isMetaInstruction())
89     return 0;
90 
91   // FIXME: We currently only handle pseudoinstructions that don't get expanded
92   //        before the assembly printer.
93   unsigned NumBytes = 0;
94   const MCInstrDesc &Desc = MI.getDesc();
95   switch (Desc.getOpcode()) {
96   default:
97     // Anything not explicitly designated otherwise is a normal 4-byte insn.
98     NumBytes = 4;
99     break;
100   case TargetOpcode::STACKMAP:
101     // The upper bound for a stackmap intrinsic is the full length of its shadow
102     NumBytes = StackMapOpers(&MI).getNumPatchBytes();
103     assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
104     break;
105   case TargetOpcode::PATCHPOINT:
106     // The size of the patchpoint intrinsic is the number of bytes requested
107     NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
108     assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
109     break;
110   case AArch64::TLSDESC_CALLSEQ:
111     // This gets lowered to an instruction sequence which takes 16 bytes
112     NumBytes = 16;
113     break;
114   case AArch64::SpeculationBarrierISBDSBEndBB:
115     // This gets lowered to 2 4-byte instructions.
116     NumBytes = 8;
117     break;
118   case AArch64::SpeculationBarrierSBEndBB:
119     // This gets lowered to 1 4-byte instructions.
120     NumBytes = 4;
121     break;
122   case AArch64::JumpTableDest32:
123   case AArch64::JumpTableDest16:
124   case AArch64::JumpTableDest8:
125     NumBytes = 12;
126     break;
127   case AArch64::SPACE:
128     NumBytes = MI.getOperand(1).getImm();
129     break;
130   case TargetOpcode::BUNDLE:
131     NumBytes = getInstBundleLength(MI);
132     break;
133   }
134 
135   return NumBytes;
136 }
137 
getInstBundleLength(const MachineInstr & MI) const138 unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
139   unsigned Size = 0;
140   MachineBasicBlock::const_instr_iterator I = MI.getIterator();
141   MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
142   while (++I != E && I->isInsideBundle()) {
143     assert(!I->isBundle() && "No nested bundle!");
144     Size += getInstSizeInBytes(*I);
145   }
146   return Size;
147 }
148 
parseCondBranch(MachineInstr * LastInst,MachineBasicBlock * & Target,SmallVectorImpl<MachineOperand> & Cond)149 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
150                             SmallVectorImpl<MachineOperand> &Cond) {
151   // Block ends with fall-through condbranch.
152   switch (LastInst->getOpcode()) {
153   default:
154     llvm_unreachable("Unknown branch instruction?");
155   case AArch64::Bcc:
156     Target = LastInst->getOperand(1).getMBB();
157     Cond.push_back(LastInst->getOperand(0));
158     break;
159   case AArch64::CBZW:
160   case AArch64::CBZX:
161   case AArch64::CBNZW:
162   case AArch64::CBNZX:
163     Target = LastInst->getOperand(1).getMBB();
164     Cond.push_back(MachineOperand::CreateImm(-1));
165     Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
166     Cond.push_back(LastInst->getOperand(0));
167     break;
168   case AArch64::TBZW:
169   case AArch64::TBZX:
170   case AArch64::TBNZW:
171   case AArch64::TBNZX:
172     Target = LastInst->getOperand(2).getMBB();
173     Cond.push_back(MachineOperand::CreateImm(-1));
174     Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
175     Cond.push_back(LastInst->getOperand(0));
176     Cond.push_back(LastInst->getOperand(1));
177   }
178 }
179 
getBranchDisplacementBits(unsigned Opc)180 static unsigned getBranchDisplacementBits(unsigned Opc) {
181   switch (Opc) {
182   default:
183     llvm_unreachable("unexpected opcode!");
184   case AArch64::B:
185     return 64;
186   case AArch64::TBNZW:
187   case AArch64::TBZW:
188   case AArch64::TBNZX:
189   case AArch64::TBZX:
190     return TBZDisplacementBits;
191   case AArch64::CBNZW:
192   case AArch64::CBZW:
193   case AArch64::CBNZX:
194   case AArch64::CBZX:
195     return CBZDisplacementBits;
196   case AArch64::Bcc:
197     return BCCDisplacementBits;
198   }
199 }
200 
isBranchOffsetInRange(unsigned BranchOp,int64_t BrOffset) const201 bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
202                                              int64_t BrOffset) const {
203   unsigned Bits = getBranchDisplacementBits(BranchOp);
204   assert(Bits >= 3 && "max branch displacement must be enough to jump"
205                       "over conditional branch expansion");
206   return isIntN(Bits, BrOffset / 4);
207 }
208 
209 MachineBasicBlock *
getBranchDestBlock(const MachineInstr & MI) const210 AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
211   switch (MI.getOpcode()) {
212   default:
213     llvm_unreachable("unexpected opcode!");
214   case AArch64::B:
215     return MI.getOperand(0).getMBB();
216   case AArch64::TBZW:
217   case AArch64::TBNZW:
218   case AArch64::TBZX:
219   case AArch64::TBNZX:
220     return MI.getOperand(2).getMBB();
221   case AArch64::CBZW:
222   case AArch64::CBNZW:
223   case AArch64::CBZX:
224   case AArch64::CBNZX:
225   case AArch64::Bcc:
226     return MI.getOperand(1).getMBB();
227   }
228 }
229 
230 // Branch analysis.
analyzeBranch(MachineBasicBlock & MBB,MachineBasicBlock * & TBB,MachineBasicBlock * & FBB,SmallVectorImpl<MachineOperand> & Cond,bool AllowModify) const231 bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
232                                      MachineBasicBlock *&TBB,
233                                      MachineBasicBlock *&FBB,
234                                      SmallVectorImpl<MachineOperand> &Cond,
235                                      bool AllowModify) const {
236   // If the block has no terminators, it just falls into the block after it.
237   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
238   if (I == MBB.end())
239     return false;
240 
241   // Skip over SpeculationBarrierEndBB terminators
242   if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
243       I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
244     --I;
245   }
246 
247   if (!isUnpredicatedTerminator(*I))
248     return false;
249 
250   // Get the last instruction in the block.
251   MachineInstr *LastInst = &*I;
252 
253   // If there is only one terminator instruction, process it.
254   unsigned LastOpc = LastInst->getOpcode();
255   if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
256     if (isUncondBranchOpcode(LastOpc)) {
257       TBB = LastInst->getOperand(0).getMBB();
258       return false;
259     }
260     if (isCondBranchOpcode(LastOpc)) {
261       // Block ends with fall-through condbranch.
262       parseCondBranch(LastInst, TBB, Cond);
263       return false;
264     }
265     return true; // Can't handle indirect branch.
266   }
267 
268   // Get the instruction before it if it is a terminator.
269   MachineInstr *SecondLastInst = &*I;
270   unsigned SecondLastOpc = SecondLastInst->getOpcode();
271 
272   // If AllowModify is true and the block ends with two or more unconditional
273   // branches, delete all but the first unconditional branch.
274   if (AllowModify && isUncondBranchOpcode(LastOpc)) {
275     while (isUncondBranchOpcode(SecondLastOpc)) {
276       LastInst->eraseFromParent();
277       LastInst = SecondLastInst;
278       LastOpc = LastInst->getOpcode();
279       if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
280         // Return now the only terminator is an unconditional branch.
281         TBB = LastInst->getOperand(0).getMBB();
282         return false;
283       } else {
284         SecondLastInst = &*I;
285         SecondLastOpc = SecondLastInst->getOpcode();
286       }
287     }
288   }
289 
290   // If there are three terminators, we don't know what sort of block this is.
291   if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
292     return true;
293 
294   // If the block ends with a B and a Bcc, handle it.
295   if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
296     parseCondBranch(SecondLastInst, TBB, Cond);
297     FBB = LastInst->getOperand(0).getMBB();
298     return false;
299   }
300 
301   // If the block ends with two unconditional branches, handle it.  The second
302   // one is not executed, so remove it.
303   if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
304     TBB = SecondLastInst->getOperand(0).getMBB();
305     I = LastInst;
306     if (AllowModify)
307       I->eraseFromParent();
308     return false;
309   }
310 
311   // ...likewise if it ends with an indirect branch followed by an unconditional
312   // branch.
313   if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
314     I = LastInst;
315     if (AllowModify)
316       I->eraseFromParent();
317     return true;
318   }
319 
320   // Otherwise, can't handle this.
321   return true;
322 }
323 
reverseBranchCondition(SmallVectorImpl<MachineOperand> & Cond) const324 bool AArch64InstrInfo::reverseBranchCondition(
325     SmallVectorImpl<MachineOperand> &Cond) const {
326   if (Cond[0].getImm() != -1) {
327     // Regular Bcc
328     AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
329     Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
330   } else {
331     // Folded compare-and-branch
332     switch (Cond[1].getImm()) {
333     default:
334       llvm_unreachable("Unknown conditional branch!");
335     case AArch64::CBZW:
336       Cond[1].setImm(AArch64::CBNZW);
337       break;
338     case AArch64::CBNZW:
339       Cond[1].setImm(AArch64::CBZW);
340       break;
341     case AArch64::CBZX:
342       Cond[1].setImm(AArch64::CBNZX);
343       break;
344     case AArch64::CBNZX:
345       Cond[1].setImm(AArch64::CBZX);
346       break;
347     case AArch64::TBZW:
348       Cond[1].setImm(AArch64::TBNZW);
349       break;
350     case AArch64::TBNZW:
351       Cond[1].setImm(AArch64::TBZW);
352       break;
353     case AArch64::TBZX:
354       Cond[1].setImm(AArch64::TBNZX);
355       break;
356     case AArch64::TBNZX:
357       Cond[1].setImm(AArch64::TBZX);
358       break;
359     }
360   }
361 
362   return false;
363 }
364 
removeBranch(MachineBasicBlock & MBB,int * BytesRemoved) const365 unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
366                                         int *BytesRemoved) const {
367   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
368   if (I == MBB.end())
369     return 0;
370 
371   if (!isUncondBranchOpcode(I->getOpcode()) &&
372       !isCondBranchOpcode(I->getOpcode()))
373     return 0;
374 
375   // Remove the branch.
376   I->eraseFromParent();
377 
378   I = MBB.end();
379 
380   if (I == MBB.begin()) {
381     if (BytesRemoved)
382       *BytesRemoved = 4;
383     return 1;
384   }
385   --I;
386   if (!isCondBranchOpcode(I->getOpcode())) {
387     if (BytesRemoved)
388       *BytesRemoved = 4;
389     return 1;
390   }
391 
392   // Remove the branch.
393   I->eraseFromParent();
394   if (BytesRemoved)
395     *BytesRemoved = 8;
396 
397   return 2;
398 }
399 
instantiateCondBranch(MachineBasicBlock & MBB,const DebugLoc & DL,MachineBasicBlock * TBB,ArrayRef<MachineOperand> Cond) const400 void AArch64InstrInfo::instantiateCondBranch(
401     MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
402     ArrayRef<MachineOperand> Cond) const {
403   if (Cond[0].getImm() != -1) {
404     // Regular Bcc
405     BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
406   } else {
407     // Folded compare-and-branch
408     // Note that we use addOperand instead of addReg to keep the flags.
409     const MachineInstrBuilder MIB =
410         BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
411     if (Cond.size() > 3)
412       MIB.addImm(Cond[3].getImm());
413     MIB.addMBB(TBB);
414   }
415 }
416 
insertBranch(MachineBasicBlock & MBB,MachineBasicBlock * TBB,MachineBasicBlock * FBB,ArrayRef<MachineOperand> Cond,const DebugLoc & DL,int * BytesAdded) const417 unsigned AArch64InstrInfo::insertBranch(
418     MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
419     ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
420   // Shouldn't be a fall through.
421   assert(TBB && "insertBranch must not be told to insert a fallthrough");
422 
423   if (!FBB) {
424     if (Cond.empty()) // Unconditional branch?
425       BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
426     else
427       instantiateCondBranch(MBB, DL, TBB, Cond);
428 
429     if (BytesAdded)
430       *BytesAdded = 4;
431 
432     return 1;
433   }
434 
435   // Two-way conditional branch.
436   instantiateCondBranch(MBB, DL, TBB, Cond);
437   BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
438 
439   if (BytesAdded)
440     *BytesAdded = 8;
441 
442   return 2;
443 }
444 
445 // Find the original register that VReg is copied from.
removeCopies(const MachineRegisterInfo & MRI,unsigned VReg)446 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
447   while (Register::isVirtualRegister(VReg)) {
448     const MachineInstr *DefMI = MRI.getVRegDef(VReg);
449     if (!DefMI->isFullCopy())
450       return VReg;
451     VReg = DefMI->getOperand(1).getReg();
452   }
453   return VReg;
454 }
455 
456 // Determine if VReg is defined by an instruction that can be folded into a
457 // csel instruction. If so, return the folded opcode, and the replacement
458 // register.
canFoldIntoCSel(const MachineRegisterInfo & MRI,unsigned VReg,unsigned * NewVReg=nullptr)459 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
460                                 unsigned *NewVReg = nullptr) {
461   VReg = removeCopies(MRI, VReg);
462   if (!Register::isVirtualRegister(VReg))
463     return 0;
464 
465   bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
466   const MachineInstr *DefMI = MRI.getVRegDef(VReg);
467   unsigned Opc = 0;
468   unsigned SrcOpNum = 0;
469   switch (DefMI->getOpcode()) {
470   case AArch64::ADDSXri:
471   case AArch64::ADDSWri:
472     // if NZCV is used, do not fold.
473     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
474       return 0;
475     // fall-through to ADDXri and ADDWri.
476     LLVM_FALLTHROUGH;
477   case AArch64::ADDXri:
478   case AArch64::ADDWri:
479     // add x, 1 -> csinc.
480     if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
481         DefMI->getOperand(3).getImm() != 0)
482       return 0;
483     SrcOpNum = 1;
484     Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
485     break;
486 
487   case AArch64::ORNXrr:
488   case AArch64::ORNWrr: {
489     // not x -> csinv, represented as orn dst, xzr, src.
490     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
491     if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
492       return 0;
493     SrcOpNum = 2;
494     Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
495     break;
496   }
497 
498   case AArch64::SUBSXrr:
499   case AArch64::SUBSWrr:
500     // if NZCV is used, do not fold.
501     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
502       return 0;
503     // fall-through to SUBXrr and SUBWrr.
504     LLVM_FALLTHROUGH;
505   case AArch64::SUBXrr:
506   case AArch64::SUBWrr: {
507     // neg x -> csneg, represented as sub dst, xzr, src.
508     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
509     if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
510       return 0;
511     SrcOpNum = 2;
512     Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
513     break;
514   }
515   default:
516     return 0;
517   }
518   assert(Opc && SrcOpNum && "Missing parameters");
519 
520   if (NewVReg)
521     *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
522   return Opc;
523 }
524 
canInsertSelect(const MachineBasicBlock & MBB,ArrayRef<MachineOperand> Cond,Register DstReg,Register TrueReg,Register FalseReg,int & CondCycles,int & TrueCycles,int & FalseCycles) const525 bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
526                                        ArrayRef<MachineOperand> Cond,
527                                        Register DstReg, Register TrueReg,
528                                        Register FalseReg, int &CondCycles,
529                                        int &TrueCycles,
530                                        int &FalseCycles) const {
531   // Check register classes.
532   const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
533   const TargetRegisterClass *RC =
534       RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
535   if (!RC)
536     return false;
537 
538   // Also need to check the dest regclass, in case we're trying to optimize
539   // something like:
540   // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
541   if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
542     return false;
543 
544   // Expanding cbz/tbz requires an extra cycle of latency on the condition.
545   unsigned ExtraCondLat = Cond.size() != 1;
546 
547   // GPRs are handled by csel.
548   // FIXME: Fold in x+1, -x, and ~x when applicable.
549   if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
550       AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
551     // Single-cycle csel, csinc, csinv, and csneg.
552     CondCycles = 1 + ExtraCondLat;
553     TrueCycles = FalseCycles = 1;
554     if (canFoldIntoCSel(MRI, TrueReg))
555       TrueCycles = 0;
556     else if (canFoldIntoCSel(MRI, FalseReg))
557       FalseCycles = 0;
558     return true;
559   }
560 
561   // Scalar floating point is handled by fcsel.
562   // FIXME: Form fabs, fmin, and fmax when applicable.
563   if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
564       AArch64::FPR32RegClass.hasSubClassEq(RC)) {
565     CondCycles = 5 + ExtraCondLat;
566     TrueCycles = FalseCycles = 2;
567     return true;
568   }
569 
570   // Can't do vectors.
571   return false;
572 }
573 
insertSelect(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,Register DstReg,ArrayRef<MachineOperand> Cond,Register TrueReg,Register FalseReg) const574 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
575                                     MachineBasicBlock::iterator I,
576                                     const DebugLoc &DL, Register DstReg,
577                                     ArrayRef<MachineOperand> Cond,
578                                     Register TrueReg, Register FalseReg) const {
579   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
580 
581   // Parse the condition code, see parseCondBranch() above.
582   AArch64CC::CondCode CC;
583   switch (Cond.size()) {
584   default:
585     llvm_unreachable("Unknown condition opcode in Cond");
586   case 1: // b.cc
587     CC = AArch64CC::CondCode(Cond[0].getImm());
588     break;
589   case 3: { // cbz/cbnz
590     // We must insert a compare against 0.
591     bool Is64Bit;
592     switch (Cond[1].getImm()) {
593     default:
594       llvm_unreachable("Unknown branch opcode in Cond");
595     case AArch64::CBZW:
596       Is64Bit = false;
597       CC = AArch64CC::EQ;
598       break;
599     case AArch64::CBZX:
600       Is64Bit = true;
601       CC = AArch64CC::EQ;
602       break;
603     case AArch64::CBNZW:
604       Is64Bit = false;
605       CC = AArch64CC::NE;
606       break;
607     case AArch64::CBNZX:
608       Is64Bit = true;
609       CC = AArch64CC::NE;
610       break;
611     }
612     Register SrcReg = Cond[2].getReg();
613     if (Is64Bit) {
614       // cmp reg, #0 is actually subs xzr, reg, #0.
615       MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
616       BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
617           .addReg(SrcReg)
618           .addImm(0)
619           .addImm(0);
620     } else {
621       MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
622       BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
623           .addReg(SrcReg)
624           .addImm(0)
625           .addImm(0);
626     }
627     break;
628   }
629   case 4: { // tbz/tbnz
630     // We must insert a tst instruction.
631     switch (Cond[1].getImm()) {
632     default:
633       llvm_unreachable("Unknown branch opcode in Cond");
634     case AArch64::TBZW:
635     case AArch64::TBZX:
636       CC = AArch64CC::EQ;
637       break;
638     case AArch64::TBNZW:
639     case AArch64::TBNZX:
640       CC = AArch64CC::NE;
641       break;
642     }
643     // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
644     if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
645       BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
646           .addReg(Cond[2].getReg())
647           .addImm(
648               AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
649     else
650       BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
651           .addReg(Cond[2].getReg())
652           .addImm(
653               AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
654     break;
655   }
656   }
657 
658   unsigned Opc = 0;
659   const TargetRegisterClass *RC = nullptr;
660   bool TryFold = false;
661   if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
662     RC = &AArch64::GPR64RegClass;
663     Opc = AArch64::CSELXr;
664     TryFold = true;
665   } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
666     RC = &AArch64::GPR32RegClass;
667     Opc = AArch64::CSELWr;
668     TryFold = true;
669   } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
670     RC = &AArch64::FPR64RegClass;
671     Opc = AArch64::FCSELDrrr;
672   } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
673     RC = &AArch64::FPR32RegClass;
674     Opc = AArch64::FCSELSrrr;
675   }
676   assert(RC && "Unsupported regclass");
677 
678   // Try folding simple instructions into the csel.
679   if (TryFold) {
680     unsigned NewVReg = 0;
681     unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
682     if (FoldedOpc) {
683       // The folded opcodes csinc, csinc and csneg apply the operation to
684       // FalseReg, so we need to invert the condition.
685       CC = AArch64CC::getInvertedCondCode(CC);
686       TrueReg = FalseReg;
687     } else
688       FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
689 
690     // Fold the operation. Leave any dead instructions for DCE to clean up.
691     if (FoldedOpc) {
692       FalseReg = NewVReg;
693       Opc = FoldedOpc;
694       // The extends the live range of NewVReg.
695       MRI.clearKillFlags(NewVReg);
696     }
697   }
698 
699   // Pull all virtual register into the appropriate class.
700   MRI.constrainRegClass(TrueReg, RC);
701   MRI.constrainRegClass(FalseReg, RC);
702 
703   // Insert the csel.
704   BuildMI(MBB, I, DL, get(Opc), DstReg)
705       .addReg(TrueReg)
706       .addReg(FalseReg)
707       .addImm(CC);
708 }
709 
710 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an  ORRxx.
canBeExpandedToORR(const MachineInstr & MI,unsigned BitSize)711 static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
712   uint64_t Imm = MI.getOperand(1).getImm();
713   uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
714   uint64_t Encoding;
715   return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
716 }
717 
718 // FIXME: this implementation should be micro-architecture dependent, so a
719 // micro-architecture target hook should be introduced here in future.
isAsCheapAsAMove(const MachineInstr & MI) const720 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
721   if (!Subtarget.hasCustomCheapAsMoveHandling())
722     return MI.isAsCheapAsAMove();
723 
724   const unsigned Opcode = MI.getOpcode();
725 
726   // Firstly, check cases gated by features.
727 
728   if (Subtarget.hasZeroCycleZeroingFP()) {
729     if (Opcode == AArch64::FMOVH0 ||
730         Opcode == AArch64::FMOVS0 ||
731         Opcode == AArch64::FMOVD0)
732       return true;
733   }
734 
735   if (Subtarget.hasZeroCycleZeroingGP()) {
736     if (Opcode == TargetOpcode::COPY &&
737         (MI.getOperand(1).getReg() == AArch64::WZR ||
738          MI.getOperand(1).getReg() == AArch64::XZR))
739       return true;
740   }
741 
742   // Secondly, check cases specific to sub-targets.
743 
744   if (Subtarget.hasExynosCheapAsMoveHandling()) {
745     if (isExynosCheapAsMove(MI))
746       return true;
747 
748     return MI.isAsCheapAsAMove();
749   }
750 
751   // Finally, check generic cases.
752 
753   switch (Opcode) {
754   default:
755     return false;
756 
757   // add/sub on register without shift
758   case AArch64::ADDWri:
759   case AArch64::ADDXri:
760   case AArch64::SUBWri:
761   case AArch64::SUBXri:
762     return (MI.getOperand(3).getImm() == 0);
763 
764   // logical ops on immediate
765   case AArch64::ANDWri:
766   case AArch64::ANDXri:
767   case AArch64::EORWri:
768   case AArch64::EORXri:
769   case AArch64::ORRWri:
770   case AArch64::ORRXri:
771     return true;
772 
773   // logical ops on register without shift
774   case AArch64::ANDWrr:
775   case AArch64::ANDXrr:
776   case AArch64::BICWrr:
777   case AArch64::BICXrr:
778   case AArch64::EONWrr:
779   case AArch64::EONXrr:
780   case AArch64::EORWrr:
781   case AArch64::EORXrr:
782   case AArch64::ORNWrr:
783   case AArch64::ORNXrr:
784   case AArch64::ORRWrr:
785   case AArch64::ORRXrr:
786     return true;
787 
788   // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
789   // ORRXri, it is as cheap as MOV
790   case AArch64::MOVi32imm:
791     return canBeExpandedToORR(MI, 32);
792   case AArch64::MOVi64imm:
793     return canBeExpandedToORR(MI, 64);
794   }
795 
796   llvm_unreachable("Unknown opcode to check as cheap as a move!");
797 }
798 
isFalkorShiftExtFast(const MachineInstr & MI)799 bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
800   switch (MI.getOpcode()) {
801   default:
802     return false;
803 
804   case AArch64::ADDWrs:
805   case AArch64::ADDXrs:
806   case AArch64::ADDSWrs:
807   case AArch64::ADDSXrs: {
808     unsigned Imm = MI.getOperand(3).getImm();
809     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
810     if (ShiftVal == 0)
811       return true;
812     return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
813   }
814 
815   case AArch64::ADDWrx:
816   case AArch64::ADDXrx:
817   case AArch64::ADDXrx64:
818   case AArch64::ADDSWrx:
819   case AArch64::ADDSXrx:
820   case AArch64::ADDSXrx64: {
821     unsigned Imm = MI.getOperand(3).getImm();
822     switch (AArch64_AM::getArithExtendType(Imm)) {
823     default:
824       return false;
825     case AArch64_AM::UXTB:
826     case AArch64_AM::UXTH:
827     case AArch64_AM::UXTW:
828     case AArch64_AM::UXTX:
829       return AArch64_AM::getArithShiftValue(Imm) <= 4;
830     }
831   }
832 
833   case AArch64::SUBWrs:
834   case AArch64::SUBSWrs: {
835     unsigned Imm = MI.getOperand(3).getImm();
836     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
837     return ShiftVal == 0 ||
838            (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
839   }
840 
841   case AArch64::SUBXrs:
842   case AArch64::SUBSXrs: {
843     unsigned Imm = MI.getOperand(3).getImm();
844     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
845     return ShiftVal == 0 ||
846            (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
847   }
848 
849   case AArch64::SUBWrx:
850   case AArch64::SUBXrx:
851   case AArch64::SUBXrx64:
852   case AArch64::SUBSWrx:
853   case AArch64::SUBSXrx:
854   case AArch64::SUBSXrx64: {
855     unsigned Imm = MI.getOperand(3).getImm();
856     switch (AArch64_AM::getArithExtendType(Imm)) {
857     default:
858       return false;
859     case AArch64_AM::UXTB:
860     case AArch64_AM::UXTH:
861     case AArch64_AM::UXTW:
862     case AArch64_AM::UXTX:
863       return AArch64_AM::getArithShiftValue(Imm) == 0;
864     }
865   }
866 
867   case AArch64::LDRBBroW:
868   case AArch64::LDRBBroX:
869   case AArch64::LDRBroW:
870   case AArch64::LDRBroX:
871   case AArch64::LDRDroW:
872   case AArch64::LDRDroX:
873   case AArch64::LDRHHroW:
874   case AArch64::LDRHHroX:
875   case AArch64::LDRHroW:
876   case AArch64::LDRHroX:
877   case AArch64::LDRQroW:
878   case AArch64::LDRQroX:
879   case AArch64::LDRSBWroW:
880   case AArch64::LDRSBWroX:
881   case AArch64::LDRSBXroW:
882   case AArch64::LDRSBXroX:
883   case AArch64::LDRSHWroW:
884   case AArch64::LDRSHWroX:
885   case AArch64::LDRSHXroW:
886   case AArch64::LDRSHXroX:
887   case AArch64::LDRSWroW:
888   case AArch64::LDRSWroX:
889   case AArch64::LDRSroW:
890   case AArch64::LDRSroX:
891   case AArch64::LDRWroW:
892   case AArch64::LDRWroX:
893   case AArch64::LDRXroW:
894   case AArch64::LDRXroX:
895   case AArch64::PRFMroW:
896   case AArch64::PRFMroX:
897   case AArch64::STRBBroW:
898   case AArch64::STRBBroX:
899   case AArch64::STRBroW:
900   case AArch64::STRBroX:
901   case AArch64::STRDroW:
902   case AArch64::STRDroX:
903   case AArch64::STRHHroW:
904   case AArch64::STRHHroX:
905   case AArch64::STRHroW:
906   case AArch64::STRHroX:
907   case AArch64::STRQroW:
908   case AArch64::STRQroX:
909   case AArch64::STRSroW:
910   case AArch64::STRSroX:
911   case AArch64::STRWroW:
912   case AArch64::STRWroX:
913   case AArch64::STRXroW:
914   case AArch64::STRXroX: {
915     unsigned IsSigned = MI.getOperand(3).getImm();
916     return !IsSigned;
917   }
918   }
919 }
920 
isSEHInstruction(const MachineInstr & MI)921 bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
922   unsigned Opc = MI.getOpcode();
923   switch (Opc) {
924     default:
925       return false;
926     case AArch64::SEH_StackAlloc:
927     case AArch64::SEH_SaveFPLR:
928     case AArch64::SEH_SaveFPLR_X:
929     case AArch64::SEH_SaveReg:
930     case AArch64::SEH_SaveReg_X:
931     case AArch64::SEH_SaveRegP:
932     case AArch64::SEH_SaveRegP_X:
933     case AArch64::SEH_SaveFReg:
934     case AArch64::SEH_SaveFReg_X:
935     case AArch64::SEH_SaveFRegP:
936     case AArch64::SEH_SaveFRegP_X:
937     case AArch64::SEH_SetFP:
938     case AArch64::SEH_AddFP:
939     case AArch64::SEH_Nop:
940     case AArch64::SEH_PrologEnd:
941     case AArch64::SEH_EpilogStart:
942     case AArch64::SEH_EpilogEnd:
943       return true;
944   }
945 }
946 
isCoalescableExtInstr(const MachineInstr & MI,Register & SrcReg,Register & DstReg,unsigned & SubIdx) const947 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
948                                              Register &SrcReg, Register &DstReg,
949                                              unsigned &SubIdx) const {
950   switch (MI.getOpcode()) {
951   default:
952     return false;
953   case AArch64::SBFMXri: // aka sxtw
954   case AArch64::UBFMXri: // aka uxtw
955     // Check for the 32 -> 64 bit extension case, these instructions can do
956     // much more.
957     if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
958       return false;
959     // This is a signed or unsigned 32 -> 64 bit extension.
960     SrcReg = MI.getOperand(1).getReg();
961     DstReg = MI.getOperand(0).getReg();
962     SubIdx = AArch64::sub_32;
963     return true;
964   }
965 }
966 
areMemAccessesTriviallyDisjoint(const MachineInstr & MIa,const MachineInstr & MIb) const967 bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
968     const MachineInstr &MIa, const MachineInstr &MIb) const {
969   const TargetRegisterInfo *TRI = &getRegisterInfo();
970   const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
971   int64_t OffsetA = 0, OffsetB = 0;
972   unsigned WidthA = 0, WidthB = 0;
973   bool OffsetAIsScalable = false, OffsetBIsScalable = false;
974 
975   assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
976   assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
977 
978   if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
979       MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
980     return false;
981 
982   // Retrieve the base, offset from the base and width. Width
983   // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8).  If
984   // base are identical, and the offset of a lower memory access +
985   // the width doesn't overlap the offset of a higher memory access,
986   // then the memory accesses are different.
987   // If OffsetAIsScalable and OffsetBIsScalable are both true, they
988   // are assumed to have the same scale (vscale).
989   if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
990                                    WidthA, TRI) &&
991       getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
992                                    WidthB, TRI)) {
993     if (BaseOpA->isIdenticalTo(*BaseOpB) &&
994         OffsetAIsScalable == OffsetBIsScalable) {
995       int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
996       int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
997       int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
998       if (LowOffset + LowWidth <= HighOffset)
999         return true;
1000     }
1001   }
1002   return false;
1003 }
1004 
isSchedulingBoundary(const MachineInstr & MI,const MachineBasicBlock * MBB,const MachineFunction & MF) const1005 bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
1006                                             const MachineBasicBlock *MBB,
1007                                             const MachineFunction &MF) const {
1008   if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
1009     return true;
1010   switch (MI.getOpcode()) {
1011   case AArch64::HINT:
1012     // CSDB hints are scheduling barriers.
1013     if (MI.getOperand(0).getImm() == 0x14)
1014       return true;
1015     break;
1016   case AArch64::DSB:
1017   case AArch64::ISB:
1018     // DSB and ISB also are scheduling barriers.
1019     return true;
1020   default:;
1021   }
1022   return isSEHInstruction(MI);
1023 }
1024 
1025 /// analyzeCompare - For a comparison instruction, return the source registers
1026 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1027 /// Return true if the comparison instruction can be analyzed.
analyzeCompare(const MachineInstr & MI,Register & SrcReg,Register & SrcReg2,int & CmpMask,int & CmpValue) const1028 bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
1029                                       Register &SrcReg2, int &CmpMask,
1030                                       int &CmpValue) const {
1031   // The first operand can be a frame index where we'd normally expect a
1032   // register.
1033   assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1034   if (!MI.getOperand(1).isReg())
1035     return false;
1036 
1037   switch (MI.getOpcode()) {
1038   default:
1039     break;
1040   case AArch64::SUBSWrr:
1041   case AArch64::SUBSWrs:
1042   case AArch64::SUBSWrx:
1043   case AArch64::SUBSXrr:
1044   case AArch64::SUBSXrs:
1045   case AArch64::SUBSXrx:
1046   case AArch64::ADDSWrr:
1047   case AArch64::ADDSWrs:
1048   case AArch64::ADDSWrx:
1049   case AArch64::ADDSXrr:
1050   case AArch64::ADDSXrs:
1051   case AArch64::ADDSXrx:
1052     // Replace SUBSWrr with SUBWrr if NZCV is not used.
1053     SrcReg = MI.getOperand(1).getReg();
1054     SrcReg2 = MI.getOperand(2).getReg();
1055     CmpMask = ~0;
1056     CmpValue = 0;
1057     return true;
1058   case AArch64::SUBSWri:
1059   case AArch64::ADDSWri:
1060   case AArch64::SUBSXri:
1061   case AArch64::ADDSXri:
1062     SrcReg = MI.getOperand(1).getReg();
1063     SrcReg2 = 0;
1064     CmpMask = ~0;
1065     // FIXME: In order to convert CmpValue to 0 or 1
1066     CmpValue = MI.getOperand(2).getImm() != 0;
1067     return true;
1068   case AArch64::ANDSWri:
1069   case AArch64::ANDSXri:
1070     // ANDS does not use the same encoding scheme as the others xxxS
1071     // instructions.
1072     SrcReg = MI.getOperand(1).getReg();
1073     SrcReg2 = 0;
1074     CmpMask = ~0;
1075     // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
1076     // while the type of CmpValue is int. When converting uint64_t to int,
1077     // the high 32 bits of uint64_t will be lost.
1078     // In fact it causes a bug in spec2006-483.xalancbmk
1079     // CmpValue is only used to compare with zero in OptimizeCompareInstr
1080     CmpValue = AArch64_AM::decodeLogicalImmediate(
1081                    MI.getOperand(2).getImm(),
1082                    MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
1083     return true;
1084   }
1085 
1086   return false;
1087 }
1088 
UpdateOperandRegClass(MachineInstr & Instr)1089 static bool UpdateOperandRegClass(MachineInstr &Instr) {
1090   MachineBasicBlock *MBB = Instr.getParent();
1091   assert(MBB && "Can't get MachineBasicBlock here");
1092   MachineFunction *MF = MBB->getParent();
1093   assert(MF && "Can't get MachineFunction here");
1094   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1095   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
1096   MachineRegisterInfo *MRI = &MF->getRegInfo();
1097 
1098   for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1099        ++OpIdx) {
1100     MachineOperand &MO = Instr.getOperand(OpIdx);
1101     const TargetRegisterClass *OpRegCstraints =
1102         Instr.getRegClassConstraint(OpIdx, TII, TRI);
1103 
1104     // If there's no constraint, there's nothing to do.
1105     if (!OpRegCstraints)
1106       continue;
1107     // If the operand is a frame index, there's nothing to do here.
1108     // A frame index operand will resolve correctly during PEI.
1109     if (MO.isFI())
1110       continue;
1111 
1112     assert(MO.isReg() &&
1113            "Operand has register constraints without being a register!");
1114 
1115     Register Reg = MO.getReg();
1116     if (Register::isPhysicalRegister(Reg)) {
1117       if (!OpRegCstraints->contains(Reg))
1118         return false;
1119     } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1120                !MRI->constrainRegClass(Reg, OpRegCstraints))
1121       return false;
1122   }
1123 
1124   return true;
1125 }
1126 
1127 /// Return the opcode that does not set flags when possible - otherwise
1128 /// return the original opcode. The caller is responsible to do the actual
1129 /// substitution and legality checking.
convertToNonFlagSettingOpc(const MachineInstr & MI)1130 static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
1131   // Don't convert all compare instructions, because for some the zero register
1132   // encoding becomes the sp register.
1133   bool MIDefinesZeroReg = false;
1134   if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
1135     MIDefinesZeroReg = true;
1136 
1137   switch (MI.getOpcode()) {
1138   default:
1139     return MI.getOpcode();
1140   case AArch64::ADDSWrr:
1141     return AArch64::ADDWrr;
1142   case AArch64::ADDSWri:
1143     return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1144   case AArch64::ADDSWrs:
1145     return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1146   case AArch64::ADDSWrx:
1147     return AArch64::ADDWrx;
1148   case AArch64::ADDSXrr:
1149     return AArch64::ADDXrr;
1150   case AArch64::ADDSXri:
1151     return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1152   case AArch64::ADDSXrs:
1153     return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1154   case AArch64::ADDSXrx:
1155     return AArch64::ADDXrx;
1156   case AArch64::SUBSWrr:
1157     return AArch64::SUBWrr;
1158   case AArch64::SUBSWri:
1159     return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1160   case AArch64::SUBSWrs:
1161     return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1162   case AArch64::SUBSWrx:
1163     return AArch64::SUBWrx;
1164   case AArch64::SUBSXrr:
1165     return AArch64::SUBXrr;
1166   case AArch64::SUBSXri:
1167     return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1168   case AArch64::SUBSXrs:
1169     return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1170   case AArch64::SUBSXrx:
1171     return AArch64::SUBXrx;
1172   }
1173 }
1174 
1175 enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1176 
1177 /// True when condition flags are accessed (either by writing or reading)
1178 /// on the instruction trace starting at From and ending at To.
1179 ///
1180 /// Note: If From and To are from different blocks it's assumed CC are accessed
1181 ///       on the path.
areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From,MachineBasicBlock::iterator To,const TargetRegisterInfo * TRI,const AccessKind AccessToCheck=AK_All)1182 static bool areCFlagsAccessedBetweenInstrs(
1183     MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
1184     const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1185   // Early exit if To is at the beginning of the BB.
1186   if (To == To->getParent()->begin())
1187     return true;
1188 
1189   // Check whether the instructions are in the same basic block
1190   // If not, assume the condition flags might get modified somewhere.
1191   if (To->getParent() != From->getParent())
1192     return true;
1193 
1194   // From must be above To.
1195   assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
1196                       [From](MachineInstr &MI) {
1197                         return MI.getIterator() == From;
1198                       }) != To->getParent()->rend());
1199 
1200   // We iterate backward starting at \p To until we hit \p From.
1201   for (const MachineInstr &Instr :
1202        instructionsWithoutDebug(++To.getReverse(), From.getReverse())) {
1203     if (((AccessToCheck & AK_Write) &&
1204          Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1205         ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1206       return true;
1207   }
1208   return false;
1209 }
1210 
1211 /// Try to optimize a compare instruction. A compare instruction is an
1212 /// instruction which produces AArch64::NZCV. It can be truly compare
1213 /// instruction
1214 /// when there are no uses of its destination register.
1215 ///
1216 /// The following steps are tried in order:
1217 /// 1. Convert CmpInstr into an unconditional version.
1218 /// 2. Remove CmpInstr if above there is an instruction producing a needed
1219 ///    condition code or an instruction which can be converted into such an
1220 ///    instruction.
1221 ///    Only comparison with zero is supported.
optimizeCompareInstr(MachineInstr & CmpInstr,Register SrcReg,Register SrcReg2,int CmpMask,int CmpValue,const MachineRegisterInfo * MRI) const1222 bool AArch64InstrInfo::optimizeCompareInstr(
1223     MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int CmpMask,
1224     int CmpValue, const MachineRegisterInfo *MRI) const {
1225   assert(CmpInstr.getParent());
1226   assert(MRI);
1227 
1228   // Replace SUBSWrr with SUBWrr if NZCV is not used.
1229   int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
1230   if (DeadNZCVIdx != -1) {
1231     if (CmpInstr.definesRegister(AArch64::WZR) ||
1232         CmpInstr.definesRegister(AArch64::XZR)) {
1233       CmpInstr.eraseFromParent();
1234       return true;
1235     }
1236     unsigned Opc = CmpInstr.getOpcode();
1237     unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1238     if (NewOpc == Opc)
1239       return false;
1240     const MCInstrDesc &MCID = get(NewOpc);
1241     CmpInstr.setDesc(MCID);
1242     CmpInstr.RemoveOperand(DeadNZCVIdx);
1243     bool succeeded = UpdateOperandRegClass(CmpInstr);
1244     (void)succeeded;
1245     assert(succeeded && "Some operands reg class are incompatible!");
1246     return true;
1247   }
1248 
1249   // Continue only if we have a "ri" where immediate is zero.
1250   // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
1251   // function.
1252   assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
1253   if (CmpValue != 0 || SrcReg2 != 0)
1254     return false;
1255 
1256   // CmpInstr is a Compare instruction if destination register is not used.
1257   if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1258     return false;
1259 
1260   return substituteCmpToZero(CmpInstr, SrcReg, MRI);
1261 }
1262 
1263 /// Get opcode of S version of Instr.
1264 /// If Instr is S version its opcode is returned.
1265 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1266 /// or we are not interested in it.
sForm(MachineInstr & Instr)1267 static unsigned sForm(MachineInstr &Instr) {
1268   switch (Instr.getOpcode()) {
1269   default:
1270     return AArch64::INSTRUCTION_LIST_END;
1271 
1272   case AArch64::ADDSWrr:
1273   case AArch64::ADDSWri:
1274   case AArch64::ADDSXrr:
1275   case AArch64::ADDSXri:
1276   case AArch64::SUBSWrr:
1277   case AArch64::SUBSWri:
1278   case AArch64::SUBSXrr:
1279   case AArch64::SUBSXri:
1280     return Instr.getOpcode();
1281 
1282   case AArch64::ADDWrr:
1283     return AArch64::ADDSWrr;
1284   case AArch64::ADDWri:
1285     return AArch64::ADDSWri;
1286   case AArch64::ADDXrr:
1287     return AArch64::ADDSXrr;
1288   case AArch64::ADDXri:
1289     return AArch64::ADDSXri;
1290   case AArch64::ADCWr:
1291     return AArch64::ADCSWr;
1292   case AArch64::ADCXr:
1293     return AArch64::ADCSXr;
1294   case AArch64::SUBWrr:
1295     return AArch64::SUBSWrr;
1296   case AArch64::SUBWri:
1297     return AArch64::SUBSWri;
1298   case AArch64::SUBXrr:
1299     return AArch64::SUBSXrr;
1300   case AArch64::SUBXri:
1301     return AArch64::SUBSXri;
1302   case AArch64::SBCWr:
1303     return AArch64::SBCSWr;
1304   case AArch64::SBCXr:
1305     return AArch64::SBCSXr;
1306   case AArch64::ANDWri:
1307     return AArch64::ANDSWri;
1308   case AArch64::ANDXri:
1309     return AArch64::ANDSXri;
1310   }
1311 }
1312 
1313 /// Check if AArch64::NZCV should be alive in successors of MBB.
areCFlagsAliveInSuccessors(MachineBasicBlock * MBB)1314 static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
1315   for (auto *BB : MBB->successors())
1316     if (BB->isLiveIn(AArch64::NZCV))
1317       return true;
1318   return false;
1319 }
1320 
1321 namespace {
1322 
1323 struct UsedNZCV {
1324   bool N = false;
1325   bool Z = false;
1326   bool C = false;
1327   bool V = false;
1328 
1329   UsedNZCV() = default;
1330 
operator |=__anonad194af10211::UsedNZCV1331   UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
1332     this->N |= UsedFlags.N;
1333     this->Z |= UsedFlags.Z;
1334     this->C |= UsedFlags.C;
1335     this->V |= UsedFlags.V;
1336     return *this;
1337   }
1338 };
1339 
1340 } // end anonymous namespace
1341 
1342 /// Find a condition code used by the instruction.
1343 /// Returns AArch64CC::Invalid if either the instruction does not use condition
1344 /// codes or we don't optimize CmpInstr in the presence of such instructions.
findCondCodeUsedByInstr(const MachineInstr & Instr)1345 static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
1346   switch (Instr.getOpcode()) {
1347   default:
1348     return AArch64CC::Invalid;
1349 
1350   case AArch64::Bcc: {
1351     int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1352     assert(Idx >= 2);
1353     return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
1354   }
1355 
1356   case AArch64::CSINVWr:
1357   case AArch64::CSINVXr:
1358   case AArch64::CSINCWr:
1359   case AArch64::CSINCXr:
1360   case AArch64::CSELWr:
1361   case AArch64::CSELXr:
1362   case AArch64::CSNEGWr:
1363   case AArch64::CSNEGXr:
1364   case AArch64::FCSELSrrr:
1365   case AArch64::FCSELDrrr: {
1366     int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1367     assert(Idx >= 1);
1368     return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
1369   }
1370   }
1371 }
1372 
getUsedNZCV(AArch64CC::CondCode CC)1373 static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1374   assert(CC != AArch64CC::Invalid);
1375   UsedNZCV UsedFlags;
1376   switch (CC) {
1377   default:
1378     break;
1379 
1380   case AArch64CC::EQ: // Z set
1381   case AArch64CC::NE: // Z clear
1382     UsedFlags.Z = true;
1383     break;
1384 
1385   case AArch64CC::HI: // Z clear and C set
1386   case AArch64CC::LS: // Z set   or  C clear
1387     UsedFlags.Z = true;
1388     LLVM_FALLTHROUGH;
1389   case AArch64CC::HS: // C set
1390   case AArch64CC::LO: // C clear
1391     UsedFlags.C = true;
1392     break;
1393 
1394   case AArch64CC::MI: // N set
1395   case AArch64CC::PL: // N clear
1396     UsedFlags.N = true;
1397     break;
1398 
1399   case AArch64CC::VS: // V set
1400   case AArch64CC::VC: // V clear
1401     UsedFlags.V = true;
1402     break;
1403 
1404   case AArch64CC::GT: // Z clear, N and V the same
1405   case AArch64CC::LE: // Z set,   N and V differ
1406     UsedFlags.Z = true;
1407     LLVM_FALLTHROUGH;
1408   case AArch64CC::GE: // N and V the same
1409   case AArch64CC::LT: // N and V differ
1410     UsedFlags.N = true;
1411     UsedFlags.V = true;
1412     break;
1413   }
1414   return UsedFlags;
1415 }
1416 
isADDSRegImm(unsigned Opcode)1417 static bool isADDSRegImm(unsigned Opcode) {
1418   return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1419 }
1420 
isSUBSRegImm(unsigned Opcode)1421 static bool isSUBSRegImm(unsigned Opcode) {
1422   return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1423 }
1424 
1425 /// Check if CmpInstr can be substituted by MI.
1426 ///
1427 /// CmpInstr can be substituted:
1428 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1429 /// - and, MI and CmpInstr are from the same MachineBB
1430 /// - and, condition flags are not alive in successors of the CmpInstr parent
1431 /// - and, if MI opcode is the S form there must be no defs of flags between
1432 ///        MI and CmpInstr
1433 ///        or if MI opcode is not the S form there must be neither defs of flags
1434 ///        nor uses of flags between MI and CmpInstr.
1435 /// - and  C/V flags are not used after CmpInstr
canInstrSubstituteCmpInstr(MachineInstr * MI,MachineInstr * CmpInstr,const TargetRegisterInfo * TRI)1436 static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
1437                                        const TargetRegisterInfo *TRI) {
1438   assert(MI);
1439   assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
1440   assert(CmpInstr);
1441 
1442   const unsigned CmpOpcode = CmpInstr->getOpcode();
1443   if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1444     return false;
1445 
1446   if (MI->getParent() != CmpInstr->getParent())
1447     return false;
1448 
1449   if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
1450     return false;
1451 
1452   AccessKind AccessToCheck = AK_Write;
1453   if (sForm(*MI) != MI->getOpcode())
1454     AccessToCheck = AK_All;
1455   if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
1456     return false;
1457 
1458   UsedNZCV NZCVUsedAfterCmp;
1459   for (const MachineInstr &Instr :
1460        instructionsWithoutDebug(std::next(CmpInstr->getIterator()),
1461                                 CmpInstr->getParent()->instr_end())) {
1462     if (Instr.readsRegister(AArch64::NZCV, TRI)) {
1463       AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
1464       if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1465         return false;
1466       NZCVUsedAfterCmp |= getUsedNZCV(CC);
1467     }
1468 
1469     if (Instr.modifiesRegister(AArch64::NZCV, TRI))
1470       break;
1471   }
1472 
1473   return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
1474 }
1475 
1476 /// Substitute an instruction comparing to zero with another instruction
1477 /// which produces needed condition flags.
1478 ///
1479 /// Return true on success.
substituteCmpToZero(MachineInstr & CmpInstr,unsigned SrcReg,const MachineRegisterInfo * MRI) const1480 bool AArch64InstrInfo::substituteCmpToZero(
1481     MachineInstr &CmpInstr, unsigned SrcReg,
1482     const MachineRegisterInfo *MRI) const {
1483   assert(MRI);
1484   // Get the unique definition of SrcReg.
1485   MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
1486   if (!MI)
1487     return false;
1488 
1489   const TargetRegisterInfo *TRI = &getRegisterInfo();
1490 
1491   unsigned NewOpc = sForm(*MI);
1492   if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1493     return false;
1494 
1495   if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
1496     return false;
1497 
1498   // Update the instruction to set NZCV.
1499   MI->setDesc(get(NewOpc));
1500   CmpInstr.eraseFromParent();
1501   bool succeeded = UpdateOperandRegClass(*MI);
1502   (void)succeeded;
1503   assert(succeeded && "Some operands reg class are incompatible!");
1504   MI->addRegisterDefined(AArch64::NZCV, TRI);
1505   return true;
1506 }
1507 
expandPostRAPseudo(MachineInstr & MI) const1508 bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1509   if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
1510       MI.getOpcode() != AArch64::CATCHRET)
1511     return false;
1512 
1513   MachineBasicBlock &MBB = *MI.getParent();
1514   auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
1515   auto TRI = Subtarget.getRegisterInfo();
1516   DebugLoc DL = MI.getDebugLoc();
1517 
1518   if (MI.getOpcode() == AArch64::CATCHRET) {
1519     // Skip to the first instruction before the epilog.
1520     const TargetInstrInfo *TII =
1521       MBB.getParent()->getSubtarget().getInstrInfo();
1522     MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
1523     auto MBBI = MachineBasicBlock::iterator(MI);
1524     MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
1525     while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
1526            FirstEpilogSEH != MBB.begin())
1527       FirstEpilogSEH = std::prev(FirstEpilogSEH);
1528     if (FirstEpilogSEH != MBB.begin())
1529       FirstEpilogSEH = std::next(FirstEpilogSEH);
1530     BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
1531         .addReg(AArch64::X0, RegState::Define)
1532         .addMBB(TargetMBB);
1533     BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
1534         .addReg(AArch64::X0, RegState::Define)
1535         .addReg(AArch64::X0)
1536         .addMBB(TargetMBB)
1537         .addImm(0);
1538     return true;
1539   }
1540 
1541   Register Reg = MI.getOperand(0).getReg();
1542   const GlobalValue *GV =
1543       cast<GlobalValue>((*MI.memoperands_begin())->getValue());
1544   const TargetMachine &TM = MBB.getParent()->getTarget();
1545   unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1546   const unsigned char MO_NC = AArch64II::MO_NC;
1547 
1548   if ((OpFlags & AArch64II::MO_GOT) != 0) {
1549     BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1550         .addGlobalAddress(GV, 0, OpFlags);
1551     if (Subtarget.isTargetILP32()) {
1552       unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
1553       BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
1554           .addDef(Reg32, RegState::Dead)
1555           .addUse(Reg, RegState::Kill)
1556           .addImm(0)
1557           .addMemOperand(*MI.memoperands_begin())
1558           .addDef(Reg, RegState::Implicit);
1559     } else {
1560       BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1561           .addReg(Reg, RegState::Kill)
1562           .addImm(0)
1563           .addMemOperand(*MI.memoperands_begin());
1564     }
1565   } else if (TM.getCodeModel() == CodeModel::Large) {
1566     assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
1567     BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
1568         .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
1569         .addImm(0);
1570     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1571         .addReg(Reg, RegState::Kill)
1572         .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
1573         .addImm(16);
1574     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1575         .addReg(Reg, RegState::Kill)
1576         .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
1577         .addImm(32);
1578     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1579         .addReg(Reg, RegState::Kill)
1580         .addGlobalAddress(GV, 0, AArch64II::MO_G3)
1581         .addImm(48);
1582     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1583         .addReg(Reg, RegState::Kill)
1584         .addImm(0)
1585         .addMemOperand(*MI.memoperands_begin());
1586   } else if (TM.getCodeModel() == CodeModel::Tiny) {
1587     BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
1588         .addGlobalAddress(GV, 0, OpFlags);
1589   } else {
1590     BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1591         .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1592     unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1593     if (Subtarget.isTargetILP32()) {
1594       unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
1595       BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
1596           .addDef(Reg32, RegState::Dead)
1597           .addUse(Reg, RegState::Kill)
1598           .addGlobalAddress(GV, 0, LoFlags)
1599           .addMemOperand(*MI.memoperands_begin())
1600           .addDef(Reg, RegState::Implicit);
1601     } else {
1602       BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1603           .addReg(Reg, RegState::Kill)
1604           .addGlobalAddress(GV, 0, LoFlags)
1605           .addMemOperand(*MI.memoperands_begin());
1606     }
1607   }
1608 
1609   MBB.erase(MI);
1610 
1611   return true;
1612 }
1613 
1614 // Return true if this instruction simply sets its single destination register
1615 // to zero. This is equivalent to a register rename of the zero-register.
isGPRZero(const MachineInstr & MI)1616 bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
1617   switch (MI.getOpcode()) {
1618   default:
1619     break;
1620   case AArch64::MOVZWi:
1621   case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
1622     if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
1623       assert(MI.getDesc().getNumOperands() == 3 &&
1624              MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
1625       return true;
1626     }
1627     break;
1628   case AArch64::ANDWri: // and Rd, Rzr, #imm
1629     return MI.getOperand(1).getReg() == AArch64::WZR;
1630   case AArch64::ANDXri:
1631     return MI.getOperand(1).getReg() == AArch64::XZR;
1632   case TargetOpcode::COPY:
1633     return MI.getOperand(1).getReg() == AArch64::WZR;
1634   }
1635   return false;
1636 }
1637 
1638 // Return true if this instruction simply renames a general register without
1639 // modifying bits.
isGPRCopy(const MachineInstr & MI)1640 bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) {
1641   switch (MI.getOpcode()) {
1642   default:
1643     break;
1644   case TargetOpcode::COPY: {
1645     // GPR32 copies will by lowered to ORRXrs
1646     Register DstReg = MI.getOperand(0).getReg();
1647     return (AArch64::GPR32RegClass.contains(DstReg) ||
1648             AArch64::GPR64RegClass.contains(DstReg));
1649   }
1650   case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
1651     if (MI.getOperand(1).getReg() == AArch64::XZR) {
1652       assert(MI.getDesc().getNumOperands() == 4 &&
1653              MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
1654       return true;
1655     }
1656     break;
1657   case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
1658     if (MI.getOperand(2).getImm() == 0) {
1659       assert(MI.getDesc().getNumOperands() == 4 &&
1660              MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
1661       return true;
1662     }
1663     break;
1664   }
1665   return false;
1666 }
1667 
1668 // Return true if this instruction simply renames a general register without
1669 // modifying bits.
isFPRCopy(const MachineInstr & MI)1670 bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
1671   switch (MI.getOpcode()) {
1672   default:
1673     break;
1674   case TargetOpcode::COPY: {
1675     // FPR64 copies will by lowered to ORR.16b
1676     Register DstReg = MI.getOperand(0).getReg();
1677     return (AArch64::FPR64RegClass.contains(DstReg) ||
1678             AArch64::FPR128RegClass.contains(DstReg));
1679   }
1680   case AArch64::ORRv16i8:
1681     if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
1682       assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
1683              "invalid ORRv16i8 operands");
1684       return true;
1685     }
1686     break;
1687   }
1688   return false;
1689 }
1690 
isLoadFromStackSlot(const MachineInstr & MI,int & FrameIndex) const1691 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
1692                                                int &FrameIndex) const {
1693   switch (MI.getOpcode()) {
1694   default:
1695     break;
1696   case AArch64::LDRWui:
1697   case AArch64::LDRXui:
1698   case AArch64::LDRBui:
1699   case AArch64::LDRHui:
1700   case AArch64::LDRSui:
1701   case AArch64::LDRDui:
1702   case AArch64::LDRQui:
1703     if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1704         MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1705       FrameIndex = MI.getOperand(1).getIndex();
1706       return MI.getOperand(0).getReg();
1707     }
1708     break;
1709   }
1710 
1711   return 0;
1712 }
1713 
isStoreToStackSlot(const MachineInstr & MI,int & FrameIndex) const1714 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
1715                                               int &FrameIndex) const {
1716   switch (MI.getOpcode()) {
1717   default:
1718     break;
1719   case AArch64::STRWui:
1720   case AArch64::STRXui:
1721   case AArch64::STRBui:
1722   case AArch64::STRHui:
1723   case AArch64::STRSui:
1724   case AArch64::STRDui:
1725   case AArch64::STRQui:
1726   case AArch64::LDR_PXI:
1727   case AArch64::STR_PXI:
1728     if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1729         MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1730       FrameIndex = MI.getOperand(1).getIndex();
1731       return MI.getOperand(0).getReg();
1732     }
1733     break;
1734   }
1735   return 0;
1736 }
1737 
1738 /// Check all MachineMemOperands for a hint to suppress pairing.
isLdStPairSuppressed(const MachineInstr & MI)1739 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
1740   return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1741     return MMO->getFlags() & MOSuppressPair;
1742   });
1743 }
1744 
1745 /// Set a flag on the first MachineMemOperand to suppress pairing.
suppressLdStPair(MachineInstr & MI)1746 void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) {
1747   if (MI.memoperands_empty())
1748     return;
1749   (*MI.memoperands_begin())->setFlags(MOSuppressPair);
1750 }
1751 
1752 /// Check all MachineMemOperands for a hint that the load/store is strided.
isStridedAccess(const MachineInstr & MI)1753 bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {
1754   return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1755     return MMO->getFlags() & MOStridedAccess;
1756   });
1757 }
1758 
isUnscaledLdSt(unsigned Opc)1759 bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) {
1760   switch (Opc) {
1761   default:
1762     return false;
1763   case AArch64::STURSi:
1764   case AArch64::STURDi:
1765   case AArch64::STURQi:
1766   case AArch64::STURBBi:
1767   case AArch64::STURHHi:
1768   case AArch64::STURWi:
1769   case AArch64::STURXi:
1770   case AArch64::LDURSi:
1771   case AArch64::LDURDi:
1772   case AArch64::LDURQi:
1773   case AArch64::LDURWi:
1774   case AArch64::LDURXi:
1775   case AArch64::LDURSWi:
1776   case AArch64::LDURHHi:
1777   case AArch64::LDURBBi:
1778   case AArch64::LDURSBWi:
1779   case AArch64::LDURSHWi:
1780     return true;
1781   }
1782 }
1783 
getUnscaledLdSt(unsigned Opc)1784 Optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
1785   switch (Opc) {
1786   default: return {};
1787   case AArch64::PRFMui: return AArch64::PRFUMi;
1788   case AArch64::LDRXui: return AArch64::LDURXi;
1789   case AArch64::LDRWui: return AArch64::LDURWi;
1790   case AArch64::LDRBui: return AArch64::LDURBi;
1791   case AArch64::LDRHui: return AArch64::LDURHi;
1792   case AArch64::LDRSui: return AArch64::LDURSi;
1793   case AArch64::LDRDui: return AArch64::LDURDi;
1794   case AArch64::LDRQui: return AArch64::LDURQi;
1795   case AArch64::LDRBBui: return AArch64::LDURBBi;
1796   case AArch64::LDRHHui: return AArch64::LDURHHi;
1797   case AArch64::LDRSBXui: return AArch64::LDURSBXi;
1798   case AArch64::LDRSBWui: return AArch64::LDURSBWi;
1799   case AArch64::LDRSHXui: return AArch64::LDURSHXi;
1800   case AArch64::LDRSHWui: return AArch64::LDURSHWi;
1801   case AArch64::LDRSWui: return AArch64::LDURSWi;
1802   case AArch64::STRXui: return AArch64::STURXi;
1803   case AArch64::STRWui: return AArch64::STURWi;
1804   case AArch64::STRBui: return AArch64::STURBi;
1805   case AArch64::STRHui: return AArch64::STURHi;
1806   case AArch64::STRSui: return AArch64::STURSi;
1807   case AArch64::STRDui: return AArch64::STURDi;
1808   case AArch64::STRQui: return AArch64::STURQi;
1809   case AArch64::STRBBui: return AArch64::STURBBi;
1810   case AArch64::STRHHui: return AArch64::STURHHi;
1811   }
1812 }
1813 
getLoadStoreImmIdx(unsigned Opc)1814 unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
1815   switch (Opc) {
1816   default:
1817     return 2;
1818   case AArch64::LDPXi:
1819   case AArch64::LDPDi:
1820   case AArch64::STPXi:
1821   case AArch64::STPDi:
1822   case AArch64::LDNPXi:
1823   case AArch64::LDNPDi:
1824   case AArch64::STNPXi:
1825   case AArch64::STNPDi:
1826   case AArch64::LDPQi:
1827   case AArch64::STPQi:
1828   case AArch64::LDNPQi:
1829   case AArch64::STNPQi:
1830   case AArch64::LDPWi:
1831   case AArch64::LDPSi:
1832   case AArch64::STPWi:
1833   case AArch64::STPSi:
1834   case AArch64::LDNPWi:
1835   case AArch64::LDNPSi:
1836   case AArch64::STNPWi:
1837   case AArch64::STNPSi:
1838   case AArch64::LDG:
1839   case AArch64::STGPi:
1840   case AArch64::LD1B_IMM:
1841   case AArch64::LD1H_IMM:
1842   case AArch64::LD1W_IMM:
1843   case AArch64::LD1D_IMM:
1844   case AArch64::ST1B_IMM:
1845   case AArch64::ST1H_IMM:
1846   case AArch64::ST1W_IMM:
1847   case AArch64::ST1D_IMM:
1848   case AArch64::LD1B_H_IMM:
1849   case AArch64::LD1SB_H_IMM:
1850   case AArch64::LD1H_S_IMM:
1851   case AArch64::LD1SH_S_IMM:
1852   case AArch64::LD1W_D_IMM:
1853   case AArch64::LD1SW_D_IMM:
1854   case AArch64::ST1B_H_IMM:
1855   case AArch64::ST1H_S_IMM:
1856   case AArch64::ST1W_D_IMM:
1857   case AArch64::LD1B_S_IMM:
1858   case AArch64::LD1SB_S_IMM:
1859   case AArch64::LD1H_D_IMM:
1860   case AArch64::LD1SH_D_IMM:
1861   case AArch64::ST1B_S_IMM:
1862   case AArch64::ST1H_D_IMM:
1863   case AArch64::LD1B_D_IMM:
1864   case AArch64::LD1SB_D_IMM:
1865   case AArch64::ST1B_D_IMM:
1866     return 3;
1867   case AArch64::ADDG:
1868   case AArch64::STGOffset:
1869   case AArch64::LDR_PXI:
1870   case AArch64::STR_PXI:
1871     return 2;
1872   }
1873 }
1874 
isPairableLdStInst(const MachineInstr & MI)1875 bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
1876   switch (MI.getOpcode()) {
1877   default:
1878     return false;
1879   // Scaled instructions.
1880   case AArch64::STRSui:
1881   case AArch64::STRDui:
1882   case AArch64::STRQui:
1883   case AArch64::STRXui:
1884   case AArch64::STRWui:
1885   case AArch64::LDRSui:
1886   case AArch64::LDRDui:
1887   case AArch64::LDRQui:
1888   case AArch64::LDRXui:
1889   case AArch64::LDRWui:
1890   case AArch64::LDRSWui:
1891   // Unscaled instructions.
1892   case AArch64::STURSi:
1893   case AArch64::STURDi:
1894   case AArch64::STURQi:
1895   case AArch64::STURWi:
1896   case AArch64::STURXi:
1897   case AArch64::LDURSi:
1898   case AArch64::LDURDi:
1899   case AArch64::LDURQi:
1900   case AArch64::LDURWi:
1901   case AArch64::LDURXi:
1902   case AArch64::LDURSWi:
1903     return true;
1904   }
1905 }
1906 
convertToFlagSettingOpc(unsigned Opc,bool & Is64Bit)1907 unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc,
1908                                                    bool &Is64Bit) {
1909   switch (Opc) {
1910   default:
1911     llvm_unreachable("Opcode has no flag setting equivalent!");
1912   // 32-bit cases:
1913   case AArch64::ADDWri:
1914     Is64Bit = false;
1915     return AArch64::ADDSWri;
1916   case AArch64::ADDWrr:
1917     Is64Bit = false;
1918     return AArch64::ADDSWrr;
1919   case AArch64::ADDWrs:
1920     Is64Bit = false;
1921     return AArch64::ADDSWrs;
1922   case AArch64::ADDWrx:
1923     Is64Bit = false;
1924     return AArch64::ADDSWrx;
1925   case AArch64::ANDWri:
1926     Is64Bit = false;
1927     return AArch64::ANDSWri;
1928   case AArch64::ANDWrr:
1929     Is64Bit = false;
1930     return AArch64::ANDSWrr;
1931   case AArch64::ANDWrs:
1932     Is64Bit = false;
1933     return AArch64::ANDSWrs;
1934   case AArch64::BICWrr:
1935     Is64Bit = false;
1936     return AArch64::BICSWrr;
1937   case AArch64::BICWrs:
1938     Is64Bit = false;
1939     return AArch64::BICSWrs;
1940   case AArch64::SUBWri:
1941     Is64Bit = false;
1942     return AArch64::SUBSWri;
1943   case AArch64::SUBWrr:
1944     Is64Bit = false;
1945     return AArch64::SUBSWrr;
1946   case AArch64::SUBWrs:
1947     Is64Bit = false;
1948     return AArch64::SUBSWrs;
1949   case AArch64::SUBWrx:
1950     Is64Bit = false;
1951     return AArch64::SUBSWrx;
1952   // 64-bit cases:
1953   case AArch64::ADDXri:
1954     Is64Bit = true;
1955     return AArch64::ADDSXri;
1956   case AArch64::ADDXrr:
1957     Is64Bit = true;
1958     return AArch64::ADDSXrr;
1959   case AArch64::ADDXrs:
1960     Is64Bit = true;
1961     return AArch64::ADDSXrs;
1962   case AArch64::ADDXrx:
1963     Is64Bit = true;
1964     return AArch64::ADDSXrx;
1965   case AArch64::ANDXri:
1966     Is64Bit = true;
1967     return AArch64::ANDSXri;
1968   case AArch64::ANDXrr:
1969     Is64Bit = true;
1970     return AArch64::ANDSXrr;
1971   case AArch64::ANDXrs:
1972     Is64Bit = true;
1973     return AArch64::ANDSXrs;
1974   case AArch64::BICXrr:
1975     Is64Bit = true;
1976     return AArch64::BICSXrr;
1977   case AArch64::BICXrs:
1978     Is64Bit = true;
1979     return AArch64::BICSXrs;
1980   case AArch64::SUBXri:
1981     Is64Bit = true;
1982     return AArch64::SUBSXri;
1983   case AArch64::SUBXrr:
1984     Is64Bit = true;
1985     return AArch64::SUBSXrr;
1986   case AArch64::SUBXrs:
1987     Is64Bit = true;
1988     return AArch64::SUBSXrs;
1989   case AArch64::SUBXrx:
1990     Is64Bit = true;
1991     return AArch64::SUBSXrx;
1992   }
1993 }
1994 
1995 // Is this a candidate for ld/st merging or pairing?  For example, we don't
1996 // touch volatiles or load/stores that have a hint to avoid pair formation.
isCandidateToMergeOrPair(const MachineInstr & MI) const1997 bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {
1998   // If this is a volatile load/store, don't mess with it.
1999   if (MI.hasOrderedMemoryRef())
2000     return false;
2001 
2002   // Make sure this is a reg/fi+imm (as opposed to an address reloc).
2003   assert((MI.getOperand(1).isReg() || MI.getOperand(1).isFI()) &&
2004          "Expected a reg or frame index operand.");
2005   if (!MI.getOperand(2).isImm())
2006     return false;
2007 
2008   // Can't merge/pair if the instruction modifies the base register.
2009   // e.g., ldr x0, [x0]
2010   // This case will never occur with an FI base.
2011   if (MI.getOperand(1).isReg()) {
2012     Register BaseReg = MI.getOperand(1).getReg();
2013     const TargetRegisterInfo *TRI = &getRegisterInfo();
2014     if (MI.modifiesRegister(BaseReg, TRI))
2015       return false;
2016   }
2017 
2018   // Check if this load/store has a hint to avoid pair formation.
2019   // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2020   if (isLdStPairSuppressed(MI))
2021     return false;
2022 
2023   // Do not pair any callee-save store/reload instructions in the
2024   // prologue/epilogue if the CFI information encoded the operations as separate
2025   // instructions, as that will cause the size of the actual prologue to mismatch
2026   // with the prologue size recorded in the Windows CFI.
2027   const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
2028   bool NeedsWinCFI = MAI->usesWindowsCFI() &&
2029                      MI.getMF()->getFunction().needsUnwindTableEntry();
2030   if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
2031                       MI.getFlag(MachineInstr::FrameDestroy)))
2032     return false;
2033 
2034   // On some CPUs quad load/store pairs are slower than two single load/stores.
2035   if (Subtarget.isPaired128Slow()) {
2036     switch (MI.getOpcode()) {
2037     default:
2038       break;
2039     case AArch64::LDURQi:
2040     case AArch64::STURQi:
2041     case AArch64::LDRQui:
2042     case AArch64::STRQui:
2043       return false;
2044     }
2045   }
2046 
2047   return true;
2048 }
2049 
getMemOperandsWithOffsetWidth(const MachineInstr & LdSt,SmallVectorImpl<const MachineOperand * > & BaseOps,int64_t & Offset,bool & OffsetIsScalable,unsigned & Width,const TargetRegisterInfo * TRI) const2050 bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
2051     const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
2052     int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
2053     const TargetRegisterInfo *TRI) const {
2054   if (!LdSt.mayLoadOrStore())
2055     return false;
2056 
2057   const MachineOperand *BaseOp;
2058   if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
2059                                     Width, TRI))
2060     return false;
2061   BaseOps.push_back(BaseOp);
2062   return true;
2063 }
2064 
getMemOperandWithOffsetWidth(const MachineInstr & LdSt,const MachineOperand * & BaseOp,int64_t & Offset,bool & OffsetIsScalable,unsigned & Width,const TargetRegisterInfo * TRI) const2065 bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
2066     const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
2067     bool &OffsetIsScalable, unsigned &Width,
2068     const TargetRegisterInfo *TRI) const {
2069   assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
2070   // Handle only loads/stores with base register followed by immediate offset.
2071   if (LdSt.getNumExplicitOperands() == 3) {
2072     // Non-paired instruction (e.g., ldr x1, [x0, #8]).
2073     if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
2074         !LdSt.getOperand(2).isImm())
2075       return false;
2076   } else if (LdSt.getNumExplicitOperands() == 4) {
2077     // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
2078     if (!LdSt.getOperand(1).isReg() ||
2079         (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
2080         !LdSt.getOperand(3).isImm())
2081       return false;
2082   } else
2083     return false;
2084 
2085   // Get the scaling factor for the instruction and set the width for the
2086   // instruction.
2087   TypeSize Scale(0U, false);
2088   int64_t Dummy1, Dummy2;
2089 
2090   // If this returns false, then it's an instruction we don't want to handle.
2091   if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
2092     return false;
2093 
2094   // Compute the offset. Offset is calculated as the immediate operand
2095   // multiplied by the scaling factor. Unscaled instructions have scaling factor
2096   // set to 1.
2097   if (LdSt.getNumExplicitOperands() == 3) {
2098     BaseOp = &LdSt.getOperand(1);
2099     Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinSize();
2100   } else {
2101     assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
2102     BaseOp = &LdSt.getOperand(2);
2103     Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinSize();
2104   }
2105   OffsetIsScalable = Scale.isScalable();
2106 
2107   if (!BaseOp->isReg() && !BaseOp->isFI())
2108     return false;
2109 
2110   return true;
2111 }
2112 
2113 MachineOperand &
getMemOpBaseRegImmOfsOffsetOperand(MachineInstr & LdSt) const2114 AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
2115   assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
2116   MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
2117   assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
2118   return OfsOp;
2119 }
2120 
getMemOpInfo(unsigned Opcode,TypeSize & Scale,unsigned & Width,int64_t & MinOffset,int64_t & MaxOffset)2121 bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
2122                                     unsigned &Width, int64_t &MinOffset,
2123                                     int64_t &MaxOffset) {
2124   const unsigned SVEMaxBytesPerVector = AArch64::SVEMaxBitsPerVector / 8;
2125   switch (Opcode) {
2126   // Not a memory operation or something we want to handle.
2127   default:
2128     Scale = TypeSize::Fixed(0);
2129     Width = 0;
2130     MinOffset = MaxOffset = 0;
2131     return false;
2132   case AArch64::STRWpost:
2133   case AArch64::LDRWpost:
2134     Width = 32;
2135     Scale = TypeSize::Fixed(4);
2136     MinOffset = -256;
2137     MaxOffset = 255;
2138     break;
2139   case AArch64::LDURQi:
2140   case AArch64::STURQi:
2141     Width = 16;
2142     Scale = TypeSize::Fixed(1);
2143     MinOffset = -256;
2144     MaxOffset = 255;
2145     break;
2146   case AArch64::PRFUMi:
2147   case AArch64::LDURXi:
2148   case AArch64::LDURDi:
2149   case AArch64::STURXi:
2150   case AArch64::STURDi:
2151     Width = 8;
2152     Scale = TypeSize::Fixed(1);
2153     MinOffset = -256;
2154     MaxOffset = 255;
2155     break;
2156   case AArch64::LDURWi:
2157   case AArch64::LDURSi:
2158   case AArch64::LDURSWi:
2159   case AArch64::STURWi:
2160   case AArch64::STURSi:
2161     Width = 4;
2162     Scale = TypeSize::Fixed(1);
2163     MinOffset = -256;
2164     MaxOffset = 255;
2165     break;
2166   case AArch64::LDURHi:
2167   case AArch64::LDURHHi:
2168   case AArch64::LDURSHXi:
2169   case AArch64::LDURSHWi:
2170   case AArch64::STURHi:
2171   case AArch64::STURHHi:
2172     Width = 2;
2173     Scale = TypeSize::Fixed(1);
2174     MinOffset = -256;
2175     MaxOffset = 255;
2176     break;
2177   case AArch64::LDURBi:
2178   case AArch64::LDURBBi:
2179   case AArch64::LDURSBXi:
2180   case AArch64::LDURSBWi:
2181   case AArch64::STURBi:
2182   case AArch64::STURBBi:
2183     Width = 1;
2184     Scale = TypeSize::Fixed(1);
2185     MinOffset = -256;
2186     MaxOffset = 255;
2187     break;
2188   case AArch64::LDPQi:
2189   case AArch64::LDNPQi:
2190   case AArch64::STPQi:
2191   case AArch64::STNPQi:
2192     Scale = TypeSize::Fixed(16);
2193     Width = 32;
2194     MinOffset = -64;
2195     MaxOffset = 63;
2196     break;
2197   case AArch64::LDRQui:
2198   case AArch64::STRQui:
2199     Scale = TypeSize::Fixed(16);
2200     Width = 16;
2201     MinOffset = 0;
2202     MaxOffset = 4095;
2203     break;
2204   case AArch64::LDPXi:
2205   case AArch64::LDPDi:
2206   case AArch64::LDNPXi:
2207   case AArch64::LDNPDi:
2208   case AArch64::STPXi:
2209   case AArch64::STPDi:
2210   case AArch64::STNPXi:
2211   case AArch64::STNPDi:
2212     Scale = TypeSize::Fixed(8);
2213     Width = 16;
2214     MinOffset = -64;
2215     MaxOffset = 63;
2216     break;
2217   case AArch64::PRFMui:
2218   case AArch64::LDRXui:
2219   case AArch64::LDRDui:
2220   case AArch64::STRXui:
2221   case AArch64::STRDui:
2222     Scale = TypeSize::Fixed(8);
2223     Width = 8;
2224     MinOffset = 0;
2225     MaxOffset = 4095;
2226     break;
2227   case AArch64::LDPWi:
2228   case AArch64::LDPSi:
2229   case AArch64::LDNPWi:
2230   case AArch64::LDNPSi:
2231   case AArch64::STPWi:
2232   case AArch64::STPSi:
2233   case AArch64::STNPWi:
2234   case AArch64::STNPSi:
2235     Scale = TypeSize::Fixed(4);
2236     Width = 8;
2237     MinOffset = -64;
2238     MaxOffset = 63;
2239     break;
2240   case AArch64::LDRWui:
2241   case AArch64::LDRSui:
2242   case AArch64::LDRSWui:
2243   case AArch64::STRWui:
2244   case AArch64::STRSui:
2245     Scale = TypeSize::Fixed(4);
2246     Width = 4;
2247     MinOffset = 0;
2248     MaxOffset = 4095;
2249     break;
2250   case AArch64::LDRHui:
2251   case AArch64::LDRHHui:
2252   case AArch64::LDRSHWui:
2253   case AArch64::LDRSHXui:
2254   case AArch64::STRHui:
2255   case AArch64::STRHHui:
2256     Scale = TypeSize::Fixed(2);
2257     Width = 2;
2258     MinOffset = 0;
2259     MaxOffset = 4095;
2260     break;
2261   case AArch64::LDRBui:
2262   case AArch64::LDRBBui:
2263   case AArch64::LDRSBWui:
2264   case AArch64::LDRSBXui:
2265   case AArch64::STRBui:
2266   case AArch64::STRBBui:
2267     Scale = TypeSize::Fixed(1);
2268     Width = 1;
2269     MinOffset = 0;
2270     MaxOffset = 4095;
2271     break;
2272   case AArch64::ADDG:
2273     Scale = TypeSize::Fixed(16);
2274     Width = 0;
2275     MinOffset = 0;
2276     MaxOffset = 63;
2277     break;
2278   case AArch64::TAGPstack:
2279     Scale = TypeSize::Fixed(16);
2280     Width = 0;
2281     // TAGP with a negative offset turns into SUBP, which has a maximum offset
2282     // of 63 (not 64!).
2283     MinOffset = -63;
2284     MaxOffset = 63;
2285     break;
2286   case AArch64::LDG:
2287   case AArch64::STGOffset:
2288   case AArch64::STZGOffset:
2289     Scale = TypeSize::Fixed(16);
2290     Width = 16;
2291     MinOffset = -256;
2292     MaxOffset = 255;
2293     break;
2294   case AArch64::STR_ZZZZXI:
2295   case AArch64::LDR_ZZZZXI:
2296     Scale = TypeSize::Scalable(16);
2297     Width = SVEMaxBytesPerVector * 4;
2298     MinOffset = -256;
2299     MaxOffset = 252;
2300     break;
2301   case AArch64::STR_ZZZXI:
2302   case AArch64::LDR_ZZZXI:
2303     Scale = TypeSize::Scalable(16);
2304     Width = SVEMaxBytesPerVector * 3;
2305     MinOffset = -256;
2306     MaxOffset = 253;
2307     break;
2308   case AArch64::STR_ZZXI:
2309   case AArch64::LDR_ZZXI:
2310     Scale = TypeSize::Scalable(16);
2311     Width = SVEMaxBytesPerVector * 2;
2312     MinOffset = -256;
2313     MaxOffset = 254;
2314     break;
2315   case AArch64::LDR_PXI:
2316   case AArch64::STR_PXI:
2317     Scale = TypeSize::Scalable(2);
2318     Width = SVEMaxBytesPerVector / 8;
2319     MinOffset = -256;
2320     MaxOffset = 255;
2321     break;
2322   case AArch64::LDR_ZXI:
2323   case AArch64::STR_ZXI:
2324     Scale = TypeSize::Scalable(16);
2325     Width = SVEMaxBytesPerVector;
2326     MinOffset = -256;
2327     MaxOffset = 255;
2328     break;
2329   case AArch64::LD1B_IMM:
2330   case AArch64::LD1H_IMM:
2331   case AArch64::LD1W_IMM:
2332   case AArch64::LD1D_IMM:
2333   case AArch64::ST1B_IMM:
2334   case AArch64::ST1H_IMM:
2335   case AArch64::ST1W_IMM:
2336   case AArch64::ST1D_IMM:
2337     // A full vectors worth of data
2338     // Width = mbytes * elements
2339     Scale = TypeSize::Scalable(16);
2340     Width = SVEMaxBytesPerVector;
2341     MinOffset = -8;
2342     MaxOffset = 7;
2343     break;
2344   case AArch64::LD1B_H_IMM:
2345   case AArch64::LD1SB_H_IMM:
2346   case AArch64::LD1H_S_IMM:
2347   case AArch64::LD1SH_S_IMM:
2348   case AArch64::LD1W_D_IMM:
2349   case AArch64::LD1SW_D_IMM:
2350   case AArch64::ST1B_H_IMM:
2351   case AArch64::ST1H_S_IMM:
2352   case AArch64::ST1W_D_IMM:
2353     // A half vector worth of data
2354     // Width = mbytes * elements
2355     Scale = TypeSize::Scalable(8);
2356     Width = SVEMaxBytesPerVector / 2;
2357     MinOffset = -8;
2358     MaxOffset = 7;
2359     break;
2360   case AArch64::LD1B_S_IMM:
2361   case AArch64::LD1SB_S_IMM:
2362   case AArch64::LD1H_D_IMM:
2363   case AArch64::LD1SH_D_IMM:
2364   case AArch64::ST1B_S_IMM:
2365   case AArch64::ST1H_D_IMM:
2366     // A quarter vector worth of data
2367     // Width = mbytes * elements
2368     Scale = TypeSize::Scalable(4);
2369     Width = SVEMaxBytesPerVector / 4;
2370     MinOffset = -8;
2371     MaxOffset = 7;
2372     break;
2373   case AArch64::LD1B_D_IMM:
2374   case AArch64::LD1SB_D_IMM:
2375   case AArch64::ST1B_D_IMM:
2376     // A eighth vector worth of data
2377     // Width = mbytes * elements
2378     Scale = TypeSize::Scalable(2);
2379     Width = SVEMaxBytesPerVector / 8;
2380     MinOffset = -8;
2381     MaxOffset = 7;
2382     break;
2383   case AArch64::ST2GOffset:
2384   case AArch64::STZ2GOffset:
2385     Scale = TypeSize::Fixed(16);
2386     Width = 32;
2387     MinOffset = -256;
2388     MaxOffset = 255;
2389     break;
2390   case AArch64::STGPi:
2391     Scale = TypeSize::Fixed(16);
2392     Width = 16;
2393     MinOffset = -64;
2394     MaxOffset = 63;
2395     break;
2396   }
2397 
2398   return true;
2399 }
2400 
2401 // Scaling factor for unscaled load or store.
getMemScale(unsigned Opc)2402 int AArch64InstrInfo::getMemScale(unsigned Opc) {
2403   switch (Opc) {
2404   default:
2405     llvm_unreachable("Opcode has unknown scale!");
2406   case AArch64::LDRBBui:
2407   case AArch64::LDURBBi:
2408   case AArch64::LDRSBWui:
2409   case AArch64::LDURSBWi:
2410   case AArch64::STRBBui:
2411   case AArch64::STURBBi:
2412     return 1;
2413   case AArch64::LDRHHui:
2414   case AArch64::LDURHHi:
2415   case AArch64::LDRSHWui:
2416   case AArch64::LDURSHWi:
2417   case AArch64::STRHHui:
2418   case AArch64::STURHHi:
2419     return 2;
2420   case AArch64::LDRSui:
2421   case AArch64::LDURSi:
2422   case AArch64::LDRSWui:
2423   case AArch64::LDURSWi:
2424   case AArch64::LDRWui:
2425   case AArch64::LDURWi:
2426   case AArch64::STRSui:
2427   case AArch64::STURSi:
2428   case AArch64::STRWui:
2429   case AArch64::STURWi:
2430   case AArch64::LDPSi:
2431   case AArch64::LDPSWi:
2432   case AArch64::LDPWi:
2433   case AArch64::STPSi:
2434   case AArch64::STPWi:
2435     return 4;
2436   case AArch64::LDRDui:
2437   case AArch64::LDURDi:
2438   case AArch64::LDRXui:
2439   case AArch64::LDURXi:
2440   case AArch64::STRDui:
2441   case AArch64::STURDi:
2442   case AArch64::STRXui:
2443   case AArch64::STURXi:
2444   case AArch64::LDPDi:
2445   case AArch64::LDPXi:
2446   case AArch64::STPDi:
2447   case AArch64::STPXi:
2448     return 8;
2449   case AArch64::LDRQui:
2450   case AArch64::LDURQi:
2451   case AArch64::STRQui:
2452   case AArch64::STURQi:
2453   case AArch64::LDPQi:
2454   case AArch64::STPQi:
2455   case AArch64::STGOffset:
2456   case AArch64::STZGOffset:
2457   case AArch64::ST2GOffset:
2458   case AArch64::STZ2GOffset:
2459   case AArch64::STGPi:
2460     return 16;
2461   }
2462 }
2463 
2464 // Scale the unscaled offsets.  Returns false if the unscaled offset can't be
2465 // scaled.
scaleOffset(unsigned Opc,int64_t & Offset)2466 static bool scaleOffset(unsigned Opc, int64_t &Offset) {
2467   int Scale = AArch64InstrInfo::getMemScale(Opc);
2468 
2469   // If the byte-offset isn't a multiple of the stride, we can't scale this
2470   // offset.
2471   if (Offset % Scale != 0)
2472     return false;
2473 
2474   // Convert the byte-offset used by unscaled into an "element" offset used
2475   // by the scaled pair load/store instructions.
2476   Offset /= Scale;
2477   return true;
2478 }
2479 
canPairLdStOpc(unsigned FirstOpc,unsigned SecondOpc)2480 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
2481   if (FirstOpc == SecondOpc)
2482     return true;
2483   // We can also pair sign-ext and zero-ext instructions.
2484   switch (FirstOpc) {
2485   default:
2486     return false;
2487   case AArch64::LDRWui:
2488   case AArch64::LDURWi:
2489     return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
2490   case AArch64::LDRSWui:
2491   case AArch64::LDURSWi:
2492     return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
2493   }
2494   // These instructions can't be paired based on their opcodes.
2495   return false;
2496 }
2497 
shouldClusterFI(const MachineFrameInfo & MFI,int FI1,int64_t Offset1,unsigned Opcode1,int FI2,int64_t Offset2,unsigned Opcode2)2498 static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
2499                             int64_t Offset1, unsigned Opcode1, int FI2,
2500                             int64_t Offset2, unsigned Opcode2) {
2501   // Accesses through fixed stack object frame indices may access a different
2502   // fixed stack slot. Check that the object offsets + offsets match.
2503   if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
2504     int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
2505     int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
2506     assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
2507     // Convert to scaled object offsets.
2508     int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
2509     if (ObjectOffset1 % Scale1 != 0)
2510       return false;
2511     ObjectOffset1 /= Scale1;
2512     int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
2513     if (ObjectOffset2 % Scale2 != 0)
2514       return false;
2515     ObjectOffset2 /= Scale2;
2516     ObjectOffset1 += Offset1;
2517     ObjectOffset2 += Offset2;
2518     return ObjectOffset1 + 1 == ObjectOffset2;
2519   }
2520 
2521   return FI1 == FI2;
2522 }
2523 
2524 /// Detect opportunities for ldp/stp formation.
2525 ///
2526 /// Only called for LdSt for which getMemOperandWithOffset returns true.
shouldClusterMemOps(ArrayRef<const MachineOperand * > BaseOps1,ArrayRef<const MachineOperand * > BaseOps2,unsigned NumLoads,unsigned NumBytes) const2527 bool AArch64InstrInfo::shouldClusterMemOps(
2528     ArrayRef<const MachineOperand *> BaseOps1,
2529     ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads,
2530     unsigned NumBytes) const {
2531   assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
2532   const MachineOperand &BaseOp1 = *BaseOps1.front();
2533   const MachineOperand &BaseOp2 = *BaseOps2.front();
2534   const MachineInstr &FirstLdSt = *BaseOp1.getParent();
2535   const MachineInstr &SecondLdSt = *BaseOp2.getParent();
2536   if (BaseOp1.getType() != BaseOp2.getType())
2537     return false;
2538 
2539   assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
2540          "Only base registers and frame indices are supported.");
2541 
2542   // Check for both base regs and base FI.
2543   if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
2544     return false;
2545 
2546   // Only cluster up to a single pair.
2547   if (NumLoads > 2)
2548     return false;
2549 
2550   if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
2551     return false;
2552 
2553   // Can we pair these instructions based on their opcodes?
2554   unsigned FirstOpc = FirstLdSt.getOpcode();
2555   unsigned SecondOpc = SecondLdSt.getOpcode();
2556   if (!canPairLdStOpc(FirstOpc, SecondOpc))
2557     return false;
2558 
2559   // Can't merge volatiles or load/stores that have a hint to avoid pair
2560   // formation, for example.
2561   if (!isCandidateToMergeOrPair(FirstLdSt) ||
2562       !isCandidateToMergeOrPair(SecondLdSt))
2563     return false;
2564 
2565   // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
2566   int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
2567   if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
2568     return false;
2569 
2570   int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
2571   if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
2572     return false;
2573 
2574   // Pairwise instructions have a 7-bit signed offset field.
2575   if (Offset1 > 63 || Offset1 < -64)
2576     return false;
2577 
2578   // The caller should already have ordered First/SecondLdSt by offset.
2579   // Note: except for non-equal frame index bases
2580   if (BaseOp1.isFI()) {
2581     assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
2582            "Caller should have ordered offsets.");
2583 
2584     const MachineFrameInfo &MFI =
2585         FirstLdSt.getParent()->getParent()->getFrameInfo();
2586     return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
2587                            BaseOp2.getIndex(), Offset2, SecondOpc);
2588   }
2589 
2590   assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
2591 
2592   return Offset1 + 1 == Offset2;
2593 }
2594 
AddSubReg(const MachineInstrBuilder & MIB,unsigned Reg,unsigned SubIdx,unsigned State,const TargetRegisterInfo * TRI)2595 static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
2596                                             unsigned Reg, unsigned SubIdx,
2597                                             unsigned State,
2598                                             const TargetRegisterInfo *TRI) {
2599   if (!SubIdx)
2600     return MIB.addReg(Reg, State);
2601 
2602   if (Register::isPhysicalRegister(Reg))
2603     return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
2604   return MIB.addReg(Reg, State, SubIdx);
2605 }
2606 
forwardCopyWillClobberTuple(unsigned DestReg,unsigned SrcReg,unsigned NumRegs)2607 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
2608                                         unsigned NumRegs) {
2609   // We really want the positive remainder mod 32 here, that happens to be
2610   // easily obtainable with a mask.
2611   return ((DestReg - SrcReg) & 0x1f) < NumRegs;
2612 }
2613 
copyPhysRegTuple(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,MCRegister DestReg,MCRegister SrcReg,bool KillSrc,unsigned Opcode,ArrayRef<unsigned> Indices) const2614 void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
2615                                         MachineBasicBlock::iterator I,
2616                                         const DebugLoc &DL, MCRegister DestReg,
2617                                         MCRegister SrcReg, bool KillSrc,
2618                                         unsigned Opcode,
2619                                         ArrayRef<unsigned> Indices) const {
2620   assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
2621   const TargetRegisterInfo *TRI = &getRegisterInfo();
2622   uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2623   uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2624   unsigned NumRegs = Indices.size();
2625 
2626   int SubReg = 0, End = NumRegs, Incr = 1;
2627   if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
2628     SubReg = NumRegs - 1;
2629     End = -1;
2630     Incr = -1;
2631   }
2632 
2633   for (; SubReg != End; SubReg += Incr) {
2634     const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
2635     AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2636     AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
2637     AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2638   }
2639 }
2640 
copyGPRRegTuple(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,DebugLoc DL,unsigned DestReg,unsigned SrcReg,bool KillSrc,unsigned Opcode,unsigned ZeroReg,llvm::ArrayRef<unsigned> Indices) const2641 void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB,
2642                                        MachineBasicBlock::iterator I,
2643                                        DebugLoc DL, unsigned DestReg,
2644                                        unsigned SrcReg, bool KillSrc,
2645                                        unsigned Opcode, unsigned ZeroReg,
2646                                        llvm::ArrayRef<unsigned> Indices) const {
2647   const TargetRegisterInfo *TRI = &getRegisterInfo();
2648   unsigned NumRegs = Indices.size();
2649 
2650 #ifndef NDEBUG
2651   uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2652   uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2653   assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
2654          "GPR reg sequences should not be able to overlap");
2655 #endif
2656 
2657   for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
2658     const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
2659     AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2660     MIB.addReg(ZeroReg);
2661     AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2662     MIB.addImm(0);
2663   }
2664 }
2665 
copyPhysReg(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,MCRegister DestReg,MCRegister SrcReg,bool KillSrc) const2666 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
2667                                    MachineBasicBlock::iterator I,
2668                                    const DebugLoc &DL, MCRegister DestReg,
2669                                    MCRegister SrcReg, bool KillSrc) const {
2670   if (AArch64::GPR32spRegClass.contains(DestReg) &&
2671       (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
2672     const TargetRegisterInfo *TRI = &getRegisterInfo();
2673 
2674     if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
2675       // If either operand is WSP, expand to ADD #0.
2676       if (Subtarget.hasZeroCycleRegMove()) {
2677         // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
2678         MCRegister DestRegX = TRI->getMatchingSuperReg(
2679             DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
2680         MCRegister SrcRegX = TRI->getMatchingSuperReg(
2681             SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
2682         // This instruction is reading and writing X registers.  This may upset
2683         // the register scavenger and machine verifier, so we need to indicate
2684         // that we are reading an undefined value from SrcRegX, but a proper
2685         // value from SrcReg.
2686         BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
2687             .addReg(SrcRegX, RegState::Undef)
2688             .addImm(0)
2689             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
2690             .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2691       } else {
2692         BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
2693             .addReg(SrcReg, getKillRegState(KillSrc))
2694             .addImm(0)
2695             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2696       }
2697     } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
2698       BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
2699           .addImm(0)
2700           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2701     } else {
2702       if (Subtarget.hasZeroCycleRegMove()) {
2703         // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
2704         MCRegister DestRegX = TRI->getMatchingSuperReg(
2705             DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
2706         MCRegister SrcRegX = TRI->getMatchingSuperReg(
2707             SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
2708         // This instruction is reading and writing X registers.  This may upset
2709         // the register scavenger and machine verifier, so we need to indicate
2710         // that we are reading an undefined value from SrcRegX, but a proper
2711         // value from SrcReg.
2712         BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
2713             .addReg(AArch64::XZR)
2714             .addReg(SrcRegX, RegState::Undef)
2715             .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2716       } else {
2717         // Otherwise, expand to ORR WZR.
2718         BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
2719             .addReg(AArch64::WZR)
2720             .addReg(SrcReg, getKillRegState(KillSrc));
2721       }
2722     }
2723     return;
2724   }
2725 
2726   // Copy a Predicate register by ORRing with itself.
2727   if (AArch64::PPRRegClass.contains(DestReg) &&
2728       AArch64::PPRRegClass.contains(SrcReg)) {
2729     assert(Subtarget.hasSVE() && "Unexpected SVE register.");
2730     BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
2731       .addReg(SrcReg) // Pg
2732       .addReg(SrcReg)
2733       .addReg(SrcReg, getKillRegState(KillSrc));
2734     return;
2735   }
2736 
2737   // Copy a Z register by ORRing with itself.
2738   if (AArch64::ZPRRegClass.contains(DestReg) &&
2739       AArch64::ZPRRegClass.contains(SrcReg)) {
2740     assert(Subtarget.hasSVE() && "Unexpected SVE register.");
2741     BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
2742       .addReg(SrcReg)
2743       .addReg(SrcReg, getKillRegState(KillSrc));
2744     return;
2745   }
2746 
2747   if (AArch64::GPR64spRegClass.contains(DestReg) &&
2748       (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
2749     if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
2750       // If either operand is SP, expand to ADD #0.
2751       BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
2752           .addReg(SrcReg, getKillRegState(KillSrc))
2753           .addImm(0)
2754           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2755     } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
2756       BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
2757           .addImm(0)
2758           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2759     } else {
2760       // Otherwise, expand to ORR XZR.
2761       BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
2762           .addReg(AArch64::XZR)
2763           .addReg(SrcReg, getKillRegState(KillSrc));
2764     }
2765     return;
2766   }
2767 
2768   // Copy a DDDD register quad by copying the individual sub-registers.
2769   if (AArch64::DDDDRegClass.contains(DestReg) &&
2770       AArch64::DDDDRegClass.contains(SrcReg)) {
2771     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2772                                        AArch64::dsub2, AArch64::dsub3};
2773     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2774                      Indices);
2775     return;
2776   }
2777 
2778   // Copy a DDD register triple by copying the individual sub-registers.
2779   if (AArch64::DDDRegClass.contains(DestReg) &&
2780       AArch64::DDDRegClass.contains(SrcReg)) {
2781     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2782                                        AArch64::dsub2};
2783     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2784                      Indices);
2785     return;
2786   }
2787 
2788   // Copy a DD register pair by copying the individual sub-registers.
2789   if (AArch64::DDRegClass.contains(DestReg) &&
2790       AArch64::DDRegClass.contains(SrcReg)) {
2791     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
2792     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2793                      Indices);
2794     return;
2795   }
2796 
2797   // Copy a QQQQ register quad by copying the individual sub-registers.
2798   if (AArch64::QQQQRegClass.contains(DestReg) &&
2799       AArch64::QQQQRegClass.contains(SrcReg)) {
2800     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2801                                        AArch64::qsub2, AArch64::qsub3};
2802     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2803                      Indices);
2804     return;
2805   }
2806 
2807   // Copy a QQQ register triple by copying the individual sub-registers.
2808   if (AArch64::QQQRegClass.contains(DestReg) &&
2809       AArch64::QQQRegClass.contains(SrcReg)) {
2810     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2811                                        AArch64::qsub2};
2812     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2813                      Indices);
2814     return;
2815   }
2816 
2817   // Copy a QQ register pair by copying the individual sub-registers.
2818   if (AArch64::QQRegClass.contains(DestReg) &&
2819       AArch64::QQRegClass.contains(SrcReg)) {
2820     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
2821     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2822                      Indices);
2823     return;
2824   }
2825 
2826   if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
2827       AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
2828     static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
2829     copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
2830                     AArch64::XZR, Indices);
2831     return;
2832   }
2833 
2834   if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
2835       AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
2836     static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
2837     copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
2838                     AArch64::WZR, Indices);
2839     return;
2840   }
2841 
2842   if (AArch64::FPR128RegClass.contains(DestReg) &&
2843       AArch64::FPR128RegClass.contains(SrcReg)) {
2844     if (Subtarget.hasNEON()) {
2845       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2846           .addReg(SrcReg)
2847           .addReg(SrcReg, getKillRegState(KillSrc));
2848     } else {
2849       BuildMI(MBB, I, DL, get(AArch64::STRQpre))
2850           .addReg(AArch64::SP, RegState::Define)
2851           .addReg(SrcReg, getKillRegState(KillSrc))
2852           .addReg(AArch64::SP)
2853           .addImm(-16);
2854       BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
2855           .addReg(AArch64::SP, RegState::Define)
2856           .addReg(DestReg, RegState::Define)
2857           .addReg(AArch64::SP)
2858           .addImm(16);
2859     }
2860     return;
2861   }
2862 
2863   if (AArch64::FPR64RegClass.contains(DestReg) &&
2864       AArch64::FPR64RegClass.contains(SrcReg)) {
2865     if (Subtarget.hasNEON()) {
2866       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
2867                                        &AArch64::FPR128RegClass);
2868       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
2869                                       &AArch64::FPR128RegClass);
2870       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2871           .addReg(SrcReg)
2872           .addReg(SrcReg, getKillRegState(KillSrc));
2873     } else {
2874       BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
2875           .addReg(SrcReg, getKillRegState(KillSrc));
2876     }
2877     return;
2878   }
2879 
2880   if (AArch64::FPR32RegClass.contains(DestReg) &&
2881       AArch64::FPR32RegClass.contains(SrcReg)) {
2882     if (Subtarget.hasNEON()) {
2883       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
2884                                        &AArch64::FPR128RegClass);
2885       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
2886                                       &AArch64::FPR128RegClass);
2887       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2888           .addReg(SrcReg)
2889           .addReg(SrcReg, getKillRegState(KillSrc));
2890     } else {
2891       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2892           .addReg(SrcReg, getKillRegState(KillSrc));
2893     }
2894     return;
2895   }
2896 
2897   if (AArch64::FPR16RegClass.contains(DestReg) &&
2898       AArch64::FPR16RegClass.contains(SrcReg)) {
2899     if (Subtarget.hasNEON()) {
2900       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2901                                        &AArch64::FPR128RegClass);
2902       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2903                                       &AArch64::FPR128RegClass);
2904       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2905           .addReg(SrcReg)
2906           .addReg(SrcReg, getKillRegState(KillSrc));
2907     } else {
2908       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2909                                        &AArch64::FPR32RegClass);
2910       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2911                                       &AArch64::FPR32RegClass);
2912       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2913           .addReg(SrcReg, getKillRegState(KillSrc));
2914     }
2915     return;
2916   }
2917 
2918   if (AArch64::FPR8RegClass.contains(DestReg) &&
2919       AArch64::FPR8RegClass.contains(SrcReg)) {
2920     if (Subtarget.hasNEON()) {
2921       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2922                                        &AArch64::FPR128RegClass);
2923       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2924                                       &AArch64::FPR128RegClass);
2925       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2926           .addReg(SrcReg)
2927           .addReg(SrcReg, getKillRegState(KillSrc));
2928     } else {
2929       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2930                                        &AArch64::FPR32RegClass);
2931       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2932                                       &AArch64::FPR32RegClass);
2933       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2934           .addReg(SrcReg, getKillRegState(KillSrc));
2935     }
2936     return;
2937   }
2938 
2939   // Copies between GPR64 and FPR64.
2940   if (AArch64::FPR64RegClass.contains(DestReg) &&
2941       AArch64::GPR64RegClass.contains(SrcReg)) {
2942     BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
2943         .addReg(SrcReg, getKillRegState(KillSrc));
2944     return;
2945   }
2946   if (AArch64::GPR64RegClass.contains(DestReg) &&
2947       AArch64::FPR64RegClass.contains(SrcReg)) {
2948     BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
2949         .addReg(SrcReg, getKillRegState(KillSrc));
2950     return;
2951   }
2952   // Copies between GPR32 and FPR32.
2953   if (AArch64::FPR32RegClass.contains(DestReg) &&
2954       AArch64::GPR32RegClass.contains(SrcReg)) {
2955     BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
2956         .addReg(SrcReg, getKillRegState(KillSrc));
2957     return;
2958   }
2959   if (AArch64::GPR32RegClass.contains(DestReg) &&
2960       AArch64::FPR32RegClass.contains(SrcReg)) {
2961     BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
2962         .addReg(SrcReg, getKillRegState(KillSrc));
2963     return;
2964   }
2965 
2966   if (DestReg == AArch64::NZCV) {
2967     assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
2968     BuildMI(MBB, I, DL, get(AArch64::MSR))
2969         .addImm(AArch64SysReg::NZCV)
2970         .addReg(SrcReg, getKillRegState(KillSrc))
2971         .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
2972     return;
2973   }
2974 
2975   if (SrcReg == AArch64::NZCV) {
2976     assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
2977     BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
2978         .addImm(AArch64SysReg::NZCV)
2979         .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
2980     return;
2981   }
2982 
2983   llvm_unreachable("unimplemented reg-to-reg copy");
2984 }
2985 
storeRegPairToStackSlot(const TargetRegisterInfo & TRI,MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const MCInstrDesc & MCID,Register SrcReg,bool IsKill,unsigned SubIdx0,unsigned SubIdx1,int FI,MachineMemOperand * MMO)2986 static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI,
2987                                     MachineBasicBlock &MBB,
2988                                     MachineBasicBlock::iterator InsertBefore,
2989                                     const MCInstrDesc &MCID,
2990                                     Register SrcReg, bool IsKill,
2991                                     unsigned SubIdx0, unsigned SubIdx1, int FI,
2992                                     MachineMemOperand *MMO) {
2993   Register SrcReg0 = SrcReg;
2994   Register SrcReg1 = SrcReg;
2995   if (Register::isPhysicalRegister(SrcReg)) {
2996     SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
2997     SubIdx0 = 0;
2998     SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
2999     SubIdx1 = 0;
3000   }
3001   BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
3002       .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
3003       .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
3004       .addFrameIndex(FI)
3005       .addImm(0)
3006       .addMemOperand(MMO);
3007 }
3008 
storeRegToStackSlot(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,Register SrcReg,bool isKill,int FI,const TargetRegisterClass * RC,const TargetRegisterInfo * TRI) const3009 void AArch64InstrInfo::storeRegToStackSlot(
3010     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
3011     bool isKill, int FI, const TargetRegisterClass *RC,
3012     const TargetRegisterInfo *TRI) const {
3013   MachineFunction &MF = *MBB.getParent();
3014   MachineFrameInfo &MFI = MF.getFrameInfo();
3015 
3016   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
3017   MachineMemOperand *MMO =
3018       MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
3019                               MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
3020   unsigned Opc = 0;
3021   bool Offset = true;
3022   unsigned StackID = TargetStackID::Default;
3023   switch (TRI->getSpillSize(*RC)) {
3024   case 1:
3025     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
3026       Opc = AArch64::STRBui;
3027     break;
3028   case 2:
3029     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
3030       Opc = AArch64::STRHui;
3031     else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
3032       assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
3033       Opc = AArch64::STR_PXI;
3034       StackID = TargetStackID::SVEVector;
3035     }
3036     break;
3037   case 4:
3038     if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
3039       Opc = AArch64::STRWui;
3040       if (Register::isVirtualRegister(SrcReg))
3041         MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
3042       else
3043         assert(SrcReg != AArch64::WSP);
3044     } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
3045       Opc = AArch64::STRSui;
3046     break;
3047   case 8:
3048     if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
3049       Opc = AArch64::STRXui;
3050       if (Register::isVirtualRegister(SrcReg))
3051         MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
3052       else
3053         assert(SrcReg != AArch64::SP);
3054     } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
3055       Opc = AArch64::STRDui;
3056     } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
3057       storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
3058                               get(AArch64::STPWi), SrcReg, isKill,
3059                               AArch64::sube32, AArch64::subo32, FI, MMO);
3060       return;
3061     }
3062     break;
3063   case 16:
3064     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
3065       Opc = AArch64::STRQui;
3066     else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
3067       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
3068       Opc = AArch64::ST1Twov1d;
3069       Offset = false;
3070     } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
3071       storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
3072                               get(AArch64::STPXi), SrcReg, isKill,
3073                               AArch64::sube64, AArch64::subo64, FI, MMO);
3074       return;
3075     } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
3076       assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
3077       Opc = AArch64::STR_ZXI;
3078       StackID = TargetStackID::SVEVector;
3079     }
3080     break;
3081   case 24:
3082     if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
3083       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
3084       Opc = AArch64::ST1Threev1d;
3085       Offset = false;
3086     }
3087     break;
3088   case 32:
3089     if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
3090       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
3091       Opc = AArch64::ST1Fourv1d;
3092       Offset = false;
3093     } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
3094       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
3095       Opc = AArch64::ST1Twov2d;
3096       Offset = false;
3097     } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
3098       assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
3099       Opc = AArch64::STR_ZZXI;
3100       StackID = TargetStackID::SVEVector;
3101     }
3102     break;
3103   case 48:
3104     if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
3105       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
3106       Opc = AArch64::ST1Threev2d;
3107       Offset = false;
3108     } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
3109       assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
3110       Opc = AArch64::STR_ZZZXI;
3111       StackID = TargetStackID::SVEVector;
3112     }
3113     break;
3114   case 64:
3115     if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
3116       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
3117       Opc = AArch64::ST1Fourv2d;
3118       Offset = false;
3119     } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
3120       assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
3121       Opc = AArch64::STR_ZZZZXI;
3122       StackID = TargetStackID::SVEVector;
3123     }
3124     break;
3125   }
3126   assert(Opc && "Unknown register class");
3127   MFI.setStackID(FI, StackID);
3128 
3129   const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
3130                                      .addReg(SrcReg, getKillRegState(isKill))
3131                                      .addFrameIndex(FI);
3132 
3133   if (Offset)
3134     MI.addImm(0);
3135   MI.addMemOperand(MMO);
3136 }
3137 
loadRegPairFromStackSlot(const TargetRegisterInfo & TRI,MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const MCInstrDesc & MCID,Register DestReg,unsigned SubIdx0,unsigned SubIdx1,int FI,MachineMemOperand * MMO)3138 static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI,
3139                                      MachineBasicBlock &MBB,
3140                                      MachineBasicBlock::iterator InsertBefore,
3141                                      const MCInstrDesc &MCID,
3142                                      Register DestReg, unsigned SubIdx0,
3143                                      unsigned SubIdx1, int FI,
3144                                      MachineMemOperand *MMO) {
3145   Register DestReg0 = DestReg;
3146   Register DestReg1 = DestReg;
3147   bool IsUndef = true;
3148   if (Register::isPhysicalRegister(DestReg)) {
3149     DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
3150     SubIdx0 = 0;
3151     DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
3152     SubIdx1 = 0;
3153     IsUndef = false;
3154   }
3155   BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
3156       .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
3157       .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
3158       .addFrameIndex(FI)
3159       .addImm(0)
3160       .addMemOperand(MMO);
3161 }
3162 
loadRegFromStackSlot(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,Register DestReg,int FI,const TargetRegisterClass * RC,const TargetRegisterInfo * TRI) const3163 void AArch64InstrInfo::loadRegFromStackSlot(
3164     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg,
3165     int FI, const TargetRegisterClass *RC,
3166     const TargetRegisterInfo *TRI) const {
3167   MachineFunction &MF = *MBB.getParent();
3168   MachineFrameInfo &MFI = MF.getFrameInfo();
3169   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
3170   MachineMemOperand *MMO =
3171       MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
3172                               MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
3173 
3174   unsigned Opc = 0;
3175   bool Offset = true;
3176   unsigned StackID = TargetStackID::Default;
3177   switch (TRI->getSpillSize(*RC)) {
3178   case 1:
3179     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
3180       Opc = AArch64::LDRBui;
3181     break;
3182   case 2:
3183     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
3184       Opc = AArch64::LDRHui;
3185     else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
3186       assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
3187       Opc = AArch64::LDR_PXI;
3188       StackID = TargetStackID::SVEVector;
3189     }
3190     break;
3191   case 4:
3192     if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
3193       Opc = AArch64::LDRWui;
3194       if (Register::isVirtualRegister(DestReg))
3195         MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
3196       else
3197         assert(DestReg != AArch64::WSP);
3198     } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
3199       Opc = AArch64::LDRSui;
3200     break;
3201   case 8:
3202     if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
3203       Opc = AArch64::LDRXui;
3204       if (Register::isVirtualRegister(DestReg))
3205         MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
3206       else
3207         assert(DestReg != AArch64::SP);
3208     } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
3209       Opc = AArch64::LDRDui;
3210     } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
3211       loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
3212                                get(AArch64::LDPWi), DestReg, AArch64::sube32,
3213                                AArch64::subo32, FI, MMO);
3214       return;
3215     }
3216     break;
3217   case 16:
3218     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
3219       Opc = AArch64::LDRQui;
3220     else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
3221       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3222       Opc = AArch64::LD1Twov1d;
3223       Offset = false;
3224     } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
3225       loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
3226                                get(AArch64::LDPXi), DestReg, AArch64::sube64,
3227                                AArch64::subo64, FI, MMO);
3228       return;
3229     } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
3230       assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
3231       Opc = AArch64::LDR_ZXI;
3232       StackID = TargetStackID::SVEVector;
3233     }
3234     break;
3235   case 24:
3236     if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
3237       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3238       Opc = AArch64::LD1Threev1d;
3239       Offset = false;
3240     }
3241     break;
3242   case 32:
3243     if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
3244       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3245       Opc = AArch64::LD1Fourv1d;
3246       Offset = false;
3247     } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
3248       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3249       Opc = AArch64::LD1Twov2d;
3250       Offset = false;
3251     } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
3252       assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
3253       Opc = AArch64::LDR_ZZXI;
3254       StackID = TargetStackID::SVEVector;
3255     }
3256     break;
3257   case 48:
3258     if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
3259       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3260       Opc = AArch64::LD1Threev2d;
3261       Offset = false;
3262     } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
3263       assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
3264       Opc = AArch64::LDR_ZZZXI;
3265       StackID = TargetStackID::SVEVector;
3266     }
3267     break;
3268   case 64:
3269     if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
3270       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3271       Opc = AArch64::LD1Fourv2d;
3272       Offset = false;
3273     } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
3274       assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
3275       Opc = AArch64::LDR_ZZZZXI;
3276       StackID = TargetStackID::SVEVector;
3277     }
3278     break;
3279   }
3280 
3281   assert(Opc && "Unknown register class");
3282   MFI.setStackID(FI, StackID);
3283 
3284   const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
3285                                      .addReg(DestReg, getDefRegState(true))
3286                                      .addFrameIndex(FI);
3287   if (Offset)
3288     MI.addImm(0);
3289   MI.addMemOperand(MMO);
3290 }
3291 
isNZCVTouchedInInstructionRange(const MachineInstr & DefMI,const MachineInstr & UseMI,const TargetRegisterInfo * TRI)3292 bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr &DefMI,
3293                                            const MachineInstr &UseMI,
3294                                            const TargetRegisterInfo *TRI) {
3295   return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
3296                                          UseMI.getIterator()),
3297                 [TRI](const MachineInstr &I) {
3298                   return I.modifiesRegister(AArch64::NZCV, TRI) ||
3299                          I.readsRegister(AArch64::NZCV, TRI);
3300                 });
3301 }
3302 
3303 // Helper function to emit a frame offset adjustment from a given
3304 // pointer (SrcReg), stored into DestReg. This function is explicit
3305 // in that it requires the opcode.
emitFrameOffsetAdj(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const DebugLoc & DL,unsigned DestReg,unsigned SrcReg,int64_t Offset,unsigned Opc,const TargetInstrInfo * TII,MachineInstr::MIFlag Flag,bool NeedsWinCFI,bool * HasWinCFI)3306 static void emitFrameOffsetAdj(MachineBasicBlock &MBB,
3307                                MachineBasicBlock::iterator MBBI,
3308                                const DebugLoc &DL, unsigned DestReg,
3309                                unsigned SrcReg, int64_t Offset, unsigned Opc,
3310                                const TargetInstrInfo *TII,
3311                                MachineInstr::MIFlag Flag, bool NeedsWinCFI,
3312                                bool *HasWinCFI) {
3313   int Sign = 1;
3314   unsigned MaxEncoding, ShiftSize;
3315   switch (Opc) {
3316   case AArch64::ADDXri:
3317   case AArch64::ADDSXri:
3318   case AArch64::SUBXri:
3319   case AArch64::SUBSXri:
3320     MaxEncoding = 0xfff;
3321     ShiftSize = 12;
3322     break;
3323   case AArch64::ADDVL_XXI:
3324   case AArch64::ADDPL_XXI:
3325     MaxEncoding = 31;
3326     ShiftSize = 0;
3327     if (Offset < 0) {
3328       MaxEncoding = 32;
3329       Sign = -1;
3330       Offset = -Offset;
3331     }
3332     break;
3333   default:
3334     llvm_unreachable("Unsupported opcode");
3335   }
3336 
3337   // FIXME: If the offset won't fit in 24-bits, compute the offset into a
3338   // scratch register.  If DestReg is a virtual register, use it as the
3339   // scratch register; otherwise, create a new virtual register (to be
3340   // replaced by the scavenger at the end of PEI).  That case can be optimized
3341   // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
3342   // register can be loaded with offset%8 and the add/sub can use an extending
3343   // instruction with LSL#3.
3344   // Currently the function handles any offsets but generates a poor sequence
3345   // of code.
3346   //  assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
3347 
3348   const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
3349   Register TmpReg = DestReg;
3350   if (TmpReg == AArch64::XZR)
3351     TmpReg = MBB.getParent()->getRegInfo().createVirtualRegister(
3352         &AArch64::GPR64RegClass);
3353   do {
3354     uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
3355     unsigned LocalShiftSize = 0;
3356     if (ThisVal > MaxEncoding) {
3357       ThisVal = ThisVal >> ShiftSize;
3358       LocalShiftSize = ShiftSize;
3359     }
3360     assert((ThisVal >> ShiftSize) <= MaxEncoding &&
3361            "Encoding cannot handle value that big");
3362 
3363     Offset -= ThisVal << LocalShiftSize;
3364     if (Offset == 0)
3365       TmpReg = DestReg;
3366     auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)
3367                    .addReg(SrcReg)
3368                    .addImm(Sign * (int)ThisVal);
3369     if (ShiftSize)
3370       MBI = MBI.addImm(
3371           AArch64_AM::getShifterImm(AArch64_AM::LSL, LocalShiftSize));
3372     MBI = MBI.setMIFlag(Flag);
3373 
3374     if (NeedsWinCFI) {
3375       assert(Sign == 1 && "SEH directives should always have a positive sign");
3376       int Imm = (int)(ThisVal << LocalShiftSize);
3377       if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
3378           (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
3379         if (HasWinCFI)
3380           *HasWinCFI = true;
3381         if (Imm == 0)
3382           BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
3383         else
3384           BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
3385               .addImm(Imm)
3386               .setMIFlag(Flag);
3387         assert(Offset == 0 && "Expected remaining offset to be zero to "
3388                               "emit a single SEH directive");
3389       } else if (DestReg == AArch64::SP) {
3390         if (HasWinCFI)
3391           *HasWinCFI = true;
3392         assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
3393         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
3394             .addImm(Imm)
3395             .setMIFlag(Flag);
3396       }
3397       if (HasWinCFI)
3398         *HasWinCFI = true;
3399     }
3400 
3401     SrcReg = TmpReg;
3402   } while (Offset);
3403 }
3404 
emitFrameOffset(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const DebugLoc & DL,unsigned DestReg,unsigned SrcReg,StackOffset Offset,const TargetInstrInfo * TII,MachineInstr::MIFlag Flag,bool SetNZCV,bool NeedsWinCFI,bool * HasWinCFI)3405 void llvm::emitFrameOffset(MachineBasicBlock &MBB,
3406                            MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
3407                            unsigned DestReg, unsigned SrcReg,
3408                            StackOffset Offset, const TargetInstrInfo *TII,
3409                            MachineInstr::MIFlag Flag, bool SetNZCV,
3410                            bool NeedsWinCFI, bool *HasWinCFI) {
3411   int64_t Bytes, NumPredicateVectors, NumDataVectors;
3412   Offset.getForFrameOffset(Bytes, NumPredicateVectors, NumDataVectors);
3413 
3414   // First emit non-scalable frame offsets, or a simple 'mov'.
3415   if (Bytes || (!Offset && SrcReg != DestReg)) {
3416     assert((DestReg != AArch64::SP || Bytes % 16 == 0) &&
3417            "SP increment/decrement not 16-byte aligned");
3418     unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
3419     if (Bytes < 0) {
3420       Bytes = -Bytes;
3421       Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
3422     }
3423     emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
3424                        NeedsWinCFI, HasWinCFI);
3425     SrcReg = DestReg;
3426   }
3427 
3428   assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) &&
3429          "SetNZCV not supported with SVE vectors");
3430   assert(!(NeedsWinCFI && (NumPredicateVectors || NumDataVectors)) &&
3431          "WinCFI not supported with SVE vectors");
3432 
3433   if (NumDataVectors) {
3434     emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
3435                        AArch64::ADDVL_XXI, TII, Flag, NeedsWinCFI, nullptr);
3436     SrcReg = DestReg;
3437   }
3438 
3439   if (NumPredicateVectors) {
3440     assert(DestReg != AArch64::SP && "Unaligned access to SP");
3441     emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
3442                        AArch64::ADDPL_XXI, TII, Flag, NeedsWinCFI, nullptr);
3443   }
3444 }
3445 
foldMemoryOperandImpl(MachineFunction & MF,MachineInstr & MI,ArrayRef<unsigned> Ops,MachineBasicBlock::iterator InsertPt,int FrameIndex,LiveIntervals * LIS,VirtRegMap * VRM) const3446 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
3447     MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
3448     MachineBasicBlock::iterator InsertPt, int FrameIndex,
3449     LiveIntervals *LIS, VirtRegMap *VRM) const {
3450   // This is a bit of a hack. Consider this instruction:
3451   //
3452   //   %0 = COPY %sp; GPR64all:%0
3453   //
3454   // We explicitly chose GPR64all for the virtual register so such a copy might
3455   // be eliminated by RegisterCoalescer. However, that may not be possible, and
3456   // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
3457   // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
3458   //
3459   // To prevent that, we are going to constrain the %0 register class here.
3460   //
3461   // <rdar://problem/11522048>
3462   //
3463   if (MI.isFullCopy()) {
3464     Register DstReg = MI.getOperand(0).getReg();
3465     Register SrcReg = MI.getOperand(1).getReg();
3466     if (SrcReg == AArch64::SP && Register::isVirtualRegister(DstReg)) {
3467       MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
3468       return nullptr;
3469     }
3470     if (DstReg == AArch64::SP && Register::isVirtualRegister(SrcReg)) {
3471       MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
3472       return nullptr;
3473     }
3474   }
3475 
3476   // Handle the case where a copy is being spilled or filled but the source
3477   // and destination register class don't match.  For example:
3478   //
3479   //   %0 = COPY %xzr; GPR64common:%0
3480   //
3481   // In this case we can still safely fold away the COPY and generate the
3482   // following spill code:
3483   //
3484   //   STRXui %xzr, %stack.0
3485   //
3486   // This also eliminates spilled cross register class COPYs (e.g. between x and
3487   // d regs) of the same size.  For example:
3488   //
3489   //   %0 = COPY %1; GPR64:%0, FPR64:%1
3490   //
3491   // will be filled as
3492   //
3493   //   LDRDui %0, fi<#0>
3494   //
3495   // instead of
3496   //
3497   //   LDRXui %Temp, fi<#0>
3498   //   %0 = FMOV %Temp
3499   //
3500   if (MI.isCopy() && Ops.size() == 1 &&
3501       // Make sure we're only folding the explicit COPY defs/uses.
3502       (Ops[0] == 0 || Ops[0] == 1)) {
3503     bool IsSpill = Ops[0] == 0;
3504     bool IsFill = !IsSpill;
3505     const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
3506     const MachineRegisterInfo &MRI = MF.getRegInfo();
3507     MachineBasicBlock &MBB = *MI.getParent();
3508     const MachineOperand &DstMO = MI.getOperand(0);
3509     const MachineOperand &SrcMO = MI.getOperand(1);
3510     Register DstReg = DstMO.getReg();
3511     Register SrcReg = SrcMO.getReg();
3512     // This is slightly expensive to compute for physical regs since
3513     // getMinimalPhysRegClass is slow.
3514     auto getRegClass = [&](unsigned Reg) {
3515       return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
3516                                               : TRI.getMinimalPhysRegClass(Reg);
3517     };
3518 
3519     if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
3520       assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
3521                  TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
3522              "Mismatched register size in non subreg COPY");
3523       if (IsSpill)
3524         storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
3525                             getRegClass(SrcReg), &TRI);
3526       else
3527         loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
3528                              getRegClass(DstReg), &TRI);
3529       return &*--InsertPt;
3530     }
3531 
3532     // Handle cases like spilling def of:
3533     //
3534     //   %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
3535     //
3536     // where the physical register source can be widened and stored to the full
3537     // virtual reg destination stack slot, in this case producing:
3538     //
3539     //   STRXui %xzr, %stack.0
3540     //
3541     if (IsSpill && DstMO.isUndef() && Register::isPhysicalRegister(SrcReg)) {
3542       assert(SrcMO.getSubReg() == 0 &&
3543              "Unexpected subreg on physical register");
3544       const TargetRegisterClass *SpillRC;
3545       unsigned SpillSubreg;
3546       switch (DstMO.getSubReg()) {
3547       default:
3548         SpillRC = nullptr;
3549         break;
3550       case AArch64::sub_32:
3551       case AArch64::ssub:
3552         if (AArch64::GPR32RegClass.contains(SrcReg)) {
3553           SpillRC = &AArch64::GPR64RegClass;
3554           SpillSubreg = AArch64::sub_32;
3555         } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
3556           SpillRC = &AArch64::FPR64RegClass;
3557           SpillSubreg = AArch64::ssub;
3558         } else
3559           SpillRC = nullptr;
3560         break;
3561       case AArch64::dsub:
3562         if (AArch64::FPR64RegClass.contains(SrcReg)) {
3563           SpillRC = &AArch64::FPR128RegClass;
3564           SpillSubreg = AArch64::dsub;
3565         } else
3566           SpillRC = nullptr;
3567         break;
3568       }
3569 
3570       if (SpillRC)
3571         if (unsigned WidenedSrcReg =
3572                 TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
3573           storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
3574                               FrameIndex, SpillRC, &TRI);
3575           return &*--InsertPt;
3576         }
3577     }
3578 
3579     // Handle cases like filling use of:
3580     //
3581     //   %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
3582     //
3583     // where we can load the full virtual reg source stack slot, into the subreg
3584     // destination, in this case producing:
3585     //
3586     //   LDRWui %0:sub_32<def,read-undef>, %stack.0
3587     //
3588     if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
3589       const TargetRegisterClass *FillRC;
3590       switch (DstMO.getSubReg()) {
3591       default:
3592         FillRC = nullptr;
3593         break;
3594       case AArch64::sub_32:
3595         FillRC = &AArch64::GPR32RegClass;
3596         break;
3597       case AArch64::ssub:
3598         FillRC = &AArch64::FPR32RegClass;
3599         break;
3600       case AArch64::dsub:
3601         FillRC = &AArch64::FPR64RegClass;
3602         break;
3603       }
3604 
3605       if (FillRC) {
3606         assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
3607                    TRI.getRegSizeInBits(*FillRC) &&
3608                "Mismatched regclass size on folded subreg COPY");
3609         loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
3610         MachineInstr &LoadMI = *--InsertPt;
3611         MachineOperand &LoadDst = LoadMI.getOperand(0);
3612         assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
3613         LoadDst.setSubReg(DstMO.getSubReg());
3614         LoadDst.setIsUndef();
3615         return &LoadMI;
3616       }
3617     }
3618   }
3619 
3620   // Cannot fold.
3621   return nullptr;
3622 }
3623 
isAArch64FrameOffsetLegal(const MachineInstr & MI,StackOffset & SOffset,bool * OutUseUnscaledOp,unsigned * OutUnscaledOp,int64_t * EmittableOffset)3624 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
3625                                     StackOffset &SOffset,
3626                                     bool *OutUseUnscaledOp,
3627                                     unsigned *OutUnscaledOp,
3628                                     int64_t *EmittableOffset) {
3629   // Set output values in case of early exit.
3630   if (EmittableOffset)
3631     *EmittableOffset = 0;
3632   if (OutUseUnscaledOp)
3633     *OutUseUnscaledOp = false;
3634   if (OutUnscaledOp)
3635     *OutUnscaledOp = 0;
3636 
3637   // Exit early for structured vector spills/fills as they can't take an
3638   // immediate offset.
3639   switch (MI.getOpcode()) {
3640   default:
3641     break;
3642   case AArch64::LD1Twov2d:
3643   case AArch64::LD1Threev2d:
3644   case AArch64::LD1Fourv2d:
3645   case AArch64::LD1Twov1d:
3646   case AArch64::LD1Threev1d:
3647   case AArch64::LD1Fourv1d:
3648   case AArch64::ST1Twov2d:
3649   case AArch64::ST1Threev2d:
3650   case AArch64::ST1Fourv2d:
3651   case AArch64::ST1Twov1d:
3652   case AArch64::ST1Threev1d:
3653   case AArch64::ST1Fourv1d:
3654   case AArch64::IRG:
3655   case AArch64::IRGstack:
3656   case AArch64::STGloop:
3657   case AArch64::STZGloop:
3658     return AArch64FrameOffsetCannotUpdate;
3659   }
3660 
3661   // Get the min/max offset and the scale.
3662   TypeSize ScaleValue(0U, false);
3663   unsigned Width;
3664   int64_t MinOff, MaxOff;
3665   if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
3666                                       MaxOff))
3667     llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
3668 
3669   // Construct the complete offset.
3670   bool IsMulVL = ScaleValue.isScalable();
3671   unsigned Scale = ScaleValue.getKnownMinSize();
3672   int64_t Offset = IsMulVL ? SOffset.getScalableBytes() : SOffset.getBytes();
3673 
3674   const MachineOperand &ImmOpnd =
3675       MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
3676   Offset += ImmOpnd.getImm() * Scale;
3677 
3678   // If the offset doesn't match the scale, we rewrite the instruction to
3679   // use the unscaled instruction instead. Likewise, if we have a negative
3680   // offset and there is an unscaled op to use.
3681   Optional<unsigned> UnscaledOp =
3682       AArch64InstrInfo::getUnscaledLdSt(MI.getOpcode());
3683   bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
3684   if (useUnscaledOp &&
3685       !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
3686                                       MaxOff))
3687     llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
3688 
3689   Scale = ScaleValue.getKnownMinSize();
3690   assert(IsMulVL == ScaleValue.isScalable() &&
3691          "Unscaled opcode has different value for scalable");
3692 
3693   int64_t Remainder = Offset % Scale;
3694   assert(!(Remainder && useUnscaledOp) &&
3695          "Cannot have remainder when using unscaled op");
3696 
3697   assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
3698   int64_t NewOffset = Offset / Scale;
3699   if (MinOff <= NewOffset && NewOffset <= MaxOff)
3700     Offset = Remainder;
3701   else {
3702     NewOffset = NewOffset < 0 ? MinOff : MaxOff;
3703     Offset = Offset - NewOffset * Scale + Remainder;
3704   }
3705 
3706   if (EmittableOffset)
3707     *EmittableOffset = NewOffset;
3708   if (OutUseUnscaledOp)
3709     *OutUseUnscaledOp = useUnscaledOp;
3710   if (OutUnscaledOp && UnscaledOp)
3711     *OutUnscaledOp = *UnscaledOp;
3712 
3713   if (IsMulVL)
3714     SOffset = StackOffset(Offset, MVT::nxv1i8) +
3715               StackOffset(SOffset.getBytes(), MVT::i8);
3716   else
3717     SOffset = StackOffset(Offset, MVT::i8) +
3718               StackOffset(SOffset.getScalableBytes(), MVT::nxv1i8);
3719   return AArch64FrameOffsetCanUpdate |
3720          (SOffset ? 0 : AArch64FrameOffsetIsLegal);
3721 }
3722 
rewriteAArch64FrameIndex(MachineInstr & MI,unsigned FrameRegIdx,unsigned FrameReg,StackOffset & Offset,const AArch64InstrInfo * TII)3723 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
3724                                     unsigned FrameReg, StackOffset &Offset,
3725                                     const AArch64InstrInfo *TII) {
3726   unsigned Opcode = MI.getOpcode();
3727   unsigned ImmIdx = FrameRegIdx + 1;
3728 
3729   if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
3730     Offset += StackOffset(MI.getOperand(ImmIdx).getImm(), MVT::i8);
3731     emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
3732                     MI.getOperand(0).getReg(), FrameReg, Offset, TII,
3733                     MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
3734     MI.eraseFromParent();
3735     Offset = StackOffset();
3736     return true;
3737   }
3738 
3739   int64_t NewOffset;
3740   unsigned UnscaledOp;
3741   bool UseUnscaledOp;
3742   int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
3743                                          &UnscaledOp, &NewOffset);
3744   if (Status & AArch64FrameOffsetCanUpdate) {
3745     if (Status & AArch64FrameOffsetIsLegal)
3746       // Replace the FrameIndex with FrameReg.
3747       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
3748     if (UseUnscaledOp)
3749       MI.setDesc(TII->get(UnscaledOp));
3750 
3751     MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
3752     return !Offset;
3753   }
3754 
3755   return false;
3756 }
3757 
getNoop(MCInst & NopInst) const3758 void AArch64InstrInfo::getNoop(MCInst &NopInst) const {
3759   NopInst.setOpcode(AArch64::HINT);
3760   NopInst.addOperand(MCOperand::createImm(0));
3761 }
3762 
3763 // AArch64 supports MachineCombiner.
useMachineCombiner() const3764 bool AArch64InstrInfo::useMachineCombiner() const { return true; }
3765 
3766 // True when Opc sets flag
isCombineInstrSettingFlag(unsigned Opc)3767 static bool isCombineInstrSettingFlag(unsigned Opc) {
3768   switch (Opc) {
3769   case AArch64::ADDSWrr:
3770   case AArch64::ADDSWri:
3771   case AArch64::ADDSXrr:
3772   case AArch64::ADDSXri:
3773   case AArch64::SUBSWrr:
3774   case AArch64::SUBSXrr:
3775   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3776   case AArch64::SUBSWri:
3777   case AArch64::SUBSXri:
3778     return true;
3779   default:
3780     break;
3781   }
3782   return false;
3783 }
3784 
3785 // 32b Opcodes that can be combined with a MUL
isCombineInstrCandidate32(unsigned Opc)3786 static bool isCombineInstrCandidate32(unsigned Opc) {
3787   switch (Opc) {
3788   case AArch64::ADDWrr:
3789   case AArch64::ADDWri:
3790   case AArch64::SUBWrr:
3791   case AArch64::ADDSWrr:
3792   case AArch64::ADDSWri:
3793   case AArch64::SUBSWrr:
3794   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3795   case AArch64::SUBWri:
3796   case AArch64::SUBSWri:
3797     return true;
3798   default:
3799     break;
3800   }
3801   return false;
3802 }
3803 
3804 // 64b Opcodes that can be combined with a MUL
isCombineInstrCandidate64(unsigned Opc)3805 static bool isCombineInstrCandidate64(unsigned Opc) {
3806   switch (Opc) {
3807   case AArch64::ADDXrr:
3808   case AArch64::ADDXri:
3809   case AArch64::SUBXrr:
3810   case AArch64::ADDSXrr:
3811   case AArch64::ADDSXri:
3812   case AArch64::SUBSXrr:
3813   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3814   case AArch64::SUBXri:
3815   case AArch64::SUBSXri:
3816   case AArch64::ADDv8i8:
3817   case AArch64::ADDv16i8:
3818   case AArch64::ADDv4i16:
3819   case AArch64::ADDv8i16:
3820   case AArch64::ADDv2i32:
3821   case AArch64::ADDv4i32:
3822   case AArch64::SUBv8i8:
3823   case AArch64::SUBv16i8:
3824   case AArch64::SUBv4i16:
3825   case AArch64::SUBv8i16:
3826   case AArch64::SUBv2i32:
3827   case AArch64::SUBv4i32:
3828     return true;
3829   default:
3830     break;
3831   }
3832   return false;
3833 }
3834 
3835 // FP Opcodes that can be combined with a FMUL
isCombineInstrCandidateFP(const MachineInstr & Inst)3836 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
3837   switch (Inst.getOpcode()) {
3838   default:
3839     break;
3840   case AArch64::FADDHrr:
3841   case AArch64::FADDSrr:
3842   case AArch64::FADDDrr:
3843   case AArch64::FADDv4f16:
3844   case AArch64::FADDv8f16:
3845   case AArch64::FADDv2f32:
3846   case AArch64::FADDv2f64:
3847   case AArch64::FADDv4f32:
3848   case AArch64::FSUBHrr:
3849   case AArch64::FSUBSrr:
3850   case AArch64::FSUBDrr:
3851   case AArch64::FSUBv4f16:
3852   case AArch64::FSUBv8f16:
3853   case AArch64::FSUBv2f32:
3854   case AArch64::FSUBv2f64:
3855   case AArch64::FSUBv4f32:
3856     TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
3857     return (Options.UnsafeFPMath ||
3858             Options.AllowFPOpFusion == FPOpFusion::Fast);
3859   }
3860   return false;
3861 }
3862 
3863 // Opcodes that can be combined with a MUL
isCombineInstrCandidate(unsigned Opc)3864 static bool isCombineInstrCandidate(unsigned Opc) {
3865   return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
3866 }
3867 
3868 //
3869 // Utility routine that checks if \param MO is defined by an
3870 // \param CombineOpc instruction in the basic block \param MBB
canCombine(MachineBasicBlock & MBB,MachineOperand & MO,unsigned CombineOpc,unsigned ZeroReg=0,bool CheckZeroReg=false)3871 static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
3872                        unsigned CombineOpc, unsigned ZeroReg = 0,
3873                        bool CheckZeroReg = false) {
3874   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3875   MachineInstr *MI = nullptr;
3876 
3877   if (MO.isReg() && Register::isVirtualRegister(MO.getReg()))
3878     MI = MRI.getUniqueVRegDef(MO.getReg());
3879   // And it needs to be in the trace (otherwise, it won't have a depth).
3880   if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
3881     return false;
3882   // Must only used by the user we combine with.
3883   if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
3884     return false;
3885 
3886   if (CheckZeroReg) {
3887     assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
3888            MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
3889            MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
3890     // The third input reg must be zero.
3891     if (MI->getOperand(3).getReg() != ZeroReg)
3892       return false;
3893   }
3894 
3895   return true;
3896 }
3897 
3898 //
3899 // Is \param MO defined by an integer multiply and can be combined?
canCombineWithMUL(MachineBasicBlock & MBB,MachineOperand & MO,unsigned MulOpc,unsigned ZeroReg)3900 static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3901                               unsigned MulOpc, unsigned ZeroReg) {
3902   return canCombine(MBB, MO, MulOpc, ZeroReg, true);
3903 }
3904 
3905 //
3906 // Is \param MO defined by a floating-point multiply and can be combined?
canCombineWithFMUL(MachineBasicBlock & MBB,MachineOperand & MO,unsigned MulOpc)3907 static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3908                                unsigned MulOpc) {
3909   return canCombine(MBB, MO, MulOpc);
3910 }
3911 
3912 // TODO: There are many more machine instruction opcodes to match:
3913 //       1. Other data types (integer, vectors)
3914 //       2. Other math / logic operations (xor, or)
3915 //       3. Other forms of the same operation (intrinsics and other variants)
isAssociativeAndCommutative(const MachineInstr & Inst) const3916 bool AArch64InstrInfo::isAssociativeAndCommutative(
3917     const MachineInstr &Inst) const {
3918   switch (Inst.getOpcode()) {
3919   case AArch64::FADDDrr:
3920   case AArch64::FADDSrr:
3921   case AArch64::FADDv2f32:
3922   case AArch64::FADDv2f64:
3923   case AArch64::FADDv4f32:
3924   case AArch64::FMULDrr:
3925   case AArch64::FMULSrr:
3926   case AArch64::FMULX32:
3927   case AArch64::FMULX64:
3928   case AArch64::FMULXv2f32:
3929   case AArch64::FMULXv2f64:
3930   case AArch64::FMULXv4f32:
3931   case AArch64::FMULv2f32:
3932   case AArch64::FMULv2f64:
3933   case AArch64::FMULv4f32:
3934     return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
3935   default:
3936     return false;
3937   }
3938 }
3939 
3940 /// Find instructions that can be turned into madd.
getMaddPatterns(MachineInstr & Root,SmallVectorImpl<MachineCombinerPattern> & Patterns)3941 static bool getMaddPatterns(MachineInstr &Root,
3942                             SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3943   unsigned Opc = Root.getOpcode();
3944   MachineBasicBlock &MBB = *Root.getParent();
3945   bool Found = false;
3946 
3947   if (!isCombineInstrCandidate(Opc))
3948     return false;
3949   if (isCombineInstrSettingFlag(Opc)) {
3950     int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
3951     // When NZCV is live bail out.
3952     if (Cmp_NZCV == -1)
3953       return false;
3954     unsigned NewOpc = convertToNonFlagSettingOpc(Root);
3955     // When opcode can't change bail out.
3956     // CHECKME: do we miss any cases for opcode conversion?
3957     if (NewOpc == Opc)
3958       return false;
3959     Opc = NewOpc;
3960   }
3961 
3962   auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
3963                       MachineCombinerPattern Pattern) {
3964     if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
3965       Patterns.push_back(Pattern);
3966       Found = true;
3967     }
3968   };
3969 
3970   auto setVFound = [&](int Opcode, int Operand, MachineCombinerPattern Pattern) {
3971     if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
3972       Patterns.push_back(Pattern);
3973       Found = true;
3974     }
3975   };
3976 
3977   typedef MachineCombinerPattern MCP;
3978 
3979   switch (Opc) {
3980   default:
3981     break;
3982   case AArch64::ADDWrr:
3983     assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3984            "ADDWrr does not have register operands");
3985     setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
3986     setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
3987     break;
3988   case AArch64::ADDXrr:
3989     setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
3990     setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
3991     break;
3992   case AArch64::SUBWrr:
3993     setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
3994     setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
3995     break;
3996   case AArch64::SUBXrr:
3997     setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
3998     setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
3999     break;
4000   case AArch64::ADDWri:
4001     setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
4002     break;
4003   case AArch64::ADDXri:
4004     setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
4005     break;
4006   case AArch64::SUBWri:
4007     setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
4008     break;
4009   case AArch64::SUBXri:
4010     setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
4011     break;
4012   case AArch64::ADDv8i8:
4013     setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
4014     setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
4015     break;
4016   case AArch64::ADDv16i8:
4017     setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
4018     setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
4019     break;
4020   case AArch64::ADDv4i16:
4021     setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
4022     setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
4023     setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
4024     setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
4025     break;
4026   case AArch64::ADDv8i16:
4027     setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
4028     setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
4029     setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
4030     setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
4031     break;
4032   case AArch64::ADDv2i32:
4033     setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
4034     setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
4035     setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
4036     setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
4037     break;
4038   case AArch64::ADDv4i32:
4039     setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
4040     setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
4041     setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
4042     setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
4043     break;
4044   case AArch64::SUBv8i8:
4045     setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
4046     setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
4047     break;
4048   case AArch64::SUBv16i8:
4049     setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
4050     setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
4051     break;
4052   case AArch64::SUBv4i16:
4053     setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
4054     setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
4055     setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
4056     setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
4057     break;
4058   case AArch64::SUBv8i16:
4059     setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
4060     setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
4061     setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
4062     setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
4063     break;
4064   case AArch64::SUBv2i32:
4065     setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
4066     setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
4067     setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
4068     setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
4069     break;
4070   case AArch64::SUBv4i32:
4071     setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
4072     setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
4073     setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
4074     setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
4075     break;
4076   }
4077   return Found;
4078 }
4079 /// Floating-Point Support
4080 
4081 /// Find instructions that can be turned into madd.
getFMAPatterns(MachineInstr & Root,SmallVectorImpl<MachineCombinerPattern> & Patterns)4082 static bool getFMAPatterns(MachineInstr &Root,
4083                            SmallVectorImpl<MachineCombinerPattern> &Patterns) {
4084 
4085   if (!isCombineInstrCandidateFP(Root))
4086     return false;
4087 
4088   MachineBasicBlock &MBB = *Root.getParent();
4089   bool Found = false;
4090 
4091   auto Match = [&](int Opcode, int Operand,
4092                    MachineCombinerPattern Pattern) -> bool {
4093     if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
4094       Patterns.push_back(Pattern);
4095       return true;
4096     }
4097     return false;
4098   };
4099 
4100   typedef MachineCombinerPattern MCP;
4101 
4102   switch (Root.getOpcode()) {
4103   default:
4104     assert(false && "Unsupported FP instruction in combiner\n");
4105     break;
4106   case AArch64::FADDHrr:
4107     assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
4108            "FADDHrr does not have register operands");
4109 
4110     Found  = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
4111     Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
4112     break;
4113   case AArch64::FADDSrr:
4114     assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
4115            "FADDSrr does not have register operands");
4116 
4117     Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
4118              Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
4119 
4120     Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
4121              Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
4122     break;
4123   case AArch64::FADDDrr:
4124     Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
4125              Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
4126 
4127     Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
4128              Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
4129     break;
4130   case AArch64::FADDv4f16:
4131     Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
4132              Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
4133 
4134     Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
4135              Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
4136     break;
4137   case AArch64::FADDv8f16:
4138     Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
4139              Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
4140 
4141     Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
4142              Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
4143     break;
4144   case AArch64::FADDv2f32:
4145     Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
4146              Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
4147 
4148     Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
4149              Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
4150     break;
4151   case AArch64::FADDv2f64:
4152     Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
4153              Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
4154 
4155     Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
4156              Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
4157     break;
4158   case AArch64::FADDv4f32:
4159     Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
4160              Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
4161 
4162     Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
4163              Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
4164     break;
4165   case AArch64::FSUBHrr:
4166     Found  = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
4167     Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
4168     Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
4169     break;
4170   case AArch64::FSUBSrr:
4171     Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
4172 
4173     Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
4174              Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
4175 
4176     Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
4177     break;
4178   case AArch64::FSUBDrr:
4179     Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
4180 
4181     Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
4182              Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
4183 
4184     Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
4185     break;
4186   case AArch64::FSUBv4f16:
4187     Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
4188              Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
4189 
4190     Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
4191              Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
4192     break;
4193   case AArch64::FSUBv8f16:
4194     Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
4195              Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
4196 
4197     Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
4198              Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
4199     break;
4200   case AArch64::FSUBv2f32:
4201     Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
4202              Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
4203 
4204     Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
4205              Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
4206     break;
4207   case AArch64::FSUBv2f64:
4208     Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
4209              Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
4210 
4211     Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
4212              Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
4213     break;
4214   case AArch64::FSUBv4f32:
4215     Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
4216              Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
4217 
4218     Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
4219              Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
4220     break;
4221   }
4222   return Found;
4223 }
4224 
4225 /// Return true when a code sequence can improve throughput. It
4226 /// should be called only for instructions in loops.
4227 /// \param Pattern - combiner pattern
isThroughputPattern(MachineCombinerPattern Pattern) const4228 bool AArch64InstrInfo::isThroughputPattern(
4229     MachineCombinerPattern Pattern) const {
4230   switch (Pattern) {
4231   default:
4232     break;
4233   case MachineCombinerPattern::FMULADDH_OP1:
4234   case MachineCombinerPattern::FMULADDH_OP2:
4235   case MachineCombinerPattern::FMULSUBH_OP1:
4236   case MachineCombinerPattern::FMULSUBH_OP2:
4237   case MachineCombinerPattern::FMULADDS_OP1:
4238   case MachineCombinerPattern::FMULADDS_OP2:
4239   case MachineCombinerPattern::FMULSUBS_OP1:
4240   case MachineCombinerPattern::FMULSUBS_OP2:
4241   case MachineCombinerPattern::FMULADDD_OP1:
4242   case MachineCombinerPattern::FMULADDD_OP2:
4243   case MachineCombinerPattern::FMULSUBD_OP1:
4244   case MachineCombinerPattern::FMULSUBD_OP2:
4245   case MachineCombinerPattern::FNMULSUBH_OP1:
4246   case MachineCombinerPattern::FNMULSUBS_OP1:
4247   case MachineCombinerPattern::FNMULSUBD_OP1:
4248   case MachineCombinerPattern::FMLAv4i16_indexed_OP1:
4249   case MachineCombinerPattern::FMLAv4i16_indexed_OP2:
4250   case MachineCombinerPattern::FMLAv8i16_indexed_OP1:
4251   case MachineCombinerPattern::FMLAv8i16_indexed_OP2:
4252   case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
4253   case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
4254   case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
4255   case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
4256   case MachineCombinerPattern::FMLAv4f16_OP2:
4257   case MachineCombinerPattern::FMLAv4f16_OP1:
4258   case MachineCombinerPattern::FMLAv8f16_OP1:
4259   case MachineCombinerPattern::FMLAv8f16_OP2:
4260   case MachineCombinerPattern::FMLAv2f32_OP2:
4261   case MachineCombinerPattern::FMLAv2f32_OP1:
4262   case MachineCombinerPattern::FMLAv2f64_OP1:
4263   case MachineCombinerPattern::FMLAv2f64_OP2:
4264   case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
4265   case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
4266   case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
4267   case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
4268   case MachineCombinerPattern::FMLAv4f32_OP1:
4269   case MachineCombinerPattern::FMLAv4f32_OP2:
4270   case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
4271   case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
4272   case MachineCombinerPattern::FMLSv4i16_indexed_OP1:
4273   case MachineCombinerPattern::FMLSv4i16_indexed_OP2:
4274   case MachineCombinerPattern::FMLSv8i16_indexed_OP1:
4275   case MachineCombinerPattern::FMLSv8i16_indexed_OP2:
4276   case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
4277   case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
4278   case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
4279   case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
4280   case MachineCombinerPattern::FMLSv4f16_OP1:
4281   case MachineCombinerPattern::FMLSv4f16_OP2:
4282   case MachineCombinerPattern::FMLSv8f16_OP1:
4283   case MachineCombinerPattern::FMLSv8f16_OP2:
4284   case MachineCombinerPattern::FMLSv2f32_OP2:
4285   case MachineCombinerPattern::FMLSv2f64_OP2:
4286   case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
4287   case MachineCombinerPattern::FMLSv4f32_OP2:
4288   case MachineCombinerPattern::MULADDv8i8_OP1:
4289   case MachineCombinerPattern::MULADDv8i8_OP2:
4290   case MachineCombinerPattern::MULADDv16i8_OP1:
4291   case MachineCombinerPattern::MULADDv16i8_OP2:
4292   case MachineCombinerPattern::MULADDv4i16_OP1:
4293   case MachineCombinerPattern::MULADDv4i16_OP2:
4294   case MachineCombinerPattern::MULADDv8i16_OP1:
4295   case MachineCombinerPattern::MULADDv8i16_OP2:
4296   case MachineCombinerPattern::MULADDv2i32_OP1:
4297   case MachineCombinerPattern::MULADDv2i32_OP2:
4298   case MachineCombinerPattern::MULADDv4i32_OP1:
4299   case MachineCombinerPattern::MULADDv4i32_OP2:
4300   case MachineCombinerPattern::MULSUBv8i8_OP1:
4301   case MachineCombinerPattern::MULSUBv8i8_OP2:
4302   case MachineCombinerPattern::MULSUBv16i8_OP1:
4303   case MachineCombinerPattern::MULSUBv16i8_OP2:
4304   case MachineCombinerPattern::MULSUBv4i16_OP1:
4305   case MachineCombinerPattern::MULSUBv4i16_OP2:
4306   case MachineCombinerPattern::MULSUBv8i16_OP1:
4307   case MachineCombinerPattern::MULSUBv8i16_OP2:
4308   case MachineCombinerPattern::MULSUBv2i32_OP1:
4309   case MachineCombinerPattern::MULSUBv2i32_OP2:
4310   case MachineCombinerPattern::MULSUBv4i32_OP1:
4311   case MachineCombinerPattern::MULSUBv4i32_OP2:
4312   case MachineCombinerPattern::MULADDv4i16_indexed_OP1:
4313   case MachineCombinerPattern::MULADDv4i16_indexed_OP2:
4314   case MachineCombinerPattern::MULADDv8i16_indexed_OP1:
4315   case MachineCombinerPattern::MULADDv8i16_indexed_OP2:
4316   case MachineCombinerPattern::MULADDv2i32_indexed_OP1:
4317   case MachineCombinerPattern::MULADDv2i32_indexed_OP2:
4318   case MachineCombinerPattern::MULADDv4i32_indexed_OP1:
4319   case MachineCombinerPattern::MULADDv4i32_indexed_OP2:
4320   case MachineCombinerPattern::MULSUBv4i16_indexed_OP1:
4321   case MachineCombinerPattern::MULSUBv4i16_indexed_OP2:
4322   case MachineCombinerPattern::MULSUBv8i16_indexed_OP1:
4323   case MachineCombinerPattern::MULSUBv8i16_indexed_OP2:
4324   case MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
4325   case MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
4326   case MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
4327   case MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
4328     return true;
4329   } // end switch (Pattern)
4330   return false;
4331 }
4332 /// Return true when there is potentially a faster code sequence for an
4333 /// instruction chain ending in \p Root. All potential patterns are listed in
4334 /// the \p Pattern vector. Pattern should be sorted in priority order since the
4335 /// pattern evaluator stops checking as soon as it finds a faster sequence.
4336 
getMachineCombinerPatterns(MachineInstr & Root,SmallVectorImpl<MachineCombinerPattern> & Patterns) const4337 bool AArch64InstrInfo::getMachineCombinerPatterns(
4338     MachineInstr &Root,
4339     SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
4340   // Integer patterns
4341   if (getMaddPatterns(Root, Patterns))
4342     return true;
4343   // Floating point patterns
4344   if (getFMAPatterns(Root, Patterns))
4345     return true;
4346 
4347   return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
4348 }
4349 
4350 enum class FMAInstKind { Default, Indexed, Accumulator };
4351 /// genFusedMultiply - Generate fused multiply instructions.
4352 /// This function supports both integer and floating point instructions.
4353 /// A typical example:
4354 ///  F|MUL I=A,B,0
4355 ///  F|ADD R,I,C
4356 ///  ==> F|MADD R,A,B,C
4357 /// \param MF Containing MachineFunction
4358 /// \param MRI Register information
4359 /// \param TII Target information
4360 /// \param Root is the F|ADD instruction
4361 /// \param [out] InsInstrs is a vector of machine instructions and will
4362 /// contain the generated madd instruction
4363 /// \param IdxMulOpd is index of operand in Root that is the result of
4364 /// the F|MUL. In the example above IdxMulOpd is 1.
4365 /// \param MaddOpc the opcode fo the f|madd instruction
4366 /// \param RC Register class of operands
4367 /// \param kind of fma instruction (addressing mode) to be generated
4368 /// \param ReplacedAddend is the result register from the instruction
4369 /// replacing the non-combined operand, if any.
4370 static MachineInstr *
genFusedMultiply(MachineFunction & MF,MachineRegisterInfo & MRI,const TargetInstrInfo * TII,MachineInstr & Root,SmallVectorImpl<MachineInstr * > & InsInstrs,unsigned IdxMulOpd,unsigned MaddOpc,const TargetRegisterClass * RC,FMAInstKind kind=FMAInstKind::Default,const Register * ReplacedAddend=nullptr)4371 genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
4372                  const TargetInstrInfo *TII, MachineInstr &Root,
4373                  SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
4374                  unsigned MaddOpc, const TargetRegisterClass *RC,
4375                  FMAInstKind kind = FMAInstKind::Default,
4376                  const Register *ReplacedAddend = nullptr) {
4377   assert(IdxMulOpd == 1 || IdxMulOpd == 2);
4378 
4379   unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
4380   MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
4381   Register ResultReg = Root.getOperand(0).getReg();
4382   Register SrcReg0 = MUL->getOperand(1).getReg();
4383   bool Src0IsKill = MUL->getOperand(1).isKill();
4384   Register SrcReg1 = MUL->getOperand(2).getReg();
4385   bool Src1IsKill = MUL->getOperand(2).isKill();
4386 
4387   unsigned SrcReg2;
4388   bool Src2IsKill;
4389   if (ReplacedAddend) {
4390     // If we just generated a new addend, we must be it's only use.
4391     SrcReg2 = *ReplacedAddend;
4392     Src2IsKill = true;
4393   } else {
4394     SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
4395     Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
4396   }
4397 
4398   if (Register::isVirtualRegister(ResultReg))
4399     MRI.constrainRegClass(ResultReg, RC);
4400   if (Register::isVirtualRegister(SrcReg0))
4401     MRI.constrainRegClass(SrcReg0, RC);
4402   if (Register::isVirtualRegister(SrcReg1))
4403     MRI.constrainRegClass(SrcReg1, RC);
4404   if (Register::isVirtualRegister(SrcReg2))
4405     MRI.constrainRegClass(SrcReg2, RC);
4406 
4407   MachineInstrBuilder MIB;
4408   if (kind == FMAInstKind::Default)
4409     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4410               .addReg(SrcReg0, getKillRegState(Src0IsKill))
4411               .addReg(SrcReg1, getKillRegState(Src1IsKill))
4412               .addReg(SrcReg2, getKillRegState(Src2IsKill));
4413   else if (kind == FMAInstKind::Indexed)
4414     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4415               .addReg(SrcReg2, getKillRegState(Src2IsKill))
4416               .addReg(SrcReg0, getKillRegState(Src0IsKill))
4417               .addReg(SrcReg1, getKillRegState(Src1IsKill))
4418               .addImm(MUL->getOperand(3).getImm());
4419   else if (kind == FMAInstKind::Accumulator)
4420     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4421               .addReg(SrcReg2, getKillRegState(Src2IsKill))
4422               .addReg(SrcReg0, getKillRegState(Src0IsKill))
4423               .addReg(SrcReg1, getKillRegState(Src1IsKill));
4424   else
4425     assert(false && "Invalid FMA instruction kind \n");
4426   // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
4427   InsInstrs.push_back(MIB);
4428   return MUL;
4429 }
4430 
4431 /// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
4432 /// instructions.
4433 ///
4434 /// \see genFusedMultiply
genFusedMultiplyAcc(MachineFunction & MF,MachineRegisterInfo & MRI,const TargetInstrInfo * TII,MachineInstr & Root,SmallVectorImpl<MachineInstr * > & InsInstrs,unsigned IdxMulOpd,unsigned MaddOpc,const TargetRegisterClass * RC)4435 static MachineInstr *genFusedMultiplyAcc(
4436     MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
4437     MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
4438     unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
4439   return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
4440                           FMAInstKind::Accumulator);
4441 }
4442 
4443 /// genNeg - Helper to generate an intermediate negation of the second operand
4444 /// of Root
genNeg(MachineFunction & MF,MachineRegisterInfo & MRI,const TargetInstrInfo * TII,MachineInstr & Root,SmallVectorImpl<MachineInstr * > & InsInstrs,DenseMap<unsigned,unsigned> & InstrIdxForVirtReg,unsigned MnegOpc,const TargetRegisterClass * RC)4445 static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI,
4446                        const TargetInstrInfo *TII, MachineInstr &Root,
4447                        SmallVectorImpl<MachineInstr *> &InsInstrs,
4448                        DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
4449                        unsigned MnegOpc, const TargetRegisterClass *RC) {
4450   Register NewVR = MRI.createVirtualRegister(RC);
4451   MachineInstrBuilder MIB =
4452       BuildMI(MF, Root.getDebugLoc(), TII->get(MnegOpc), NewVR)
4453           .add(Root.getOperand(2));
4454   InsInstrs.push_back(MIB);
4455 
4456   assert(InstrIdxForVirtReg.empty());
4457   InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4458 
4459   return NewVR;
4460 }
4461 
4462 /// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
4463 /// instructions with an additional negation of the accumulator
genFusedMultiplyAccNeg(MachineFunction & MF,MachineRegisterInfo & MRI,const TargetInstrInfo * TII,MachineInstr & Root,SmallVectorImpl<MachineInstr * > & InsInstrs,DenseMap<unsigned,unsigned> & InstrIdxForVirtReg,unsigned IdxMulOpd,unsigned MaddOpc,unsigned MnegOpc,const TargetRegisterClass * RC)4464 static MachineInstr *genFusedMultiplyAccNeg(
4465     MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
4466     MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
4467     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
4468     unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
4469   assert(IdxMulOpd == 1);
4470 
4471   Register NewVR =
4472       genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
4473   return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
4474                           FMAInstKind::Accumulator, &NewVR);
4475 }
4476 
4477 /// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
4478 /// instructions.
4479 ///
4480 /// \see genFusedMultiply
genFusedMultiplyIdx(MachineFunction & MF,MachineRegisterInfo & MRI,const TargetInstrInfo * TII,MachineInstr & Root,SmallVectorImpl<MachineInstr * > & InsInstrs,unsigned IdxMulOpd,unsigned MaddOpc,const TargetRegisterClass * RC)4481 static MachineInstr *genFusedMultiplyIdx(
4482     MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
4483     MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
4484     unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
4485   return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
4486                           FMAInstKind::Indexed);
4487 }
4488 
4489 /// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
4490 /// instructions with an additional negation of the accumulator
genFusedMultiplyIdxNeg(MachineFunction & MF,MachineRegisterInfo & MRI,const TargetInstrInfo * TII,MachineInstr & Root,SmallVectorImpl<MachineInstr * > & InsInstrs,DenseMap<unsigned,unsigned> & InstrIdxForVirtReg,unsigned IdxMulOpd,unsigned MaddOpc,unsigned MnegOpc,const TargetRegisterClass * RC)4491 static MachineInstr *genFusedMultiplyIdxNeg(
4492     MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
4493     MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
4494     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
4495     unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
4496   assert(IdxMulOpd == 1);
4497 
4498   Register NewVR =
4499       genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
4500 
4501   return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
4502                           FMAInstKind::Indexed, &NewVR);
4503 }
4504 
4505 /// genMaddR - Generate madd instruction and combine mul and add using
4506 /// an extra virtual register
4507 /// Example - an ADD intermediate needs to be stored in a register:
4508 ///   MUL I=A,B,0
4509 ///   ADD R,I,Imm
4510 ///   ==> ORR  V, ZR, Imm
4511 ///   ==> MADD R,A,B,V
4512 /// \param MF Containing MachineFunction
4513 /// \param MRI Register information
4514 /// \param TII Target information
4515 /// \param Root is the ADD instruction
4516 /// \param [out] InsInstrs is a vector of machine instructions and will
4517 /// contain the generated madd instruction
4518 /// \param IdxMulOpd is index of operand in Root that is the result of
4519 /// the MUL. In the example above IdxMulOpd is 1.
4520 /// \param MaddOpc the opcode fo the madd instruction
4521 /// \param VR is a virtual register that holds the value of an ADD operand
4522 /// (V in the example above).
4523 /// \param RC Register class of operands
genMaddR(MachineFunction & MF,MachineRegisterInfo & MRI,const TargetInstrInfo * TII,MachineInstr & Root,SmallVectorImpl<MachineInstr * > & InsInstrs,unsigned IdxMulOpd,unsigned MaddOpc,unsigned VR,const TargetRegisterClass * RC)4524 static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
4525                               const TargetInstrInfo *TII, MachineInstr &Root,
4526                               SmallVectorImpl<MachineInstr *> &InsInstrs,
4527                               unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
4528                               const TargetRegisterClass *RC) {
4529   assert(IdxMulOpd == 1 || IdxMulOpd == 2);
4530 
4531   MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
4532   Register ResultReg = Root.getOperand(0).getReg();
4533   Register SrcReg0 = MUL->getOperand(1).getReg();
4534   bool Src0IsKill = MUL->getOperand(1).isKill();
4535   Register SrcReg1 = MUL->getOperand(2).getReg();
4536   bool Src1IsKill = MUL->getOperand(2).isKill();
4537 
4538   if (Register::isVirtualRegister(ResultReg))
4539     MRI.constrainRegClass(ResultReg, RC);
4540   if (Register::isVirtualRegister(SrcReg0))
4541     MRI.constrainRegClass(SrcReg0, RC);
4542   if (Register::isVirtualRegister(SrcReg1))
4543     MRI.constrainRegClass(SrcReg1, RC);
4544   if (Register::isVirtualRegister(VR))
4545     MRI.constrainRegClass(VR, RC);
4546 
4547   MachineInstrBuilder MIB =
4548       BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4549           .addReg(SrcReg0, getKillRegState(Src0IsKill))
4550           .addReg(SrcReg1, getKillRegState(Src1IsKill))
4551           .addReg(VR);
4552   // Insert the MADD
4553   InsInstrs.push_back(MIB);
4554   return MUL;
4555 }
4556 
4557 /// When getMachineCombinerPatterns() finds potential patterns,
4558 /// this function generates the instructions that could replace the
4559 /// original code sequence
genAlternativeCodeSequence(MachineInstr & Root,MachineCombinerPattern Pattern,SmallVectorImpl<MachineInstr * > & InsInstrs,SmallVectorImpl<MachineInstr * > & DelInstrs,DenseMap<unsigned,unsigned> & InstrIdxForVirtReg) const4560 void AArch64InstrInfo::genAlternativeCodeSequence(
4561     MachineInstr &Root, MachineCombinerPattern Pattern,
4562     SmallVectorImpl<MachineInstr *> &InsInstrs,
4563     SmallVectorImpl<MachineInstr *> &DelInstrs,
4564     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
4565   MachineBasicBlock &MBB = *Root.getParent();
4566   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
4567   MachineFunction &MF = *MBB.getParent();
4568   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
4569 
4570   MachineInstr *MUL;
4571   const TargetRegisterClass *RC;
4572   unsigned Opc;
4573   switch (Pattern) {
4574   default:
4575     // Reassociate instructions.
4576     TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
4577                                                 DelInstrs, InstrIdxForVirtReg);
4578     return;
4579   case MachineCombinerPattern::MULADDW_OP1:
4580   case MachineCombinerPattern::MULADDX_OP1:
4581     // MUL I=A,B,0
4582     // ADD R,I,C
4583     // ==> MADD R,A,B,C
4584     // --- Create(MADD);
4585     if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
4586       Opc = AArch64::MADDWrrr;
4587       RC = &AArch64::GPR32RegClass;
4588     } else {
4589       Opc = AArch64::MADDXrrr;
4590       RC = &AArch64::GPR64RegClass;
4591     }
4592     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4593     break;
4594   case MachineCombinerPattern::MULADDW_OP2:
4595   case MachineCombinerPattern::MULADDX_OP2:
4596     // MUL I=A,B,0
4597     // ADD R,C,I
4598     // ==> MADD R,A,B,C
4599     // --- Create(MADD);
4600     if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
4601       Opc = AArch64::MADDWrrr;
4602       RC = &AArch64::GPR32RegClass;
4603     } else {
4604       Opc = AArch64::MADDXrrr;
4605       RC = &AArch64::GPR64RegClass;
4606     }
4607     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4608     break;
4609   case MachineCombinerPattern::MULADDWI_OP1:
4610   case MachineCombinerPattern::MULADDXI_OP1: {
4611     // MUL I=A,B,0
4612     // ADD R,I,Imm
4613     // ==> ORR  V, ZR, Imm
4614     // ==> MADD R,A,B,V
4615     // --- Create(MADD);
4616     const TargetRegisterClass *OrrRC;
4617     unsigned BitSize, OrrOpc, ZeroReg;
4618     if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
4619       OrrOpc = AArch64::ORRWri;
4620       OrrRC = &AArch64::GPR32spRegClass;
4621       BitSize = 32;
4622       ZeroReg = AArch64::WZR;
4623       Opc = AArch64::MADDWrrr;
4624       RC = &AArch64::GPR32RegClass;
4625     } else {
4626       OrrOpc = AArch64::ORRXri;
4627       OrrRC = &AArch64::GPR64spRegClass;
4628       BitSize = 64;
4629       ZeroReg = AArch64::XZR;
4630       Opc = AArch64::MADDXrrr;
4631       RC = &AArch64::GPR64RegClass;
4632     }
4633     Register NewVR = MRI.createVirtualRegister(OrrRC);
4634     uint64_t Imm = Root.getOperand(2).getImm();
4635 
4636     if (Root.getOperand(3).isImm()) {
4637       unsigned Val = Root.getOperand(3).getImm();
4638       Imm = Imm << Val;
4639     }
4640     uint64_t UImm = SignExtend64(Imm, BitSize);
4641     uint64_t Encoding;
4642     if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4643       MachineInstrBuilder MIB1 =
4644           BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4645               .addReg(ZeroReg)
4646               .addImm(Encoding);
4647       InsInstrs.push_back(MIB1);
4648       InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4649       MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4650     }
4651     break;
4652   }
4653   case MachineCombinerPattern::MULSUBW_OP1:
4654   case MachineCombinerPattern::MULSUBX_OP1: {
4655     // MUL I=A,B,0
4656     // SUB R,I, C
4657     // ==> SUB  V, 0, C
4658     // ==> MADD R,A,B,V // = -C + A*B
4659     // --- Create(MADD);
4660     const TargetRegisterClass *SubRC;
4661     unsigned SubOpc, ZeroReg;
4662     if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
4663       SubOpc = AArch64::SUBWrr;
4664       SubRC = &AArch64::GPR32spRegClass;
4665       ZeroReg = AArch64::WZR;
4666       Opc = AArch64::MADDWrrr;
4667       RC = &AArch64::GPR32RegClass;
4668     } else {
4669       SubOpc = AArch64::SUBXrr;
4670       SubRC = &AArch64::GPR64spRegClass;
4671       ZeroReg = AArch64::XZR;
4672       Opc = AArch64::MADDXrrr;
4673       RC = &AArch64::GPR64RegClass;
4674     }
4675     Register NewVR = MRI.createVirtualRegister(SubRC);
4676     // SUB NewVR, 0, C
4677     MachineInstrBuilder MIB1 =
4678         BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
4679             .addReg(ZeroReg)
4680             .add(Root.getOperand(2));
4681     InsInstrs.push_back(MIB1);
4682     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4683     MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4684     break;
4685   }
4686   case MachineCombinerPattern::MULSUBW_OP2:
4687   case MachineCombinerPattern::MULSUBX_OP2:
4688     // MUL I=A,B,0
4689     // SUB R,C,I
4690     // ==> MSUB R,A,B,C (computes C - A*B)
4691     // --- Create(MSUB);
4692     if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
4693       Opc = AArch64::MSUBWrrr;
4694       RC = &AArch64::GPR32RegClass;
4695     } else {
4696       Opc = AArch64::MSUBXrrr;
4697       RC = &AArch64::GPR64RegClass;
4698     }
4699     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4700     break;
4701   case MachineCombinerPattern::MULSUBWI_OP1:
4702   case MachineCombinerPattern::MULSUBXI_OP1: {
4703     // MUL I=A,B,0
4704     // SUB R,I, Imm
4705     // ==> ORR  V, ZR, -Imm
4706     // ==> MADD R,A,B,V // = -Imm + A*B
4707     // --- Create(MADD);
4708     const TargetRegisterClass *OrrRC;
4709     unsigned BitSize, OrrOpc, ZeroReg;
4710     if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
4711       OrrOpc = AArch64::ORRWri;
4712       OrrRC = &AArch64::GPR32spRegClass;
4713       BitSize = 32;
4714       ZeroReg = AArch64::WZR;
4715       Opc = AArch64::MADDWrrr;
4716       RC = &AArch64::GPR32RegClass;
4717     } else {
4718       OrrOpc = AArch64::ORRXri;
4719       OrrRC = &AArch64::GPR64spRegClass;
4720       BitSize = 64;
4721       ZeroReg = AArch64::XZR;
4722       Opc = AArch64::MADDXrrr;
4723       RC = &AArch64::GPR64RegClass;
4724     }
4725     Register NewVR = MRI.createVirtualRegister(OrrRC);
4726     uint64_t Imm = Root.getOperand(2).getImm();
4727     if (Root.getOperand(3).isImm()) {
4728       unsigned Val = Root.getOperand(3).getImm();
4729       Imm = Imm << Val;
4730     }
4731     uint64_t UImm = SignExtend64(-Imm, BitSize);
4732     uint64_t Encoding;
4733     if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4734       MachineInstrBuilder MIB1 =
4735           BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4736               .addReg(ZeroReg)
4737               .addImm(Encoding);
4738       InsInstrs.push_back(MIB1);
4739       InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4740       MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4741     }
4742     break;
4743   }
4744 
4745   case MachineCombinerPattern::MULADDv8i8_OP1:
4746     Opc = AArch64::MLAv8i8;
4747     RC = &AArch64::FPR64RegClass;
4748     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4749     break;
4750   case MachineCombinerPattern::MULADDv8i8_OP2:
4751     Opc = AArch64::MLAv8i8;
4752     RC = &AArch64::FPR64RegClass;
4753     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4754     break;
4755   case MachineCombinerPattern::MULADDv16i8_OP1:
4756     Opc = AArch64::MLAv16i8;
4757     RC = &AArch64::FPR128RegClass;
4758     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4759     break;
4760   case MachineCombinerPattern::MULADDv16i8_OP2:
4761     Opc = AArch64::MLAv16i8;
4762     RC = &AArch64::FPR128RegClass;
4763     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4764     break;
4765   case MachineCombinerPattern::MULADDv4i16_OP1:
4766     Opc = AArch64::MLAv4i16;
4767     RC = &AArch64::FPR64RegClass;
4768     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4769     break;
4770   case MachineCombinerPattern::MULADDv4i16_OP2:
4771     Opc = AArch64::MLAv4i16;
4772     RC = &AArch64::FPR64RegClass;
4773     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4774     break;
4775   case MachineCombinerPattern::MULADDv8i16_OP1:
4776     Opc = AArch64::MLAv8i16;
4777     RC = &AArch64::FPR128RegClass;
4778     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4779     break;
4780   case MachineCombinerPattern::MULADDv8i16_OP2:
4781     Opc = AArch64::MLAv8i16;
4782     RC = &AArch64::FPR128RegClass;
4783     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4784     break;
4785   case MachineCombinerPattern::MULADDv2i32_OP1:
4786     Opc = AArch64::MLAv2i32;
4787     RC = &AArch64::FPR64RegClass;
4788     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4789     break;
4790   case MachineCombinerPattern::MULADDv2i32_OP2:
4791     Opc = AArch64::MLAv2i32;
4792     RC = &AArch64::FPR64RegClass;
4793     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4794     break;
4795   case MachineCombinerPattern::MULADDv4i32_OP1:
4796     Opc = AArch64::MLAv4i32;
4797     RC = &AArch64::FPR128RegClass;
4798     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4799     break;
4800   case MachineCombinerPattern::MULADDv4i32_OP2:
4801     Opc = AArch64::MLAv4i32;
4802     RC = &AArch64::FPR128RegClass;
4803     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4804     break;
4805 
4806   case MachineCombinerPattern::MULSUBv8i8_OP1:
4807     Opc = AArch64::MLAv8i8;
4808     RC = &AArch64::FPR64RegClass;
4809     MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
4810                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
4811                                  RC);
4812     break;
4813   case MachineCombinerPattern::MULSUBv8i8_OP2:
4814     Opc = AArch64::MLSv8i8;
4815     RC = &AArch64::FPR64RegClass;
4816     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4817     break;
4818   case MachineCombinerPattern::MULSUBv16i8_OP1:
4819     Opc = AArch64::MLAv16i8;
4820     RC = &AArch64::FPR128RegClass;
4821     MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
4822                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
4823                                  RC);
4824     break;
4825   case MachineCombinerPattern::MULSUBv16i8_OP2:
4826     Opc = AArch64::MLSv16i8;
4827     RC = &AArch64::FPR128RegClass;
4828     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4829     break;
4830   case MachineCombinerPattern::MULSUBv4i16_OP1:
4831     Opc = AArch64::MLAv4i16;
4832     RC = &AArch64::FPR64RegClass;
4833     MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
4834                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
4835                                  RC);
4836     break;
4837   case MachineCombinerPattern::MULSUBv4i16_OP2:
4838     Opc = AArch64::MLSv4i16;
4839     RC = &AArch64::FPR64RegClass;
4840     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4841     break;
4842   case MachineCombinerPattern::MULSUBv8i16_OP1:
4843     Opc = AArch64::MLAv8i16;
4844     RC = &AArch64::FPR128RegClass;
4845     MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
4846                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
4847                                  RC);
4848     break;
4849   case MachineCombinerPattern::MULSUBv8i16_OP2:
4850     Opc = AArch64::MLSv8i16;
4851     RC = &AArch64::FPR128RegClass;
4852     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4853     break;
4854   case MachineCombinerPattern::MULSUBv2i32_OP1:
4855     Opc = AArch64::MLAv2i32;
4856     RC = &AArch64::FPR64RegClass;
4857     MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
4858                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
4859                                  RC);
4860     break;
4861   case MachineCombinerPattern::MULSUBv2i32_OP2:
4862     Opc = AArch64::MLSv2i32;
4863     RC = &AArch64::FPR64RegClass;
4864     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4865     break;
4866   case MachineCombinerPattern::MULSUBv4i32_OP1:
4867     Opc = AArch64::MLAv4i32;
4868     RC = &AArch64::FPR128RegClass;
4869     MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
4870                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
4871                                  RC);
4872     break;
4873   case MachineCombinerPattern::MULSUBv4i32_OP2:
4874     Opc = AArch64::MLSv4i32;
4875     RC = &AArch64::FPR128RegClass;
4876     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4877     break;
4878 
4879   case MachineCombinerPattern::MULADDv4i16_indexed_OP1:
4880     Opc = AArch64::MLAv4i16_indexed;
4881     RC = &AArch64::FPR64RegClass;
4882     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4883     break;
4884   case MachineCombinerPattern::MULADDv4i16_indexed_OP2:
4885     Opc = AArch64::MLAv4i16_indexed;
4886     RC = &AArch64::FPR64RegClass;
4887     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4888     break;
4889   case MachineCombinerPattern::MULADDv8i16_indexed_OP1:
4890     Opc = AArch64::MLAv8i16_indexed;
4891     RC = &AArch64::FPR128RegClass;
4892     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4893     break;
4894   case MachineCombinerPattern::MULADDv8i16_indexed_OP2:
4895     Opc = AArch64::MLAv8i16_indexed;
4896     RC = &AArch64::FPR128RegClass;
4897     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4898     break;
4899   case MachineCombinerPattern::MULADDv2i32_indexed_OP1:
4900     Opc = AArch64::MLAv2i32_indexed;
4901     RC = &AArch64::FPR64RegClass;
4902     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4903     break;
4904   case MachineCombinerPattern::MULADDv2i32_indexed_OP2:
4905     Opc = AArch64::MLAv2i32_indexed;
4906     RC = &AArch64::FPR64RegClass;
4907     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4908     break;
4909   case MachineCombinerPattern::MULADDv4i32_indexed_OP1:
4910     Opc = AArch64::MLAv4i32_indexed;
4911     RC = &AArch64::FPR128RegClass;
4912     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4913     break;
4914   case MachineCombinerPattern::MULADDv4i32_indexed_OP2:
4915     Opc = AArch64::MLAv4i32_indexed;
4916     RC = &AArch64::FPR128RegClass;
4917     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4918     break;
4919 
4920   case MachineCombinerPattern::MULSUBv4i16_indexed_OP1:
4921     Opc = AArch64::MLAv4i16_indexed;
4922     RC = &AArch64::FPR64RegClass;
4923     MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
4924                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
4925                                  RC);
4926     break;
4927   case MachineCombinerPattern::MULSUBv4i16_indexed_OP2:
4928     Opc = AArch64::MLSv4i16_indexed;
4929     RC = &AArch64::FPR64RegClass;
4930     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4931     break;
4932   case MachineCombinerPattern::MULSUBv8i16_indexed_OP1:
4933     Opc = AArch64::MLAv8i16_indexed;
4934     RC = &AArch64::FPR128RegClass;
4935     MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
4936                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
4937                                  RC);
4938     break;
4939   case MachineCombinerPattern::MULSUBv8i16_indexed_OP2:
4940     Opc = AArch64::MLSv8i16_indexed;
4941     RC = &AArch64::FPR128RegClass;
4942     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4943     break;
4944   case MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
4945     Opc = AArch64::MLAv2i32_indexed;
4946     RC = &AArch64::FPR64RegClass;
4947     MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
4948                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
4949                                  RC);
4950     break;
4951   case MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
4952     Opc = AArch64::MLSv2i32_indexed;
4953     RC = &AArch64::FPR64RegClass;
4954     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4955     break;
4956   case MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
4957     Opc = AArch64::MLAv4i32_indexed;
4958     RC = &AArch64::FPR128RegClass;
4959     MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
4960                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
4961                                  RC);
4962     break;
4963   case MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
4964     Opc = AArch64::MLSv4i32_indexed;
4965     RC = &AArch64::FPR128RegClass;
4966     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4967     break;
4968 
4969   // Floating Point Support
4970   case MachineCombinerPattern::FMULADDH_OP1:
4971     Opc = AArch64::FMADDHrrr;
4972     RC = &AArch64::FPR16RegClass;
4973     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4974     break;
4975   case MachineCombinerPattern::FMULADDS_OP1:
4976     Opc = AArch64::FMADDSrrr;
4977     RC = &AArch64::FPR32RegClass;
4978     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4979     break;
4980   case MachineCombinerPattern::FMULADDD_OP1:
4981     Opc = AArch64::FMADDDrrr;
4982     RC = &AArch64::FPR64RegClass;
4983     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4984     break;
4985 
4986   case MachineCombinerPattern::FMULADDH_OP2:
4987     Opc = AArch64::FMADDHrrr;
4988     RC = &AArch64::FPR16RegClass;
4989     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4990     break;
4991   case MachineCombinerPattern::FMULADDS_OP2:
4992     Opc = AArch64::FMADDSrrr;
4993     RC = &AArch64::FPR32RegClass;
4994     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4995     break;
4996   case MachineCombinerPattern::FMULADDD_OP2:
4997     Opc = AArch64::FMADDDrrr;
4998     RC = &AArch64::FPR64RegClass;
4999     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5000     break;
5001 
5002   case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
5003     Opc = AArch64::FMLAv1i32_indexed;
5004     RC = &AArch64::FPR32RegClass;
5005     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5006                            FMAInstKind::Indexed);
5007     break;
5008   case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
5009     Opc = AArch64::FMLAv1i32_indexed;
5010     RC = &AArch64::FPR32RegClass;
5011     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5012                            FMAInstKind::Indexed);
5013     break;
5014 
5015   case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
5016     Opc = AArch64::FMLAv1i64_indexed;
5017     RC = &AArch64::FPR64RegClass;
5018     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5019                            FMAInstKind::Indexed);
5020     break;
5021   case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
5022     Opc = AArch64::FMLAv1i64_indexed;
5023     RC = &AArch64::FPR64RegClass;
5024     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5025                            FMAInstKind::Indexed);
5026     break;
5027 
5028   case MachineCombinerPattern::FMLAv4i16_indexed_OP1:
5029     RC = &AArch64::FPR64RegClass;
5030     Opc = AArch64::FMLAv4i16_indexed;
5031     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5032                            FMAInstKind::Indexed);
5033     break;
5034   case MachineCombinerPattern::FMLAv4f16_OP1:
5035     RC = &AArch64::FPR64RegClass;
5036     Opc = AArch64::FMLAv4f16;
5037     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5038                            FMAInstKind::Accumulator);
5039     break;
5040   case MachineCombinerPattern::FMLAv4i16_indexed_OP2:
5041     RC = &AArch64::FPR64RegClass;
5042     Opc = AArch64::FMLAv4i16_indexed;
5043     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5044                            FMAInstKind::Indexed);
5045     break;
5046   case MachineCombinerPattern::FMLAv4f16_OP2:
5047     RC = &AArch64::FPR64RegClass;
5048     Opc = AArch64::FMLAv4f16;
5049     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5050                            FMAInstKind::Accumulator);
5051     break;
5052 
5053   case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
5054   case MachineCombinerPattern::FMLAv2f32_OP1:
5055     RC = &AArch64::FPR64RegClass;
5056     if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
5057       Opc = AArch64::FMLAv2i32_indexed;
5058       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5059                              FMAInstKind::Indexed);
5060     } else {
5061       Opc = AArch64::FMLAv2f32;
5062       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5063                              FMAInstKind::Accumulator);
5064     }
5065     break;
5066   case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
5067   case MachineCombinerPattern::FMLAv2f32_OP2:
5068     RC = &AArch64::FPR64RegClass;
5069     if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
5070       Opc = AArch64::FMLAv2i32_indexed;
5071       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5072                              FMAInstKind::Indexed);
5073     } else {
5074       Opc = AArch64::FMLAv2f32;
5075       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5076                              FMAInstKind::Accumulator);
5077     }
5078     break;
5079 
5080   case MachineCombinerPattern::FMLAv8i16_indexed_OP1:
5081     RC = &AArch64::FPR128RegClass;
5082     Opc = AArch64::FMLAv8i16_indexed;
5083     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5084                            FMAInstKind::Indexed);
5085     break;
5086   case MachineCombinerPattern::FMLAv8f16_OP1:
5087     RC = &AArch64::FPR128RegClass;
5088     Opc = AArch64::FMLAv8f16;
5089     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5090                            FMAInstKind::Accumulator);
5091     break;
5092   case MachineCombinerPattern::FMLAv8i16_indexed_OP2:
5093     RC = &AArch64::FPR128RegClass;
5094     Opc = AArch64::FMLAv8i16_indexed;
5095     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5096                            FMAInstKind::Indexed);
5097     break;
5098   case MachineCombinerPattern::FMLAv8f16_OP2:
5099     RC = &AArch64::FPR128RegClass;
5100     Opc = AArch64::FMLAv8f16;
5101     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5102                            FMAInstKind::Accumulator);
5103     break;
5104 
5105   case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
5106   case MachineCombinerPattern::FMLAv2f64_OP1:
5107     RC = &AArch64::FPR128RegClass;
5108     if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
5109       Opc = AArch64::FMLAv2i64_indexed;
5110       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5111                              FMAInstKind::Indexed);
5112     } else {
5113       Opc = AArch64::FMLAv2f64;
5114       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5115                              FMAInstKind::Accumulator);
5116     }
5117     break;
5118   case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
5119   case MachineCombinerPattern::FMLAv2f64_OP2:
5120     RC = &AArch64::FPR128RegClass;
5121     if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
5122       Opc = AArch64::FMLAv2i64_indexed;
5123       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5124                              FMAInstKind::Indexed);
5125     } else {
5126       Opc = AArch64::FMLAv2f64;
5127       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5128                              FMAInstKind::Accumulator);
5129     }
5130     break;
5131 
5132   case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
5133   case MachineCombinerPattern::FMLAv4f32_OP1:
5134     RC = &AArch64::FPR128RegClass;
5135     if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
5136       Opc = AArch64::FMLAv4i32_indexed;
5137       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5138                              FMAInstKind::Indexed);
5139     } else {
5140       Opc = AArch64::FMLAv4f32;
5141       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5142                              FMAInstKind::Accumulator);
5143     }
5144     break;
5145 
5146   case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
5147   case MachineCombinerPattern::FMLAv4f32_OP2:
5148     RC = &AArch64::FPR128RegClass;
5149     if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
5150       Opc = AArch64::FMLAv4i32_indexed;
5151       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5152                              FMAInstKind::Indexed);
5153     } else {
5154       Opc = AArch64::FMLAv4f32;
5155       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5156                              FMAInstKind::Accumulator);
5157     }
5158     break;
5159 
5160   case MachineCombinerPattern::FMULSUBH_OP1:
5161     Opc = AArch64::FNMSUBHrrr;
5162     RC = &AArch64::FPR16RegClass;
5163     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5164     break;
5165   case MachineCombinerPattern::FMULSUBS_OP1:
5166     Opc = AArch64::FNMSUBSrrr;
5167     RC = &AArch64::FPR32RegClass;
5168     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5169     break;
5170   case MachineCombinerPattern::FMULSUBD_OP1:
5171     Opc = AArch64::FNMSUBDrrr;
5172     RC = &AArch64::FPR64RegClass;
5173     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5174     break;
5175 
5176   case MachineCombinerPattern::FNMULSUBH_OP1:
5177     Opc = AArch64::FNMADDHrrr;
5178     RC = &AArch64::FPR16RegClass;
5179     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5180     break;
5181   case MachineCombinerPattern::FNMULSUBS_OP1:
5182     Opc = AArch64::FNMADDSrrr;
5183     RC = &AArch64::FPR32RegClass;
5184     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5185     break;
5186   case MachineCombinerPattern::FNMULSUBD_OP1:
5187     Opc = AArch64::FNMADDDrrr;
5188     RC = &AArch64::FPR64RegClass;
5189     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5190     break;
5191 
5192   case MachineCombinerPattern::FMULSUBH_OP2:
5193     Opc = AArch64::FMSUBHrrr;
5194     RC = &AArch64::FPR16RegClass;
5195     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5196     break;
5197   case MachineCombinerPattern::FMULSUBS_OP2:
5198     Opc = AArch64::FMSUBSrrr;
5199     RC = &AArch64::FPR32RegClass;
5200     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5201     break;
5202   case MachineCombinerPattern::FMULSUBD_OP2:
5203     Opc = AArch64::FMSUBDrrr;
5204     RC = &AArch64::FPR64RegClass;
5205     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5206     break;
5207 
5208   case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
5209     Opc = AArch64::FMLSv1i32_indexed;
5210     RC = &AArch64::FPR32RegClass;
5211     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5212                            FMAInstKind::Indexed);
5213     break;
5214 
5215   case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
5216     Opc = AArch64::FMLSv1i64_indexed;
5217     RC = &AArch64::FPR64RegClass;
5218     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5219                            FMAInstKind::Indexed);
5220     break;
5221 
5222   case MachineCombinerPattern::FMLSv4f16_OP1:
5223   case MachineCombinerPattern::FMLSv4i16_indexed_OP1: {
5224     RC = &AArch64::FPR64RegClass;
5225     Register NewVR = MRI.createVirtualRegister(RC);
5226     MachineInstrBuilder MIB1 =
5227         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f16), NewVR)
5228             .add(Root.getOperand(2));
5229     InsInstrs.push_back(MIB1);
5230     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
5231     if (Pattern == MachineCombinerPattern::FMLSv4f16_OP1) {
5232       Opc = AArch64::FMLAv4f16;
5233       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5234                              FMAInstKind::Accumulator, &NewVR);
5235     } else {
5236       Opc = AArch64::FMLAv4i16_indexed;
5237       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5238                              FMAInstKind::Indexed, &NewVR);
5239     }
5240     break;
5241   }
5242   case MachineCombinerPattern::FMLSv4f16_OP2:
5243     RC = &AArch64::FPR64RegClass;
5244     Opc = AArch64::FMLSv4f16;
5245     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5246                            FMAInstKind::Accumulator);
5247     break;
5248   case MachineCombinerPattern::FMLSv4i16_indexed_OP2:
5249     RC = &AArch64::FPR64RegClass;
5250     Opc = AArch64::FMLSv4i16_indexed;
5251     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5252                            FMAInstKind::Indexed);
5253     break;
5254 
5255   case MachineCombinerPattern::FMLSv2f32_OP2:
5256   case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
5257     RC = &AArch64::FPR64RegClass;
5258     if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
5259       Opc = AArch64::FMLSv2i32_indexed;
5260       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5261                              FMAInstKind::Indexed);
5262     } else {
5263       Opc = AArch64::FMLSv2f32;
5264       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5265                              FMAInstKind::Accumulator);
5266     }
5267     break;
5268 
5269   case MachineCombinerPattern::FMLSv8f16_OP1:
5270   case MachineCombinerPattern::FMLSv8i16_indexed_OP1: {
5271     RC = &AArch64::FPR128RegClass;
5272     Register NewVR = MRI.createVirtualRegister(RC);
5273     MachineInstrBuilder MIB1 =
5274         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv8f16), NewVR)
5275             .add(Root.getOperand(2));
5276     InsInstrs.push_back(MIB1);
5277     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
5278     if (Pattern == MachineCombinerPattern::FMLSv8f16_OP1) {
5279       Opc = AArch64::FMLAv8f16;
5280       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5281                              FMAInstKind::Accumulator, &NewVR);
5282     } else {
5283       Opc = AArch64::FMLAv8i16_indexed;
5284       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5285                              FMAInstKind::Indexed, &NewVR);
5286     }
5287     break;
5288   }
5289   case MachineCombinerPattern::FMLSv8f16_OP2:
5290     RC = &AArch64::FPR128RegClass;
5291     Opc = AArch64::FMLSv8f16;
5292     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5293                            FMAInstKind::Accumulator);
5294     break;
5295   case MachineCombinerPattern::FMLSv8i16_indexed_OP2:
5296     RC = &AArch64::FPR128RegClass;
5297     Opc = AArch64::FMLSv8i16_indexed;
5298     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5299                            FMAInstKind::Indexed);
5300     break;
5301 
5302   case MachineCombinerPattern::FMLSv2f64_OP2:
5303   case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
5304     RC = &AArch64::FPR128RegClass;
5305     if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
5306       Opc = AArch64::FMLSv2i64_indexed;
5307       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5308                              FMAInstKind::Indexed);
5309     } else {
5310       Opc = AArch64::FMLSv2f64;
5311       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5312                              FMAInstKind::Accumulator);
5313     }
5314     break;
5315 
5316   case MachineCombinerPattern::FMLSv4f32_OP2:
5317   case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
5318     RC = &AArch64::FPR128RegClass;
5319     if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
5320       Opc = AArch64::FMLSv4i32_indexed;
5321       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5322                              FMAInstKind::Indexed);
5323     } else {
5324       Opc = AArch64::FMLSv4f32;
5325       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5326                              FMAInstKind::Accumulator);
5327     }
5328     break;
5329   case MachineCombinerPattern::FMLSv2f32_OP1:
5330   case MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
5331     RC = &AArch64::FPR64RegClass;
5332     Register NewVR = MRI.createVirtualRegister(RC);
5333     MachineInstrBuilder MIB1 =
5334         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
5335             .add(Root.getOperand(2));
5336     InsInstrs.push_back(MIB1);
5337     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
5338     if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
5339       Opc = AArch64::FMLAv2i32_indexed;
5340       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5341                              FMAInstKind::Indexed, &NewVR);
5342     } else {
5343       Opc = AArch64::FMLAv2f32;
5344       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5345                              FMAInstKind::Accumulator, &NewVR);
5346     }
5347     break;
5348   }
5349   case MachineCombinerPattern::FMLSv4f32_OP1:
5350   case MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
5351     RC = &AArch64::FPR128RegClass;
5352     Register NewVR = MRI.createVirtualRegister(RC);
5353     MachineInstrBuilder MIB1 =
5354         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
5355             .add(Root.getOperand(2));
5356     InsInstrs.push_back(MIB1);
5357     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
5358     if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
5359       Opc = AArch64::FMLAv4i32_indexed;
5360       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5361                              FMAInstKind::Indexed, &NewVR);
5362     } else {
5363       Opc = AArch64::FMLAv4f32;
5364       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5365                              FMAInstKind::Accumulator, &NewVR);
5366     }
5367     break;
5368   }
5369   case MachineCombinerPattern::FMLSv2f64_OP1:
5370   case MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
5371     RC = &AArch64::FPR128RegClass;
5372     Register NewVR = MRI.createVirtualRegister(RC);
5373     MachineInstrBuilder MIB1 =
5374         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
5375             .add(Root.getOperand(2));
5376     InsInstrs.push_back(MIB1);
5377     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
5378     if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
5379       Opc = AArch64::FMLAv2i64_indexed;
5380       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5381                              FMAInstKind::Indexed, &NewVR);
5382     } else {
5383       Opc = AArch64::FMLAv2f64;
5384       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5385                              FMAInstKind::Accumulator, &NewVR);
5386     }
5387     break;
5388   }
5389   } // end switch (Pattern)
5390   // Record MUL and ADD/SUB for deletion
5391   DelInstrs.push_back(MUL);
5392   DelInstrs.push_back(&Root);
5393 }
5394 
5395 /// Replace csincr-branch sequence by simple conditional branch
5396 ///
5397 /// Examples:
5398 /// 1. \code
5399 ///   csinc  w9, wzr, wzr, <condition code>
5400 ///   tbnz   w9, #0, 0x44
5401 ///    \endcode
5402 /// to
5403 ///    \code
5404 ///   b.<inverted condition code>
5405 ///    \endcode
5406 ///
5407 /// 2. \code
5408 ///   csinc w9, wzr, wzr, <condition code>
5409 ///   tbz   w9, #0, 0x44
5410 ///    \endcode
5411 /// to
5412 ///    \code
5413 ///   b.<condition code>
5414 ///    \endcode
5415 ///
5416 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the
5417 /// compare's constant operand is power of 2.
5418 ///
5419 /// Examples:
5420 ///    \code
5421 ///   and  w8, w8, #0x400
5422 ///   cbnz w8, L1
5423 ///    \endcode
5424 /// to
5425 ///    \code
5426 ///   tbnz w8, #10, L1
5427 ///    \endcode
5428 ///
5429 /// \param  MI Conditional Branch
5430 /// \return True when the simple conditional branch is generated
5431 ///
optimizeCondBranch(MachineInstr & MI) const5432 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
5433   bool IsNegativeBranch = false;
5434   bool IsTestAndBranch = false;
5435   unsigned TargetBBInMI = 0;
5436   switch (MI.getOpcode()) {
5437   default:
5438     llvm_unreachable("Unknown branch instruction?");
5439   case AArch64::Bcc:
5440     return false;
5441   case AArch64::CBZW:
5442   case AArch64::CBZX:
5443     TargetBBInMI = 1;
5444     break;
5445   case AArch64::CBNZW:
5446   case AArch64::CBNZX:
5447     TargetBBInMI = 1;
5448     IsNegativeBranch = true;
5449     break;
5450   case AArch64::TBZW:
5451   case AArch64::TBZX:
5452     TargetBBInMI = 2;
5453     IsTestAndBranch = true;
5454     break;
5455   case AArch64::TBNZW:
5456   case AArch64::TBNZX:
5457     TargetBBInMI = 2;
5458     IsNegativeBranch = true;
5459     IsTestAndBranch = true;
5460     break;
5461   }
5462   // So we increment a zero register and test for bits other
5463   // than bit 0? Conservatively bail out in case the verifier
5464   // missed this case.
5465   if (IsTestAndBranch && MI.getOperand(1).getImm())
5466     return false;
5467 
5468   // Find Definition.
5469   assert(MI.getParent() && "Incomplete machine instruciton\n");
5470   MachineBasicBlock *MBB = MI.getParent();
5471   MachineFunction *MF = MBB->getParent();
5472   MachineRegisterInfo *MRI = &MF->getRegInfo();
5473   Register VReg = MI.getOperand(0).getReg();
5474   if (!Register::isVirtualRegister(VReg))
5475     return false;
5476 
5477   MachineInstr *DefMI = MRI->getVRegDef(VReg);
5478 
5479   // Look through COPY instructions to find definition.
5480   while (DefMI->isCopy()) {
5481     Register CopyVReg = DefMI->getOperand(1).getReg();
5482     if (!MRI->hasOneNonDBGUse(CopyVReg))
5483       return false;
5484     if (!MRI->hasOneDef(CopyVReg))
5485       return false;
5486     DefMI = MRI->getVRegDef(CopyVReg);
5487   }
5488 
5489   switch (DefMI->getOpcode()) {
5490   default:
5491     return false;
5492   // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
5493   case AArch64::ANDWri:
5494   case AArch64::ANDXri: {
5495     if (IsTestAndBranch)
5496       return false;
5497     if (DefMI->getParent() != MBB)
5498       return false;
5499     if (!MRI->hasOneNonDBGUse(VReg))
5500       return false;
5501 
5502     bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
5503     uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
5504         DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
5505     if (!isPowerOf2_64(Mask))
5506       return false;
5507 
5508     MachineOperand &MO = DefMI->getOperand(1);
5509     Register NewReg = MO.getReg();
5510     if (!Register::isVirtualRegister(NewReg))
5511       return false;
5512 
5513     assert(!MRI->def_empty(NewReg) && "Register must be defined.");
5514 
5515     MachineBasicBlock &RefToMBB = *MBB;
5516     MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
5517     DebugLoc DL = MI.getDebugLoc();
5518     unsigned Imm = Log2_64(Mask);
5519     unsigned Opc = (Imm < 32)
5520                        ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
5521                        : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
5522     MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
5523                               .addReg(NewReg)
5524                               .addImm(Imm)
5525                               .addMBB(TBB);
5526     // Register lives on to the CBZ now.
5527     MO.setIsKill(false);
5528 
5529     // For immediate smaller than 32, we need to use the 32-bit
5530     // variant (W) in all cases. Indeed the 64-bit variant does not
5531     // allow to encode them.
5532     // Therefore, if the input register is 64-bit, we need to take the
5533     // 32-bit sub-part.
5534     if (!Is32Bit && Imm < 32)
5535       NewMI->getOperand(0).setSubReg(AArch64::sub_32);
5536     MI.eraseFromParent();
5537     return true;
5538   }
5539   // Look for CSINC
5540   case AArch64::CSINCWr:
5541   case AArch64::CSINCXr: {
5542     if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
5543           DefMI->getOperand(2).getReg() == AArch64::WZR) &&
5544         !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
5545           DefMI->getOperand(2).getReg() == AArch64::XZR))
5546       return false;
5547 
5548     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
5549       return false;
5550 
5551     AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
5552     // Convert only when the condition code is not modified between
5553     // the CSINC and the branch. The CC may be used by other
5554     // instructions in between.
5555     if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
5556       return false;
5557     MachineBasicBlock &RefToMBB = *MBB;
5558     MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
5559     DebugLoc DL = MI.getDebugLoc();
5560     if (IsNegativeBranch)
5561       CC = AArch64CC::getInvertedCondCode(CC);
5562     BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
5563     MI.eraseFromParent();
5564     return true;
5565   }
5566   }
5567 }
5568 
5569 std::pair<unsigned, unsigned>
decomposeMachineOperandsTargetFlags(unsigned TF) const5570 AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
5571   const unsigned Mask = AArch64II::MO_FRAGMENT;
5572   return std::make_pair(TF & Mask, TF & ~Mask);
5573 }
5574 
5575 ArrayRef<std::pair<unsigned, const char *>>
getSerializableDirectMachineOperandTargetFlags() const5576 AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
5577   using namespace AArch64II;
5578 
5579   static const std::pair<unsigned, const char *> TargetFlags[] = {
5580       {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
5581       {MO_G3, "aarch64-g3"},     {MO_G2, "aarch64-g2"},
5582       {MO_G1, "aarch64-g1"},     {MO_G0, "aarch64-g0"},
5583       {MO_HI12, "aarch64-hi12"}};
5584   return makeArrayRef(TargetFlags);
5585 }
5586 
5587 ArrayRef<std::pair<unsigned, const char *>>
getSerializableBitmaskMachineOperandTargetFlags() const5588 AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
5589   using namespace AArch64II;
5590 
5591   static const std::pair<unsigned, const char *> TargetFlags[] = {
5592       {MO_COFFSTUB, "aarch64-coffstub"},
5593       {MO_GOT, "aarch64-got"},
5594       {MO_NC, "aarch64-nc"},
5595       {MO_S, "aarch64-s"},
5596       {MO_TLS, "aarch64-tls"},
5597       {MO_DLLIMPORT, "aarch64-dllimport"},
5598       {MO_PREL, "aarch64-prel"},
5599       {MO_TAGGED, "aarch64-tagged"}};
5600   return makeArrayRef(TargetFlags);
5601 }
5602 
5603 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
getSerializableMachineMemOperandTargetFlags() const5604 AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
5605   static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
5606       {{MOSuppressPair, "aarch64-suppress-pair"},
5607        {MOStridedAccess, "aarch64-strided-access"}};
5608   return makeArrayRef(TargetFlags);
5609 }
5610 
5611 /// Constants defining how certain sequences should be outlined.
5612 /// This encompasses how an outlined function should be called, and what kind of
5613 /// frame should be emitted for that outlined function.
5614 ///
5615 /// \p MachineOutlinerDefault implies that the function should be called with
5616 /// a save and restore of LR to the stack.
5617 ///
5618 /// That is,
5619 ///
5620 /// I1     Save LR                    OUTLINED_FUNCTION:
5621 /// I2 --> BL OUTLINED_FUNCTION       I1
5622 /// I3     Restore LR                 I2
5623 ///                                   I3
5624 ///                                   RET
5625 ///
5626 /// * Call construction overhead: 3 (save + BL + restore)
5627 /// * Frame construction overhead: 1 (ret)
5628 /// * Requires stack fixups? Yes
5629 ///
5630 /// \p MachineOutlinerTailCall implies that the function is being created from
5631 /// a sequence of instructions ending in a return.
5632 ///
5633 /// That is,
5634 ///
5635 /// I1                             OUTLINED_FUNCTION:
5636 /// I2 --> B OUTLINED_FUNCTION     I1
5637 /// RET                            I2
5638 ///                                RET
5639 ///
5640 /// * Call construction overhead: 1 (B)
5641 /// * Frame construction overhead: 0 (Return included in sequence)
5642 /// * Requires stack fixups? No
5643 ///
5644 /// \p MachineOutlinerNoLRSave implies that the function should be called using
5645 /// a BL instruction, but doesn't require LR to be saved and restored. This
5646 /// happens when LR is known to be dead.
5647 ///
5648 /// That is,
5649 ///
5650 /// I1                                OUTLINED_FUNCTION:
5651 /// I2 --> BL OUTLINED_FUNCTION       I1
5652 /// I3                                I2
5653 ///                                   I3
5654 ///                                   RET
5655 ///
5656 /// * Call construction overhead: 1 (BL)
5657 /// * Frame construction overhead: 1 (RET)
5658 /// * Requires stack fixups? No
5659 ///
5660 /// \p MachineOutlinerThunk implies that the function is being created from
5661 /// a sequence of instructions ending in a call. The outlined function is
5662 /// called with a BL instruction, and the outlined function tail-calls the
5663 /// original call destination.
5664 ///
5665 /// That is,
5666 ///
5667 /// I1                                OUTLINED_FUNCTION:
5668 /// I2 --> BL OUTLINED_FUNCTION       I1
5669 /// BL f                              I2
5670 ///                                   B f
5671 /// * Call construction overhead: 1 (BL)
5672 /// * Frame construction overhead: 0
5673 /// * Requires stack fixups? No
5674 ///
5675 /// \p MachineOutlinerRegSave implies that the function should be called with a
5676 /// save and restore of LR to an available register. This allows us to avoid
5677 /// stack fixups. Note that this outlining variant is compatible with the
5678 /// NoLRSave case.
5679 ///
5680 /// That is,
5681 ///
5682 /// I1     Save LR                    OUTLINED_FUNCTION:
5683 /// I2 --> BL OUTLINED_FUNCTION       I1
5684 /// I3     Restore LR                 I2
5685 ///                                   I3
5686 ///                                   RET
5687 ///
5688 /// * Call construction overhead: 3 (save + BL + restore)
5689 /// * Frame construction overhead: 1 (ret)
5690 /// * Requires stack fixups? No
5691 enum MachineOutlinerClass {
5692   MachineOutlinerDefault,  /// Emit a save, restore, call, and return.
5693   MachineOutlinerTailCall, /// Only emit a branch.
5694   MachineOutlinerNoLRSave, /// Emit a call and return.
5695   MachineOutlinerThunk,    /// Emit a call and tail-call.
5696   MachineOutlinerRegSave   /// Same as default, but save to a register.
5697 };
5698 
5699 enum MachineOutlinerMBBFlags {
5700   LRUnavailableSomewhere = 0x2,
5701   HasCalls = 0x4,
5702   UnsafeRegsDead = 0x8
5703 };
5704 
5705 unsigned
findRegisterToSaveLRTo(const outliner::Candidate & C) const5706 AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
5707   assert(C.LRUWasSet && "LRU wasn't set?");
5708   MachineFunction *MF = C.getMF();
5709   const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
5710       MF->getSubtarget().getRegisterInfo());
5711 
5712   // Check if there is an available register across the sequence that we can
5713   // use.
5714   for (unsigned Reg : AArch64::GPR64RegClass) {
5715     if (!ARI->isReservedReg(*MF, Reg) &&
5716         Reg != AArch64::LR &&  // LR is not reserved, but don't use it.
5717         Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
5718         Reg != AArch64::X17 && // Ditto for X17.
5719         C.LRU.available(Reg) && C.UsedInSequence.available(Reg))
5720       return Reg;
5721   }
5722 
5723   // No suitable register. Return 0.
5724   return 0u;
5725 }
5726 
5727 static bool
outliningCandidatesSigningScopeConsensus(const outliner::Candidate & a,const outliner::Candidate & b)5728 outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a,
5729                                          const outliner::Candidate &b) {
5730   const Function &Fa = a.getMF()->getFunction();
5731   const Function &Fb = b.getMF()->getFunction();
5732 
5733   // If none of the functions have the "sign-return-address" attribute their
5734   // signing behaviour is equal
5735   if (!Fa.hasFnAttribute("sign-return-address") &&
5736       !Fb.hasFnAttribute("sign-return-address")) {
5737     return true;
5738   }
5739 
5740   // If both functions have the "sign-return-address" attribute their signing
5741   // behaviour is equal, if the values of the attributes are equal
5742   if (Fa.hasFnAttribute("sign-return-address") &&
5743       Fb.hasFnAttribute("sign-return-address")) {
5744     StringRef ScopeA =
5745         Fa.getFnAttribute("sign-return-address").getValueAsString();
5746     StringRef ScopeB =
5747         Fb.getFnAttribute("sign-return-address").getValueAsString();
5748     return ScopeA.equals(ScopeB);
5749   }
5750 
5751   // If function B doesn't have the "sign-return-address" attribute but A does,
5752   // the functions' signing behaviour is equal if A's value for
5753   // "sign-return-address" is "none" and vice versa.
5754   if (Fa.hasFnAttribute("sign-return-address")) {
5755     StringRef ScopeA =
5756         Fa.getFnAttribute("sign-return-address").getValueAsString();
5757     return ScopeA.equals("none");
5758   }
5759 
5760   if (Fb.hasFnAttribute("sign-return-address")) {
5761     StringRef ScopeB =
5762         Fb.getFnAttribute("sign-return-address").getValueAsString();
5763     return ScopeB.equals("none");
5764   }
5765 
5766   llvm_unreachable("Unkown combination of sign-return-address attributes");
5767 }
5768 
5769 static bool
outliningCandidatesSigningKeyConsensus(const outliner::Candidate & a,const outliner::Candidate & b)5770 outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a,
5771                                        const outliner::Candidate &b) {
5772   const Function &Fa = a.getMF()->getFunction();
5773   const Function &Fb = b.getMF()->getFunction();
5774 
5775   // If none of the functions have the "sign-return-address-key" attribute
5776   // their keys are equal
5777   if (!Fa.hasFnAttribute("sign-return-address-key") &&
5778       !Fb.hasFnAttribute("sign-return-address-key")) {
5779     return true;
5780   }
5781 
5782   // If both functions have the "sign-return-address-key" attribute their
5783   // keys are equal if the values of "sign-return-address-key" are equal
5784   if (Fa.hasFnAttribute("sign-return-address-key") &&
5785       Fb.hasFnAttribute("sign-return-address-key")) {
5786     StringRef KeyA =
5787         Fa.getFnAttribute("sign-return-address-key").getValueAsString();
5788     StringRef KeyB =
5789         Fb.getFnAttribute("sign-return-address-key").getValueAsString();
5790     return KeyA.equals(KeyB);
5791   }
5792 
5793   // If B doesn't have the "sign-return-address-key" attribute, both keys are
5794   // equal, if function a has the default key (a_key)
5795   if (Fa.hasFnAttribute("sign-return-address-key")) {
5796     StringRef KeyA =
5797         Fa.getFnAttribute("sign-return-address-key").getValueAsString();
5798     return KeyA.equals_lower("a_key");
5799   }
5800 
5801   if (Fb.hasFnAttribute("sign-return-address-key")) {
5802     StringRef KeyB =
5803         Fb.getFnAttribute("sign-return-address-key").getValueAsString();
5804     return KeyB.equals_lower("a_key");
5805   }
5806 
5807   llvm_unreachable("Unkown combination of sign-return-address-key attributes");
5808 }
5809 
outliningCandidatesV8_3OpsConsensus(const outliner::Candidate & a,const outliner::Candidate & b)5810 static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a,
5811                                                 const outliner::Candidate &b) {
5812   const AArch64Subtarget &SubtargetA =
5813       a.getMF()->getSubtarget<AArch64Subtarget>();
5814   const AArch64Subtarget &SubtargetB =
5815       b.getMF()->getSubtarget<AArch64Subtarget>();
5816   return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
5817 }
5818 
getOutliningCandidateInfo(std::vector<outliner::Candidate> & RepeatedSequenceLocs) const5819 outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
5820     std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
5821   outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
5822   unsigned SequenceSize =
5823       std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0,
5824                       [this](unsigned Sum, const MachineInstr &MI) {
5825                         return Sum + getInstSizeInBytes(MI);
5826                       });
5827   unsigned NumBytesToCreateFrame = 0;
5828 
5829   // We only allow outlining for functions having exactly matching return
5830   // address signing attributes, i.e., all share the same value for the
5831   // attribute "sign-return-address" and all share the same type of key they
5832   // are signed with.
5833   // Additionally we require all functions to simultaniously either support
5834   // v8.3a features or not. Otherwise an outlined function could get signed
5835   // using dedicated v8.3 instructions and a call from a function that doesn't
5836   // support v8.3 instructions would therefore be invalid.
5837   if (std::adjacent_find(
5838           RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
5839           [](const outliner::Candidate &a, const outliner::Candidate &b) {
5840             // Return true if a and b are non-equal w.r.t. return address
5841             // signing or support of v8.3a features
5842             if (outliningCandidatesSigningScopeConsensus(a, b) &&
5843                 outliningCandidatesSigningKeyConsensus(a, b) &&
5844                 outliningCandidatesV8_3OpsConsensus(a, b)) {
5845               return false;
5846             }
5847             return true;
5848           }) != RepeatedSequenceLocs.end()) {
5849     return outliner::OutlinedFunction();
5850   }
5851 
5852   // Since at this point all candidates agree on their return address signing
5853   // picking just one is fine. If the candidate functions potentially sign their
5854   // return addresses, the outlined function should do the same. Note that in
5855   // the case of "sign-return-address"="non-leaf" this is an assumption: It is
5856   // not certainly true that the outlined function will have to sign its return
5857   // address but this decision is made later, when the decision to outline
5858   // has already been made.
5859   // The same holds for the number of additional instructions we need: On
5860   // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
5861   // necessary. However, at this point we don't know if the outlined function
5862   // will have a RET instruction so we assume the worst.
5863   const Function &FCF = FirstCand.getMF()->getFunction();
5864   const TargetRegisterInfo &TRI = getRegisterInfo();
5865   if (FCF.hasFnAttribute("sign-return-address")) {
5866     // One PAC and one AUT instructions
5867     NumBytesToCreateFrame += 8;
5868 
5869     // We have to check if sp modifying instructions would get outlined.
5870     // If so we only allow outlining if sp is unchanged overall, so matching
5871     // sub and add instructions are okay to outline, all other sp modifications
5872     // are not
5873     auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
5874       int SPValue = 0;
5875       MachineBasicBlock::iterator MBBI = C.front();
5876       for (;;) {
5877         if (MBBI->modifiesRegister(AArch64::SP, &TRI)) {
5878           switch (MBBI->getOpcode()) {
5879           case AArch64::ADDXri:
5880           case AArch64::ADDWri:
5881             assert(MBBI->getNumOperands() == 4 && "Wrong number of operands");
5882             assert(MBBI->getOperand(2).isImm() &&
5883                    "Expected operand to be immediate");
5884             assert(MBBI->getOperand(1).isReg() &&
5885                    "Expected operand to be a register");
5886             // Check if the add just increments sp. If so, we search for
5887             // matching sub instructions that decrement sp. If not, the
5888             // modification is illegal
5889             if (MBBI->getOperand(1).getReg() == AArch64::SP)
5890               SPValue += MBBI->getOperand(2).getImm();
5891             else
5892               return true;
5893             break;
5894           case AArch64::SUBXri:
5895           case AArch64::SUBWri:
5896             assert(MBBI->getNumOperands() == 4 && "Wrong number of operands");
5897             assert(MBBI->getOperand(2).isImm() &&
5898                    "Expected operand to be immediate");
5899             assert(MBBI->getOperand(1).isReg() &&
5900                    "Expected operand to be a register");
5901             // Check if the sub just decrements sp. If so, we search for
5902             // matching add instructions that increment sp. If not, the
5903             // modification is illegal
5904             if (MBBI->getOperand(1).getReg() == AArch64::SP)
5905               SPValue -= MBBI->getOperand(2).getImm();
5906             else
5907               return true;
5908             break;
5909           default:
5910             return true;
5911           }
5912         }
5913         if (MBBI == C.back())
5914           break;
5915         ++MBBI;
5916       }
5917       if (SPValue)
5918         return true;
5919       return false;
5920     };
5921     // Remove candidates with illegal stack modifying instructions
5922     RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
5923                                               RepeatedSequenceLocs.end(),
5924                                               hasIllegalSPModification),
5925                                RepeatedSequenceLocs.end());
5926 
5927     // If the sequence doesn't have enough candidates left, then we're done.
5928     if (RepeatedSequenceLocs.size() < 2)
5929       return outliner::OutlinedFunction();
5930   }
5931 
5932   // Properties about candidate MBBs that hold for all of them.
5933   unsigned FlagsSetInAll = 0xF;
5934 
5935   // Compute liveness information for each candidate, and set FlagsSetInAll.
5936   std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
5937                 [&FlagsSetInAll](outliner::Candidate &C) {
5938                   FlagsSetInAll &= C.Flags;
5939                 });
5940 
5941   // According to the AArch64 Procedure Call Standard, the following are
5942   // undefined on entry/exit from a function call:
5943   //
5944   // * Registers x16, x17, (and thus w16, w17)
5945   // * Condition codes (and thus the NZCV register)
5946   //
5947   // Because if this, we can't outline any sequence of instructions where
5948   // one
5949   // of these registers is live into/across it. Thus, we need to delete
5950   // those
5951   // candidates.
5952   auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
5953     // If the unsafe registers in this block are all dead, then we don't need
5954     // to compute liveness here.
5955     if (C.Flags & UnsafeRegsDead)
5956       return false;
5957     C.initLRU(TRI);
5958     LiveRegUnits LRU = C.LRU;
5959     return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) ||
5960             !LRU.available(AArch64::NZCV));
5961   };
5962 
5963   // Are there any candidates where those registers are live?
5964   if (!(FlagsSetInAll & UnsafeRegsDead)) {
5965     // Erase every candidate that violates the restrictions above. (It could be
5966     // true that we have viable candidates, so it's not worth bailing out in
5967     // the case that, say, 1 out of 20 candidates violate the restructions.)
5968     RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
5969                                               RepeatedSequenceLocs.end(),
5970                                               CantGuaranteeValueAcrossCall),
5971                                RepeatedSequenceLocs.end());
5972 
5973     // If the sequence doesn't have enough candidates left, then we're done.
5974     if (RepeatedSequenceLocs.size() < 2)
5975       return outliner::OutlinedFunction();
5976   }
5977 
5978   // At this point, we have only "safe" candidates to outline. Figure out
5979   // frame + call instruction information.
5980 
5981   unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode();
5982 
5983   // Helper lambda which sets call information for every candidate.
5984   auto SetCandidateCallInfo =
5985       [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
5986         for (outliner::Candidate &C : RepeatedSequenceLocs)
5987           C.setCallInfo(CallID, NumBytesForCall);
5988       };
5989 
5990   unsigned FrameID = MachineOutlinerDefault;
5991   NumBytesToCreateFrame += 4;
5992 
5993   bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
5994     return C.getMF()->getFunction().hasFnAttribute("branch-target-enforcement");
5995   });
5996 
5997   // We check to see if CFI Instructions are present, and if they are
5998   // we find the number of CFI Instructions in the candidates.
5999   unsigned CFICount = 0;
6000   MachineBasicBlock::iterator MBBI = RepeatedSequenceLocs[0].front();
6001   for (unsigned Loc = RepeatedSequenceLocs[0].getStartIdx();
6002        Loc < RepeatedSequenceLocs[0].getEndIdx() + 1; Loc++) {
6003     const std::vector<MCCFIInstruction> &CFIInstructions =
6004         RepeatedSequenceLocs[0].getMF()->getFrameInstructions();
6005     if (MBBI->isCFIInstruction()) {
6006       unsigned CFIIndex = MBBI->getOperand(0).getCFIIndex();
6007       MCCFIInstruction CFI = CFIInstructions[CFIIndex];
6008       CFICount++;
6009     }
6010     MBBI++;
6011   }
6012 
6013   // We compare the number of found CFI Instructions to  the number of CFI
6014   // instructions in the parent function for each candidate.  We must check this
6015   // since if we outline one of the CFI instructions in a function, we have to
6016   // outline them all for correctness. If we do not, the address offsets will be
6017   // incorrect between the two sections of the program.
6018   for (outliner::Candidate &C : RepeatedSequenceLocs) {
6019     std::vector<MCCFIInstruction> CFIInstructions =
6020         C.getMF()->getFrameInstructions();
6021 
6022     if (CFICount > 0 && CFICount != CFIInstructions.size())
6023       return outliner::OutlinedFunction();
6024   }
6025 
6026   // Returns true if an instructions is safe to fix up, false otherwise.
6027   auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
6028     if (MI.isCall())
6029       return true;
6030 
6031     if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
6032         !MI.readsRegister(AArch64::SP, &TRI))
6033       return true;
6034 
6035     // Any modification of SP will break our code to save/restore LR.
6036     // FIXME: We could handle some instructions which add a constant
6037     // offset to SP, with a bit more work.
6038     if (MI.modifiesRegister(AArch64::SP, &TRI))
6039       return false;
6040 
6041     // At this point, we have a stack instruction that we might need to
6042     // fix up. We'll handle it if it's a load or store.
6043     if (MI.mayLoadOrStore()) {
6044       const MachineOperand *Base; // Filled with the base operand of MI.
6045       int64_t Offset;             // Filled with the offset of MI.
6046       bool OffsetIsScalable;
6047 
6048       // Does it allow us to offset the base operand and is the base the
6049       // register SP?
6050       if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) ||
6051           !Base->isReg() || Base->getReg() != AArch64::SP)
6052         return false;
6053 
6054       // Fixe-up code below assumes bytes.
6055       if (OffsetIsScalable)
6056         return false;
6057 
6058       // Find the minimum/maximum offset for this instruction and check
6059       // if fixing it up would be in range.
6060       int64_t MinOffset,
6061           MaxOffset;  // Unscaled offsets for the instruction.
6062       TypeSize Scale(0U, false); // The scale to multiply the offsets by.
6063       unsigned DummyWidth;
6064       getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
6065 
6066       Offset += 16; // Update the offset to what it would be if we outlined.
6067       if (Offset < MinOffset * (int64_t)Scale.getFixedSize() ||
6068           Offset > MaxOffset * (int64_t)Scale.getFixedSize())
6069         return false;
6070 
6071       // It's in range, so we can outline it.
6072       return true;
6073     }
6074 
6075     // FIXME: Add handling for instructions like "add x0, sp, #8".
6076 
6077     // We can't fix it up, so don't outline it.
6078     return false;
6079   };
6080 
6081   // True if it's possible to fix up each stack instruction in this sequence.
6082   // Important for frames/call variants that modify the stack.
6083   bool AllStackInstrsSafe = std::all_of(
6084       FirstCand.front(), std::next(FirstCand.back()), IsSafeToFixup);
6085 
6086   // If the last instruction in any candidate is a terminator, then we should
6087   // tail call all of the candidates.
6088   if (RepeatedSequenceLocs[0].back()->isTerminator()) {
6089     FrameID = MachineOutlinerTailCall;
6090     NumBytesToCreateFrame = 0;
6091     SetCandidateCallInfo(MachineOutlinerTailCall, 4);
6092   }
6093 
6094   else if (LastInstrOpcode == AArch64::BL ||
6095            ((LastInstrOpcode == AArch64::BLR ||
6096              LastInstrOpcode == AArch64::BLRNoIP) &&
6097             !HasBTI)) {
6098     // FIXME: Do we need to check if the code after this uses the value of LR?
6099     FrameID = MachineOutlinerThunk;
6100     NumBytesToCreateFrame = 0;
6101     SetCandidateCallInfo(MachineOutlinerThunk, 4);
6102   }
6103 
6104   else {
6105     // We need to decide how to emit calls + frames. We can always emit the same
6106     // frame if we don't need to save to the stack. If we have to save to the
6107     // stack, then we need a different frame.
6108     unsigned NumBytesNoStackCalls = 0;
6109     std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
6110 
6111     // Check if we have to save LR.
6112     for (outliner::Candidate &C : RepeatedSequenceLocs) {
6113       C.initLRU(TRI);
6114 
6115       // If we have a noreturn caller, then we're going to be conservative and
6116       // say that we have to save LR. If we don't have a ret at the end of the
6117       // block, then we can't reason about liveness accurately.
6118       //
6119       // FIXME: We can probably do better than always disabling this in
6120       // noreturn functions by fixing up the liveness info.
6121       bool IsNoReturn =
6122           C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
6123 
6124       // Is LR available? If so, we don't need a save.
6125       if (C.LRU.available(AArch64::LR) && !IsNoReturn) {
6126         NumBytesNoStackCalls += 4;
6127         C.setCallInfo(MachineOutlinerNoLRSave, 4);
6128         CandidatesWithoutStackFixups.push_back(C);
6129       }
6130 
6131       // Is an unused register available? If so, we won't modify the stack, so
6132       // we can outline with the same frame type as those that don't save LR.
6133       else if (findRegisterToSaveLRTo(C)) {
6134         NumBytesNoStackCalls += 12;
6135         C.setCallInfo(MachineOutlinerRegSave, 12);
6136         CandidatesWithoutStackFixups.push_back(C);
6137       }
6138 
6139       // Is SP used in the sequence at all? If not, we don't have to modify
6140       // the stack, so we are guaranteed to get the same frame.
6141       else if (C.UsedInSequence.available(AArch64::SP)) {
6142         NumBytesNoStackCalls += 12;
6143         C.setCallInfo(MachineOutlinerDefault, 12);
6144         CandidatesWithoutStackFixups.push_back(C);
6145       }
6146 
6147       // If we outline this, we need to modify the stack. Pretend we don't
6148       // outline this by saving all of its bytes.
6149       else {
6150         NumBytesNoStackCalls += SequenceSize;
6151       }
6152     }
6153 
6154     // If there are no places where we have to save LR, then note that we
6155     // don't have to update the stack. Otherwise, give every candidate the
6156     // default call type, as long as it's safe to do so.
6157     if (!AllStackInstrsSafe ||
6158         NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
6159       RepeatedSequenceLocs = CandidatesWithoutStackFixups;
6160       FrameID = MachineOutlinerNoLRSave;
6161     } else {
6162       SetCandidateCallInfo(MachineOutlinerDefault, 12);
6163     }
6164 
6165     // If we dropped all of the candidates, bail out here.
6166     if (RepeatedSequenceLocs.size() < 2) {
6167       RepeatedSequenceLocs.clear();
6168       return outliner::OutlinedFunction();
6169     }
6170   }
6171 
6172   // Does every candidate's MBB contain a call? If so, then we might have a call
6173   // in the range.
6174   if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
6175     // Check if the range contains a call. These require a save + restore of the
6176     // link register.
6177     bool ModStackToSaveLR = false;
6178     if (std::any_of(FirstCand.front(), FirstCand.back(),
6179                     [](const MachineInstr &MI) { return MI.isCall(); }))
6180       ModStackToSaveLR = true;
6181 
6182     // Handle the last instruction separately. If this is a tail call, then the
6183     // last instruction is a call. We don't want to save + restore in this case.
6184     // However, it could be possible that the last instruction is a call without
6185     // it being valid to tail call this sequence. We should consider this as
6186     // well.
6187     else if (FrameID != MachineOutlinerThunk &&
6188              FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall())
6189       ModStackToSaveLR = true;
6190 
6191     if (ModStackToSaveLR) {
6192       // We can't fix up the stack. Bail out.
6193       if (!AllStackInstrsSafe) {
6194         RepeatedSequenceLocs.clear();
6195         return outliner::OutlinedFunction();
6196       }
6197 
6198       // Save + restore LR.
6199       NumBytesToCreateFrame += 8;
6200     }
6201   }
6202 
6203   // If we have CFI instructions, we can only outline if the outlined section
6204   // can be a tail call
6205   if (FrameID != MachineOutlinerTailCall && CFICount > 0)
6206     return outliner::OutlinedFunction();
6207 
6208   return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
6209                                     NumBytesToCreateFrame, FrameID);
6210 }
6211 
isFunctionSafeToOutlineFrom(MachineFunction & MF,bool OutlineFromLinkOnceODRs) const6212 bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
6213     MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
6214   const Function &F = MF.getFunction();
6215 
6216   // Can F be deduplicated by the linker? If it can, don't outline from it.
6217   if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
6218     return false;
6219 
6220   // Don't outline from functions with section markings; the program could
6221   // expect that all the code is in the named section.
6222   // FIXME: Allow outlining from multiple functions with the same section
6223   // marking.
6224   if (F.hasSection())
6225     return false;
6226 
6227   // Outlining from functions with redzones is unsafe since the outliner may
6228   // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
6229   // outline from it.
6230   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
6231   if (!AFI || AFI->hasRedZone().getValueOr(true))
6232     return false;
6233 
6234   // FIXME: Teach the outliner to generate/handle Windows unwind info.
6235   if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI())
6236     return false;
6237 
6238   // It's safe to outline from MF.
6239   return true;
6240 }
6241 
isMBBSafeToOutlineFrom(MachineBasicBlock & MBB,unsigned & Flags) const6242 bool AArch64InstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
6243                                               unsigned &Flags) const {
6244   // Check if LR is available through all of the MBB. If it's not, then set
6245   // a flag.
6246   assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
6247          "Suitable Machine Function for outlining must track liveness");
6248   LiveRegUnits LRU(getRegisterInfo());
6249 
6250   std::for_each(MBB.rbegin(), MBB.rend(),
6251                 [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
6252 
6253   // Check if each of the unsafe registers are available...
6254   bool W16AvailableInBlock = LRU.available(AArch64::W16);
6255   bool W17AvailableInBlock = LRU.available(AArch64::W17);
6256   bool NZCVAvailableInBlock = LRU.available(AArch64::NZCV);
6257 
6258   // If all of these are dead (and not live out), we know we don't have to check
6259   // them later.
6260   if (W16AvailableInBlock && W17AvailableInBlock && NZCVAvailableInBlock)
6261     Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead;
6262 
6263   // Now, add the live outs to the set.
6264   LRU.addLiveOuts(MBB);
6265 
6266   // If any of these registers is available in the MBB, but also a live out of
6267   // the block, then we know outlining is unsafe.
6268   if (W16AvailableInBlock && !LRU.available(AArch64::W16))
6269     return false;
6270   if (W17AvailableInBlock && !LRU.available(AArch64::W17))
6271     return false;
6272   if (NZCVAvailableInBlock && !LRU.available(AArch64::NZCV))
6273     return false;
6274 
6275   // Check if there's a call inside this MachineBasicBlock. If there is, then
6276   // set a flag.
6277   if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
6278     Flags |= MachineOutlinerMBBFlags::HasCalls;
6279 
6280   MachineFunction *MF = MBB.getParent();
6281 
6282   // In the event that we outline, we may have to save LR. If there is an
6283   // available register in the MBB, then we'll always save LR there. Check if
6284   // this is true.
6285   bool CanSaveLR = false;
6286   const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
6287       MF->getSubtarget().getRegisterInfo());
6288 
6289   // Check if there is an available register across the sequence that we can
6290   // use.
6291   for (unsigned Reg : AArch64::GPR64RegClass) {
6292     if (!ARI->isReservedReg(*MF, Reg) && Reg != AArch64::LR &&
6293         Reg != AArch64::X16 && Reg != AArch64::X17 && LRU.available(Reg)) {
6294       CanSaveLR = true;
6295       break;
6296     }
6297   }
6298 
6299   // Check if we have a register we can save LR to, and if LR was used
6300   // somewhere. If both of those things are true, then we need to evaluate the
6301   // safety of outlining stack instructions later.
6302   if (!CanSaveLR && !LRU.available(AArch64::LR))
6303     Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
6304 
6305   return true;
6306 }
6307 
6308 outliner::InstrType
getOutliningType(MachineBasicBlock::iterator & MIT,unsigned Flags) const6309 AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
6310                                    unsigned Flags) const {
6311   MachineInstr &MI = *MIT;
6312   MachineBasicBlock *MBB = MI.getParent();
6313   MachineFunction *MF = MBB->getParent();
6314   AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
6315 
6316   // Don't outline anything used for return address signing. The outlined
6317   // function will get signed later if needed
6318   switch (MI.getOpcode()) {
6319   case AArch64::PACIASP:
6320   case AArch64::PACIBSP:
6321   case AArch64::AUTIASP:
6322   case AArch64::AUTIBSP:
6323   case AArch64::RETAA:
6324   case AArch64::RETAB:
6325   case AArch64::EMITBKEY:
6326     return outliner::InstrType::Illegal;
6327   }
6328 
6329   // Don't outline LOHs.
6330   if (FuncInfo->getLOHRelated().count(&MI))
6331     return outliner::InstrType::Illegal;
6332 
6333   // We can only outline these if we will tail call the outlined function, or
6334   // fix up the CFI offsets. Currently, CFI instructions are outlined only if
6335   // in a tail call.
6336   //
6337   // FIXME: If the proper fixups for the offset are implemented, this should be
6338   // possible.
6339   if (MI.isCFIInstruction())
6340     return outliner::InstrType::Legal;
6341 
6342   // Don't allow debug values to impact outlining type.
6343   if (MI.isDebugInstr() || MI.isIndirectDebugValue())
6344     return outliner::InstrType::Invisible;
6345 
6346   // At this point, KILL instructions don't really tell us much so we can go
6347   // ahead and skip over them.
6348   if (MI.isKill())
6349     return outliner::InstrType::Invisible;
6350 
6351   // Is this a terminator for a basic block?
6352   if (MI.isTerminator()) {
6353 
6354     // Is this the end of a function?
6355     if (MI.getParent()->succ_empty())
6356       return outliner::InstrType::Legal;
6357 
6358     // It's not, so don't outline it.
6359     return outliner::InstrType::Illegal;
6360   }
6361 
6362   // Make sure none of the operands are un-outlinable.
6363   for (const MachineOperand &MOP : MI.operands()) {
6364     if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
6365         MOP.isTargetIndex())
6366       return outliner::InstrType::Illegal;
6367 
6368     // If it uses LR or W30 explicitly, then don't touch it.
6369     if (MOP.isReg() && !MOP.isImplicit() &&
6370         (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
6371       return outliner::InstrType::Illegal;
6372   }
6373 
6374   // Special cases for instructions that can always be outlined, but will fail
6375   // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
6376   // be outlined because they don't require a *specific* value to be in LR.
6377   if (MI.getOpcode() == AArch64::ADRP)
6378     return outliner::InstrType::Legal;
6379 
6380   // If MI is a call we might be able to outline it. We don't want to outline
6381   // any calls that rely on the position of items on the stack. When we outline
6382   // something containing a call, we have to emit a save and restore of LR in
6383   // the outlined function. Currently, this always happens by saving LR to the
6384   // stack. Thus, if we outline, say, half the parameters for a function call
6385   // plus the call, then we'll break the callee's expectations for the layout
6386   // of the stack.
6387   //
6388   // FIXME: Allow calls to functions which construct a stack frame, as long
6389   // as they don't access arguments on the stack.
6390   // FIXME: Figure out some way to analyze functions defined in other modules.
6391   // We should be able to compute the memory usage based on the IR calling
6392   // convention, even if we can't see the definition.
6393   if (MI.isCall()) {
6394     // Get the function associated with the call. Look at each operand and find
6395     // the one that represents the callee and get its name.
6396     const Function *Callee = nullptr;
6397     for (const MachineOperand &MOP : MI.operands()) {
6398       if (MOP.isGlobal()) {
6399         Callee = dyn_cast<Function>(MOP.getGlobal());
6400         break;
6401       }
6402     }
6403 
6404     // Never outline calls to mcount.  There isn't any rule that would require
6405     // this, but the Linux kernel's "ftrace" feature depends on it.
6406     if (Callee && Callee->getName() == "\01_mcount")
6407       return outliner::InstrType::Illegal;
6408 
6409     // If we don't know anything about the callee, assume it depends on the
6410     // stack layout of the caller. In that case, it's only legal to outline
6411     // as a tail-call. Explicitly list the call instructions we know about so we
6412     // don't get unexpected results with call pseudo-instructions.
6413     auto UnknownCallOutlineType = outliner::InstrType::Illegal;
6414     if (MI.getOpcode() == AArch64::BLR ||
6415         MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
6416       UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
6417 
6418     if (!Callee)
6419       return UnknownCallOutlineType;
6420 
6421     // We have a function we have information about. Check it if it's something
6422     // can safely outline.
6423     MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
6424 
6425     // We don't know what's going on with the callee at all. Don't touch it.
6426     if (!CalleeMF)
6427       return UnknownCallOutlineType;
6428 
6429     // Check if we know anything about the callee saves on the function. If we
6430     // don't, then don't touch it, since that implies that we haven't
6431     // computed anything about its stack frame yet.
6432     MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
6433     if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
6434         MFI.getNumObjects() > 0)
6435       return UnknownCallOutlineType;
6436 
6437     // At this point, we can say that CalleeMF ought to not pass anything on the
6438     // stack. Therefore, we can outline it.
6439     return outliner::InstrType::Legal;
6440   }
6441 
6442   // Don't outline positions.
6443   if (MI.isPosition())
6444     return outliner::InstrType::Illegal;
6445 
6446   // Don't touch the link register or W30.
6447   if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
6448       MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
6449     return outliner::InstrType::Illegal;
6450 
6451   // Don't outline BTI instructions, because that will prevent the outlining
6452   // site from being indirectly callable.
6453   if (MI.getOpcode() == AArch64::HINT) {
6454     int64_t Imm = MI.getOperand(0).getImm();
6455     if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
6456       return outliner::InstrType::Illegal;
6457   }
6458 
6459   return outliner::InstrType::Legal;
6460 }
6461 
fixupPostOutline(MachineBasicBlock & MBB) const6462 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
6463   for (MachineInstr &MI : MBB) {
6464     const MachineOperand *Base;
6465     unsigned Width;
6466     int64_t Offset;
6467     bool OffsetIsScalable;
6468 
6469     // Is this a load or store with an immediate offset with SP as the base?
6470     if (!MI.mayLoadOrStore() ||
6471         !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
6472                                       &RI) ||
6473         (Base->isReg() && Base->getReg() != AArch64::SP))
6474       continue;
6475 
6476     // It is, so we have to fix it up.
6477     TypeSize Scale(0U, false);
6478     int64_t Dummy1, Dummy2;
6479 
6480     MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
6481     assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
6482     getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
6483     assert(Scale != 0 && "Unexpected opcode!");
6484     assert(!OffsetIsScalable && "Expected offset to be a byte offset");
6485 
6486     // We've pushed the return address to the stack, so add 16 to the offset.
6487     // This is safe, since we already checked if it would overflow when we
6488     // checked if this instruction was legal to outline.
6489     int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedSize();
6490     StackOffsetOperand.setImm(NewImm);
6491   }
6492 }
6493 
signOutlinedFunction(MachineFunction & MF,MachineBasicBlock & MBB,bool ShouldSignReturnAddr,bool ShouldSignReturnAddrWithAKey)6494 static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB,
6495                                  bool ShouldSignReturnAddr,
6496                                  bool ShouldSignReturnAddrWithAKey) {
6497   if (ShouldSignReturnAddr) {
6498     MachineBasicBlock::iterator MBBPAC = MBB.begin();
6499     MachineBasicBlock::iterator MBBAUT = MBB.getFirstTerminator();
6500     const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
6501     const TargetInstrInfo *TII = Subtarget.getInstrInfo();
6502     DebugLoc DL;
6503 
6504     if (MBBAUT != MBB.end())
6505       DL = MBBAUT->getDebugLoc();
6506 
6507     // At the very beginning of the basic block we insert the following
6508     // depending on the key type
6509     //
6510     // a_key:                   b_key:
6511     //    PACIASP                   EMITBKEY
6512     //    CFI_INSTRUCTION           PACIBSP
6513     //                              CFI_INSTRUCTION
6514     if (ShouldSignReturnAddrWithAKey) {
6515       BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::PACIASP))
6516           .setMIFlag(MachineInstr::FrameSetup);
6517     } else {
6518       BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::EMITBKEY))
6519           .setMIFlag(MachineInstr::FrameSetup);
6520       BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::PACIBSP))
6521           .setMIFlag(MachineInstr::FrameSetup);
6522     }
6523     unsigned CFIIndex =
6524         MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
6525     BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::CFI_INSTRUCTION))
6526         .addCFIIndex(CFIIndex)
6527         .setMIFlags(MachineInstr::FrameSetup);
6528 
6529     // If v8.3a features are available we can replace a RET instruction by
6530     // RETAA or RETAB and omit the AUT instructions
6531     if (Subtarget.hasV8_3aOps() && MBBAUT != MBB.end() &&
6532         MBBAUT->getOpcode() == AArch64::RET) {
6533       BuildMI(MBB, MBBAUT, DL,
6534               TII->get(ShouldSignReturnAddrWithAKey ? AArch64::RETAA
6535                                                     : AArch64::RETAB))
6536           .copyImplicitOps(*MBBAUT);
6537       MBB.erase(MBBAUT);
6538     } else {
6539       BuildMI(MBB, MBBAUT, DL,
6540               TII->get(ShouldSignReturnAddrWithAKey ? AArch64::AUTIASP
6541                                                     : AArch64::AUTIBSP))
6542           .setMIFlag(MachineInstr::FrameDestroy);
6543     }
6544   }
6545 }
6546 
buildOutlinedFrame(MachineBasicBlock & MBB,MachineFunction & MF,const outliner::OutlinedFunction & OF) const6547 void AArch64InstrInfo::buildOutlinedFrame(
6548     MachineBasicBlock &MBB, MachineFunction &MF,
6549     const outliner::OutlinedFunction &OF) const {
6550 
6551   AArch64FunctionInfo *FI = MF.getInfo<AArch64FunctionInfo>();
6552 
6553   if (OF.FrameConstructionID == MachineOutlinerTailCall)
6554     FI->setOutliningStyle("Tail Call");
6555   else if (OF.FrameConstructionID == MachineOutlinerThunk) {
6556     // For thunk outlining, rewrite the last instruction from a call to a
6557     // tail-call.
6558     MachineInstr *Call = &*--MBB.instr_end();
6559     unsigned TailOpcode;
6560     if (Call->getOpcode() == AArch64::BL) {
6561       TailOpcode = AArch64::TCRETURNdi;
6562     } else {
6563       assert(Call->getOpcode() == AArch64::BLR ||
6564              Call->getOpcode() == AArch64::BLRNoIP);
6565       TailOpcode = AArch64::TCRETURNriALL;
6566     }
6567     MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
6568                            .add(Call->getOperand(0))
6569                            .addImm(0);
6570     MBB.insert(MBB.end(), TC);
6571     Call->eraseFromParent();
6572 
6573     FI->setOutliningStyle("Thunk");
6574   }
6575 
6576   bool IsLeafFunction = true;
6577 
6578   // Is there a call in the outlined range?
6579   auto IsNonTailCall = [](const MachineInstr &MI) {
6580     return MI.isCall() && !MI.isReturn();
6581   };
6582 
6583   if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) {
6584     // Fix up the instructions in the range, since we're going to modify the
6585     // stack.
6586     assert(OF.FrameConstructionID != MachineOutlinerDefault &&
6587            "Can only fix up stack references once");
6588     fixupPostOutline(MBB);
6589 
6590     IsLeafFunction = false;
6591 
6592     // LR has to be a live in so that we can save it.
6593     if (!MBB.isLiveIn(AArch64::LR))
6594       MBB.addLiveIn(AArch64::LR);
6595 
6596     MachineBasicBlock::iterator It = MBB.begin();
6597     MachineBasicBlock::iterator Et = MBB.end();
6598 
6599     if (OF.FrameConstructionID == MachineOutlinerTailCall ||
6600         OF.FrameConstructionID == MachineOutlinerThunk)
6601       Et = std::prev(MBB.end());
6602 
6603     // Insert a save before the outlined region
6604     MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
6605                                 .addReg(AArch64::SP, RegState::Define)
6606                                 .addReg(AArch64::LR)
6607                                 .addReg(AArch64::SP)
6608                                 .addImm(-16);
6609     It = MBB.insert(It, STRXpre);
6610 
6611     const TargetSubtargetInfo &STI = MF.getSubtarget();
6612     const MCRegisterInfo *MRI = STI.getRegisterInfo();
6613     unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
6614 
6615     // Add a CFI saying the stack was moved 16 B down.
6616     int64_t StackPosEntry =
6617         MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 16));
6618     BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
6619         .addCFIIndex(StackPosEntry)
6620         .setMIFlags(MachineInstr::FrameSetup);
6621 
6622     // Add a CFI saying that the LR that we want to find is now 16 B higher than
6623     // before.
6624     int64_t LRPosEntry =
6625         MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, -16));
6626     BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
6627         .addCFIIndex(LRPosEntry)
6628         .setMIFlags(MachineInstr::FrameSetup);
6629 
6630     // Insert a restore before the terminator for the function.
6631     MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
6632                                  .addReg(AArch64::SP, RegState::Define)
6633                                  .addReg(AArch64::LR, RegState::Define)
6634                                  .addReg(AArch64::SP)
6635                                  .addImm(16);
6636     Et = MBB.insert(Et, LDRXpost);
6637   }
6638 
6639   // If a bunch of candidates reach this point they must agree on their return
6640   // address signing. It is therefore enough to just consider the signing
6641   // behaviour of one of them
6642   const Function &CF = OF.Candidates.front().getMF()->getFunction();
6643   bool ShouldSignReturnAddr = false;
6644   if (CF.hasFnAttribute("sign-return-address")) {
6645     StringRef Scope =
6646         CF.getFnAttribute("sign-return-address").getValueAsString();
6647     if (Scope.equals("all"))
6648       ShouldSignReturnAddr = true;
6649     else if (Scope.equals("non-leaf") && !IsLeafFunction)
6650       ShouldSignReturnAddr = true;
6651   }
6652 
6653   // a_key is the default
6654   bool ShouldSignReturnAddrWithAKey = true;
6655   if (CF.hasFnAttribute("sign-return-address-key")) {
6656     const StringRef Key =
6657         CF.getFnAttribute("sign-return-address-key").getValueAsString();
6658     // Key can either be a_key or b_key
6659     assert((Key.equals_lower("a_key") || Key.equals_lower("b_key")) &&
6660            "Return address signing key must be either a_key or b_key");
6661     ShouldSignReturnAddrWithAKey = Key.equals_lower("a_key");
6662   }
6663 
6664   // If this is a tail call outlined function, then there's already a return.
6665   if (OF.FrameConstructionID == MachineOutlinerTailCall ||
6666       OF.FrameConstructionID == MachineOutlinerThunk) {
6667     signOutlinedFunction(MF, MBB, ShouldSignReturnAddr,
6668                          ShouldSignReturnAddrWithAKey);
6669     return;
6670   }
6671 
6672   // It's not a tail call, so we have to insert the return ourselves.
6673 
6674   // LR has to be a live in so that we can return to it.
6675   if (!MBB.isLiveIn(AArch64::LR))
6676     MBB.addLiveIn(AArch64::LR);
6677 
6678   MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
6679                           .addReg(AArch64::LR);
6680   MBB.insert(MBB.end(), ret);
6681 
6682   signOutlinedFunction(MF, MBB, ShouldSignReturnAddr,
6683                        ShouldSignReturnAddrWithAKey);
6684 
6685   FI->setOutliningStyle("Function");
6686 
6687   // Did we have to modify the stack by saving the link register?
6688   if (OF.FrameConstructionID != MachineOutlinerDefault)
6689     return;
6690 
6691   // We modified the stack.
6692   // Walk over the basic block and fix up all the stack accesses.
6693   fixupPostOutline(MBB);
6694 }
6695 
insertOutlinedCall(Module & M,MachineBasicBlock & MBB,MachineBasicBlock::iterator & It,MachineFunction & MF,const outliner::Candidate & C) const6696 MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
6697     Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
6698     MachineFunction &MF, const outliner::Candidate &C) const {
6699 
6700   // Are we tail calling?
6701   if (C.CallConstructionID == MachineOutlinerTailCall) {
6702     // If yes, then we can just branch to the label.
6703     It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
6704                             .addGlobalAddress(M.getNamedValue(MF.getName()))
6705                             .addImm(0));
6706     return It;
6707   }
6708 
6709   // Are we saving the link register?
6710   if (C.CallConstructionID == MachineOutlinerNoLRSave ||
6711       C.CallConstructionID == MachineOutlinerThunk) {
6712     // No, so just insert the call.
6713     It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
6714                             .addGlobalAddress(M.getNamedValue(MF.getName())));
6715     return It;
6716   }
6717 
6718   // We want to return the spot where we inserted the call.
6719   MachineBasicBlock::iterator CallPt;
6720 
6721   // Instructions for saving and restoring LR around the call instruction we're
6722   // going to insert.
6723   MachineInstr *Save;
6724   MachineInstr *Restore;
6725   // Can we save to a register?
6726   if (C.CallConstructionID == MachineOutlinerRegSave) {
6727     // FIXME: This logic should be sunk into a target-specific interface so that
6728     // we don't have to recompute the register.
6729     unsigned Reg = findRegisterToSaveLRTo(C);
6730     assert(Reg != 0 && "No callee-saved register available?");
6731 
6732     // Save and restore LR from that register.
6733     Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
6734                .addReg(AArch64::XZR)
6735                .addReg(AArch64::LR)
6736                .addImm(0);
6737     Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
6738                 .addReg(AArch64::XZR)
6739                 .addReg(Reg)
6740                 .addImm(0);
6741   } else {
6742     // We have the default case. Save and restore from SP.
6743     Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
6744                .addReg(AArch64::SP, RegState::Define)
6745                .addReg(AArch64::LR)
6746                .addReg(AArch64::SP)
6747                .addImm(-16);
6748     Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
6749                   .addReg(AArch64::SP, RegState::Define)
6750                   .addReg(AArch64::LR, RegState::Define)
6751                   .addReg(AArch64::SP)
6752                   .addImm(16);
6753   }
6754 
6755   It = MBB.insert(It, Save);
6756   It++;
6757 
6758   // Insert the call.
6759   It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
6760                           .addGlobalAddress(M.getNamedValue(MF.getName())));
6761   CallPt = It;
6762   It++;
6763 
6764   It = MBB.insert(It, Restore);
6765   return CallPt;
6766 }
6767 
shouldOutlineFromFunctionByDefault(MachineFunction & MF) const6768 bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
6769   MachineFunction &MF) const {
6770   return MF.getFunction().hasMinSize();
6771 }
6772 
6773 Optional<DestSourcePair>
isCopyInstrImpl(const MachineInstr & MI) const6774 AArch64InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
6775 
6776   // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
6777   // and zero immediate operands used as an alias for mov instruction.
6778   if (MI.getOpcode() == AArch64::ORRWrs &&
6779       MI.getOperand(1).getReg() == AArch64::WZR &&
6780       MI.getOperand(3).getImm() == 0x0) {
6781     return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
6782   }
6783 
6784   if (MI.getOpcode() == AArch64::ORRXrs &&
6785       MI.getOperand(1).getReg() == AArch64::XZR &&
6786       MI.getOperand(3).getImm() == 0x0) {
6787     return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
6788   }
6789 
6790   return None;
6791 }
6792 
isAddImmediate(const MachineInstr & MI,Register Reg) const6793 Optional<RegImmPair> AArch64InstrInfo::isAddImmediate(const MachineInstr &MI,
6794                                                       Register Reg) const {
6795   int Sign = 1;
6796   int64_t Offset = 0;
6797 
6798   // TODO: Handle cases where Reg is a super- or sub-register of the
6799   // destination register.
6800   const MachineOperand &Op0 = MI.getOperand(0);
6801   if (!Op0.isReg() || Reg != Op0.getReg())
6802     return None;
6803 
6804   switch (MI.getOpcode()) {
6805   default:
6806     return None;
6807   case AArch64::SUBWri:
6808   case AArch64::SUBXri:
6809   case AArch64::SUBSWri:
6810   case AArch64::SUBSXri:
6811     Sign *= -1;
6812     LLVM_FALLTHROUGH;
6813   case AArch64::ADDSWri:
6814   case AArch64::ADDSXri:
6815   case AArch64::ADDWri:
6816   case AArch64::ADDXri: {
6817     // TODO: Third operand can be global address (usually some string).
6818     if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
6819         !MI.getOperand(2).isImm())
6820       return None;
6821     Offset = MI.getOperand(2).getImm() * Sign;
6822     int Shift = MI.getOperand(3).getImm();
6823     assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
6824     Offset = Offset << Shift;
6825   }
6826   }
6827   return RegImmPair{MI.getOperand(1).getReg(), Offset};
6828 }
6829 
6830 /// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
6831 /// the destination register then, if possible, describe the value in terms of
6832 /// the source register.
6833 static Optional<ParamLoadedValue>
describeORRLoadedValue(const MachineInstr & MI,Register DescribedReg,const TargetInstrInfo * TII,const TargetRegisterInfo * TRI)6834 describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg,
6835                        const TargetInstrInfo *TII,
6836                        const TargetRegisterInfo *TRI) {
6837   auto DestSrc = TII->isCopyInstr(MI);
6838   if (!DestSrc)
6839     return None;
6840 
6841   Register DestReg = DestSrc->Destination->getReg();
6842   Register SrcReg = DestSrc->Source->getReg();
6843 
6844   auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
6845 
6846   // If the described register is the destination, just return the source.
6847   if (DestReg == DescribedReg)
6848     return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
6849 
6850   // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
6851   if (MI.getOpcode() == AArch64::ORRWrs &&
6852       TRI->isSuperRegister(DestReg, DescribedReg))
6853     return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
6854 
6855   // We may need to describe the lower part of a ORRXrs move.
6856   if (MI.getOpcode() == AArch64::ORRXrs &&
6857       TRI->isSubRegister(DestReg, DescribedReg)) {
6858     Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
6859     return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
6860   }
6861 
6862   assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
6863          "Unhandled ORR[XW]rs copy case");
6864 
6865   return None;
6866 }
6867 
6868 Optional<ParamLoadedValue>
describeLoadedValue(const MachineInstr & MI,Register Reg) const6869 AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
6870                                       Register Reg) const {
6871   const MachineFunction *MF = MI.getMF();
6872   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
6873   switch (MI.getOpcode()) {
6874   case AArch64::MOVZWi:
6875   case AArch64::MOVZXi: {
6876     // MOVZWi may be used for producing zero-extended 32-bit immediates in
6877     // 64-bit parameters, so we need to consider super-registers.
6878     if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
6879       return None;
6880 
6881     if (!MI.getOperand(1).isImm())
6882       return None;
6883     int64_t Immediate = MI.getOperand(1).getImm();
6884     int Shift = MI.getOperand(2).getImm();
6885     return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
6886                             nullptr);
6887   }
6888   case AArch64::ORRWrs:
6889   case AArch64::ORRXrs:
6890     return describeORRLoadedValue(MI, Reg, this, TRI);
6891   }
6892 
6893   return TargetInstrInfo::describeLoadedValue(MI, Reg);
6894 }
6895 
getElementSizeForOpcode(unsigned Opc) const6896 uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
6897   return get(Opc).TSFlags & AArch64::ElementSizeMask;
6898 }
6899 
getBLRCallOpcode(const MachineFunction & MF)6900 unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) {
6901   if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
6902     return AArch64::BLRNoIP;
6903   else
6904     return AArch64::BLR;
6905 }
6906 
6907 #define GET_INSTRINFO_HELPERS
6908 #define GET_INSTRMAP_INFO
6909 #include "AArch64GenInstrInfo.inc"
6910