1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the AArch64 implementation of the TargetInstrInfo class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AArch64InstrInfo.h"
14 #include "AArch64MachineFunctionInfo.h"
15 #include "AArch64Subtarget.h"
16 #include "MCTargetDesc/AArch64AddressingModes.h"
17 #include "Utils/AArch64BaseInfo.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/CodeGen/MachineBasicBlock.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstr.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineMemOperand.h"
27 #include "llvm/CodeGen/MachineOperand.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/CodeGen/MachineModuleInfo.h"
30 #include "llvm/CodeGen/StackMaps.h"
31 #include "llvm/CodeGen/TargetRegisterInfo.h"
32 #include "llvm/CodeGen/TargetSubtargetInfo.h"
33 #include "llvm/IR/DebugLoc.h"
34 #include "llvm/IR/GlobalValue.h"
35 #include "llvm/MC/MCAsmInfo.h"
36 #include "llvm/MC/MCInst.h"
37 #include "llvm/MC/MCInstrDesc.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/CodeGen.h"
40 #include "llvm/Support/CommandLine.h"
41 #include "llvm/Support/Compiler.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/MathExtras.h"
44 #include "llvm/Target/TargetMachine.h"
45 #include "llvm/Target/TargetOptions.h"
46 #include <cassert>
47 #include <cstdint>
48 #include <iterator>
49 #include <utility>
50 
51 using namespace llvm;
52 
53 #define GET_INSTRINFO_CTOR_DTOR
54 #include "AArch64GenInstrInfo.inc"
55 
56 static cl::opt<unsigned> TBZDisplacementBits(
57     "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
58     cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
59 
60 static cl::opt<unsigned> CBZDisplacementBits(
61     "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
62     cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
63 
64 static cl::opt<unsigned>
65     BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
66                         cl::desc("Restrict range of Bcc instructions (DEBUG)"));
67 
AArch64InstrInfo(const AArch64Subtarget & STI)68 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
69     : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
70                           AArch64::CATCHRET),
71       RI(STI.getTargetTriple()), Subtarget(STI) {}
72 
73 /// GetInstSize - Return the number of bytes of code the specified
74 /// instruction may be.  This returns the maximum number of bytes.
getInstSizeInBytes(const MachineInstr & MI) const75 unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
76   const MachineBasicBlock &MBB = *MI.getParent();
77   const MachineFunction *MF = MBB.getParent();
78   const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
79 
80   {
81     auto Op = MI.getOpcode();
82     if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
83       return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
84   }
85 
86   // FIXME: We currently only handle pseudoinstructions that don't get expanded
87   //        before the assembly printer.
88   unsigned NumBytes = 0;
89   const MCInstrDesc &Desc = MI.getDesc();
90   switch (Desc.getOpcode()) {
91   default:
92     // Anything not explicitly designated otherwise is a normal 4-byte insn.
93     NumBytes = 4;
94     break;
95   case TargetOpcode::DBG_VALUE:
96   case TargetOpcode::EH_LABEL:
97   case TargetOpcode::IMPLICIT_DEF:
98   case TargetOpcode::KILL:
99     NumBytes = 0;
100     break;
101   case TargetOpcode::STACKMAP:
102     // The upper bound for a stackmap intrinsic is the full length of its shadow
103     NumBytes = StackMapOpers(&MI).getNumPatchBytes();
104     assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
105     break;
106   case TargetOpcode::PATCHPOINT:
107     // The size of the patchpoint intrinsic is the number of bytes requested
108     NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
109     assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
110     break;
111   case AArch64::TLSDESC_CALLSEQ:
112     // This gets lowered to an instruction sequence which takes 16 bytes
113     NumBytes = 16;
114     break;
115   case AArch64::JumpTableDest32:
116   case AArch64::JumpTableDest16:
117   case AArch64::JumpTableDest8:
118     NumBytes = 12;
119     break;
120   case AArch64::SPACE:
121     NumBytes = MI.getOperand(1).getImm();
122     break;
123   }
124 
125   return NumBytes;
126 }
127 
parseCondBranch(MachineInstr * LastInst,MachineBasicBlock * & Target,SmallVectorImpl<MachineOperand> & Cond)128 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
129                             SmallVectorImpl<MachineOperand> &Cond) {
130   // Block ends with fall-through condbranch.
131   switch (LastInst->getOpcode()) {
132   default:
133     llvm_unreachable("Unknown branch instruction?");
134   case AArch64::Bcc:
135     Target = LastInst->getOperand(1).getMBB();
136     Cond.push_back(LastInst->getOperand(0));
137     break;
138   case AArch64::CBZW:
139   case AArch64::CBZX:
140   case AArch64::CBNZW:
141   case AArch64::CBNZX:
142     Target = LastInst->getOperand(1).getMBB();
143     Cond.push_back(MachineOperand::CreateImm(-1));
144     Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
145     Cond.push_back(LastInst->getOperand(0));
146     break;
147   case AArch64::TBZW:
148   case AArch64::TBZX:
149   case AArch64::TBNZW:
150   case AArch64::TBNZX:
151     Target = LastInst->getOperand(2).getMBB();
152     Cond.push_back(MachineOperand::CreateImm(-1));
153     Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
154     Cond.push_back(LastInst->getOperand(0));
155     Cond.push_back(LastInst->getOperand(1));
156   }
157 }
158 
getBranchDisplacementBits(unsigned Opc)159 static unsigned getBranchDisplacementBits(unsigned Opc) {
160   switch (Opc) {
161   default:
162     llvm_unreachable("unexpected opcode!");
163   case AArch64::B:
164     return 64;
165   case AArch64::TBNZW:
166   case AArch64::TBZW:
167   case AArch64::TBNZX:
168   case AArch64::TBZX:
169     return TBZDisplacementBits;
170   case AArch64::CBNZW:
171   case AArch64::CBZW:
172   case AArch64::CBNZX:
173   case AArch64::CBZX:
174     return CBZDisplacementBits;
175   case AArch64::Bcc:
176     return BCCDisplacementBits;
177   }
178 }
179 
isBranchOffsetInRange(unsigned BranchOp,int64_t BrOffset) const180 bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
181                                              int64_t BrOffset) const {
182   unsigned Bits = getBranchDisplacementBits(BranchOp);
183   assert(Bits >= 3 && "max branch displacement must be enough to jump"
184                       "over conditional branch expansion");
185   return isIntN(Bits, BrOffset / 4);
186 }
187 
188 MachineBasicBlock *
getBranchDestBlock(const MachineInstr & MI) const189 AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
190   switch (MI.getOpcode()) {
191   default:
192     llvm_unreachable("unexpected opcode!");
193   case AArch64::B:
194     return MI.getOperand(0).getMBB();
195   case AArch64::TBZW:
196   case AArch64::TBNZW:
197   case AArch64::TBZX:
198   case AArch64::TBNZX:
199     return MI.getOperand(2).getMBB();
200   case AArch64::CBZW:
201   case AArch64::CBNZW:
202   case AArch64::CBZX:
203   case AArch64::CBNZX:
204   case AArch64::Bcc:
205     return MI.getOperand(1).getMBB();
206   }
207 }
208 
209 // Branch analysis.
analyzeBranch(MachineBasicBlock & MBB,MachineBasicBlock * & TBB,MachineBasicBlock * & FBB,SmallVectorImpl<MachineOperand> & Cond,bool AllowModify) const210 bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
211                                      MachineBasicBlock *&TBB,
212                                      MachineBasicBlock *&FBB,
213                                      SmallVectorImpl<MachineOperand> &Cond,
214                                      bool AllowModify) const {
215   // If the block has no terminators, it just falls into the block after it.
216   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
217   if (I == MBB.end())
218     return false;
219 
220   if (!isUnpredicatedTerminator(*I))
221     return false;
222 
223   // Get the last instruction in the block.
224   MachineInstr *LastInst = &*I;
225 
226   // If there is only one terminator instruction, process it.
227   unsigned LastOpc = LastInst->getOpcode();
228   if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
229     if (isUncondBranchOpcode(LastOpc)) {
230       TBB = LastInst->getOperand(0).getMBB();
231       return false;
232     }
233     if (isCondBranchOpcode(LastOpc)) {
234       // Block ends with fall-through condbranch.
235       parseCondBranch(LastInst, TBB, Cond);
236       return false;
237     }
238     return true; // Can't handle indirect branch.
239   }
240 
241   // Get the instruction before it if it is a terminator.
242   MachineInstr *SecondLastInst = &*I;
243   unsigned SecondLastOpc = SecondLastInst->getOpcode();
244 
245   // If AllowModify is true and the block ends with two or more unconditional
246   // branches, delete all but the first unconditional branch.
247   if (AllowModify && isUncondBranchOpcode(LastOpc)) {
248     while (isUncondBranchOpcode(SecondLastOpc)) {
249       LastInst->eraseFromParent();
250       LastInst = SecondLastInst;
251       LastOpc = LastInst->getOpcode();
252       if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
253         // Return now the only terminator is an unconditional branch.
254         TBB = LastInst->getOperand(0).getMBB();
255         return false;
256       } else {
257         SecondLastInst = &*I;
258         SecondLastOpc = SecondLastInst->getOpcode();
259       }
260     }
261   }
262 
263   // If there are three terminators, we don't know what sort of block this is.
264   if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
265     return true;
266 
267   // If the block ends with a B and a Bcc, handle it.
268   if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
269     parseCondBranch(SecondLastInst, TBB, Cond);
270     FBB = LastInst->getOperand(0).getMBB();
271     return false;
272   }
273 
274   // If the block ends with two unconditional branches, handle it.  The second
275   // one is not executed, so remove it.
276   if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
277     TBB = SecondLastInst->getOperand(0).getMBB();
278     I = LastInst;
279     if (AllowModify)
280       I->eraseFromParent();
281     return false;
282   }
283 
284   // ...likewise if it ends with an indirect branch followed by an unconditional
285   // branch.
286   if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
287     I = LastInst;
288     if (AllowModify)
289       I->eraseFromParent();
290     return true;
291   }
292 
293   // Otherwise, can't handle this.
294   return true;
295 }
296 
reverseBranchCondition(SmallVectorImpl<MachineOperand> & Cond) const297 bool AArch64InstrInfo::reverseBranchCondition(
298     SmallVectorImpl<MachineOperand> &Cond) const {
299   if (Cond[0].getImm() != -1) {
300     // Regular Bcc
301     AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
302     Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
303   } else {
304     // Folded compare-and-branch
305     switch (Cond[1].getImm()) {
306     default:
307       llvm_unreachable("Unknown conditional branch!");
308     case AArch64::CBZW:
309       Cond[1].setImm(AArch64::CBNZW);
310       break;
311     case AArch64::CBNZW:
312       Cond[1].setImm(AArch64::CBZW);
313       break;
314     case AArch64::CBZX:
315       Cond[1].setImm(AArch64::CBNZX);
316       break;
317     case AArch64::CBNZX:
318       Cond[1].setImm(AArch64::CBZX);
319       break;
320     case AArch64::TBZW:
321       Cond[1].setImm(AArch64::TBNZW);
322       break;
323     case AArch64::TBNZW:
324       Cond[1].setImm(AArch64::TBZW);
325       break;
326     case AArch64::TBZX:
327       Cond[1].setImm(AArch64::TBNZX);
328       break;
329     case AArch64::TBNZX:
330       Cond[1].setImm(AArch64::TBZX);
331       break;
332     }
333   }
334 
335   return false;
336 }
337 
removeBranch(MachineBasicBlock & MBB,int * BytesRemoved) const338 unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
339                                         int *BytesRemoved) const {
340   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
341   if (I == MBB.end())
342     return 0;
343 
344   if (!isUncondBranchOpcode(I->getOpcode()) &&
345       !isCondBranchOpcode(I->getOpcode()))
346     return 0;
347 
348   // Remove the branch.
349   I->eraseFromParent();
350 
351   I = MBB.end();
352 
353   if (I == MBB.begin()) {
354     if (BytesRemoved)
355       *BytesRemoved = 4;
356     return 1;
357   }
358   --I;
359   if (!isCondBranchOpcode(I->getOpcode())) {
360     if (BytesRemoved)
361       *BytesRemoved = 4;
362     return 1;
363   }
364 
365   // Remove the branch.
366   I->eraseFromParent();
367   if (BytesRemoved)
368     *BytesRemoved = 8;
369 
370   return 2;
371 }
372 
instantiateCondBranch(MachineBasicBlock & MBB,const DebugLoc & DL,MachineBasicBlock * TBB,ArrayRef<MachineOperand> Cond) const373 void AArch64InstrInfo::instantiateCondBranch(
374     MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
375     ArrayRef<MachineOperand> Cond) const {
376   if (Cond[0].getImm() != -1) {
377     // Regular Bcc
378     BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
379   } else {
380     // Folded compare-and-branch
381     // Note that we use addOperand instead of addReg to keep the flags.
382     const MachineInstrBuilder MIB =
383         BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
384     if (Cond.size() > 3)
385       MIB.addImm(Cond[3].getImm());
386     MIB.addMBB(TBB);
387   }
388 }
389 
insertBranch(MachineBasicBlock & MBB,MachineBasicBlock * TBB,MachineBasicBlock * FBB,ArrayRef<MachineOperand> Cond,const DebugLoc & DL,int * BytesAdded) const390 unsigned AArch64InstrInfo::insertBranch(
391     MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
392     ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
393   // Shouldn't be a fall through.
394   assert(TBB && "insertBranch must not be told to insert a fallthrough");
395 
396   if (!FBB) {
397     if (Cond.empty()) // Unconditional branch?
398       BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
399     else
400       instantiateCondBranch(MBB, DL, TBB, Cond);
401 
402     if (BytesAdded)
403       *BytesAdded = 4;
404 
405     return 1;
406   }
407 
408   // Two-way conditional branch.
409   instantiateCondBranch(MBB, DL, TBB, Cond);
410   BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
411 
412   if (BytesAdded)
413     *BytesAdded = 8;
414 
415   return 2;
416 }
417 
418 // Find the original register that VReg is copied from.
removeCopies(const MachineRegisterInfo & MRI,unsigned VReg)419 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
420   while (TargetRegisterInfo::isVirtualRegister(VReg)) {
421     const MachineInstr *DefMI = MRI.getVRegDef(VReg);
422     if (!DefMI->isFullCopy())
423       return VReg;
424     VReg = DefMI->getOperand(1).getReg();
425   }
426   return VReg;
427 }
428 
429 // Determine if VReg is defined by an instruction that can be folded into a
430 // csel instruction. If so, return the folded opcode, and the replacement
431 // register.
canFoldIntoCSel(const MachineRegisterInfo & MRI,unsigned VReg,unsigned * NewVReg=nullptr)432 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
433                                 unsigned *NewVReg = nullptr) {
434   VReg = removeCopies(MRI, VReg);
435   if (!TargetRegisterInfo::isVirtualRegister(VReg))
436     return 0;
437 
438   bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
439   const MachineInstr *DefMI = MRI.getVRegDef(VReg);
440   unsigned Opc = 0;
441   unsigned SrcOpNum = 0;
442   switch (DefMI->getOpcode()) {
443   case AArch64::ADDSXri:
444   case AArch64::ADDSWri:
445     // if NZCV is used, do not fold.
446     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
447       return 0;
448     // fall-through to ADDXri and ADDWri.
449     LLVM_FALLTHROUGH;
450   case AArch64::ADDXri:
451   case AArch64::ADDWri:
452     // add x, 1 -> csinc.
453     if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
454         DefMI->getOperand(3).getImm() != 0)
455       return 0;
456     SrcOpNum = 1;
457     Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
458     break;
459 
460   case AArch64::ORNXrr:
461   case AArch64::ORNWrr: {
462     // not x -> csinv, represented as orn dst, xzr, src.
463     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
464     if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
465       return 0;
466     SrcOpNum = 2;
467     Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
468     break;
469   }
470 
471   case AArch64::SUBSXrr:
472   case AArch64::SUBSWrr:
473     // if NZCV is used, do not fold.
474     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
475       return 0;
476     // fall-through to SUBXrr and SUBWrr.
477     LLVM_FALLTHROUGH;
478   case AArch64::SUBXrr:
479   case AArch64::SUBWrr: {
480     // neg x -> csneg, represented as sub dst, xzr, src.
481     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
482     if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
483       return 0;
484     SrcOpNum = 2;
485     Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
486     break;
487   }
488   default:
489     return 0;
490   }
491   assert(Opc && SrcOpNum && "Missing parameters");
492 
493   if (NewVReg)
494     *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
495   return Opc;
496 }
497 
canInsertSelect(const MachineBasicBlock & MBB,ArrayRef<MachineOperand> Cond,unsigned TrueReg,unsigned FalseReg,int & CondCycles,int & TrueCycles,int & FalseCycles) const498 bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
499                                        ArrayRef<MachineOperand> Cond,
500                                        unsigned TrueReg, unsigned FalseReg,
501                                        int &CondCycles, int &TrueCycles,
502                                        int &FalseCycles) const {
503   // Check register classes.
504   const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
505   const TargetRegisterClass *RC =
506       RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
507   if (!RC)
508     return false;
509 
510   // Expanding cbz/tbz requires an extra cycle of latency on the condition.
511   unsigned ExtraCondLat = Cond.size() != 1;
512 
513   // GPRs are handled by csel.
514   // FIXME: Fold in x+1, -x, and ~x when applicable.
515   if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
516       AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
517     // Single-cycle csel, csinc, csinv, and csneg.
518     CondCycles = 1 + ExtraCondLat;
519     TrueCycles = FalseCycles = 1;
520     if (canFoldIntoCSel(MRI, TrueReg))
521       TrueCycles = 0;
522     else if (canFoldIntoCSel(MRI, FalseReg))
523       FalseCycles = 0;
524     return true;
525   }
526 
527   // Scalar floating point is handled by fcsel.
528   // FIXME: Form fabs, fmin, and fmax when applicable.
529   if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
530       AArch64::FPR32RegClass.hasSubClassEq(RC)) {
531     CondCycles = 5 + ExtraCondLat;
532     TrueCycles = FalseCycles = 2;
533     return true;
534   }
535 
536   // Can't do vectors.
537   return false;
538 }
539 
insertSelect(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,unsigned DstReg,ArrayRef<MachineOperand> Cond,unsigned TrueReg,unsigned FalseReg) const540 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
541                                     MachineBasicBlock::iterator I,
542                                     const DebugLoc &DL, unsigned DstReg,
543                                     ArrayRef<MachineOperand> Cond,
544                                     unsigned TrueReg, unsigned FalseReg) const {
545   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
546 
547   // Parse the condition code, see parseCondBranch() above.
548   AArch64CC::CondCode CC;
549   switch (Cond.size()) {
550   default:
551     llvm_unreachable("Unknown condition opcode in Cond");
552   case 1: // b.cc
553     CC = AArch64CC::CondCode(Cond[0].getImm());
554     break;
555   case 3: { // cbz/cbnz
556     // We must insert a compare against 0.
557     bool Is64Bit;
558     switch (Cond[1].getImm()) {
559     default:
560       llvm_unreachable("Unknown branch opcode in Cond");
561     case AArch64::CBZW:
562       Is64Bit = false;
563       CC = AArch64CC::EQ;
564       break;
565     case AArch64::CBZX:
566       Is64Bit = true;
567       CC = AArch64CC::EQ;
568       break;
569     case AArch64::CBNZW:
570       Is64Bit = false;
571       CC = AArch64CC::NE;
572       break;
573     case AArch64::CBNZX:
574       Is64Bit = true;
575       CC = AArch64CC::NE;
576       break;
577     }
578     unsigned SrcReg = Cond[2].getReg();
579     if (Is64Bit) {
580       // cmp reg, #0 is actually subs xzr, reg, #0.
581       MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
582       BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
583           .addReg(SrcReg)
584           .addImm(0)
585           .addImm(0);
586     } else {
587       MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
588       BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
589           .addReg(SrcReg)
590           .addImm(0)
591           .addImm(0);
592     }
593     break;
594   }
595   case 4: { // tbz/tbnz
596     // We must insert a tst instruction.
597     switch (Cond[1].getImm()) {
598     default:
599       llvm_unreachable("Unknown branch opcode in Cond");
600     case AArch64::TBZW:
601     case AArch64::TBZX:
602       CC = AArch64CC::EQ;
603       break;
604     case AArch64::TBNZW:
605     case AArch64::TBNZX:
606       CC = AArch64CC::NE;
607       break;
608     }
609     // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
610     if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
611       BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
612           .addReg(Cond[2].getReg())
613           .addImm(
614               AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
615     else
616       BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
617           .addReg(Cond[2].getReg())
618           .addImm(
619               AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
620     break;
621   }
622   }
623 
624   unsigned Opc = 0;
625   const TargetRegisterClass *RC = nullptr;
626   bool TryFold = false;
627   if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
628     RC = &AArch64::GPR64RegClass;
629     Opc = AArch64::CSELXr;
630     TryFold = true;
631   } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
632     RC = &AArch64::GPR32RegClass;
633     Opc = AArch64::CSELWr;
634     TryFold = true;
635   } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
636     RC = &AArch64::FPR64RegClass;
637     Opc = AArch64::FCSELDrrr;
638   } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
639     RC = &AArch64::FPR32RegClass;
640     Opc = AArch64::FCSELSrrr;
641   }
642   assert(RC && "Unsupported regclass");
643 
644   // Try folding simple instructions into the csel.
645   if (TryFold) {
646     unsigned NewVReg = 0;
647     unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
648     if (FoldedOpc) {
649       // The folded opcodes csinc, csinc and csneg apply the operation to
650       // FalseReg, so we need to invert the condition.
651       CC = AArch64CC::getInvertedCondCode(CC);
652       TrueReg = FalseReg;
653     } else
654       FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
655 
656     // Fold the operation. Leave any dead instructions for DCE to clean up.
657     if (FoldedOpc) {
658       FalseReg = NewVReg;
659       Opc = FoldedOpc;
660       // The extends the live range of NewVReg.
661       MRI.clearKillFlags(NewVReg);
662     }
663   }
664 
665   // Pull all virtual register into the appropriate class.
666   MRI.constrainRegClass(TrueReg, RC);
667   MRI.constrainRegClass(FalseReg, RC);
668 
669   // Insert the csel.
670   BuildMI(MBB, I, DL, get(Opc), DstReg)
671       .addReg(TrueReg)
672       .addReg(FalseReg)
673       .addImm(CC);
674 }
675 
676 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an  ORRxx.
canBeExpandedToORR(const MachineInstr & MI,unsigned BitSize)677 static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
678   uint64_t Imm = MI.getOperand(1).getImm();
679   uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
680   uint64_t Encoding;
681   return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
682 }
683 
684 // FIXME: this implementation should be micro-architecture dependent, so a
685 // micro-architecture target hook should be introduced here in future.
isAsCheapAsAMove(const MachineInstr & MI) const686 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
687   if (!Subtarget.hasCustomCheapAsMoveHandling())
688     return MI.isAsCheapAsAMove();
689 
690   const unsigned Opcode = MI.getOpcode();
691 
692   // Firstly, check cases gated by features.
693 
694   if (Subtarget.hasZeroCycleZeroingFP()) {
695     if (Opcode == AArch64::FMOVH0 ||
696         Opcode == AArch64::FMOVS0 ||
697         Opcode == AArch64::FMOVD0)
698       return true;
699   }
700 
701   if (Subtarget.hasZeroCycleZeroingGP()) {
702     if (Opcode == TargetOpcode::COPY &&
703         (MI.getOperand(1).getReg() == AArch64::WZR ||
704          MI.getOperand(1).getReg() == AArch64::XZR))
705       return true;
706   }
707 
708   // Secondly, check cases specific to sub-targets.
709 
710   if (Subtarget.hasExynosCheapAsMoveHandling()) {
711     if (isExynosCheapAsMove(MI))
712       return true;
713 
714     return MI.isAsCheapAsAMove();
715   }
716 
717   // Finally, check generic cases.
718 
719   switch (Opcode) {
720   default:
721     return false;
722 
723   // add/sub on register without shift
724   case AArch64::ADDWri:
725   case AArch64::ADDXri:
726   case AArch64::SUBWri:
727   case AArch64::SUBXri:
728     return (MI.getOperand(3).getImm() == 0);
729 
730   // logical ops on immediate
731   case AArch64::ANDWri:
732   case AArch64::ANDXri:
733   case AArch64::EORWri:
734   case AArch64::EORXri:
735   case AArch64::ORRWri:
736   case AArch64::ORRXri:
737     return true;
738 
739   // logical ops on register without shift
740   case AArch64::ANDWrr:
741   case AArch64::ANDXrr:
742   case AArch64::BICWrr:
743   case AArch64::BICXrr:
744   case AArch64::EONWrr:
745   case AArch64::EONXrr:
746   case AArch64::EORWrr:
747   case AArch64::EORXrr:
748   case AArch64::ORNWrr:
749   case AArch64::ORNXrr:
750   case AArch64::ORRWrr:
751   case AArch64::ORRXrr:
752     return true;
753 
754   // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
755   // ORRXri, it is as cheap as MOV
756   case AArch64::MOVi32imm:
757     return canBeExpandedToORR(MI, 32);
758   case AArch64::MOVi64imm:
759     return canBeExpandedToORR(MI, 64);
760   }
761 
762   llvm_unreachable("Unknown opcode to check as cheap as a move!");
763 }
764 
isFalkorShiftExtFast(const MachineInstr & MI)765 bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
766   switch (MI.getOpcode()) {
767   default:
768     return false;
769 
770   case AArch64::ADDWrs:
771   case AArch64::ADDXrs:
772   case AArch64::ADDSWrs:
773   case AArch64::ADDSXrs: {
774     unsigned Imm = MI.getOperand(3).getImm();
775     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
776     if (ShiftVal == 0)
777       return true;
778     return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
779   }
780 
781   case AArch64::ADDWrx:
782   case AArch64::ADDXrx:
783   case AArch64::ADDXrx64:
784   case AArch64::ADDSWrx:
785   case AArch64::ADDSXrx:
786   case AArch64::ADDSXrx64: {
787     unsigned Imm = MI.getOperand(3).getImm();
788     switch (AArch64_AM::getArithExtendType(Imm)) {
789     default:
790       return false;
791     case AArch64_AM::UXTB:
792     case AArch64_AM::UXTH:
793     case AArch64_AM::UXTW:
794     case AArch64_AM::UXTX:
795       return AArch64_AM::getArithShiftValue(Imm) <= 4;
796     }
797   }
798 
799   case AArch64::SUBWrs:
800   case AArch64::SUBSWrs: {
801     unsigned Imm = MI.getOperand(3).getImm();
802     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
803     return ShiftVal == 0 ||
804            (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
805   }
806 
807   case AArch64::SUBXrs:
808   case AArch64::SUBSXrs: {
809     unsigned Imm = MI.getOperand(3).getImm();
810     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
811     return ShiftVal == 0 ||
812            (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
813   }
814 
815   case AArch64::SUBWrx:
816   case AArch64::SUBXrx:
817   case AArch64::SUBXrx64:
818   case AArch64::SUBSWrx:
819   case AArch64::SUBSXrx:
820   case AArch64::SUBSXrx64: {
821     unsigned Imm = MI.getOperand(3).getImm();
822     switch (AArch64_AM::getArithExtendType(Imm)) {
823     default:
824       return false;
825     case AArch64_AM::UXTB:
826     case AArch64_AM::UXTH:
827     case AArch64_AM::UXTW:
828     case AArch64_AM::UXTX:
829       return AArch64_AM::getArithShiftValue(Imm) == 0;
830     }
831   }
832 
833   case AArch64::LDRBBroW:
834   case AArch64::LDRBBroX:
835   case AArch64::LDRBroW:
836   case AArch64::LDRBroX:
837   case AArch64::LDRDroW:
838   case AArch64::LDRDroX:
839   case AArch64::LDRHHroW:
840   case AArch64::LDRHHroX:
841   case AArch64::LDRHroW:
842   case AArch64::LDRHroX:
843   case AArch64::LDRQroW:
844   case AArch64::LDRQroX:
845   case AArch64::LDRSBWroW:
846   case AArch64::LDRSBWroX:
847   case AArch64::LDRSBXroW:
848   case AArch64::LDRSBXroX:
849   case AArch64::LDRSHWroW:
850   case AArch64::LDRSHWroX:
851   case AArch64::LDRSHXroW:
852   case AArch64::LDRSHXroX:
853   case AArch64::LDRSWroW:
854   case AArch64::LDRSWroX:
855   case AArch64::LDRSroW:
856   case AArch64::LDRSroX:
857   case AArch64::LDRWroW:
858   case AArch64::LDRWroX:
859   case AArch64::LDRXroW:
860   case AArch64::LDRXroX:
861   case AArch64::PRFMroW:
862   case AArch64::PRFMroX:
863   case AArch64::STRBBroW:
864   case AArch64::STRBBroX:
865   case AArch64::STRBroW:
866   case AArch64::STRBroX:
867   case AArch64::STRDroW:
868   case AArch64::STRDroX:
869   case AArch64::STRHHroW:
870   case AArch64::STRHHroX:
871   case AArch64::STRHroW:
872   case AArch64::STRHroX:
873   case AArch64::STRQroW:
874   case AArch64::STRQroX:
875   case AArch64::STRSroW:
876   case AArch64::STRSroX:
877   case AArch64::STRWroW:
878   case AArch64::STRWroX:
879   case AArch64::STRXroW:
880   case AArch64::STRXroX: {
881     unsigned IsSigned = MI.getOperand(3).getImm();
882     return !IsSigned;
883   }
884   }
885 }
886 
isSEHInstruction(const MachineInstr & MI)887 bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
888   unsigned Opc = MI.getOpcode();
889   switch (Opc) {
890     default:
891       return false;
892     case AArch64::SEH_StackAlloc:
893     case AArch64::SEH_SaveFPLR:
894     case AArch64::SEH_SaveFPLR_X:
895     case AArch64::SEH_SaveReg:
896     case AArch64::SEH_SaveReg_X:
897     case AArch64::SEH_SaveRegP:
898     case AArch64::SEH_SaveRegP_X:
899     case AArch64::SEH_SaveFReg:
900     case AArch64::SEH_SaveFReg_X:
901     case AArch64::SEH_SaveFRegP:
902     case AArch64::SEH_SaveFRegP_X:
903     case AArch64::SEH_SetFP:
904     case AArch64::SEH_AddFP:
905     case AArch64::SEH_Nop:
906     case AArch64::SEH_PrologEnd:
907     case AArch64::SEH_EpilogStart:
908     case AArch64::SEH_EpilogEnd:
909       return true;
910   }
911 }
912 
isCoalescableExtInstr(const MachineInstr & MI,unsigned & SrcReg,unsigned & DstReg,unsigned & SubIdx) const913 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
914                                              unsigned &SrcReg, unsigned &DstReg,
915                                              unsigned &SubIdx) const {
916   switch (MI.getOpcode()) {
917   default:
918     return false;
919   case AArch64::SBFMXri: // aka sxtw
920   case AArch64::UBFMXri: // aka uxtw
921     // Check for the 32 -> 64 bit extension case, these instructions can do
922     // much more.
923     if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
924       return false;
925     // This is a signed or unsigned 32 -> 64 bit extension.
926     SrcReg = MI.getOperand(1).getReg();
927     DstReg = MI.getOperand(0).getReg();
928     SubIdx = AArch64::sub_32;
929     return true;
930   }
931 }
932 
areMemAccessesTriviallyDisjoint(const MachineInstr & MIa,const MachineInstr & MIb,AliasAnalysis * AA) const933 bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
934     const MachineInstr &MIa, const MachineInstr &MIb, AliasAnalysis *AA) const {
935   const TargetRegisterInfo *TRI = &getRegisterInfo();
936   const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
937   int64_t OffsetA = 0, OffsetB = 0;
938   unsigned WidthA = 0, WidthB = 0;
939 
940   assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
941   assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
942 
943   if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
944       MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
945     return false;
946 
947   // Retrieve the base, offset from the base and width. Width
948   // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8).  If
949   // base are identical, and the offset of a lower memory access +
950   // the width doesn't overlap the offset of a higher memory access,
951   // then the memory accesses are different.
952   if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
953       getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
954     if (BaseOpA->isIdenticalTo(*BaseOpB)) {
955       int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
956       int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
957       int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
958       if (LowOffset + LowWidth <= HighOffset)
959         return true;
960     }
961   }
962   return false;
963 }
964 
isSchedulingBoundary(const MachineInstr & MI,const MachineBasicBlock * MBB,const MachineFunction & MF) const965 bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
966                                             const MachineBasicBlock *MBB,
967                                             const MachineFunction &MF) const {
968   if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
969     return true;
970   switch (MI.getOpcode()) {
971   case AArch64::HINT:
972     // CSDB hints are scheduling barriers.
973     if (MI.getOperand(0).getImm() == 0x14)
974       return true;
975     break;
976   case AArch64::DSB:
977   case AArch64::ISB:
978     // DSB and ISB also are scheduling barriers.
979     return true;
980   default:;
981   }
982   return isSEHInstruction(MI);
983 }
984 
985 /// analyzeCompare - For a comparison instruction, return the source registers
986 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
987 /// Return true if the comparison instruction can be analyzed.
analyzeCompare(const MachineInstr & MI,unsigned & SrcReg,unsigned & SrcReg2,int & CmpMask,int & CmpValue) const988 bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
989                                       unsigned &SrcReg2, int &CmpMask,
990                                       int &CmpValue) const {
991   // The first operand can be a frame index where we'd normally expect a
992   // register.
993   assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
994   if (!MI.getOperand(1).isReg())
995     return false;
996 
997   switch (MI.getOpcode()) {
998   default:
999     break;
1000   case AArch64::SUBSWrr:
1001   case AArch64::SUBSWrs:
1002   case AArch64::SUBSWrx:
1003   case AArch64::SUBSXrr:
1004   case AArch64::SUBSXrs:
1005   case AArch64::SUBSXrx:
1006   case AArch64::ADDSWrr:
1007   case AArch64::ADDSWrs:
1008   case AArch64::ADDSWrx:
1009   case AArch64::ADDSXrr:
1010   case AArch64::ADDSXrs:
1011   case AArch64::ADDSXrx:
1012     // Replace SUBSWrr with SUBWrr if NZCV is not used.
1013     SrcReg = MI.getOperand(1).getReg();
1014     SrcReg2 = MI.getOperand(2).getReg();
1015     CmpMask = ~0;
1016     CmpValue = 0;
1017     return true;
1018   case AArch64::SUBSWri:
1019   case AArch64::ADDSWri:
1020   case AArch64::SUBSXri:
1021   case AArch64::ADDSXri:
1022     SrcReg = MI.getOperand(1).getReg();
1023     SrcReg2 = 0;
1024     CmpMask = ~0;
1025     // FIXME: In order to convert CmpValue to 0 or 1
1026     CmpValue = MI.getOperand(2).getImm() != 0;
1027     return true;
1028   case AArch64::ANDSWri:
1029   case AArch64::ANDSXri:
1030     // ANDS does not use the same encoding scheme as the others xxxS
1031     // instructions.
1032     SrcReg = MI.getOperand(1).getReg();
1033     SrcReg2 = 0;
1034     CmpMask = ~0;
1035     // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
1036     // while the type of CmpValue is int. When converting uint64_t to int,
1037     // the high 32 bits of uint64_t will be lost.
1038     // In fact it causes a bug in spec2006-483.xalancbmk
1039     // CmpValue is only used to compare with zero in OptimizeCompareInstr
1040     CmpValue = AArch64_AM::decodeLogicalImmediate(
1041                    MI.getOperand(2).getImm(),
1042                    MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
1043     return true;
1044   }
1045 
1046   return false;
1047 }
1048 
UpdateOperandRegClass(MachineInstr & Instr)1049 static bool UpdateOperandRegClass(MachineInstr &Instr) {
1050   MachineBasicBlock *MBB = Instr.getParent();
1051   assert(MBB && "Can't get MachineBasicBlock here");
1052   MachineFunction *MF = MBB->getParent();
1053   assert(MF && "Can't get MachineFunction here");
1054   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1055   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
1056   MachineRegisterInfo *MRI = &MF->getRegInfo();
1057 
1058   for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1059        ++OpIdx) {
1060     MachineOperand &MO = Instr.getOperand(OpIdx);
1061     const TargetRegisterClass *OpRegCstraints =
1062         Instr.getRegClassConstraint(OpIdx, TII, TRI);
1063 
1064     // If there's no constraint, there's nothing to do.
1065     if (!OpRegCstraints)
1066       continue;
1067     // If the operand is a frame index, there's nothing to do here.
1068     // A frame index operand will resolve correctly during PEI.
1069     if (MO.isFI())
1070       continue;
1071 
1072     assert(MO.isReg() &&
1073            "Operand has register constraints without being a register!");
1074 
1075     unsigned Reg = MO.getReg();
1076     if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
1077       if (!OpRegCstraints->contains(Reg))
1078         return false;
1079     } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1080                !MRI->constrainRegClass(Reg, OpRegCstraints))
1081       return false;
1082   }
1083 
1084   return true;
1085 }
1086 
1087 /// Return the opcode that does not set flags when possible - otherwise
1088 /// return the original opcode. The caller is responsible to do the actual
1089 /// substitution and legality checking.
convertToNonFlagSettingOpc(const MachineInstr & MI)1090 static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
1091   // Don't convert all compare instructions, because for some the zero register
1092   // encoding becomes the sp register.
1093   bool MIDefinesZeroReg = false;
1094   if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
1095     MIDefinesZeroReg = true;
1096 
1097   switch (MI.getOpcode()) {
1098   default:
1099     return MI.getOpcode();
1100   case AArch64::ADDSWrr:
1101     return AArch64::ADDWrr;
1102   case AArch64::ADDSWri:
1103     return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1104   case AArch64::ADDSWrs:
1105     return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1106   case AArch64::ADDSWrx:
1107     return AArch64::ADDWrx;
1108   case AArch64::ADDSXrr:
1109     return AArch64::ADDXrr;
1110   case AArch64::ADDSXri:
1111     return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1112   case AArch64::ADDSXrs:
1113     return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1114   case AArch64::ADDSXrx:
1115     return AArch64::ADDXrx;
1116   case AArch64::SUBSWrr:
1117     return AArch64::SUBWrr;
1118   case AArch64::SUBSWri:
1119     return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1120   case AArch64::SUBSWrs:
1121     return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1122   case AArch64::SUBSWrx:
1123     return AArch64::SUBWrx;
1124   case AArch64::SUBSXrr:
1125     return AArch64::SUBXrr;
1126   case AArch64::SUBSXri:
1127     return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1128   case AArch64::SUBSXrs:
1129     return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1130   case AArch64::SUBSXrx:
1131     return AArch64::SUBXrx;
1132   }
1133 }
1134 
1135 enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1136 
1137 /// True when condition flags are accessed (either by writing or reading)
1138 /// on the instruction trace starting at From and ending at To.
1139 ///
1140 /// Note: If From and To are from different blocks it's assumed CC are accessed
1141 ///       on the path.
areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From,MachineBasicBlock::iterator To,const TargetRegisterInfo * TRI,const AccessKind AccessToCheck=AK_All)1142 static bool areCFlagsAccessedBetweenInstrs(
1143     MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
1144     const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1145   // Early exit if To is at the beginning of the BB.
1146   if (To == To->getParent()->begin())
1147     return true;
1148 
1149   // Check whether the instructions are in the same basic block
1150   // If not, assume the condition flags might get modified somewhere.
1151   if (To->getParent() != From->getParent())
1152     return true;
1153 
1154   // From must be above To.
1155   assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
1156                       [From](MachineInstr &MI) {
1157                         return MI.getIterator() == From;
1158                       }) != To->getParent()->rend());
1159 
1160   // We iterate backward starting \p To until we hit \p From.
1161   for (--To; To != From; --To) {
1162     const MachineInstr &Instr = *To;
1163 
1164     if (((AccessToCheck & AK_Write) &&
1165          Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1166         ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1167       return true;
1168   }
1169   return false;
1170 }
1171 
1172 /// Try to optimize a compare instruction. A compare instruction is an
1173 /// instruction which produces AArch64::NZCV. It can be truly compare
1174 /// instruction
1175 /// when there are no uses of its destination register.
1176 ///
1177 /// The following steps are tried in order:
1178 /// 1. Convert CmpInstr into an unconditional version.
1179 /// 2. Remove CmpInstr if above there is an instruction producing a needed
1180 ///    condition code or an instruction which can be converted into such an
1181 ///    instruction.
1182 ///    Only comparison with zero is supported.
optimizeCompareInstr(MachineInstr & CmpInstr,unsigned SrcReg,unsigned SrcReg2,int CmpMask,int CmpValue,const MachineRegisterInfo * MRI) const1183 bool AArch64InstrInfo::optimizeCompareInstr(
1184     MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
1185     int CmpValue, const MachineRegisterInfo *MRI) const {
1186   assert(CmpInstr.getParent());
1187   assert(MRI);
1188 
1189   // Replace SUBSWrr with SUBWrr if NZCV is not used.
1190   int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
1191   if (DeadNZCVIdx != -1) {
1192     if (CmpInstr.definesRegister(AArch64::WZR) ||
1193         CmpInstr.definesRegister(AArch64::XZR)) {
1194       CmpInstr.eraseFromParent();
1195       return true;
1196     }
1197     unsigned Opc = CmpInstr.getOpcode();
1198     unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1199     if (NewOpc == Opc)
1200       return false;
1201     const MCInstrDesc &MCID = get(NewOpc);
1202     CmpInstr.setDesc(MCID);
1203     CmpInstr.RemoveOperand(DeadNZCVIdx);
1204     bool succeeded = UpdateOperandRegClass(CmpInstr);
1205     (void)succeeded;
1206     assert(succeeded && "Some operands reg class are incompatible!");
1207     return true;
1208   }
1209 
1210   // Continue only if we have a "ri" where immediate is zero.
1211   // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
1212   // function.
1213   assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
1214   if (CmpValue != 0 || SrcReg2 != 0)
1215     return false;
1216 
1217   // CmpInstr is a Compare instruction if destination register is not used.
1218   if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1219     return false;
1220 
1221   return substituteCmpToZero(CmpInstr, SrcReg, MRI);
1222 }
1223 
1224 /// Get opcode of S version of Instr.
1225 /// If Instr is S version its opcode is returned.
1226 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1227 /// or we are not interested in it.
sForm(MachineInstr & Instr)1228 static unsigned sForm(MachineInstr &Instr) {
1229   switch (Instr.getOpcode()) {
1230   default:
1231     return AArch64::INSTRUCTION_LIST_END;
1232 
1233   case AArch64::ADDSWrr:
1234   case AArch64::ADDSWri:
1235   case AArch64::ADDSXrr:
1236   case AArch64::ADDSXri:
1237   case AArch64::SUBSWrr:
1238   case AArch64::SUBSWri:
1239   case AArch64::SUBSXrr:
1240   case AArch64::SUBSXri:
1241     return Instr.getOpcode();
1242 
1243   case AArch64::ADDWrr:
1244     return AArch64::ADDSWrr;
1245   case AArch64::ADDWri:
1246     return AArch64::ADDSWri;
1247   case AArch64::ADDXrr:
1248     return AArch64::ADDSXrr;
1249   case AArch64::ADDXri:
1250     return AArch64::ADDSXri;
1251   case AArch64::ADCWr:
1252     return AArch64::ADCSWr;
1253   case AArch64::ADCXr:
1254     return AArch64::ADCSXr;
1255   case AArch64::SUBWrr:
1256     return AArch64::SUBSWrr;
1257   case AArch64::SUBWri:
1258     return AArch64::SUBSWri;
1259   case AArch64::SUBXrr:
1260     return AArch64::SUBSXrr;
1261   case AArch64::SUBXri:
1262     return AArch64::SUBSXri;
1263   case AArch64::SBCWr:
1264     return AArch64::SBCSWr;
1265   case AArch64::SBCXr:
1266     return AArch64::SBCSXr;
1267   case AArch64::ANDWri:
1268     return AArch64::ANDSWri;
1269   case AArch64::ANDXri:
1270     return AArch64::ANDSXri;
1271   }
1272 }
1273 
1274 /// Check if AArch64::NZCV should be alive in successors of MBB.
areCFlagsAliveInSuccessors(MachineBasicBlock * MBB)1275 static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
1276   for (auto *BB : MBB->successors())
1277     if (BB->isLiveIn(AArch64::NZCV))
1278       return true;
1279   return false;
1280 }
1281 
1282 namespace {
1283 
1284 struct UsedNZCV {
1285   bool N = false;
1286   bool Z = false;
1287   bool C = false;
1288   bool V = false;
1289 
1290   UsedNZCV() = default;
1291 
operator |=__anon34ba85090211::UsedNZCV1292   UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
1293     this->N |= UsedFlags.N;
1294     this->Z |= UsedFlags.Z;
1295     this->C |= UsedFlags.C;
1296     this->V |= UsedFlags.V;
1297     return *this;
1298   }
1299 };
1300 
1301 } // end anonymous namespace
1302 
1303 /// Find a condition code used by the instruction.
1304 /// Returns AArch64CC::Invalid if either the instruction does not use condition
1305 /// codes or we don't optimize CmpInstr in the presence of such instructions.
findCondCodeUsedByInstr(const MachineInstr & Instr)1306 static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
1307   switch (Instr.getOpcode()) {
1308   default:
1309     return AArch64CC::Invalid;
1310 
1311   case AArch64::Bcc: {
1312     int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1313     assert(Idx >= 2);
1314     return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
1315   }
1316 
1317   case AArch64::CSINVWr:
1318   case AArch64::CSINVXr:
1319   case AArch64::CSINCWr:
1320   case AArch64::CSINCXr:
1321   case AArch64::CSELWr:
1322   case AArch64::CSELXr:
1323   case AArch64::CSNEGWr:
1324   case AArch64::CSNEGXr:
1325   case AArch64::FCSELSrrr:
1326   case AArch64::FCSELDrrr: {
1327     int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1328     assert(Idx >= 1);
1329     return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
1330   }
1331   }
1332 }
1333 
getUsedNZCV(AArch64CC::CondCode CC)1334 static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1335   assert(CC != AArch64CC::Invalid);
1336   UsedNZCV UsedFlags;
1337   switch (CC) {
1338   default:
1339     break;
1340 
1341   case AArch64CC::EQ: // Z set
1342   case AArch64CC::NE: // Z clear
1343     UsedFlags.Z = true;
1344     break;
1345 
1346   case AArch64CC::HI: // Z clear and C set
1347   case AArch64CC::LS: // Z set   or  C clear
1348     UsedFlags.Z = true;
1349     LLVM_FALLTHROUGH;
1350   case AArch64CC::HS: // C set
1351   case AArch64CC::LO: // C clear
1352     UsedFlags.C = true;
1353     break;
1354 
1355   case AArch64CC::MI: // N set
1356   case AArch64CC::PL: // N clear
1357     UsedFlags.N = true;
1358     break;
1359 
1360   case AArch64CC::VS: // V set
1361   case AArch64CC::VC: // V clear
1362     UsedFlags.V = true;
1363     break;
1364 
1365   case AArch64CC::GT: // Z clear, N and V the same
1366   case AArch64CC::LE: // Z set,   N and V differ
1367     UsedFlags.Z = true;
1368     LLVM_FALLTHROUGH;
1369   case AArch64CC::GE: // N and V the same
1370   case AArch64CC::LT: // N and V differ
1371     UsedFlags.N = true;
1372     UsedFlags.V = true;
1373     break;
1374   }
1375   return UsedFlags;
1376 }
1377 
isADDSRegImm(unsigned Opcode)1378 static bool isADDSRegImm(unsigned Opcode) {
1379   return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1380 }
1381 
isSUBSRegImm(unsigned Opcode)1382 static bool isSUBSRegImm(unsigned Opcode) {
1383   return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1384 }
1385 
1386 /// Check if CmpInstr can be substituted by MI.
1387 ///
1388 /// CmpInstr can be substituted:
1389 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1390 /// - and, MI and CmpInstr are from the same MachineBB
1391 /// - and, condition flags are not alive in successors of the CmpInstr parent
1392 /// - and, if MI opcode is the S form there must be no defs of flags between
1393 ///        MI and CmpInstr
1394 ///        or if MI opcode is not the S form there must be neither defs of flags
1395 ///        nor uses of flags between MI and CmpInstr.
1396 /// - and  C/V flags are not used after CmpInstr
canInstrSubstituteCmpInstr(MachineInstr * MI,MachineInstr * CmpInstr,const TargetRegisterInfo * TRI)1397 static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
1398                                        const TargetRegisterInfo *TRI) {
1399   assert(MI);
1400   assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
1401   assert(CmpInstr);
1402 
1403   const unsigned CmpOpcode = CmpInstr->getOpcode();
1404   if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1405     return false;
1406 
1407   if (MI->getParent() != CmpInstr->getParent())
1408     return false;
1409 
1410   if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
1411     return false;
1412 
1413   AccessKind AccessToCheck = AK_Write;
1414   if (sForm(*MI) != MI->getOpcode())
1415     AccessToCheck = AK_All;
1416   if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
1417     return false;
1418 
1419   UsedNZCV NZCVUsedAfterCmp;
1420   for (auto I = std::next(CmpInstr->getIterator()),
1421             E = CmpInstr->getParent()->instr_end();
1422        I != E; ++I) {
1423     const MachineInstr &Instr = *I;
1424     if (Instr.readsRegister(AArch64::NZCV, TRI)) {
1425       AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
1426       if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1427         return false;
1428       NZCVUsedAfterCmp |= getUsedNZCV(CC);
1429     }
1430 
1431     if (Instr.modifiesRegister(AArch64::NZCV, TRI))
1432       break;
1433   }
1434 
1435   return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
1436 }
1437 
1438 /// Substitute an instruction comparing to zero with another instruction
1439 /// which produces needed condition flags.
1440 ///
1441 /// Return true on success.
substituteCmpToZero(MachineInstr & CmpInstr,unsigned SrcReg,const MachineRegisterInfo * MRI) const1442 bool AArch64InstrInfo::substituteCmpToZero(
1443     MachineInstr &CmpInstr, unsigned SrcReg,
1444     const MachineRegisterInfo *MRI) const {
1445   assert(MRI);
1446   // Get the unique definition of SrcReg.
1447   MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
1448   if (!MI)
1449     return false;
1450 
1451   const TargetRegisterInfo *TRI = &getRegisterInfo();
1452 
1453   unsigned NewOpc = sForm(*MI);
1454   if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1455     return false;
1456 
1457   if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
1458     return false;
1459 
1460   // Update the instruction to set NZCV.
1461   MI->setDesc(get(NewOpc));
1462   CmpInstr.eraseFromParent();
1463   bool succeeded = UpdateOperandRegClass(*MI);
1464   (void)succeeded;
1465   assert(succeeded && "Some operands reg class are incompatible!");
1466   MI->addRegisterDefined(AArch64::NZCV, TRI);
1467   return true;
1468 }
1469 
expandPostRAPseudo(MachineInstr & MI) const1470 bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1471   if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
1472       MI.getOpcode() != AArch64::CATCHRET)
1473     return false;
1474 
1475   MachineBasicBlock &MBB = *MI.getParent();
1476   DebugLoc DL = MI.getDebugLoc();
1477 
1478   if (MI.getOpcode() == AArch64::CATCHRET) {
1479     // Skip to the first instruction before the epilog.
1480     const TargetInstrInfo *TII =
1481       MBB.getParent()->getSubtarget().getInstrInfo();
1482     MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
1483     auto MBBI = MachineBasicBlock::iterator(MI);
1484     MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
1485     while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
1486            FirstEpilogSEH != MBB.begin())
1487       FirstEpilogSEH = std::prev(FirstEpilogSEH);
1488     if (FirstEpilogSEH != MBB.begin())
1489       FirstEpilogSEH = std::next(FirstEpilogSEH);
1490     BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
1491         .addReg(AArch64::X0, RegState::Define)
1492         .addMBB(TargetMBB);
1493     BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
1494         .addReg(AArch64::X0, RegState::Define)
1495         .addReg(AArch64::X0)
1496         .addMBB(TargetMBB)
1497         .addImm(0);
1498     return true;
1499   }
1500 
1501   unsigned Reg = MI.getOperand(0).getReg();
1502   const GlobalValue *GV =
1503       cast<GlobalValue>((*MI.memoperands_begin())->getValue());
1504   const TargetMachine &TM = MBB.getParent()->getTarget();
1505   unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1506   const unsigned char MO_NC = AArch64II::MO_NC;
1507 
1508   if ((OpFlags & AArch64II::MO_GOT) != 0) {
1509     BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1510         .addGlobalAddress(GV, 0, OpFlags);
1511     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1512         .addReg(Reg, RegState::Kill)
1513         .addImm(0)
1514         .addMemOperand(*MI.memoperands_begin());
1515   } else if (TM.getCodeModel() == CodeModel::Large) {
1516     BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
1517         .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
1518         .addImm(0);
1519     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1520         .addReg(Reg, RegState::Kill)
1521         .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
1522         .addImm(16);
1523     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1524         .addReg(Reg, RegState::Kill)
1525         .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
1526         .addImm(32);
1527     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1528         .addReg(Reg, RegState::Kill)
1529         .addGlobalAddress(GV, 0, AArch64II::MO_G3)
1530         .addImm(48);
1531     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1532         .addReg(Reg, RegState::Kill)
1533         .addImm(0)
1534         .addMemOperand(*MI.memoperands_begin());
1535   } else if (TM.getCodeModel() == CodeModel::Tiny) {
1536     BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
1537         .addGlobalAddress(GV, 0, OpFlags);
1538   } else {
1539     BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1540         .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1541     unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1542     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1543         .addReg(Reg, RegState::Kill)
1544         .addGlobalAddress(GV, 0, LoFlags)
1545         .addMemOperand(*MI.memoperands_begin());
1546   }
1547 
1548   MBB.erase(MI);
1549 
1550   return true;
1551 }
1552 
1553 // Return true if this instruction simply sets its single destination register
1554 // to zero. This is equivalent to a register rename of the zero-register.
isGPRZero(const MachineInstr & MI)1555 bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
1556   switch (MI.getOpcode()) {
1557   default:
1558     break;
1559   case AArch64::MOVZWi:
1560   case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
1561     if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
1562       assert(MI.getDesc().getNumOperands() == 3 &&
1563              MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
1564       return true;
1565     }
1566     break;
1567   case AArch64::ANDWri: // and Rd, Rzr, #imm
1568     return MI.getOperand(1).getReg() == AArch64::WZR;
1569   case AArch64::ANDXri:
1570     return MI.getOperand(1).getReg() == AArch64::XZR;
1571   case TargetOpcode::COPY:
1572     return MI.getOperand(1).getReg() == AArch64::WZR;
1573   }
1574   return false;
1575 }
1576 
1577 // Return true if this instruction simply renames a general register without
1578 // modifying bits.
isGPRCopy(const MachineInstr & MI)1579 bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) {
1580   switch (MI.getOpcode()) {
1581   default:
1582     break;
1583   case TargetOpcode::COPY: {
1584     // GPR32 copies will by lowered to ORRXrs
1585     unsigned DstReg = MI.getOperand(0).getReg();
1586     return (AArch64::GPR32RegClass.contains(DstReg) ||
1587             AArch64::GPR64RegClass.contains(DstReg));
1588   }
1589   case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
1590     if (MI.getOperand(1).getReg() == AArch64::XZR) {
1591       assert(MI.getDesc().getNumOperands() == 4 &&
1592              MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
1593       return true;
1594     }
1595     break;
1596   case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
1597     if (MI.getOperand(2).getImm() == 0) {
1598       assert(MI.getDesc().getNumOperands() == 4 &&
1599              MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
1600       return true;
1601     }
1602     break;
1603   }
1604   return false;
1605 }
1606 
1607 // Return true if this instruction simply renames a general register without
1608 // modifying bits.
isFPRCopy(const MachineInstr & MI)1609 bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
1610   switch (MI.getOpcode()) {
1611   default:
1612     break;
1613   case TargetOpcode::COPY: {
1614     // FPR64 copies will by lowered to ORR.16b
1615     unsigned DstReg = MI.getOperand(0).getReg();
1616     return (AArch64::FPR64RegClass.contains(DstReg) ||
1617             AArch64::FPR128RegClass.contains(DstReg));
1618   }
1619   case AArch64::ORRv16i8:
1620     if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
1621       assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
1622              "invalid ORRv16i8 operands");
1623       return true;
1624     }
1625     break;
1626   }
1627   return false;
1628 }
1629 
isLoadFromStackSlot(const MachineInstr & MI,int & FrameIndex) const1630 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
1631                                                int &FrameIndex) const {
1632   switch (MI.getOpcode()) {
1633   default:
1634     break;
1635   case AArch64::LDRWui:
1636   case AArch64::LDRXui:
1637   case AArch64::LDRBui:
1638   case AArch64::LDRHui:
1639   case AArch64::LDRSui:
1640   case AArch64::LDRDui:
1641   case AArch64::LDRQui:
1642     if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1643         MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1644       FrameIndex = MI.getOperand(1).getIndex();
1645       return MI.getOperand(0).getReg();
1646     }
1647     break;
1648   }
1649 
1650   return 0;
1651 }
1652 
isStoreToStackSlot(const MachineInstr & MI,int & FrameIndex) const1653 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
1654                                               int &FrameIndex) const {
1655   switch (MI.getOpcode()) {
1656   default:
1657     break;
1658   case AArch64::STRWui:
1659   case AArch64::STRXui:
1660   case AArch64::STRBui:
1661   case AArch64::STRHui:
1662   case AArch64::STRSui:
1663   case AArch64::STRDui:
1664   case AArch64::STRQui:
1665     if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1666         MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1667       FrameIndex = MI.getOperand(1).getIndex();
1668       return MI.getOperand(0).getReg();
1669     }
1670     break;
1671   }
1672   return 0;
1673 }
1674 
1675 /// Check all MachineMemOperands for a hint to suppress pairing.
isLdStPairSuppressed(const MachineInstr & MI)1676 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
1677   return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1678     return MMO->getFlags() & MOSuppressPair;
1679   });
1680 }
1681 
1682 /// Set a flag on the first MachineMemOperand to suppress pairing.
suppressLdStPair(MachineInstr & MI)1683 void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) {
1684   if (MI.memoperands_empty())
1685     return;
1686   (*MI.memoperands_begin())->setFlags(MOSuppressPair);
1687 }
1688 
1689 /// Check all MachineMemOperands for a hint that the load/store is strided.
isStridedAccess(const MachineInstr & MI)1690 bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {
1691   return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1692     return MMO->getFlags() & MOStridedAccess;
1693   });
1694 }
1695 
isUnscaledLdSt(unsigned Opc)1696 bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) {
1697   switch (Opc) {
1698   default:
1699     return false;
1700   case AArch64::STURSi:
1701   case AArch64::STURDi:
1702   case AArch64::STURQi:
1703   case AArch64::STURBBi:
1704   case AArch64::STURHHi:
1705   case AArch64::STURWi:
1706   case AArch64::STURXi:
1707   case AArch64::LDURSi:
1708   case AArch64::LDURDi:
1709   case AArch64::LDURQi:
1710   case AArch64::LDURWi:
1711   case AArch64::LDURXi:
1712   case AArch64::LDURSWi:
1713   case AArch64::LDURHHi:
1714   case AArch64::LDURBBi:
1715   case AArch64::LDURSBWi:
1716   case AArch64::LDURSHWi:
1717     return true;
1718   }
1719 }
1720 
getUnscaledLdSt(unsigned Opc)1721 Optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
1722   switch (Opc) {
1723   default: return {};
1724   case AArch64::PRFMui: return AArch64::PRFUMi;
1725   case AArch64::LDRXui: return AArch64::LDURXi;
1726   case AArch64::LDRWui: return AArch64::LDURWi;
1727   case AArch64::LDRBui: return AArch64::LDURBi;
1728   case AArch64::LDRHui: return AArch64::LDURHi;
1729   case AArch64::LDRSui: return AArch64::LDURSi;
1730   case AArch64::LDRDui: return AArch64::LDURDi;
1731   case AArch64::LDRQui: return AArch64::LDURQi;
1732   case AArch64::LDRBBui: return AArch64::LDURBBi;
1733   case AArch64::LDRHHui: return AArch64::LDURHHi;
1734   case AArch64::LDRSBXui: return AArch64::LDURSBXi;
1735   case AArch64::LDRSBWui: return AArch64::LDURSBWi;
1736   case AArch64::LDRSHXui: return AArch64::LDURSHXi;
1737   case AArch64::LDRSHWui: return AArch64::LDURSHWi;
1738   case AArch64::LDRSWui: return AArch64::LDURSWi;
1739   case AArch64::STRXui: return AArch64::STURXi;
1740   case AArch64::STRWui: return AArch64::STURWi;
1741   case AArch64::STRBui: return AArch64::STURBi;
1742   case AArch64::STRHui: return AArch64::STURHi;
1743   case AArch64::STRSui: return AArch64::STURSi;
1744   case AArch64::STRDui: return AArch64::STURDi;
1745   case AArch64::STRQui: return AArch64::STURQi;
1746   case AArch64::STRBBui: return AArch64::STURBBi;
1747   case AArch64::STRHHui: return AArch64::STURHHi;
1748   }
1749 }
1750 
getLoadStoreImmIdx(unsigned Opc)1751 unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
1752   switch (Opc) {
1753   default:
1754     return 2;
1755   case AArch64::LDPXi:
1756   case AArch64::LDPDi:
1757   case AArch64::STPXi:
1758   case AArch64::STPDi:
1759   case AArch64::LDNPXi:
1760   case AArch64::LDNPDi:
1761   case AArch64::STNPXi:
1762   case AArch64::STNPDi:
1763   case AArch64::LDPQi:
1764   case AArch64::STPQi:
1765   case AArch64::LDNPQi:
1766   case AArch64::STNPQi:
1767   case AArch64::LDPWi:
1768   case AArch64::LDPSi:
1769   case AArch64::STPWi:
1770   case AArch64::STPSi:
1771   case AArch64::LDNPWi:
1772   case AArch64::LDNPSi:
1773   case AArch64::STNPWi:
1774   case AArch64::STNPSi:
1775   case AArch64::LDG:
1776   case AArch64::STGPi:
1777     return 3;
1778   case AArch64::ADDG:
1779   case AArch64::STGOffset:
1780     return 2;
1781   }
1782 }
1783 
isPairableLdStInst(const MachineInstr & MI)1784 bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
1785   switch (MI.getOpcode()) {
1786   default:
1787     return false;
1788   // Scaled instructions.
1789   case AArch64::STRSui:
1790   case AArch64::STRDui:
1791   case AArch64::STRQui:
1792   case AArch64::STRXui:
1793   case AArch64::STRWui:
1794   case AArch64::LDRSui:
1795   case AArch64::LDRDui:
1796   case AArch64::LDRQui:
1797   case AArch64::LDRXui:
1798   case AArch64::LDRWui:
1799   case AArch64::LDRSWui:
1800   // Unscaled instructions.
1801   case AArch64::STURSi:
1802   case AArch64::STURDi:
1803   case AArch64::STURQi:
1804   case AArch64::STURWi:
1805   case AArch64::STURXi:
1806   case AArch64::LDURSi:
1807   case AArch64::LDURDi:
1808   case AArch64::LDURQi:
1809   case AArch64::LDURWi:
1810   case AArch64::LDURXi:
1811   case AArch64::LDURSWi:
1812     return true;
1813   }
1814 }
1815 
convertToFlagSettingOpc(unsigned Opc,bool & Is64Bit)1816 unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc,
1817                                                    bool &Is64Bit) {
1818   switch (Opc) {
1819   default:
1820     llvm_unreachable("Opcode has no flag setting equivalent!");
1821   // 32-bit cases:
1822   case AArch64::ADDWri:
1823     Is64Bit = false;
1824     return AArch64::ADDSWri;
1825   case AArch64::ADDWrr:
1826     Is64Bit = false;
1827     return AArch64::ADDSWrr;
1828   case AArch64::ADDWrs:
1829     Is64Bit = false;
1830     return AArch64::ADDSWrs;
1831   case AArch64::ADDWrx:
1832     Is64Bit = false;
1833     return AArch64::ADDSWrx;
1834   case AArch64::ANDWri:
1835     Is64Bit = false;
1836     return AArch64::ANDSWri;
1837   case AArch64::ANDWrr:
1838     Is64Bit = false;
1839     return AArch64::ANDSWrr;
1840   case AArch64::ANDWrs:
1841     Is64Bit = false;
1842     return AArch64::ANDSWrs;
1843   case AArch64::BICWrr:
1844     Is64Bit = false;
1845     return AArch64::BICSWrr;
1846   case AArch64::BICWrs:
1847     Is64Bit = false;
1848     return AArch64::BICSWrs;
1849   case AArch64::SUBWri:
1850     Is64Bit = false;
1851     return AArch64::SUBSWri;
1852   case AArch64::SUBWrr:
1853     Is64Bit = false;
1854     return AArch64::SUBSWrr;
1855   case AArch64::SUBWrs:
1856     Is64Bit = false;
1857     return AArch64::SUBSWrs;
1858   case AArch64::SUBWrx:
1859     Is64Bit = false;
1860     return AArch64::SUBSWrx;
1861   // 64-bit cases:
1862   case AArch64::ADDXri:
1863     Is64Bit = true;
1864     return AArch64::ADDSXri;
1865   case AArch64::ADDXrr:
1866     Is64Bit = true;
1867     return AArch64::ADDSXrr;
1868   case AArch64::ADDXrs:
1869     Is64Bit = true;
1870     return AArch64::ADDSXrs;
1871   case AArch64::ADDXrx:
1872     Is64Bit = true;
1873     return AArch64::ADDSXrx;
1874   case AArch64::ANDXri:
1875     Is64Bit = true;
1876     return AArch64::ANDSXri;
1877   case AArch64::ANDXrr:
1878     Is64Bit = true;
1879     return AArch64::ANDSXrr;
1880   case AArch64::ANDXrs:
1881     Is64Bit = true;
1882     return AArch64::ANDSXrs;
1883   case AArch64::BICXrr:
1884     Is64Bit = true;
1885     return AArch64::BICSXrr;
1886   case AArch64::BICXrs:
1887     Is64Bit = true;
1888     return AArch64::BICSXrs;
1889   case AArch64::SUBXri:
1890     Is64Bit = true;
1891     return AArch64::SUBSXri;
1892   case AArch64::SUBXrr:
1893     Is64Bit = true;
1894     return AArch64::SUBSXrr;
1895   case AArch64::SUBXrs:
1896     Is64Bit = true;
1897     return AArch64::SUBSXrs;
1898   case AArch64::SUBXrx:
1899     Is64Bit = true;
1900     return AArch64::SUBSXrx;
1901   }
1902 }
1903 
1904 // Is this a candidate for ld/st merging or pairing?  For example, we don't
1905 // touch volatiles or load/stores that have a hint to avoid pair formation.
isCandidateToMergeOrPair(const MachineInstr & MI) const1906 bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {
1907   // If this is a volatile load/store, don't mess with it.
1908   if (MI.hasOrderedMemoryRef())
1909     return false;
1910 
1911   // Make sure this is a reg/fi+imm (as opposed to an address reloc).
1912   assert((MI.getOperand(1).isReg() || MI.getOperand(1).isFI()) &&
1913          "Expected a reg or frame index operand.");
1914   if (!MI.getOperand(2).isImm())
1915     return false;
1916 
1917   // Can't merge/pair if the instruction modifies the base register.
1918   // e.g., ldr x0, [x0]
1919   // This case will never occur with an FI base.
1920   if (MI.getOperand(1).isReg()) {
1921     unsigned BaseReg = MI.getOperand(1).getReg();
1922     const TargetRegisterInfo *TRI = &getRegisterInfo();
1923     if (MI.modifiesRegister(BaseReg, TRI))
1924       return false;
1925   }
1926 
1927   // Check if this load/store has a hint to avoid pair formation.
1928   // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
1929   if (isLdStPairSuppressed(MI))
1930     return false;
1931 
1932   // Do not pair any callee-save store/reload instructions in the
1933   // prologue/epilogue if the CFI information encoded the operations as separate
1934   // instructions, as that will cause the size of the actual prologue to mismatch
1935   // with the prologue size recorded in the Windows CFI.
1936   const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
1937   bool NeedsWinCFI = MAI->usesWindowsCFI() &&
1938                      MI.getMF()->getFunction().needsUnwindTableEntry();
1939   if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
1940                       MI.getFlag(MachineInstr::FrameDestroy)))
1941     return false;
1942 
1943   // On some CPUs quad load/store pairs are slower than two single load/stores.
1944   if (Subtarget.isPaired128Slow()) {
1945     switch (MI.getOpcode()) {
1946     default:
1947       break;
1948     case AArch64::LDURQi:
1949     case AArch64::STURQi:
1950     case AArch64::LDRQui:
1951     case AArch64::STRQui:
1952       return false;
1953     }
1954   }
1955 
1956   return true;
1957 }
1958 
getMemOperandWithOffset(const MachineInstr & LdSt,const MachineOperand * & BaseOp,int64_t & Offset,const TargetRegisterInfo * TRI) const1959 bool AArch64InstrInfo::getMemOperandWithOffset(const MachineInstr &LdSt,
1960                                           const MachineOperand *&BaseOp,
1961                                           int64_t &Offset,
1962                                           const TargetRegisterInfo *TRI) const {
1963   unsigned Width;
1964   return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI);
1965 }
1966 
getMemOperandWithOffsetWidth(const MachineInstr & LdSt,const MachineOperand * & BaseOp,int64_t & Offset,unsigned & Width,const TargetRegisterInfo * TRI) const1967 bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
1968     const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
1969     unsigned &Width, const TargetRegisterInfo *TRI) const {
1970   assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
1971   // Handle only loads/stores with base register followed by immediate offset.
1972   if (LdSt.getNumExplicitOperands() == 3) {
1973     // Non-paired instruction (e.g., ldr x1, [x0, #8]).
1974     if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
1975         !LdSt.getOperand(2).isImm())
1976       return false;
1977   } else if (LdSt.getNumExplicitOperands() == 4) {
1978     // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
1979     if (!LdSt.getOperand(1).isReg() ||
1980         (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
1981         !LdSt.getOperand(3).isImm())
1982       return false;
1983   } else
1984     return false;
1985 
1986   // Get the scaling factor for the instruction and set the width for the
1987   // instruction.
1988   unsigned Scale = 0;
1989   int64_t Dummy1, Dummy2;
1990 
1991   // If this returns false, then it's an instruction we don't want to handle.
1992   if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
1993     return false;
1994 
1995   // Compute the offset. Offset is calculated as the immediate operand
1996   // multiplied by the scaling factor. Unscaled instructions have scaling factor
1997   // set to 1.
1998   if (LdSt.getNumExplicitOperands() == 3) {
1999     BaseOp = &LdSt.getOperand(1);
2000     Offset = LdSt.getOperand(2).getImm() * Scale;
2001   } else {
2002     assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
2003     BaseOp = &LdSt.getOperand(2);
2004     Offset = LdSt.getOperand(3).getImm() * Scale;
2005   }
2006 
2007   assert((BaseOp->isReg() || BaseOp->isFI()) &&
2008          "getMemOperandWithOffset only supports base "
2009          "operands of type register or frame index.");
2010 
2011   return true;
2012 }
2013 
2014 MachineOperand &
getMemOpBaseRegImmOfsOffsetOperand(MachineInstr & LdSt) const2015 AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
2016   assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
2017   MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
2018   assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
2019   return OfsOp;
2020 }
2021 
getMemOpInfo(unsigned Opcode,unsigned & Scale,unsigned & Width,int64_t & MinOffset,int64_t & MaxOffset)2022 bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
2023                                     unsigned &Width, int64_t &MinOffset,
2024                                     int64_t &MaxOffset) {
2025   switch (Opcode) {
2026   // Not a memory operation or something we want to handle.
2027   default:
2028     Scale = Width = 0;
2029     MinOffset = MaxOffset = 0;
2030     return false;
2031   case AArch64::STRWpost:
2032   case AArch64::LDRWpost:
2033     Width = 32;
2034     Scale = 4;
2035     MinOffset = -256;
2036     MaxOffset = 255;
2037     break;
2038   case AArch64::LDURQi:
2039   case AArch64::STURQi:
2040     Width = 16;
2041     Scale = 1;
2042     MinOffset = -256;
2043     MaxOffset = 255;
2044     break;
2045   case AArch64::PRFUMi:
2046   case AArch64::LDURXi:
2047   case AArch64::LDURDi:
2048   case AArch64::STURXi:
2049   case AArch64::STURDi:
2050     Width = 8;
2051     Scale = 1;
2052     MinOffset = -256;
2053     MaxOffset = 255;
2054     break;
2055   case AArch64::LDURWi:
2056   case AArch64::LDURSi:
2057   case AArch64::LDURSWi:
2058   case AArch64::STURWi:
2059   case AArch64::STURSi:
2060     Width = 4;
2061     Scale = 1;
2062     MinOffset = -256;
2063     MaxOffset = 255;
2064     break;
2065   case AArch64::LDURHi:
2066   case AArch64::LDURHHi:
2067   case AArch64::LDURSHXi:
2068   case AArch64::LDURSHWi:
2069   case AArch64::STURHi:
2070   case AArch64::STURHHi:
2071     Width = 2;
2072     Scale = 1;
2073     MinOffset = -256;
2074     MaxOffset = 255;
2075     break;
2076   case AArch64::LDURBi:
2077   case AArch64::LDURBBi:
2078   case AArch64::LDURSBXi:
2079   case AArch64::LDURSBWi:
2080   case AArch64::STURBi:
2081   case AArch64::STURBBi:
2082     Width = 1;
2083     Scale = 1;
2084     MinOffset = -256;
2085     MaxOffset = 255;
2086     break;
2087   case AArch64::LDPQi:
2088   case AArch64::LDNPQi:
2089   case AArch64::STPQi:
2090   case AArch64::STNPQi:
2091     Scale = 16;
2092     Width = 32;
2093     MinOffset = -64;
2094     MaxOffset = 63;
2095     break;
2096   case AArch64::LDRQui:
2097   case AArch64::STRQui:
2098     Scale = Width = 16;
2099     MinOffset = 0;
2100     MaxOffset = 4095;
2101     break;
2102   case AArch64::LDPXi:
2103   case AArch64::LDPDi:
2104   case AArch64::LDNPXi:
2105   case AArch64::LDNPDi:
2106   case AArch64::STPXi:
2107   case AArch64::STPDi:
2108   case AArch64::STNPXi:
2109   case AArch64::STNPDi:
2110     Scale = 8;
2111     Width = 16;
2112     MinOffset = -64;
2113     MaxOffset = 63;
2114     break;
2115   case AArch64::PRFMui:
2116   case AArch64::LDRXui:
2117   case AArch64::LDRDui:
2118   case AArch64::STRXui:
2119   case AArch64::STRDui:
2120     Scale = Width = 8;
2121     MinOffset = 0;
2122     MaxOffset = 4095;
2123     break;
2124   case AArch64::LDPWi:
2125   case AArch64::LDPSi:
2126   case AArch64::LDNPWi:
2127   case AArch64::LDNPSi:
2128   case AArch64::STPWi:
2129   case AArch64::STPSi:
2130   case AArch64::STNPWi:
2131   case AArch64::STNPSi:
2132     Scale = 4;
2133     Width = 8;
2134     MinOffset = -64;
2135     MaxOffset = 63;
2136     break;
2137   case AArch64::LDRWui:
2138   case AArch64::LDRSui:
2139   case AArch64::LDRSWui:
2140   case AArch64::STRWui:
2141   case AArch64::STRSui:
2142     Scale = Width = 4;
2143     MinOffset = 0;
2144     MaxOffset = 4095;
2145     break;
2146   case AArch64::LDRHui:
2147   case AArch64::LDRHHui:
2148   case AArch64::LDRSHWui:
2149   case AArch64::LDRSHXui:
2150   case AArch64::STRHui:
2151   case AArch64::STRHHui:
2152     Scale = Width = 2;
2153     MinOffset = 0;
2154     MaxOffset = 4095;
2155     break;
2156   case AArch64::LDRBui:
2157   case AArch64::LDRBBui:
2158   case AArch64::LDRSBWui:
2159   case AArch64::LDRSBXui:
2160   case AArch64::STRBui:
2161   case AArch64::STRBBui:
2162     Scale = Width = 1;
2163     MinOffset = 0;
2164     MaxOffset = 4095;
2165     break;
2166   case AArch64::ADDG:
2167   case AArch64::TAGPstack:
2168     Scale = 16;
2169     Width = 0;
2170     MinOffset = 0;
2171     MaxOffset = 63;
2172     break;
2173   case AArch64::LDG:
2174   case AArch64::STGOffset:
2175   case AArch64::STZGOffset:
2176     Scale = Width = 16;
2177     MinOffset = -256;
2178     MaxOffset = 255;
2179     break;
2180   case AArch64::ST2GOffset:
2181   case AArch64::STZ2GOffset:
2182     Scale = 16;
2183     Width = 32;
2184     MinOffset = -256;
2185     MaxOffset = 255;
2186     break;
2187   case AArch64::STGPi:
2188     Scale = Width = 16;
2189     MinOffset = -64;
2190     MaxOffset = 63;
2191     break;
2192   }
2193 
2194   return true;
2195 }
2196 
getOffsetStride(unsigned Opc)2197 static unsigned getOffsetStride(unsigned Opc) {
2198   switch (Opc) {
2199   default:
2200     return 0;
2201   case AArch64::LDURQi:
2202   case AArch64::STURQi:
2203     return 16;
2204   case AArch64::LDURXi:
2205   case AArch64::LDURDi:
2206   case AArch64::STURXi:
2207   case AArch64::STURDi:
2208     return 8;
2209   case AArch64::LDURWi:
2210   case AArch64::LDURSi:
2211   case AArch64::LDURSWi:
2212   case AArch64::STURWi:
2213   case AArch64::STURSi:
2214     return 4;
2215   }
2216 }
2217 
2218 // Scale the unscaled offsets.  Returns false if the unscaled offset can't be
2219 // scaled.
scaleOffset(unsigned Opc,int64_t & Offset)2220 static bool scaleOffset(unsigned Opc, int64_t &Offset) {
2221   unsigned OffsetStride = getOffsetStride(Opc);
2222   if (OffsetStride == 0)
2223     return false;
2224   // If the byte-offset isn't a multiple of the stride, we can't scale this
2225   // offset.
2226   if (Offset % OffsetStride != 0)
2227     return false;
2228 
2229   // Convert the byte-offset used by unscaled into an "element" offset used
2230   // by the scaled pair load/store instructions.
2231   Offset /= OffsetStride;
2232   return true;
2233 }
2234 
2235 // Unscale the scaled offsets. Returns false if the scaled offset can't be
2236 // unscaled.
unscaleOffset(unsigned Opc,int64_t & Offset)2237 static bool unscaleOffset(unsigned Opc, int64_t &Offset) {
2238   unsigned OffsetStride = getOffsetStride(Opc);
2239   if (OffsetStride == 0)
2240     return false;
2241 
2242   // Convert the "element" offset used by scaled pair load/store instructions
2243   // into the byte-offset used by unscaled.
2244   Offset *= OffsetStride;
2245   return true;
2246 }
2247 
canPairLdStOpc(unsigned FirstOpc,unsigned SecondOpc)2248 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
2249   if (FirstOpc == SecondOpc)
2250     return true;
2251   // We can also pair sign-ext and zero-ext instructions.
2252   switch (FirstOpc) {
2253   default:
2254     return false;
2255   case AArch64::LDRWui:
2256   case AArch64::LDURWi:
2257     return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
2258   case AArch64::LDRSWui:
2259   case AArch64::LDURSWi:
2260     return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
2261   }
2262   // These instructions can't be paired based on their opcodes.
2263   return false;
2264 }
2265 
shouldClusterFI(const MachineFrameInfo & MFI,int FI1,int64_t Offset1,unsigned Opcode1,int FI2,int64_t Offset2,unsigned Opcode2)2266 static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
2267                             int64_t Offset1, unsigned Opcode1, int FI2,
2268                             int64_t Offset2, unsigned Opcode2) {
2269   // Accesses through fixed stack object frame indices may access a different
2270   // fixed stack slot. Check that the object offsets + offsets match.
2271   if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
2272     int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
2273     int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
2274     assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
2275     // Get the byte-offset from the object offset.
2276     if (!unscaleOffset(Opcode1, Offset1) || !unscaleOffset(Opcode2, Offset2))
2277       return false;
2278     ObjectOffset1 += Offset1;
2279     ObjectOffset2 += Offset2;
2280     // Get the "element" index in the object.
2281     if (!scaleOffset(Opcode1, ObjectOffset1) ||
2282         !scaleOffset(Opcode2, ObjectOffset2))
2283       return false;
2284     return ObjectOffset1 + 1 == ObjectOffset2;
2285   }
2286 
2287   return FI1 == FI2;
2288 }
2289 
2290 /// Detect opportunities for ldp/stp formation.
2291 ///
2292 /// Only called for LdSt for which getMemOperandWithOffset returns true.
shouldClusterMemOps(const MachineOperand & BaseOp1,const MachineOperand & BaseOp2,unsigned NumLoads) const2293 bool AArch64InstrInfo::shouldClusterMemOps(const MachineOperand &BaseOp1,
2294                                            const MachineOperand &BaseOp2,
2295                                            unsigned NumLoads) const {
2296   const MachineInstr &FirstLdSt = *BaseOp1.getParent();
2297   const MachineInstr &SecondLdSt = *BaseOp2.getParent();
2298   if (BaseOp1.getType() != BaseOp2.getType())
2299     return false;
2300 
2301   assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
2302          "Only base registers and frame indices are supported.");
2303 
2304   // Check for both base regs and base FI.
2305   if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
2306     return false;
2307 
2308   // Only cluster up to a single pair.
2309   if (NumLoads > 1)
2310     return false;
2311 
2312   if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
2313     return false;
2314 
2315   // Can we pair these instructions based on their opcodes?
2316   unsigned FirstOpc = FirstLdSt.getOpcode();
2317   unsigned SecondOpc = SecondLdSt.getOpcode();
2318   if (!canPairLdStOpc(FirstOpc, SecondOpc))
2319     return false;
2320 
2321   // Can't merge volatiles or load/stores that have a hint to avoid pair
2322   // formation, for example.
2323   if (!isCandidateToMergeOrPair(FirstLdSt) ||
2324       !isCandidateToMergeOrPair(SecondLdSt))
2325     return false;
2326 
2327   // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
2328   int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
2329   if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
2330     return false;
2331 
2332   int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
2333   if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
2334     return false;
2335 
2336   // Pairwise instructions have a 7-bit signed offset field.
2337   if (Offset1 > 63 || Offset1 < -64)
2338     return false;
2339 
2340   // The caller should already have ordered First/SecondLdSt by offset.
2341   // Note: except for non-equal frame index bases
2342   if (BaseOp1.isFI()) {
2343     assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 >= Offset2) &&
2344            "Caller should have ordered offsets.");
2345 
2346     const MachineFrameInfo &MFI =
2347         FirstLdSt.getParent()->getParent()->getFrameInfo();
2348     return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
2349                            BaseOp2.getIndex(), Offset2, SecondOpc);
2350   }
2351 
2352   assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
2353          "Caller should have ordered offsets.");
2354 
2355   return Offset1 + 1 == Offset2;
2356 }
2357 
AddSubReg(const MachineInstrBuilder & MIB,unsigned Reg,unsigned SubIdx,unsigned State,const TargetRegisterInfo * TRI)2358 static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
2359                                             unsigned Reg, unsigned SubIdx,
2360                                             unsigned State,
2361                                             const TargetRegisterInfo *TRI) {
2362   if (!SubIdx)
2363     return MIB.addReg(Reg, State);
2364 
2365   if (TargetRegisterInfo::isPhysicalRegister(Reg))
2366     return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
2367   return MIB.addReg(Reg, State, SubIdx);
2368 }
2369 
forwardCopyWillClobberTuple(unsigned DestReg,unsigned SrcReg,unsigned NumRegs)2370 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
2371                                         unsigned NumRegs) {
2372   // We really want the positive remainder mod 32 here, that happens to be
2373   // easily obtainable with a mask.
2374   return ((DestReg - SrcReg) & 0x1f) < NumRegs;
2375 }
2376 
copyPhysRegTuple(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,unsigned DestReg,unsigned SrcReg,bool KillSrc,unsigned Opcode,ArrayRef<unsigned> Indices) const2377 void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
2378                                         MachineBasicBlock::iterator I,
2379                                         const DebugLoc &DL, unsigned DestReg,
2380                                         unsigned SrcReg, bool KillSrc,
2381                                         unsigned Opcode,
2382                                         ArrayRef<unsigned> Indices) const {
2383   assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
2384   const TargetRegisterInfo *TRI = &getRegisterInfo();
2385   uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2386   uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2387   unsigned NumRegs = Indices.size();
2388 
2389   int SubReg = 0, End = NumRegs, Incr = 1;
2390   if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
2391     SubReg = NumRegs - 1;
2392     End = -1;
2393     Incr = -1;
2394   }
2395 
2396   for (; SubReg != End; SubReg += Incr) {
2397     const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
2398     AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2399     AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
2400     AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2401   }
2402 }
2403 
copyGPRRegTuple(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,DebugLoc DL,unsigned DestReg,unsigned SrcReg,bool KillSrc,unsigned Opcode,unsigned ZeroReg,llvm::ArrayRef<unsigned> Indices) const2404 void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB,
2405                                        MachineBasicBlock::iterator I,
2406                                        DebugLoc DL, unsigned DestReg,
2407                                        unsigned SrcReg, bool KillSrc,
2408                                        unsigned Opcode, unsigned ZeroReg,
2409                                        llvm::ArrayRef<unsigned> Indices) const {
2410   const TargetRegisterInfo *TRI = &getRegisterInfo();
2411   unsigned NumRegs = Indices.size();
2412 
2413 #ifndef NDEBUG
2414   uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2415   uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2416   assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
2417          "GPR reg sequences should not be able to overlap");
2418 #endif
2419 
2420   for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
2421     const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
2422     AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2423     MIB.addReg(ZeroReg);
2424     AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2425     MIB.addImm(0);
2426   }
2427 }
2428 
copyPhysReg(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,unsigned DestReg,unsigned SrcReg,bool KillSrc) const2429 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
2430                                    MachineBasicBlock::iterator I,
2431                                    const DebugLoc &DL, unsigned DestReg,
2432                                    unsigned SrcReg, bool KillSrc) const {
2433   if (AArch64::GPR32spRegClass.contains(DestReg) &&
2434       (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
2435     const TargetRegisterInfo *TRI = &getRegisterInfo();
2436 
2437     if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
2438       // If either operand is WSP, expand to ADD #0.
2439       if (Subtarget.hasZeroCycleRegMove()) {
2440         // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
2441         unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2442                                                      &AArch64::GPR64spRegClass);
2443         unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2444                                                     &AArch64::GPR64spRegClass);
2445         // This instruction is reading and writing X registers.  This may upset
2446         // the register scavenger and machine verifier, so we need to indicate
2447         // that we are reading an undefined value from SrcRegX, but a proper
2448         // value from SrcReg.
2449         BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
2450             .addReg(SrcRegX, RegState::Undef)
2451             .addImm(0)
2452             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
2453             .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2454       } else {
2455         BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
2456             .addReg(SrcReg, getKillRegState(KillSrc))
2457             .addImm(0)
2458             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2459       }
2460     } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
2461       BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
2462           .addImm(0)
2463           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2464     } else {
2465       if (Subtarget.hasZeroCycleRegMove()) {
2466         // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
2467         unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2468                                                      &AArch64::GPR64spRegClass);
2469         unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2470                                                     &AArch64::GPR64spRegClass);
2471         // This instruction is reading and writing X registers.  This may upset
2472         // the register scavenger and machine verifier, so we need to indicate
2473         // that we are reading an undefined value from SrcRegX, but a proper
2474         // value from SrcReg.
2475         BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
2476             .addReg(AArch64::XZR)
2477             .addReg(SrcRegX, RegState::Undef)
2478             .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2479       } else {
2480         // Otherwise, expand to ORR WZR.
2481         BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
2482             .addReg(AArch64::WZR)
2483             .addReg(SrcReg, getKillRegState(KillSrc));
2484       }
2485     }
2486     return;
2487   }
2488 
2489   if (AArch64::GPR64spRegClass.contains(DestReg) &&
2490       (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
2491     if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
2492       // If either operand is SP, expand to ADD #0.
2493       BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
2494           .addReg(SrcReg, getKillRegState(KillSrc))
2495           .addImm(0)
2496           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2497     } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
2498       BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
2499           .addImm(0)
2500           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2501     } else {
2502       // Otherwise, expand to ORR XZR.
2503       BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
2504           .addReg(AArch64::XZR)
2505           .addReg(SrcReg, getKillRegState(KillSrc));
2506     }
2507     return;
2508   }
2509 
2510   // Copy a DDDD register quad by copying the individual sub-registers.
2511   if (AArch64::DDDDRegClass.contains(DestReg) &&
2512       AArch64::DDDDRegClass.contains(SrcReg)) {
2513     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2514                                        AArch64::dsub2, AArch64::dsub3};
2515     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2516                      Indices);
2517     return;
2518   }
2519 
2520   // Copy a DDD register triple by copying the individual sub-registers.
2521   if (AArch64::DDDRegClass.contains(DestReg) &&
2522       AArch64::DDDRegClass.contains(SrcReg)) {
2523     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2524                                        AArch64::dsub2};
2525     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2526                      Indices);
2527     return;
2528   }
2529 
2530   // Copy a DD register pair by copying the individual sub-registers.
2531   if (AArch64::DDRegClass.contains(DestReg) &&
2532       AArch64::DDRegClass.contains(SrcReg)) {
2533     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
2534     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2535                      Indices);
2536     return;
2537   }
2538 
2539   // Copy a QQQQ register quad by copying the individual sub-registers.
2540   if (AArch64::QQQQRegClass.contains(DestReg) &&
2541       AArch64::QQQQRegClass.contains(SrcReg)) {
2542     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2543                                        AArch64::qsub2, AArch64::qsub3};
2544     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2545                      Indices);
2546     return;
2547   }
2548 
2549   // Copy a QQQ register triple by copying the individual sub-registers.
2550   if (AArch64::QQQRegClass.contains(DestReg) &&
2551       AArch64::QQQRegClass.contains(SrcReg)) {
2552     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2553                                        AArch64::qsub2};
2554     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2555                      Indices);
2556     return;
2557   }
2558 
2559   // Copy a QQ register pair by copying the individual sub-registers.
2560   if (AArch64::QQRegClass.contains(DestReg) &&
2561       AArch64::QQRegClass.contains(SrcReg)) {
2562     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
2563     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2564                      Indices);
2565     return;
2566   }
2567 
2568   if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
2569       AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
2570     static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
2571     copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
2572                     AArch64::XZR, Indices);
2573     return;
2574   }
2575 
2576   if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
2577       AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
2578     static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
2579     copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
2580                     AArch64::WZR, Indices);
2581     return;
2582   }
2583 
2584   if (AArch64::FPR128RegClass.contains(DestReg) &&
2585       AArch64::FPR128RegClass.contains(SrcReg)) {
2586     if (Subtarget.hasNEON()) {
2587       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2588           .addReg(SrcReg)
2589           .addReg(SrcReg, getKillRegState(KillSrc));
2590     } else {
2591       BuildMI(MBB, I, DL, get(AArch64::STRQpre))
2592           .addReg(AArch64::SP, RegState::Define)
2593           .addReg(SrcReg, getKillRegState(KillSrc))
2594           .addReg(AArch64::SP)
2595           .addImm(-16);
2596       BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
2597           .addReg(AArch64::SP, RegState::Define)
2598           .addReg(DestReg, RegState::Define)
2599           .addReg(AArch64::SP)
2600           .addImm(16);
2601     }
2602     return;
2603   }
2604 
2605   if (AArch64::FPR64RegClass.contains(DestReg) &&
2606       AArch64::FPR64RegClass.contains(SrcReg)) {
2607     if (Subtarget.hasNEON()) {
2608       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
2609                                        &AArch64::FPR128RegClass);
2610       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
2611                                       &AArch64::FPR128RegClass);
2612       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2613           .addReg(SrcReg)
2614           .addReg(SrcReg, getKillRegState(KillSrc));
2615     } else {
2616       BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
2617           .addReg(SrcReg, getKillRegState(KillSrc));
2618     }
2619     return;
2620   }
2621 
2622   if (AArch64::FPR32RegClass.contains(DestReg) &&
2623       AArch64::FPR32RegClass.contains(SrcReg)) {
2624     if (Subtarget.hasNEON()) {
2625       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
2626                                        &AArch64::FPR128RegClass);
2627       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
2628                                       &AArch64::FPR128RegClass);
2629       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2630           .addReg(SrcReg)
2631           .addReg(SrcReg, getKillRegState(KillSrc));
2632     } else {
2633       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2634           .addReg(SrcReg, getKillRegState(KillSrc));
2635     }
2636     return;
2637   }
2638 
2639   if (AArch64::FPR16RegClass.contains(DestReg) &&
2640       AArch64::FPR16RegClass.contains(SrcReg)) {
2641     if (Subtarget.hasNEON()) {
2642       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2643                                        &AArch64::FPR128RegClass);
2644       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2645                                       &AArch64::FPR128RegClass);
2646       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2647           .addReg(SrcReg)
2648           .addReg(SrcReg, getKillRegState(KillSrc));
2649     } else {
2650       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2651                                        &AArch64::FPR32RegClass);
2652       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2653                                       &AArch64::FPR32RegClass);
2654       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2655           .addReg(SrcReg, getKillRegState(KillSrc));
2656     }
2657     return;
2658   }
2659 
2660   if (AArch64::FPR8RegClass.contains(DestReg) &&
2661       AArch64::FPR8RegClass.contains(SrcReg)) {
2662     if (Subtarget.hasNEON()) {
2663       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2664                                        &AArch64::FPR128RegClass);
2665       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2666                                       &AArch64::FPR128RegClass);
2667       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2668           .addReg(SrcReg)
2669           .addReg(SrcReg, getKillRegState(KillSrc));
2670     } else {
2671       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2672                                        &AArch64::FPR32RegClass);
2673       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2674                                       &AArch64::FPR32RegClass);
2675       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2676           .addReg(SrcReg, getKillRegState(KillSrc));
2677     }
2678     return;
2679   }
2680 
2681   // Copies between GPR64 and FPR64.
2682   if (AArch64::FPR64RegClass.contains(DestReg) &&
2683       AArch64::GPR64RegClass.contains(SrcReg)) {
2684     BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
2685         .addReg(SrcReg, getKillRegState(KillSrc));
2686     return;
2687   }
2688   if (AArch64::GPR64RegClass.contains(DestReg) &&
2689       AArch64::FPR64RegClass.contains(SrcReg)) {
2690     BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
2691         .addReg(SrcReg, getKillRegState(KillSrc));
2692     return;
2693   }
2694   // Copies between GPR32 and FPR32.
2695   if (AArch64::FPR32RegClass.contains(DestReg) &&
2696       AArch64::GPR32RegClass.contains(SrcReg)) {
2697     BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
2698         .addReg(SrcReg, getKillRegState(KillSrc));
2699     return;
2700   }
2701   if (AArch64::GPR32RegClass.contains(DestReg) &&
2702       AArch64::FPR32RegClass.contains(SrcReg)) {
2703     BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
2704         .addReg(SrcReg, getKillRegState(KillSrc));
2705     return;
2706   }
2707 
2708   if (DestReg == AArch64::NZCV) {
2709     assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
2710     BuildMI(MBB, I, DL, get(AArch64::MSR))
2711         .addImm(AArch64SysReg::NZCV)
2712         .addReg(SrcReg, getKillRegState(KillSrc))
2713         .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
2714     return;
2715   }
2716 
2717   if (SrcReg == AArch64::NZCV) {
2718     assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
2719     BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
2720         .addImm(AArch64SysReg::NZCV)
2721         .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
2722     return;
2723   }
2724 
2725   llvm_unreachable("unimplemented reg-to-reg copy");
2726 }
2727 
storeRegPairToStackSlot(const TargetRegisterInfo & TRI,MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const MCInstrDesc & MCID,unsigned SrcReg,bool IsKill,unsigned SubIdx0,unsigned SubIdx1,int FI,MachineMemOperand * MMO)2728 static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI,
2729                                     MachineBasicBlock &MBB,
2730                                     MachineBasicBlock::iterator InsertBefore,
2731                                     const MCInstrDesc &MCID,
2732                                     unsigned SrcReg, bool IsKill,
2733                                     unsigned SubIdx0, unsigned SubIdx1, int FI,
2734                                     MachineMemOperand *MMO) {
2735   unsigned SrcReg0 = SrcReg;
2736   unsigned SrcReg1 = SrcReg;
2737   if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
2738     SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
2739     SubIdx0 = 0;
2740     SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
2741     SubIdx1 = 0;
2742   }
2743   BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
2744       .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
2745       .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
2746       .addFrameIndex(FI)
2747       .addImm(0)
2748       .addMemOperand(MMO);
2749 }
2750 
storeRegToStackSlot(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned SrcReg,bool isKill,int FI,const TargetRegisterClass * RC,const TargetRegisterInfo * TRI) const2751 void AArch64InstrInfo::storeRegToStackSlot(
2752     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
2753     bool isKill, int FI, const TargetRegisterClass *RC,
2754     const TargetRegisterInfo *TRI) const {
2755   MachineFunction &MF = *MBB.getParent();
2756   MachineFrameInfo &MFI = MF.getFrameInfo();
2757   unsigned Align = MFI.getObjectAlignment(FI);
2758 
2759   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
2760   MachineMemOperand *MMO = MF.getMachineMemOperand(
2761       PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
2762   unsigned Opc = 0;
2763   bool Offset = true;
2764   switch (TRI->getSpillSize(*RC)) {
2765   case 1:
2766     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2767       Opc = AArch64::STRBui;
2768     break;
2769   case 2:
2770     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2771       Opc = AArch64::STRHui;
2772     break;
2773   case 4:
2774     if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2775       Opc = AArch64::STRWui;
2776       if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2777         MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2778       else
2779         assert(SrcReg != AArch64::WSP);
2780     } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2781       Opc = AArch64::STRSui;
2782     break;
2783   case 8:
2784     if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2785       Opc = AArch64::STRXui;
2786       if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2787         MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2788       else
2789         assert(SrcReg != AArch64::SP);
2790     } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
2791       Opc = AArch64::STRDui;
2792     } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
2793       storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
2794                               get(AArch64::STPWi), SrcReg, isKill,
2795                               AArch64::sube32, AArch64::subo32, FI, MMO);
2796       return;
2797     }
2798     break;
2799   case 16:
2800     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2801       Opc = AArch64::STRQui;
2802     else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2803       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2804       Opc = AArch64::ST1Twov1d;
2805       Offset = false;
2806     } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
2807       storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
2808                               get(AArch64::STPXi), SrcReg, isKill,
2809                               AArch64::sube64, AArch64::subo64, FI, MMO);
2810       return;
2811     }
2812     break;
2813   case 24:
2814     if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2815       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2816       Opc = AArch64::ST1Threev1d;
2817       Offset = false;
2818     }
2819     break;
2820   case 32:
2821     if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2822       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2823       Opc = AArch64::ST1Fourv1d;
2824       Offset = false;
2825     } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2826       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2827       Opc = AArch64::ST1Twov2d;
2828       Offset = false;
2829     }
2830     break;
2831   case 48:
2832     if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2833       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2834       Opc = AArch64::ST1Threev2d;
2835       Offset = false;
2836     }
2837     break;
2838   case 64:
2839     if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2840       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2841       Opc = AArch64::ST1Fourv2d;
2842       Offset = false;
2843     }
2844     break;
2845   }
2846   assert(Opc && "Unknown register class");
2847 
2848   const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
2849                                      .addReg(SrcReg, getKillRegState(isKill))
2850                                      .addFrameIndex(FI);
2851 
2852   if (Offset)
2853     MI.addImm(0);
2854   MI.addMemOperand(MMO);
2855 }
2856 
loadRegPairFromStackSlot(const TargetRegisterInfo & TRI,MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const MCInstrDesc & MCID,unsigned DestReg,unsigned SubIdx0,unsigned SubIdx1,int FI,MachineMemOperand * MMO)2857 static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI,
2858                                      MachineBasicBlock &MBB,
2859                                      MachineBasicBlock::iterator InsertBefore,
2860                                      const MCInstrDesc &MCID,
2861                                      unsigned DestReg, unsigned SubIdx0,
2862                                      unsigned SubIdx1, int FI,
2863                                      MachineMemOperand *MMO) {
2864   unsigned DestReg0 = DestReg;
2865   unsigned DestReg1 = DestReg;
2866   bool IsUndef = true;
2867   if (TargetRegisterInfo::isPhysicalRegister(DestReg)) {
2868     DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
2869     SubIdx0 = 0;
2870     DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
2871     SubIdx1 = 0;
2872     IsUndef = false;
2873   }
2874   BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
2875       .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
2876       .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
2877       .addFrameIndex(FI)
2878       .addImm(0)
2879       .addMemOperand(MMO);
2880 }
2881 
loadRegFromStackSlot(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned DestReg,int FI,const TargetRegisterClass * RC,const TargetRegisterInfo * TRI) const2882 void AArch64InstrInfo::loadRegFromStackSlot(
2883     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
2884     int FI, const TargetRegisterClass *RC,
2885     const TargetRegisterInfo *TRI) const {
2886   MachineFunction &MF = *MBB.getParent();
2887   MachineFrameInfo &MFI = MF.getFrameInfo();
2888   unsigned Align = MFI.getObjectAlignment(FI);
2889   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
2890   MachineMemOperand *MMO = MF.getMachineMemOperand(
2891       PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
2892 
2893   unsigned Opc = 0;
2894   bool Offset = true;
2895   switch (TRI->getSpillSize(*RC)) {
2896   case 1:
2897     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2898       Opc = AArch64::LDRBui;
2899     break;
2900   case 2:
2901     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2902       Opc = AArch64::LDRHui;
2903     break;
2904   case 4:
2905     if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2906       Opc = AArch64::LDRWui;
2907       if (TargetRegisterInfo::isVirtualRegister(DestReg))
2908         MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
2909       else
2910         assert(DestReg != AArch64::WSP);
2911     } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2912       Opc = AArch64::LDRSui;
2913     break;
2914   case 8:
2915     if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2916       Opc = AArch64::LDRXui;
2917       if (TargetRegisterInfo::isVirtualRegister(DestReg))
2918         MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
2919       else
2920         assert(DestReg != AArch64::SP);
2921     } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
2922       Opc = AArch64::LDRDui;
2923     } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
2924       loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
2925                                get(AArch64::LDPWi), DestReg, AArch64::sube32,
2926                                AArch64::subo32, FI, MMO);
2927       return;
2928     }
2929     break;
2930   case 16:
2931     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2932       Opc = AArch64::LDRQui;
2933     else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2934       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2935       Opc = AArch64::LD1Twov1d;
2936       Offset = false;
2937     } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
2938       loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
2939                                get(AArch64::LDPXi), DestReg, AArch64::sube64,
2940                                AArch64::subo64, FI, MMO);
2941       return;
2942     }
2943     break;
2944   case 24:
2945     if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2946       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2947       Opc = AArch64::LD1Threev1d;
2948       Offset = false;
2949     }
2950     break;
2951   case 32:
2952     if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2953       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2954       Opc = AArch64::LD1Fourv1d;
2955       Offset = false;
2956     } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2957       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2958       Opc = AArch64::LD1Twov2d;
2959       Offset = false;
2960     }
2961     break;
2962   case 48:
2963     if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2964       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2965       Opc = AArch64::LD1Threev2d;
2966       Offset = false;
2967     }
2968     break;
2969   case 64:
2970     if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2971       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2972       Opc = AArch64::LD1Fourv2d;
2973       Offset = false;
2974     }
2975     break;
2976   }
2977   assert(Opc && "Unknown register class");
2978 
2979   const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
2980                                      .addReg(DestReg, getDefRegState(true))
2981                                      .addFrameIndex(FI);
2982   if (Offset)
2983     MI.addImm(0);
2984   MI.addMemOperand(MMO);
2985 }
2986 
emitFrameOffset(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const DebugLoc & DL,unsigned DestReg,unsigned SrcReg,int Offset,const TargetInstrInfo * TII,MachineInstr::MIFlag Flag,bool SetNZCV,bool NeedsWinCFI,bool * HasWinCFI)2987 void llvm::emitFrameOffset(MachineBasicBlock &MBB,
2988                            MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
2989                            unsigned DestReg, unsigned SrcReg, int Offset,
2990                            const TargetInstrInfo *TII,
2991                            MachineInstr::MIFlag Flag, bool SetNZCV,
2992                            bool NeedsWinCFI, bool *HasWinCFI) {
2993   if (DestReg == SrcReg && Offset == 0)
2994     return;
2995 
2996   assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
2997          "SP increment/decrement not 16-byte aligned");
2998 
2999   bool isSub = Offset < 0;
3000   if (isSub)
3001     Offset = -Offset;
3002 
3003   // FIXME: If the offset won't fit in 24-bits, compute the offset into a
3004   // scratch register.  If DestReg is a virtual register, use it as the
3005   // scratch register; otherwise, create a new virtual register (to be
3006   // replaced by the scavenger at the end of PEI).  That case can be optimized
3007   // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
3008   // register can be loaded with offset%8 and the add/sub can use an extending
3009   // instruction with LSL#3.
3010   // Currently the function handles any offsets but generates a poor sequence
3011   // of code.
3012   //  assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
3013 
3014   unsigned Opc;
3015   if (SetNZCV)
3016     Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
3017   else
3018     Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
3019   const unsigned MaxEncoding = 0xfff;
3020   const unsigned ShiftSize = 12;
3021   const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
3022   while (((unsigned)Offset) >= (1 << ShiftSize)) {
3023     unsigned ThisVal;
3024     if (((unsigned)Offset) > MaxEncodableValue) {
3025       ThisVal = MaxEncodableValue;
3026     } else {
3027       ThisVal = Offset & MaxEncodableValue;
3028     }
3029     assert((ThisVal >> ShiftSize) <= MaxEncoding &&
3030            "Encoding cannot handle value that big");
3031     BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
3032         .addReg(SrcReg)
3033         .addImm(ThisVal >> ShiftSize)
3034         .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
3035         .setMIFlag(Flag);
3036 
3037     if (NeedsWinCFI && SrcReg == AArch64::SP && DestReg == AArch64::SP) {
3038       if (HasWinCFI)
3039         *HasWinCFI = true;
3040       BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
3041           .addImm(ThisVal)
3042           .setMIFlag(Flag);
3043     }
3044 
3045     SrcReg = DestReg;
3046     Offset -= ThisVal;
3047     if (Offset == 0)
3048       return;
3049   }
3050   BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
3051       .addReg(SrcReg)
3052       .addImm(Offset)
3053       .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
3054       .setMIFlag(Flag);
3055 
3056   if (NeedsWinCFI) {
3057     if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
3058         (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
3059       if (HasWinCFI)
3060         *HasWinCFI = true;
3061       if (Offset == 0)
3062         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).
3063                 setMIFlag(Flag);
3064       else
3065         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP)).
3066                 addImm(Offset).setMIFlag(Flag);
3067     } else if (DestReg == AArch64::SP) {
3068       if (HasWinCFI)
3069         *HasWinCFI = true;
3070       BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)).
3071               addImm(Offset).setMIFlag(Flag);
3072     }
3073   }
3074 }
3075 
foldMemoryOperandImpl(MachineFunction & MF,MachineInstr & MI,ArrayRef<unsigned> Ops,MachineBasicBlock::iterator InsertPt,int FrameIndex,LiveIntervals * LIS,VirtRegMap * VRM) const3076 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
3077     MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
3078     MachineBasicBlock::iterator InsertPt, int FrameIndex,
3079     LiveIntervals *LIS, VirtRegMap *VRM) const {
3080   // This is a bit of a hack. Consider this instruction:
3081   //
3082   //   %0 = COPY %sp; GPR64all:%0
3083   //
3084   // We explicitly chose GPR64all for the virtual register so such a copy might
3085   // be eliminated by RegisterCoalescer. However, that may not be possible, and
3086   // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
3087   // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
3088   //
3089   // To prevent that, we are going to constrain the %0 register class here.
3090   //
3091   // <rdar://problem/11522048>
3092   //
3093   if (MI.isFullCopy()) {
3094     unsigned DstReg = MI.getOperand(0).getReg();
3095     unsigned SrcReg = MI.getOperand(1).getReg();
3096     if (SrcReg == AArch64::SP &&
3097         TargetRegisterInfo::isVirtualRegister(DstReg)) {
3098       MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
3099       return nullptr;
3100     }
3101     if (DstReg == AArch64::SP &&
3102         TargetRegisterInfo::isVirtualRegister(SrcReg)) {
3103       MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
3104       return nullptr;
3105     }
3106   }
3107 
3108   // Handle the case where a copy is being spilled or filled but the source
3109   // and destination register class don't match.  For example:
3110   //
3111   //   %0 = COPY %xzr; GPR64common:%0
3112   //
3113   // In this case we can still safely fold away the COPY and generate the
3114   // following spill code:
3115   //
3116   //   STRXui %xzr, %stack.0
3117   //
3118   // This also eliminates spilled cross register class COPYs (e.g. between x and
3119   // d regs) of the same size.  For example:
3120   //
3121   //   %0 = COPY %1; GPR64:%0, FPR64:%1
3122   //
3123   // will be filled as
3124   //
3125   //   LDRDui %0, fi<#0>
3126   //
3127   // instead of
3128   //
3129   //   LDRXui %Temp, fi<#0>
3130   //   %0 = FMOV %Temp
3131   //
3132   if (MI.isCopy() && Ops.size() == 1 &&
3133       // Make sure we're only folding the explicit COPY defs/uses.
3134       (Ops[0] == 0 || Ops[0] == 1)) {
3135     bool IsSpill = Ops[0] == 0;
3136     bool IsFill = !IsSpill;
3137     const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
3138     const MachineRegisterInfo &MRI = MF.getRegInfo();
3139     MachineBasicBlock &MBB = *MI.getParent();
3140     const MachineOperand &DstMO = MI.getOperand(0);
3141     const MachineOperand &SrcMO = MI.getOperand(1);
3142     unsigned DstReg = DstMO.getReg();
3143     unsigned SrcReg = SrcMO.getReg();
3144     // This is slightly expensive to compute for physical regs since
3145     // getMinimalPhysRegClass is slow.
3146     auto getRegClass = [&](unsigned Reg) {
3147       return TargetRegisterInfo::isVirtualRegister(Reg)
3148                  ? MRI.getRegClass(Reg)
3149                  : TRI.getMinimalPhysRegClass(Reg);
3150     };
3151 
3152     if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
3153       assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
3154                  TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
3155              "Mismatched register size in non subreg COPY");
3156       if (IsSpill)
3157         storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
3158                             getRegClass(SrcReg), &TRI);
3159       else
3160         loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
3161                              getRegClass(DstReg), &TRI);
3162       return &*--InsertPt;
3163     }
3164 
3165     // Handle cases like spilling def of:
3166     //
3167     //   %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
3168     //
3169     // where the physical register source can be widened and stored to the full
3170     // virtual reg destination stack slot, in this case producing:
3171     //
3172     //   STRXui %xzr, %stack.0
3173     //
3174     if (IsSpill && DstMO.isUndef() &&
3175         TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
3176       assert(SrcMO.getSubReg() == 0 &&
3177              "Unexpected subreg on physical register");
3178       const TargetRegisterClass *SpillRC;
3179       unsigned SpillSubreg;
3180       switch (DstMO.getSubReg()) {
3181       default:
3182         SpillRC = nullptr;
3183         break;
3184       case AArch64::sub_32:
3185       case AArch64::ssub:
3186         if (AArch64::GPR32RegClass.contains(SrcReg)) {
3187           SpillRC = &AArch64::GPR64RegClass;
3188           SpillSubreg = AArch64::sub_32;
3189         } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
3190           SpillRC = &AArch64::FPR64RegClass;
3191           SpillSubreg = AArch64::ssub;
3192         } else
3193           SpillRC = nullptr;
3194         break;
3195       case AArch64::dsub:
3196         if (AArch64::FPR64RegClass.contains(SrcReg)) {
3197           SpillRC = &AArch64::FPR128RegClass;
3198           SpillSubreg = AArch64::dsub;
3199         } else
3200           SpillRC = nullptr;
3201         break;
3202       }
3203 
3204       if (SpillRC)
3205         if (unsigned WidenedSrcReg =
3206                 TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
3207           storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
3208                               FrameIndex, SpillRC, &TRI);
3209           return &*--InsertPt;
3210         }
3211     }
3212 
3213     // Handle cases like filling use of:
3214     //
3215     //   %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
3216     //
3217     // where we can load the full virtual reg source stack slot, into the subreg
3218     // destination, in this case producing:
3219     //
3220     //   LDRWui %0:sub_32<def,read-undef>, %stack.0
3221     //
3222     if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
3223       const TargetRegisterClass *FillRC;
3224       switch (DstMO.getSubReg()) {
3225       default:
3226         FillRC = nullptr;
3227         break;
3228       case AArch64::sub_32:
3229         FillRC = &AArch64::GPR32RegClass;
3230         break;
3231       case AArch64::ssub:
3232         FillRC = &AArch64::FPR32RegClass;
3233         break;
3234       case AArch64::dsub:
3235         FillRC = &AArch64::FPR64RegClass;
3236         break;
3237       }
3238 
3239       if (FillRC) {
3240         assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
3241                    TRI.getRegSizeInBits(*FillRC) &&
3242                "Mismatched regclass size on folded subreg COPY");
3243         loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
3244         MachineInstr &LoadMI = *--InsertPt;
3245         MachineOperand &LoadDst = LoadMI.getOperand(0);
3246         assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
3247         LoadDst.setSubReg(DstMO.getSubReg());
3248         LoadDst.setIsUndef();
3249         return &LoadMI;
3250       }
3251     }
3252   }
3253 
3254   // Cannot fold.
3255   return nullptr;
3256 }
3257 
isAArch64FrameOffsetLegal(const MachineInstr & MI,int & Offset,bool * OutUseUnscaledOp,unsigned * OutUnscaledOp,int * EmittableOffset)3258 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
3259                                     bool *OutUseUnscaledOp,
3260                                     unsigned *OutUnscaledOp,
3261                                     int *EmittableOffset) {
3262   // Set output values in case of early exit.
3263   if (EmittableOffset)
3264     *EmittableOffset = 0;
3265   if (OutUseUnscaledOp)
3266     *OutUseUnscaledOp = false;
3267   if (OutUnscaledOp)
3268     *OutUnscaledOp = 0;
3269 
3270   // Exit early for structured vector spills/fills as they can't take an
3271   // immediate offset.
3272   switch (MI.getOpcode()) {
3273   default:
3274     break;
3275   case AArch64::LD1Twov2d:
3276   case AArch64::LD1Threev2d:
3277   case AArch64::LD1Fourv2d:
3278   case AArch64::LD1Twov1d:
3279   case AArch64::LD1Threev1d:
3280   case AArch64::LD1Fourv1d:
3281   case AArch64::ST1Twov2d:
3282   case AArch64::ST1Threev2d:
3283   case AArch64::ST1Fourv2d:
3284   case AArch64::ST1Twov1d:
3285   case AArch64::ST1Threev1d:
3286   case AArch64::ST1Fourv1d:
3287   case AArch64::IRG:
3288   case AArch64::IRGstack:
3289     return AArch64FrameOffsetCannotUpdate;
3290   }
3291 
3292   // Get the min/max offset and the scale.
3293   unsigned Scale, Width;
3294   int64_t MinOff, MaxOff;
3295   if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), Scale, Width, MinOff,
3296                                       MaxOff))
3297     llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
3298 
3299   // Construct the complete offset.
3300   const MachineOperand &ImmOpnd =
3301       MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
3302   Offset += ImmOpnd.getImm() * Scale;
3303 
3304   // If the offset doesn't match the scale, we rewrite the instruction to
3305   // use the unscaled instruction instead. Likewise, if we have a negative
3306   // offset and there is an unscaled op to use.
3307   Optional<unsigned> UnscaledOp =
3308       AArch64InstrInfo::getUnscaledLdSt(MI.getOpcode());
3309   bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
3310   if (useUnscaledOp &&
3311       !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, Scale, Width, MinOff, MaxOff))
3312     llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
3313 
3314   int64_t Remainder = Offset % Scale;
3315   assert(!(Remainder && useUnscaledOp) &&
3316          "Cannot have remainder when using unscaled op");
3317 
3318   assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
3319   int NewOffset = Offset / Scale;
3320   if (MinOff <= NewOffset && NewOffset <= MaxOff)
3321     Offset = Remainder;
3322   else {
3323     NewOffset = NewOffset < 0 ? MinOff : MaxOff;
3324     Offset = Offset - NewOffset * Scale + Remainder;
3325   }
3326 
3327   if (EmittableOffset)
3328     *EmittableOffset = NewOffset;
3329   if (OutUseUnscaledOp)
3330     *OutUseUnscaledOp = useUnscaledOp;
3331   if (OutUnscaledOp && UnscaledOp)
3332     *OutUnscaledOp = *UnscaledOp;
3333 
3334   return AArch64FrameOffsetCanUpdate |
3335          (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
3336 }
3337 
rewriteAArch64FrameIndex(MachineInstr & MI,unsigned FrameRegIdx,unsigned FrameReg,int & Offset,const AArch64InstrInfo * TII)3338 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
3339                                     unsigned FrameReg, int &Offset,
3340                                     const AArch64InstrInfo *TII) {
3341   unsigned Opcode = MI.getOpcode();
3342   unsigned ImmIdx = FrameRegIdx + 1;
3343 
3344   if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
3345     Offset += MI.getOperand(ImmIdx).getImm();
3346     emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
3347                     MI.getOperand(0).getReg(), FrameReg, Offset, TII,
3348                     MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
3349     MI.eraseFromParent();
3350     Offset = 0;
3351     return true;
3352   }
3353 
3354   int NewOffset;
3355   unsigned UnscaledOp;
3356   bool UseUnscaledOp;
3357   int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
3358                                          &UnscaledOp, &NewOffset);
3359   if (Status & AArch64FrameOffsetCanUpdate) {
3360     if (Status & AArch64FrameOffsetIsLegal)
3361       // Replace the FrameIndex with FrameReg.
3362       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
3363     if (UseUnscaledOp)
3364       MI.setDesc(TII->get(UnscaledOp));
3365 
3366     MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
3367     return Offset == 0;
3368   }
3369 
3370   return false;
3371 }
3372 
getNoop(MCInst & NopInst) const3373 void AArch64InstrInfo::getNoop(MCInst &NopInst) const {
3374   NopInst.setOpcode(AArch64::HINT);
3375   NopInst.addOperand(MCOperand::createImm(0));
3376 }
3377 
3378 // AArch64 supports MachineCombiner.
useMachineCombiner() const3379 bool AArch64InstrInfo::useMachineCombiner() const { return true; }
3380 
3381 // True when Opc sets flag
isCombineInstrSettingFlag(unsigned Opc)3382 static bool isCombineInstrSettingFlag(unsigned Opc) {
3383   switch (Opc) {
3384   case AArch64::ADDSWrr:
3385   case AArch64::ADDSWri:
3386   case AArch64::ADDSXrr:
3387   case AArch64::ADDSXri:
3388   case AArch64::SUBSWrr:
3389   case AArch64::SUBSXrr:
3390   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3391   case AArch64::SUBSWri:
3392   case AArch64::SUBSXri:
3393     return true;
3394   default:
3395     break;
3396   }
3397   return false;
3398 }
3399 
3400 // 32b Opcodes that can be combined with a MUL
isCombineInstrCandidate32(unsigned Opc)3401 static bool isCombineInstrCandidate32(unsigned Opc) {
3402   switch (Opc) {
3403   case AArch64::ADDWrr:
3404   case AArch64::ADDWri:
3405   case AArch64::SUBWrr:
3406   case AArch64::ADDSWrr:
3407   case AArch64::ADDSWri:
3408   case AArch64::SUBSWrr:
3409   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3410   case AArch64::SUBWri:
3411   case AArch64::SUBSWri:
3412     return true;
3413   default:
3414     break;
3415   }
3416   return false;
3417 }
3418 
3419 // 64b Opcodes that can be combined with a MUL
isCombineInstrCandidate64(unsigned Opc)3420 static bool isCombineInstrCandidate64(unsigned Opc) {
3421   switch (Opc) {
3422   case AArch64::ADDXrr:
3423   case AArch64::ADDXri:
3424   case AArch64::SUBXrr:
3425   case AArch64::ADDSXrr:
3426   case AArch64::ADDSXri:
3427   case AArch64::SUBSXrr:
3428   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3429   case AArch64::SUBXri:
3430   case AArch64::SUBSXri:
3431     return true;
3432   default:
3433     break;
3434   }
3435   return false;
3436 }
3437 
3438 // FP Opcodes that can be combined with a FMUL
isCombineInstrCandidateFP(const MachineInstr & Inst)3439 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
3440   switch (Inst.getOpcode()) {
3441   default:
3442     break;
3443   case AArch64::FADDSrr:
3444   case AArch64::FADDDrr:
3445   case AArch64::FADDv2f32:
3446   case AArch64::FADDv2f64:
3447   case AArch64::FADDv4f32:
3448   case AArch64::FSUBSrr:
3449   case AArch64::FSUBDrr:
3450   case AArch64::FSUBv2f32:
3451   case AArch64::FSUBv2f64:
3452   case AArch64::FSUBv4f32:
3453     TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
3454     return (Options.UnsafeFPMath ||
3455             Options.AllowFPOpFusion == FPOpFusion::Fast);
3456   }
3457   return false;
3458 }
3459 
3460 // Opcodes that can be combined with a MUL
isCombineInstrCandidate(unsigned Opc)3461 static bool isCombineInstrCandidate(unsigned Opc) {
3462   return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
3463 }
3464 
3465 //
3466 // Utility routine that checks if \param MO is defined by an
3467 // \param CombineOpc instruction in the basic block \param MBB
canCombine(MachineBasicBlock & MBB,MachineOperand & MO,unsigned CombineOpc,unsigned ZeroReg=0,bool CheckZeroReg=false)3468 static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
3469                        unsigned CombineOpc, unsigned ZeroReg = 0,
3470                        bool CheckZeroReg = false) {
3471   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3472   MachineInstr *MI = nullptr;
3473 
3474   if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
3475     MI = MRI.getUniqueVRegDef(MO.getReg());
3476   // And it needs to be in the trace (otherwise, it won't have a depth).
3477   if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
3478     return false;
3479   // Must only used by the user we combine with.
3480   if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
3481     return false;
3482 
3483   if (CheckZeroReg) {
3484     assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
3485            MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
3486            MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
3487     // The third input reg must be zero.
3488     if (MI->getOperand(3).getReg() != ZeroReg)
3489       return false;
3490   }
3491 
3492   return true;
3493 }
3494 
3495 //
3496 // Is \param MO defined by an integer multiply and can be combined?
canCombineWithMUL(MachineBasicBlock & MBB,MachineOperand & MO,unsigned MulOpc,unsigned ZeroReg)3497 static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3498                               unsigned MulOpc, unsigned ZeroReg) {
3499   return canCombine(MBB, MO, MulOpc, ZeroReg, true);
3500 }
3501 
3502 //
3503 // Is \param MO defined by a floating-point multiply and can be combined?
canCombineWithFMUL(MachineBasicBlock & MBB,MachineOperand & MO,unsigned MulOpc)3504 static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3505                                unsigned MulOpc) {
3506   return canCombine(MBB, MO, MulOpc);
3507 }
3508 
3509 // TODO: There are many more machine instruction opcodes to match:
3510 //       1. Other data types (integer, vectors)
3511 //       2. Other math / logic operations (xor, or)
3512 //       3. Other forms of the same operation (intrinsics and other variants)
isAssociativeAndCommutative(const MachineInstr & Inst) const3513 bool AArch64InstrInfo::isAssociativeAndCommutative(
3514     const MachineInstr &Inst) const {
3515   switch (Inst.getOpcode()) {
3516   case AArch64::FADDDrr:
3517   case AArch64::FADDSrr:
3518   case AArch64::FADDv2f32:
3519   case AArch64::FADDv2f64:
3520   case AArch64::FADDv4f32:
3521   case AArch64::FMULDrr:
3522   case AArch64::FMULSrr:
3523   case AArch64::FMULX32:
3524   case AArch64::FMULX64:
3525   case AArch64::FMULXv2f32:
3526   case AArch64::FMULXv2f64:
3527   case AArch64::FMULXv4f32:
3528   case AArch64::FMULv2f32:
3529   case AArch64::FMULv2f64:
3530   case AArch64::FMULv4f32:
3531     return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
3532   default:
3533     return false;
3534   }
3535 }
3536 
3537 /// Find instructions that can be turned into madd.
getMaddPatterns(MachineInstr & Root,SmallVectorImpl<MachineCombinerPattern> & Patterns)3538 static bool getMaddPatterns(MachineInstr &Root,
3539                             SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3540   unsigned Opc = Root.getOpcode();
3541   MachineBasicBlock &MBB = *Root.getParent();
3542   bool Found = false;
3543 
3544   if (!isCombineInstrCandidate(Opc))
3545     return false;
3546   if (isCombineInstrSettingFlag(Opc)) {
3547     int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
3548     // When NZCV is live bail out.
3549     if (Cmp_NZCV == -1)
3550       return false;
3551     unsigned NewOpc = convertToNonFlagSettingOpc(Root);
3552     // When opcode can't change bail out.
3553     // CHECKME: do we miss any cases for opcode conversion?
3554     if (NewOpc == Opc)
3555       return false;
3556     Opc = NewOpc;
3557   }
3558 
3559   switch (Opc) {
3560   default:
3561     break;
3562   case AArch64::ADDWrr:
3563     assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3564            "ADDWrr does not have register operands");
3565     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3566                           AArch64::WZR)) {
3567       Patterns.push_back(MachineCombinerPattern::MULADDW_OP1);
3568       Found = true;
3569     }
3570     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3571                           AArch64::WZR)) {
3572       Patterns.push_back(MachineCombinerPattern::MULADDW_OP2);
3573       Found = true;
3574     }
3575     break;
3576   case AArch64::ADDXrr:
3577     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3578                           AArch64::XZR)) {
3579       Patterns.push_back(MachineCombinerPattern::MULADDX_OP1);
3580       Found = true;
3581     }
3582     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3583                           AArch64::XZR)) {
3584       Patterns.push_back(MachineCombinerPattern::MULADDX_OP2);
3585       Found = true;
3586     }
3587     break;
3588   case AArch64::SUBWrr:
3589     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3590                           AArch64::WZR)) {
3591       Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1);
3592       Found = true;
3593     }
3594     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3595                           AArch64::WZR)) {
3596       Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2);
3597       Found = true;
3598     }
3599     break;
3600   case AArch64::SUBXrr:
3601     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3602                           AArch64::XZR)) {
3603       Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1);
3604       Found = true;
3605     }
3606     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3607                           AArch64::XZR)) {
3608       Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2);
3609       Found = true;
3610     }
3611     break;
3612   case AArch64::ADDWri:
3613     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3614                           AArch64::WZR)) {
3615       Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1);
3616       Found = true;
3617     }
3618     break;
3619   case AArch64::ADDXri:
3620     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3621                           AArch64::XZR)) {
3622       Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1);
3623       Found = true;
3624     }
3625     break;
3626   case AArch64::SUBWri:
3627     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3628                           AArch64::WZR)) {
3629       Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1);
3630       Found = true;
3631     }
3632     break;
3633   case AArch64::SUBXri:
3634     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3635                           AArch64::XZR)) {
3636       Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1);
3637       Found = true;
3638     }
3639     break;
3640   }
3641   return Found;
3642 }
3643 /// Floating-Point Support
3644 
3645 /// Find instructions that can be turned into madd.
getFMAPatterns(MachineInstr & Root,SmallVectorImpl<MachineCombinerPattern> & Patterns)3646 static bool getFMAPatterns(MachineInstr &Root,
3647                            SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3648 
3649   if (!isCombineInstrCandidateFP(Root))
3650     return false;
3651 
3652   MachineBasicBlock &MBB = *Root.getParent();
3653   bool Found = false;
3654 
3655   switch (Root.getOpcode()) {
3656   default:
3657     assert(false && "Unsupported FP instruction in combiner\n");
3658     break;
3659   case AArch64::FADDSrr:
3660     assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3661            "FADDWrr does not have register operands");
3662     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3663       Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1);
3664       Found = true;
3665     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3666                                   AArch64::FMULv1i32_indexed)) {
3667       Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1);
3668       Found = true;
3669     }
3670     if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3671       Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2);
3672       Found = true;
3673     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3674                                   AArch64::FMULv1i32_indexed)) {
3675       Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2);
3676       Found = true;
3677     }
3678     break;
3679   case AArch64::FADDDrr:
3680     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3681       Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1);
3682       Found = true;
3683     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3684                                   AArch64::FMULv1i64_indexed)) {
3685       Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1);
3686       Found = true;
3687     }
3688     if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3689       Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2);
3690       Found = true;
3691     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3692                                   AArch64::FMULv1i64_indexed)) {
3693       Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2);
3694       Found = true;
3695     }
3696     break;
3697   case AArch64::FADDv2f32:
3698     if (canCombineWithFMUL(MBB, Root.getOperand(1),
3699                            AArch64::FMULv2i32_indexed)) {
3700       Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1);
3701       Found = true;
3702     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3703                                   AArch64::FMULv2f32)) {
3704       Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1);
3705       Found = true;
3706     }
3707     if (canCombineWithFMUL(MBB, Root.getOperand(2),
3708                            AArch64::FMULv2i32_indexed)) {
3709       Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2);
3710       Found = true;
3711     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3712                                   AArch64::FMULv2f32)) {
3713       Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2);
3714       Found = true;
3715     }
3716     break;
3717   case AArch64::FADDv2f64:
3718     if (canCombineWithFMUL(MBB, Root.getOperand(1),
3719                            AArch64::FMULv2i64_indexed)) {
3720       Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1);
3721       Found = true;
3722     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3723                                   AArch64::FMULv2f64)) {
3724       Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1);
3725       Found = true;
3726     }
3727     if (canCombineWithFMUL(MBB, Root.getOperand(2),
3728                            AArch64::FMULv2i64_indexed)) {
3729       Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2);
3730       Found = true;
3731     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3732                                   AArch64::FMULv2f64)) {
3733       Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2);
3734       Found = true;
3735     }
3736     break;
3737   case AArch64::FADDv4f32:
3738     if (canCombineWithFMUL(MBB, Root.getOperand(1),
3739                            AArch64::FMULv4i32_indexed)) {
3740       Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1);
3741       Found = true;
3742     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3743                                   AArch64::FMULv4f32)) {
3744       Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1);
3745       Found = true;
3746     }
3747     if (canCombineWithFMUL(MBB, Root.getOperand(2),
3748                            AArch64::FMULv4i32_indexed)) {
3749       Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2);
3750       Found = true;
3751     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3752                                   AArch64::FMULv4f32)) {
3753       Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2);
3754       Found = true;
3755     }
3756     break;
3757 
3758   case AArch64::FSUBSrr:
3759     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3760       Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1);
3761       Found = true;
3762     }
3763     if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3764       Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2);
3765       Found = true;
3766     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3767                                   AArch64::FMULv1i32_indexed)) {
3768       Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2);
3769       Found = true;
3770     }
3771     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) {
3772       Patterns.push_back(MachineCombinerPattern::FNMULSUBS_OP1);
3773       Found = true;
3774     }
3775     break;
3776   case AArch64::FSUBDrr:
3777     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3778       Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1);
3779       Found = true;
3780     }
3781     if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3782       Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2);
3783       Found = true;
3784     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3785                                   AArch64::FMULv1i64_indexed)) {
3786       Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2);
3787       Found = true;
3788     }
3789     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) {
3790       Patterns.push_back(MachineCombinerPattern::FNMULSUBD_OP1);
3791       Found = true;
3792     }
3793     break;
3794   case AArch64::FSUBv2f32:
3795     if (canCombineWithFMUL(MBB, Root.getOperand(2),
3796                            AArch64::FMULv2i32_indexed)) {
3797       Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
3798       Found = true;
3799     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3800                                   AArch64::FMULv2f32)) {
3801       Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2);
3802       Found = true;
3803     }
3804     if (canCombineWithFMUL(MBB, Root.getOperand(1),
3805                            AArch64::FMULv2i32_indexed)) {
3806       Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1);
3807       Found = true;
3808     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3809                                   AArch64::FMULv2f32)) {
3810       Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1);
3811       Found = true;
3812     }
3813     break;
3814   case AArch64::FSUBv2f64:
3815     if (canCombineWithFMUL(MBB, Root.getOperand(2),
3816                            AArch64::FMULv2i64_indexed)) {
3817       Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
3818       Found = true;
3819     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3820                                   AArch64::FMULv2f64)) {
3821       Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2);
3822       Found = true;
3823     }
3824     if (canCombineWithFMUL(MBB, Root.getOperand(1),
3825                            AArch64::FMULv2i64_indexed)) {
3826       Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1);
3827       Found = true;
3828     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3829                                   AArch64::FMULv2f64)) {
3830       Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1);
3831       Found = true;
3832     }
3833     break;
3834   case AArch64::FSUBv4f32:
3835     if (canCombineWithFMUL(MBB, Root.getOperand(2),
3836                            AArch64::FMULv4i32_indexed)) {
3837       Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
3838       Found = true;
3839     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3840                                   AArch64::FMULv4f32)) {
3841       Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2);
3842       Found = true;
3843     }
3844     if (canCombineWithFMUL(MBB, Root.getOperand(1),
3845                            AArch64::FMULv4i32_indexed)) {
3846       Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1);
3847       Found = true;
3848     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3849                                   AArch64::FMULv4f32)) {
3850       Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1);
3851       Found = true;
3852     }
3853     break;
3854   }
3855   return Found;
3856 }
3857 
3858 /// Return true when a code sequence can improve throughput. It
3859 /// should be called only for instructions in loops.
3860 /// \param Pattern - combiner pattern
isThroughputPattern(MachineCombinerPattern Pattern) const3861 bool AArch64InstrInfo::isThroughputPattern(
3862     MachineCombinerPattern Pattern) const {
3863   switch (Pattern) {
3864   default:
3865     break;
3866   case MachineCombinerPattern::FMULADDS_OP1:
3867   case MachineCombinerPattern::FMULADDS_OP2:
3868   case MachineCombinerPattern::FMULSUBS_OP1:
3869   case MachineCombinerPattern::FMULSUBS_OP2:
3870   case MachineCombinerPattern::FMULADDD_OP1:
3871   case MachineCombinerPattern::FMULADDD_OP2:
3872   case MachineCombinerPattern::FMULSUBD_OP1:
3873   case MachineCombinerPattern::FMULSUBD_OP2:
3874   case MachineCombinerPattern::FNMULSUBS_OP1:
3875   case MachineCombinerPattern::FNMULSUBD_OP1:
3876   case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
3877   case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
3878   case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
3879   case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
3880   case MachineCombinerPattern::FMLAv2f32_OP2:
3881   case MachineCombinerPattern::FMLAv2f32_OP1:
3882   case MachineCombinerPattern::FMLAv2f64_OP1:
3883   case MachineCombinerPattern::FMLAv2f64_OP2:
3884   case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
3885   case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
3886   case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
3887   case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
3888   case MachineCombinerPattern::FMLAv4f32_OP1:
3889   case MachineCombinerPattern::FMLAv4f32_OP2:
3890   case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
3891   case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
3892   case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
3893   case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
3894   case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
3895   case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
3896   case MachineCombinerPattern::FMLSv2f32_OP2:
3897   case MachineCombinerPattern::FMLSv2f64_OP2:
3898   case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
3899   case MachineCombinerPattern::FMLSv4f32_OP2:
3900     return true;
3901   } // end switch (Pattern)
3902   return false;
3903 }
3904 /// Return true when there is potentially a faster code sequence for an
3905 /// instruction chain ending in \p Root. All potential patterns are listed in
3906 /// the \p Pattern vector. Pattern should be sorted in priority order since the
3907 /// pattern evaluator stops checking as soon as it finds a faster sequence.
3908 
getMachineCombinerPatterns(MachineInstr & Root,SmallVectorImpl<MachineCombinerPattern> & Patterns) const3909 bool AArch64InstrInfo::getMachineCombinerPatterns(
3910     MachineInstr &Root,
3911     SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
3912   // Integer patterns
3913   if (getMaddPatterns(Root, Patterns))
3914     return true;
3915   // Floating point patterns
3916   if (getFMAPatterns(Root, Patterns))
3917     return true;
3918 
3919   return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
3920 }
3921 
3922 enum class FMAInstKind { Default, Indexed, Accumulator };
3923 /// genFusedMultiply - Generate fused multiply instructions.
3924 /// This function supports both integer and floating point instructions.
3925 /// A typical example:
3926 ///  F|MUL I=A,B,0
3927 ///  F|ADD R,I,C
3928 ///  ==> F|MADD R,A,B,C
3929 /// \param MF Containing MachineFunction
3930 /// \param MRI Register information
3931 /// \param TII Target information
3932 /// \param Root is the F|ADD instruction
3933 /// \param [out] InsInstrs is a vector of machine instructions and will
3934 /// contain the generated madd instruction
3935 /// \param IdxMulOpd is index of operand in Root that is the result of
3936 /// the F|MUL. In the example above IdxMulOpd is 1.
3937 /// \param MaddOpc the opcode fo the f|madd instruction
3938 /// \param RC Register class of operands
3939 /// \param kind of fma instruction (addressing mode) to be generated
3940 /// \param ReplacedAddend is the result register from the instruction
3941 /// replacing the non-combined operand, if any.
3942 static MachineInstr *
genFusedMultiply(MachineFunction & MF,MachineRegisterInfo & MRI,const TargetInstrInfo * TII,MachineInstr & Root,SmallVectorImpl<MachineInstr * > & InsInstrs,unsigned IdxMulOpd,unsigned MaddOpc,const TargetRegisterClass * RC,FMAInstKind kind=FMAInstKind::Default,const unsigned * ReplacedAddend=nullptr)3943 genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
3944                  const TargetInstrInfo *TII, MachineInstr &Root,
3945                  SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
3946                  unsigned MaddOpc, const TargetRegisterClass *RC,
3947                  FMAInstKind kind = FMAInstKind::Default,
3948                  const unsigned *ReplacedAddend = nullptr) {
3949   assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3950 
3951   unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
3952   MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
3953   unsigned ResultReg = Root.getOperand(0).getReg();
3954   unsigned SrcReg0 = MUL->getOperand(1).getReg();
3955   bool Src0IsKill = MUL->getOperand(1).isKill();
3956   unsigned SrcReg1 = MUL->getOperand(2).getReg();
3957   bool Src1IsKill = MUL->getOperand(2).isKill();
3958 
3959   unsigned SrcReg2;
3960   bool Src2IsKill;
3961   if (ReplacedAddend) {
3962     // If we just generated a new addend, we must be it's only use.
3963     SrcReg2 = *ReplacedAddend;
3964     Src2IsKill = true;
3965   } else {
3966     SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
3967     Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
3968   }
3969 
3970   if (TargetRegisterInfo::isVirtualRegister(ResultReg))
3971     MRI.constrainRegClass(ResultReg, RC);
3972   if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
3973     MRI.constrainRegClass(SrcReg0, RC);
3974   if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
3975     MRI.constrainRegClass(SrcReg1, RC);
3976   if (TargetRegisterInfo::isVirtualRegister(SrcReg2))
3977     MRI.constrainRegClass(SrcReg2, RC);
3978 
3979   MachineInstrBuilder MIB;
3980   if (kind == FMAInstKind::Default)
3981     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3982               .addReg(SrcReg0, getKillRegState(Src0IsKill))
3983               .addReg(SrcReg1, getKillRegState(Src1IsKill))
3984               .addReg(SrcReg2, getKillRegState(Src2IsKill));
3985   else if (kind == FMAInstKind::Indexed)
3986     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3987               .addReg(SrcReg2, getKillRegState(Src2IsKill))
3988               .addReg(SrcReg0, getKillRegState(Src0IsKill))
3989               .addReg(SrcReg1, getKillRegState(Src1IsKill))
3990               .addImm(MUL->getOperand(3).getImm());
3991   else if (kind == FMAInstKind::Accumulator)
3992     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
3993               .addReg(SrcReg2, getKillRegState(Src2IsKill))
3994               .addReg(SrcReg0, getKillRegState(Src0IsKill))
3995               .addReg(SrcReg1, getKillRegState(Src1IsKill));
3996   else
3997     assert(false && "Invalid FMA instruction kind \n");
3998   // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
3999   InsInstrs.push_back(MIB);
4000   return MUL;
4001 }
4002 
4003 /// genMaddR - Generate madd instruction and combine mul and add using
4004 /// an extra virtual register
4005 /// Example - an ADD intermediate needs to be stored in a register:
4006 ///   MUL I=A,B,0
4007 ///   ADD R,I,Imm
4008 ///   ==> ORR  V, ZR, Imm
4009 ///   ==> MADD R,A,B,V
4010 /// \param MF Containing MachineFunction
4011 /// \param MRI Register information
4012 /// \param TII Target information
4013 /// \param Root is the ADD instruction
4014 /// \param [out] InsInstrs is a vector of machine instructions and will
4015 /// contain the generated madd instruction
4016 /// \param IdxMulOpd is index of operand in Root that is the result of
4017 /// the MUL. In the example above IdxMulOpd is 1.
4018 /// \param MaddOpc the opcode fo the madd instruction
4019 /// \param VR is a virtual register that holds the value of an ADD operand
4020 /// (V in the example above).
4021 /// \param RC Register class of operands
genMaddR(MachineFunction & MF,MachineRegisterInfo & MRI,const TargetInstrInfo * TII,MachineInstr & Root,SmallVectorImpl<MachineInstr * > & InsInstrs,unsigned IdxMulOpd,unsigned MaddOpc,unsigned VR,const TargetRegisterClass * RC)4022 static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
4023                               const TargetInstrInfo *TII, MachineInstr &Root,
4024                               SmallVectorImpl<MachineInstr *> &InsInstrs,
4025                               unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
4026                               const TargetRegisterClass *RC) {
4027   assert(IdxMulOpd == 1 || IdxMulOpd == 2);
4028 
4029   MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
4030   unsigned ResultReg = Root.getOperand(0).getReg();
4031   unsigned SrcReg0 = MUL->getOperand(1).getReg();
4032   bool Src0IsKill = MUL->getOperand(1).isKill();
4033   unsigned SrcReg1 = MUL->getOperand(2).getReg();
4034   bool Src1IsKill = MUL->getOperand(2).isKill();
4035 
4036   if (TargetRegisterInfo::isVirtualRegister(ResultReg))
4037     MRI.constrainRegClass(ResultReg, RC);
4038   if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
4039     MRI.constrainRegClass(SrcReg0, RC);
4040   if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
4041     MRI.constrainRegClass(SrcReg1, RC);
4042   if (TargetRegisterInfo::isVirtualRegister(VR))
4043     MRI.constrainRegClass(VR, RC);
4044 
4045   MachineInstrBuilder MIB =
4046       BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4047           .addReg(SrcReg0, getKillRegState(Src0IsKill))
4048           .addReg(SrcReg1, getKillRegState(Src1IsKill))
4049           .addReg(VR);
4050   // Insert the MADD
4051   InsInstrs.push_back(MIB);
4052   return MUL;
4053 }
4054 
4055 /// When getMachineCombinerPatterns() finds potential patterns,
4056 /// this function generates the instructions that could replace the
4057 /// original code sequence
genAlternativeCodeSequence(MachineInstr & Root,MachineCombinerPattern Pattern,SmallVectorImpl<MachineInstr * > & InsInstrs,SmallVectorImpl<MachineInstr * > & DelInstrs,DenseMap<unsigned,unsigned> & InstrIdxForVirtReg) const4058 void AArch64InstrInfo::genAlternativeCodeSequence(
4059     MachineInstr &Root, MachineCombinerPattern Pattern,
4060     SmallVectorImpl<MachineInstr *> &InsInstrs,
4061     SmallVectorImpl<MachineInstr *> &DelInstrs,
4062     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
4063   MachineBasicBlock &MBB = *Root.getParent();
4064   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
4065   MachineFunction &MF = *MBB.getParent();
4066   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
4067 
4068   MachineInstr *MUL;
4069   const TargetRegisterClass *RC;
4070   unsigned Opc;
4071   switch (Pattern) {
4072   default:
4073     // Reassociate instructions.
4074     TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
4075                                                 DelInstrs, InstrIdxForVirtReg);
4076     return;
4077   case MachineCombinerPattern::MULADDW_OP1:
4078   case MachineCombinerPattern::MULADDX_OP1:
4079     // MUL I=A,B,0
4080     // ADD R,I,C
4081     // ==> MADD R,A,B,C
4082     // --- Create(MADD);
4083     if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
4084       Opc = AArch64::MADDWrrr;
4085       RC = &AArch64::GPR32RegClass;
4086     } else {
4087       Opc = AArch64::MADDXrrr;
4088       RC = &AArch64::GPR64RegClass;
4089     }
4090     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4091     break;
4092   case MachineCombinerPattern::MULADDW_OP2:
4093   case MachineCombinerPattern::MULADDX_OP2:
4094     // MUL I=A,B,0
4095     // ADD R,C,I
4096     // ==> MADD R,A,B,C
4097     // --- Create(MADD);
4098     if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
4099       Opc = AArch64::MADDWrrr;
4100       RC = &AArch64::GPR32RegClass;
4101     } else {
4102       Opc = AArch64::MADDXrrr;
4103       RC = &AArch64::GPR64RegClass;
4104     }
4105     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4106     break;
4107   case MachineCombinerPattern::MULADDWI_OP1:
4108   case MachineCombinerPattern::MULADDXI_OP1: {
4109     // MUL I=A,B,0
4110     // ADD R,I,Imm
4111     // ==> ORR  V, ZR, Imm
4112     // ==> MADD R,A,B,V
4113     // --- Create(MADD);
4114     const TargetRegisterClass *OrrRC;
4115     unsigned BitSize, OrrOpc, ZeroReg;
4116     if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
4117       OrrOpc = AArch64::ORRWri;
4118       OrrRC = &AArch64::GPR32spRegClass;
4119       BitSize = 32;
4120       ZeroReg = AArch64::WZR;
4121       Opc = AArch64::MADDWrrr;
4122       RC = &AArch64::GPR32RegClass;
4123     } else {
4124       OrrOpc = AArch64::ORRXri;
4125       OrrRC = &AArch64::GPR64spRegClass;
4126       BitSize = 64;
4127       ZeroReg = AArch64::XZR;
4128       Opc = AArch64::MADDXrrr;
4129       RC = &AArch64::GPR64RegClass;
4130     }
4131     unsigned NewVR = MRI.createVirtualRegister(OrrRC);
4132     uint64_t Imm = Root.getOperand(2).getImm();
4133 
4134     if (Root.getOperand(3).isImm()) {
4135       unsigned Val = Root.getOperand(3).getImm();
4136       Imm = Imm << Val;
4137     }
4138     uint64_t UImm = SignExtend64(Imm, BitSize);
4139     uint64_t Encoding;
4140     if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4141       MachineInstrBuilder MIB1 =
4142           BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4143               .addReg(ZeroReg)
4144               .addImm(Encoding);
4145       InsInstrs.push_back(MIB1);
4146       InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4147       MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4148     }
4149     break;
4150   }
4151   case MachineCombinerPattern::MULSUBW_OP1:
4152   case MachineCombinerPattern::MULSUBX_OP1: {
4153     // MUL I=A,B,0
4154     // SUB R,I, C
4155     // ==> SUB  V, 0, C
4156     // ==> MADD R,A,B,V // = -C + A*B
4157     // --- Create(MADD);
4158     const TargetRegisterClass *SubRC;
4159     unsigned SubOpc, ZeroReg;
4160     if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
4161       SubOpc = AArch64::SUBWrr;
4162       SubRC = &AArch64::GPR32spRegClass;
4163       ZeroReg = AArch64::WZR;
4164       Opc = AArch64::MADDWrrr;
4165       RC = &AArch64::GPR32RegClass;
4166     } else {
4167       SubOpc = AArch64::SUBXrr;
4168       SubRC = &AArch64::GPR64spRegClass;
4169       ZeroReg = AArch64::XZR;
4170       Opc = AArch64::MADDXrrr;
4171       RC = &AArch64::GPR64RegClass;
4172     }
4173     unsigned NewVR = MRI.createVirtualRegister(SubRC);
4174     // SUB NewVR, 0, C
4175     MachineInstrBuilder MIB1 =
4176         BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
4177             .addReg(ZeroReg)
4178             .add(Root.getOperand(2));
4179     InsInstrs.push_back(MIB1);
4180     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4181     MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4182     break;
4183   }
4184   case MachineCombinerPattern::MULSUBW_OP2:
4185   case MachineCombinerPattern::MULSUBX_OP2:
4186     // MUL I=A,B,0
4187     // SUB R,C,I
4188     // ==> MSUB R,A,B,C (computes C - A*B)
4189     // --- Create(MSUB);
4190     if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
4191       Opc = AArch64::MSUBWrrr;
4192       RC = &AArch64::GPR32RegClass;
4193     } else {
4194       Opc = AArch64::MSUBXrrr;
4195       RC = &AArch64::GPR64RegClass;
4196     }
4197     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4198     break;
4199   case MachineCombinerPattern::MULSUBWI_OP1:
4200   case MachineCombinerPattern::MULSUBXI_OP1: {
4201     // MUL I=A,B,0
4202     // SUB R,I, Imm
4203     // ==> ORR  V, ZR, -Imm
4204     // ==> MADD R,A,B,V // = -Imm + A*B
4205     // --- Create(MADD);
4206     const TargetRegisterClass *OrrRC;
4207     unsigned BitSize, OrrOpc, ZeroReg;
4208     if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
4209       OrrOpc = AArch64::ORRWri;
4210       OrrRC = &AArch64::GPR32spRegClass;
4211       BitSize = 32;
4212       ZeroReg = AArch64::WZR;
4213       Opc = AArch64::MADDWrrr;
4214       RC = &AArch64::GPR32RegClass;
4215     } else {
4216       OrrOpc = AArch64::ORRXri;
4217       OrrRC = &AArch64::GPR64spRegClass;
4218       BitSize = 64;
4219       ZeroReg = AArch64::XZR;
4220       Opc = AArch64::MADDXrrr;
4221       RC = &AArch64::GPR64RegClass;
4222     }
4223     unsigned NewVR = MRI.createVirtualRegister(OrrRC);
4224     uint64_t Imm = Root.getOperand(2).getImm();
4225     if (Root.getOperand(3).isImm()) {
4226       unsigned Val = Root.getOperand(3).getImm();
4227       Imm = Imm << Val;
4228     }
4229     uint64_t UImm = SignExtend64(-Imm, BitSize);
4230     uint64_t Encoding;
4231     if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4232       MachineInstrBuilder MIB1 =
4233           BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4234               .addReg(ZeroReg)
4235               .addImm(Encoding);
4236       InsInstrs.push_back(MIB1);
4237       InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4238       MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4239     }
4240     break;
4241   }
4242   // Floating Point Support
4243   case MachineCombinerPattern::FMULADDS_OP1:
4244   case MachineCombinerPattern::FMULADDD_OP1:
4245     // MUL I=A,B,0
4246     // ADD R,I,C
4247     // ==> MADD R,A,B,C
4248     // --- Create(MADD);
4249     if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
4250       Opc = AArch64::FMADDSrrr;
4251       RC = &AArch64::FPR32RegClass;
4252     } else {
4253       Opc = AArch64::FMADDDrrr;
4254       RC = &AArch64::FPR64RegClass;
4255     }
4256     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4257     break;
4258   case MachineCombinerPattern::FMULADDS_OP2:
4259   case MachineCombinerPattern::FMULADDD_OP2:
4260     // FMUL I=A,B,0
4261     // FADD R,C,I
4262     // ==> FMADD R,A,B,C
4263     // --- Create(FMADD);
4264     if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
4265       Opc = AArch64::FMADDSrrr;
4266       RC = &AArch64::FPR32RegClass;
4267     } else {
4268       Opc = AArch64::FMADDDrrr;
4269       RC = &AArch64::FPR64RegClass;
4270     }
4271     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4272     break;
4273 
4274   case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
4275     Opc = AArch64::FMLAv1i32_indexed;
4276     RC = &AArch64::FPR32RegClass;
4277     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4278                            FMAInstKind::Indexed);
4279     break;
4280   case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
4281     Opc = AArch64::FMLAv1i32_indexed;
4282     RC = &AArch64::FPR32RegClass;
4283     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4284                            FMAInstKind::Indexed);
4285     break;
4286 
4287   case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
4288     Opc = AArch64::FMLAv1i64_indexed;
4289     RC = &AArch64::FPR64RegClass;
4290     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4291                            FMAInstKind::Indexed);
4292     break;
4293   case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
4294     Opc = AArch64::FMLAv1i64_indexed;
4295     RC = &AArch64::FPR64RegClass;
4296     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4297                            FMAInstKind::Indexed);
4298     break;
4299 
4300   case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
4301   case MachineCombinerPattern::FMLAv2f32_OP1:
4302     RC = &AArch64::FPR64RegClass;
4303     if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
4304       Opc = AArch64::FMLAv2i32_indexed;
4305       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4306                              FMAInstKind::Indexed);
4307     } else {
4308       Opc = AArch64::FMLAv2f32;
4309       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4310                              FMAInstKind::Accumulator);
4311     }
4312     break;
4313   case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
4314   case MachineCombinerPattern::FMLAv2f32_OP2:
4315     RC = &AArch64::FPR64RegClass;
4316     if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
4317       Opc = AArch64::FMLAv2i32_indexed;
4318       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4319                              FMAInstKind::Indexed);
4320     } else {
4321       Opc = AArch64::FMLAv2f32;
4322       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4323                              FMAInstKind::Accumulator);
4324     }
4325     break;
4326 
4327   case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
4328   case MachineCombinerPattern::FMLAv2f64_OP1:
4329     RC = &AArch64::FPR128RegClass;
4330     if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
4331       Opc = AArch64::FMLAv2i64_indexed;
4332       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4333                              FMAInstKind::Indexed);
4334     } else {
4335       Opc = AArch64::FMLAv2f64;
4336       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4337                              FMAInstKind::Accumulator);
4338     }
4339     break;
4340   case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
4341   case MachineCombinerPattern::FMLAv2f64_OP2:
4342     RC = &AArch64::FPR128RegClass;
4343     if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
4344       Opc = AArch64::FMLAv2i64_indexed;
4345       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4346                              FMAInstKind::Indexed);
4347     } else {
4348       Opc = AArch64::FMLAv2f64;
4349       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4350                              FMAInstKind::Accumulator);
4351     }
4352     break;
4353 
4354   case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
4355   case MachineCombinerPattern::FMLAv4f32_OP1:
4356     RC = &AArch64::FPR128RegClass;
4357     if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
4358       Opc = AArch64::FMLAv4i32_indexed;
4359       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4360                              FMAInstKind::Indexed);
4361     } else {
4362       Opc = AArch64::FMLAv4f32;
4363       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4364                              FMAInstKind::Accumulator);
4365     }
4366     break;
4367 
4368   case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
4369   case MachineCombinerPattern::FMLAv4f32_OP2:
4370     RC = &AArch64::FPR128RegClass;
4371     if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
4372       Opc = AArch64::FMLAv4i32_indexed;
4373       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4374                              FMAInstKind::Indexed);
4375     } else {
4376       Opc = AArch64::FMLAv4f32;
4377       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4378                              FMAInstKind::Accumulator);
4379     }
4380     break;
4381 
4382   case MachineCombinerPattern::FMULSUBS_OP1:
4383   case MachineCombinerPattern::FMULSUBD_OP1: {
4384     // FMUL I=A,B,0
4385     // FSUB R,I,C
4386     // ==> FNMSUB R,A,B,C // = -C + A*B
4387     // --- Create(FNMSUB);
4388     if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
4389       Opc = AArch64::FNMSUBSrrr;
4390       RC = &AArch64::FPR32RegClass;
4391     } else {
4392       Opc = AArch64::FNMSUBDrrr;
4393       RC = &AArch64::FPR64RegClass;
4394     }
4395     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4396     break;
4397   }
4398 
4399   case MachineCombinerPattern::FNMULSUBS_OP1:
4400   case MachineCombinerPattern::FNMULSUBD_OP1: {
4401     // FNMUL I=A,B,0
4402     // FSUB R,I,C
4403     // ==> FNMADD R,A,B,C // = -A*B - C
4404     // --- Create(FNMADD);
4405     if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) {
4406       Opc = AArch64::FNMADDSrrr;
4407       RC = &AArch64::FPR32RegClass;
4408     } else {
4409       Opc = AArch64::FNMADDDrrr;
4410       RC = &AArch64::FPR64RegClass;
4411     }
4412     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4413     break;
4414   }
4415 
4416   case MachineCombinerPattern::FMULSUBS_OP2:
4417   case MachineCombinerPattern::FMULSUBD_OP2: {
4418     // FMUL I=A,B,0
4419     // FSUB R,C,I
4420     // ==> FMSUB R,A,B,C (computes C - A*B)
4421     // --- Create(FMSUB);
4422     if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
4423       Opc = AArch64::FMSUBSrrr;
4424       RC = &AArch64::FPR32RegClass;
4425     } else {
4426       Opc = AArch64::FMSUBDrrr;
4427       RC = &AArch64::FPR64RegClass;
4428     }
4429     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4430     break;
4431   }
4432 
4433   case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
4434     Opc = AArch64::FMLSv1i32_indexed;
4435     RC = &AArch64::FPR32RegClass;
4436     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4437                            FMAInstKind::Indexed);
4438     break;
4439 
4440   case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
4441     Opc = AArch64::FMLSv1i64_indexed;
4442     RC = &AArch64::FPR64RegClass;
4443     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4444                            FMAInstKind::Indexed);
4445     break;
4446 
4447   case MachineCombinerPattern::FMLSv2f32_OP2:
4448   case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
4449     RC = &AArch64::FPR64RegClass;
4450     if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
4451       Opc = AArch64::FMLSv2i32_indexed;
4452       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4453                              FMAInstKind::Indexed);
4454     } else {
4455       Opc = AArch64::FMLSv2f32;
4456       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4457                              FMAInstKind::Accumulator);
4458     }
4459     break;
4460 
4461   case MachineCombinerPattern::FMLSv2f64_OP2:
4462   case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
4463     RC = &AArch64::FPR128RegClass;
4464     if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
4465       Opc = AArch64::FMLSv2i64_indexed;
4466       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4467                              FMAInstKind::Indexed);
4468     } else {
4469       Opc = AArch64::FMLSv2f64;
4470       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4471                              FMAInstKind::Accumulator);
4472     }
4473     break;
4474 
4475   case MachineCombinerPattern::FMLSv4f32_OP2:
4476   case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
4477     RC = &AArch64::FPR128RegClass;
4478     if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
4479       Opc = AArch64::FMLSv4i32_indexed;
4480       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4481                              FMAInstKind::Indexed);
4482     } else {
4483       Opc = AArch64::FMLSv4f32;
4484       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4485                              FMAInstKind::Accumulator);
4486     }
4487     break;
4488   case MachineCombinerPattern::FMLSv2f32_OP1:
4489   case MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
4490     RC = &AArch64::FPR64RegClass;
4491     unsigned NewVR = MRI.createVirtualRegister(RC);
4492     MachineInstrBuilder MIB1 =
4493         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
4494             .add(Root.getOperand(2));
4495     InsInstrs.push_back(MIB1);
4496     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4497     if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
4498       Opc = AArch64::FMLAv2i32_indexed;
4499       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4500                              FMAInstKind::Indexed, &NewVR);
4501     } else {
4502       Opc = AArch64::FMLAv2f32;
4503       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4504                              FMAInstKind::Accumulator, &NewVR);
4505     }
4506     break;
4507   }
4508   case MachineCombinerPattern::FMLSv4f32_OP1:
4509   case MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
4510     RC = &AArch64::FPR128RegClass;
4511     unsigned NewVR = MRI.createVirtualRegister(RC);
4512     MachineInstrBuilder MIB1 =
4513         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
4514             .add(Root.getOperand(2));
4515     InsInstrs.push_back(MIB1);
4516     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4517     if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
4518       Opc = AArch64::FMLAv4i32_indexed;
4519       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4520                              FMAInstKind::Indexed, &NewVR);
4521     } else {
4522       Opc = AArch64::FMLAv4f32;
4523       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4524                              FMAInstKind::Accumulator, &NewVR);
4525     }
4526     break;
4527   }
4528   case MachineCombinerPattern::FMLSv2f64_OP1:
4529   case MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
4530     RC = &AArch64::FPR128RegClass;
4531     unsigned NewVR = MRI.createVirtualRegister(RC);
4532     MachineInstrBuilder MIB1 =
4533         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
4534             .add(Root.getOperand(2));
4535     InsInstrs.push_back(MIB1);
4536     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4537     if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
4538       Opc = AArch64::FMLAv2i64_indexed;
4539       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4540                              FMAInstKind::Indexed, &NewVR);
4541     } else {
4542       Opc = AArch64::FMLAv2f64;
4543       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4544                              FMAInstKind::Accumulator, &NewVR);
4545     }
4546     break;
4547   }
4548   } // end switch (Pattern)
4549   // Record MUL and ADD/SUB for deletion
4550   DelInstrs.push_back(MUL);
4551   DelInstrs.push_back(&Root);
4552 }
4553 
4554 /// Replace csincr-branch sequence by simple conditional branch
4555 ///
4556 /// Examples:
4557 /// 1. \code
4558 ///   csinc  w9, wzr, wzr, <condition code>
4559 ///   tbnz   w9, #0, 0x44
4560 ///    \endcode
4561 /// to
4562 ///    \code
4563 ///   b.<inverted condition code>
4564 ///    \endcode
4565 ///
4566 /// 2. \code
4567 ///   csinc w9, wzr, wzr, <condition code>
4568 ///   tbz   w9, #0, 0x44
4569 ///    \endcode
4570 /// to
4571 ///    \code
4572 ///   b.<condition code>
4573 ///    \endcode
4574 ///
4575 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the
4576 /// compare's constant operand is power of 2.
4577 ///
4578 /// Examples:
4579 ///    \code
4580 ///   and  w8, w8, #0x400
4581 ///   cbnz w8, L1
4582 ///    \endcode
4583 /// to
4584 ///    \code
4585 ///   tbnz w8, #10, L1
4586 ///    \endcode
4587 ///
4588 /// \param  MI Conditional Branch
4589 /// \return True when the simple conditional branch is generated
4590 ///
optimizeCondBranch(MachineInstr & MI) const4591 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
4592   bool IsNegativeBranch = false;
4593   bool IsTestAndBranch = false;
4594   unsigned TargetBBInMI = 0;
4595   switch (MI.getOpcode()) {
4596   default:
4597     llvm_unreachable("Unknown branch instruction?");
4598   case AArch64::Bcc:
4599     return false;
4600   case AArch64::CBZW:
4601   case AArch64::CBZX:
4602     TargetBBInMI = 1;
4603     break;
4604   case AArch64::CBNZW:
4605   case AArch64::CBNZX:
4606     TargetBBInMI = 1;
4607     IsNegativeBranch = true;
4608     break;
4609   case AArch64::TBZW:
4610   case AArch64::TBZX:
4611     TargetBBInMI = 2;
4612     IsTestAndBranch = true;
4613     break;
4614   case AArch64::TBNZW:
4615   case AArch64::TBNZX:
4616     TargetBBInMI = 2;
4617     IsNegativeBranch = true;
4618     IsTestAndBranch = true;
4619     break;
4620   }
4621   // So we increment a zero register and test for bits other
4622   // than bit 0? Conservatively bail out in case the verifier
4623   // missed this case.
4624   if (IsTestAndBranch && MI.getOperand(1).getImm())
4625     return false;
4626 
4627   // Find Definition.
4628   assert(MI.getParent() && "Incomplete machine instruciton\n");
4629   MachineBasicBlock *MBB = MI.getParent();
4630   MachineFunction *MF = MBB->getParent();
4631   MachineRegisterInfo *MRI = &MF->getRegInfo();
4632   unsigned VReg = MI.getOperand(0).getReg();
4633   if (!TargetRegisterInfo::isVirtualRegister(VReg))
4634     return false;
4635 
4636   MachineInstr *DefMI = MRI->getVRegDef(VReg);
4637 
4638   // Look through COPY instructions to find definition.
4639   while (DefMI->isCopy()) {
4640     unsigned CopyVReg = DefMI->getOperand(1).getReg();
4641     if (!MRI->hasOneNonDBGUse(CopyVReg))
4642       return false;
4643     if (!MRI->hasOneDef(CopyVReg))
4644       return false;
4645     DefMI = MRI->getVRegDef(CopyVReg);
4646   }
4647 
4648   switch (DefMI->getOpcode()) {
4649   default:
4650     return false;
4651   // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
4652   case AArch64::ANDWri:
4653   case AArch64::ANDXri: {
4654     if (IsTestAndBranch)
4655       return false;
4656     if (DefMI->getParent() != MBB)
4657       return false;
4658     if (!MRI->hasOneNonDBGUse(VReg))
4659       return false;
4660 
4661     bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
4662     uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
4663         DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
4664     if (!isPowerOf2_64(Mask))
4665       return false;
4666 
4667     MachineOperand &MO = DefMI->getOperand(1);
4668     unsigned NewReg = MO.getReg();
4669     if (!TargetRegisterInfo::isVirtualRegister(NewReg))
4670       return false;
4671 
4672     assert(!MRI->def_empty(NewReg) && "Register must be defined.");
4673 
4674     MachineBasicBlock &RefToMBB = *MBB;
4675     MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
4676     DebugLoc DL = MI.getDebugLoc();
4677     unsigned Imm = Log2_64(Mask);
4678     unsigned Opc = (Imm < 32)
4679                        ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
4680                        : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
4681     MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
4682                               .addReg(NewReg)
4683                               .addImm(Imm)
4684                               .addMBB(TBB);
4685     // Register lives on to the CBZ now.
4686     MO.setIsKill(false);
4687 
4688     // For immediate smaller than 32, we need to use the 32-bit
4689     // variant (W) in all cases. Indeed the 64-bit variant does not
4690     // allow to encode them.
4691     // Therefore, if the input register is 64-bit, we need to take the
4692     // 32-bit sub-part.
4693     if (!Is32Bit && Imm < 32)
4694       NewMI->getOperand(0).setSubReg(AArch64::sub_32);
4695     MI.eraseFromParent();
4696     return true;
4697   }
4698   // Look for CSINC
4699   case AArch64::CSINCWr:
4700   case AArch64::CSINCXr: {
4701     if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
4702           DefMI->getOperand(2).getReg() == AArch64::WZR) &&
4703         !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
4704           DefMI->getOperand(2).getReg() == AArch64::XZR))
4705       return false;
4706 
4707     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
4708       return false;
4709 
4710     AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
4711     // Convert only when the condition code is not modified between
4712     // the CSINC and the branch. The CC may be used by other
4713     // instructions in between.
4714     if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
4715       return false;
4716     MachineBasicBlock &RefToMBB = *MBB;
4717     MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
4718     DebugLoc DL = MI.getDebugLoc();
4719     if (IsNegativeBranch)
4720       CC = AArch64CC::getInvertedCondCode(CC);
4721     BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
4722     MI.eraseFromParent();
4723     return true;
4724   }
4725   }
4726 }
4727 
4728 std::pair<unsigned, unsigned>
decomposeMachineOperandsTargetFlags(unsigned TF) const4729 AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
4730   const unsigned Mask = AArch64II::MO_FRAGMENT;
4731   return std::make_pair(TF & Mask, TF & ~Mask);
4732 }
4733 
4734 ArrayRef<std::pair<unsigned, const char *>>
getSerializableDirectMachineOperandTargetFlags() const4735 AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
4736   using namespace AArch64II;
4737 
4738   static const std::pair<unsigned, const char *> TargetFlags[] = {
4739       {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
4740       {MO_G3, "aarch64-g3"},     {MO_G2, "aarch64-g2"},
4741       {MO_G1, "aarch64-g1"},     {MO_G0, "aarch64-g0"},
4742       {MO_HI12, "aarch64-hi12"}};
4743   return makeArrayRef(TargetFlags);
4744 }
4745 
4746 ArrayRef<std::pair<unsigned, const char *>>
getSerializableBitmaskMachineOperandTargetFlags() const4747 AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
4748   using namespace AArch64II;
4749 
4750   static const std::pair<unsigned, const char *> TargetFlags[] = {
4751       {MO_COFFSTUB, "aarch64-coffstub"},
4752       {MO_GOT, "aarch64-got"},   {MO_NC, "aarch64-nc"},
4753       {MO_S, "aarch64-s"},       {MO_TLS, "aarch64-tls"},
4754       {MO_DLLIMPORT, "aarch64-dllimport"}};
4755   return makeArrayRef(TargetFlags);
4756 }
4757 
4758 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
getSerializableMachineMemOperandTargetFlags() const4759 AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
4760   static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
4761       {{MOSuppressPair, "aarch64-suppress-pair"},
4762        {MOStridedAccess, "aarch64-strided-access"}};
4763   return makeArrayRef(TargetFlags);
4764 }
4765 
4766 /// Constants defining how certain sequences should be outlined.
4767 /// This encompasses how an outlined function should be called, and what kind of
4768 /// frame should be emitted for that outlined function.
4769 ///
4770 /// \p MachineOutlinerDefault implies that the function should be called with
4771 /// a save and restore of LR to the stack.
4772 ///
4773 /// That is,
4774 ///
4775 /// I1     Save LR                    OUTLINED_FUNCTION:
4776 /// I2 --> BL OUTLINED_FUNCTION       I1
4777 /// I3     Restore LR                 I2
4778 ///                                   I3
4779 ///                                   RET
4780 ///
4781 /// * Call construction overhead: 3 (save + BL + restore)
4782 /// * Frame construction overhead: 1 (ret)
4783 /// * Requires stack fixups? Yes
4784 ///
4785 /// \p MachineOutlinerTailCall implies that the function is being created from
4786 /// a sequence of instructions ending in a return.
4787 ///
4788 /// That is,
4789 ///
4790 /// I1                             OUTLINED_FUNCTION:
4791 /// I2 --> B OUTLINED_FUNCTION     I1
4792 /// RET                            I2
4793 ///                                RET
4794 ///
4795 /// * Call construction overhead: 1 (B)
4796 /// * Frame construction overhead: 0 (Return included in sequence)
4797 /// * Requires stack fixups? No
4798 ///
4799 /// \p MachineOutlinerNoLRSave implies that the function should be called using
4800 /// a BL instruction, but doesn't require LR to be saved and restored. This
4801 /// happens when LR is known to be dead.
4802 ///
4803 /// That is,
4804 ///
4805 /// I1                                OUTLINED_FUNCTION:
4806 /// I2 --> BL OUTLINED_FUNCTION       I1
4807 /// I3                                I2
4808 ///                                   I3
4809 ///                                   RET
4810 ///
4811 /// * Call construction overhead: 1 (BL)
4812 /// * Frame construction overhead: 1 (RET)
4813 /// * Requires stack fixups? No
4814 ///
4815 /// \p MachineOutlinerThunk implies that the function is being created from
4816 /// a sequence of instructions ending in a call. The outlined function is
4817 /// called with a BL instruction, and the outlined function tail-calls the
4818 /// original call destination.
4819 ///
4820 /// That is,
4821 ///
4822 /// I1                                OUTLINED_FUNCTION:
4823 /// I2 --> BL OUTLINED_FUNCTION       I1
4824 /// BL f                              I2
4825 ///                                   B f
4826 /// * Call construction overhead: 1 (BL)
4827 /// * Frame construction overhead: 0
4828 /// * Requires stack fixups? No
4829 ///
4830 /// \p MachineOutlinerRegSave implies that the function should be called with a
4831 /// save and restore of LR to an available register. This allows us to avoid
4832 /// stack fixups. Note that this outlining variant is compatible with the
4833 /// NoLRSave case.
4834 ///
4835 /// That is,
4836 ///
4837 /// I1     Save LR                    OUTLINED_FUNCTION:
4838 /// I2 --> BL OUTLINED_FUNCTION       I1
4839 /// I3     Restore LR                 I2
4840 ///                                   I3
4841 ///                                   RET
4842 ///
4843 /// * Call construction overhead: 3 (save + BL + restore)
4844 /// * Frame construction overhead: 1 (ret)
4845 /// * Requires stack fixups? No
4846 enum MachineOutlinerClass {
4847   MachineOutlinerDefault,  /// Emit a save, restore, call, and return.
4848   MachineOutlinerTailCall, /// Only emit a branch.
4849   MachineOutlinerNoLRSave, /// Emit a call and return.
4850   MachineOutlinerThunk,    /// Emit a call and tail-call.
4851   MachineOutlinerRegSave   /// Same as default, but save to a register.
4852 };
4853 
4854 enum MachineOutlinerMBBFlags {
4855   LRUnavailableSomewhere = 0x2,
4856   HasCalls = 0x4,
4857   UnsafeRegsDead = 0x8
4858 };
4859 
4860 unsigned
findRegisterToSaveLRTo(const outliner::Candidate & C) const4861 AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
4862   assert(C.LRUWasSet && "LRU wasn't set?");
4863   MachineFunction *MF = C.getMF();
4864   const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
4865       MF->getSubtarget().getRegisterInfo());
4866 
4867   // Check if there is an available register across the sequence that we can
4868   // use.
4869   for (unsigned Reg : AArch64::GPR64RegClass) {
4870     if (!ARI->isReservedReg(*MF, Reg) &&
4871         Reg != AArch64::LR &&  // LR is not reserved, but don't use it.
4872         Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
4873         Reg != AArch64::X17 && // Ditto for X17.
4874         C.LRU.available(Reg) && C.UsedInSequence.available(Reg))
4875       return Reg;
4876   }
4877 
4878   // No suitable register. Return 0.
4879   return 0u;
4880 }
4881 
4882 outliner::OutlinedFunction
getOutliningCandidateInfo(std::vector<outliner::Candidate> & RepeatedSequenceLocs) const4883 AArch64InstrInfo::getOutliningCandidateInfo(
4884     std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
4885   outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
4886   unsigned SequenceSize =
4887       std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0,
4888                       [this](unsigned Sum, const MachineInstr &MI) {
4889                         return Sum + getInstSizeInBytes(MI);
4890                       });
4891 
4892   // Properties about candidate MBBs that hold for all of them.
4893   unsigned FlagsSetInAll = 0xF;
4894 
4895   // Compute liveness information for each candidate, and set FlagsSetInAll.
4896   const TargetRegisterInfo &TRI = getRegisterInfo();
4897   std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
4898                 [&FlagsSetInAll](outliner::Candidate &C) {
4899                   FlagsSetInAll &= C.Flags;
4900                 });
4901 
4902   // According to the AArch64 Procedure Call Standard, the following are
4903   // undefined on entry/exit from a function call:
4904   //
4905   // * Registers x16, x17, (and thus w16, w17)
4906   // * Condition codes (and thus the NZCV register)
4907   //
4908   // Because if this, we can't outline any sequence of instructions where
4909   // one
4910   // of these registers is live into/across it. Thus, we need to delete
4911   // those
4912   // candidates.
4913   auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
4914     // If the unsafe registers in this block are all dead, then we don't need
4915     // to compute liveness here.
4916     if (C.Flags & UnsafeRegsDead)
4917       return false;
4918     C.initLRU(TRI);
4919     LiveRegUnits LRU = C.LRU;
4920     return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) ||
4921             !LRU.available(AArch64::NZCV));
4922   };
4923 
4924   // Are there any candidates where those registers are live?
4925   if (!(FlagsSetInAll & UnsafeRegsDead)) {
4926     // Erase every candidate that violates the restrictions above. (It could be
4927     // true that we have viable candidates, so it's not worth bailing out in
4928     // the case that, say, 1 out of 20 candidates violate the restructions.)
4929     RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
4930                                               RepeatedSequenceLocs.end(),
4931                                               CantGuaranteeValueAcrossCall),
4932                                RepeatedSequenceLocs.end());
4933 
4934     // If the sequence doesn't have enough candidates left, then we're done.
4935     if (RepeatedSequenceLocs.size() < 2)
4936       return outliner::OutlinedFunction();
4937   }
4938 
4939   // At this point, we have only "safe" candidates to outline. Figure out
4940   // frame + call instruction information.
4941 
4942   unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode();
4943 
4944   // Helper lambda which sets call information for every candidate.
4945   auto SetCandidateCallInfo =
4946       [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
4947         for (outliner::Candidate &C : RepeatedSequenceLocs)
4948           C.setCallInfo(CallID, NumBytesForCall);
4949       };
4950 
4951   unsigned FrameID = MachineOutlinerDefault;
4952   unsigned NumBytesToCreateFrame = 4;
4953 
4954   bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
4955     return C.getMF()->getFunction().hasFnAttribute("branch-target-enforcement");
4956   });
4957 
4958   // Returns true if an instructions is safe to fix up, false otherwise.
4959   auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
4960     if (MI.isCall())
4961       return true;
4962 
4963     if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
4964         !MI.readsRegister(AArch64::SP, &TRI))
4965       return true;
4966 
4967     // Any modification of SP will break our code to save/restore LR.
4968     // FIXME: We could handle some instructions which add a constant
4969     // offset to SP, with a bit more work.
4970     if (MI.modifiesRegister(AArch64::SP, &TRI))
4971       return false;
4972 
4973     // At this point, we have a stack instruction that we might need to
4974     // fix up. We'll handle it if it's a load or store.
4975     if (MI.mayLoadOrStore()) {
4976       const MachineOperand *Base; // Filled with the base operand of MI.
4977       int64_t Offset;             // Filled with the offset of MI.
4978 
4979       // Does it allow us to offset the base operand and is the base the
4980       // register SP?
4981       if (!getMemOperandWithOffset(MI, Base, Offset, &TRI) || !Base->isReg() ||
4982           Base->getReg() != AArch64::SP)
4983         return false;
4984 
4985       // Find the minimum/maximum offset for this instruction and check
4986       // if fixing it up would be in range.
4987       int64_t MinOffset,
4988           MaxOffset;  // Unscaled offsets for the instruction.
4989       unsigned Scale; // The scale to multiply the offsets by.
4990       unsigned DummyWidth;
4991       getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
4992 
4993       Offset += 16; // Update the offset to what it would be if we outlined.
4994       if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale)
4995         return false;
4996 
4997       // It's in range, so we can outline it.
4998       return true;
4999     }
5000 
5001     // FIXME: Add handling for instructions like "add x0, sp, #8".
5002 
5003     // We can't fix it up, so don't outline it.
5004     return false;
5005   };
5006 
5007   // True if it's possible to fix up each stack instruction in this sequence.
5008   // Important for frames/call variants that modify the stack.
5009   bool AllStackInstrsSafe = std::all_of(
5010       FirstCand.front(), std::next(FirstCand.back()), IsSafeToFixup);
5011 
5012   // If the last instruction in any candidate is a terminator, then we should
5013   // tail call all of the candidates.
5014   if (RepeatedSequenceLocs[0].back()->isTerminator()) {
5015     FrameID = MachineOutlinerTailCall;
5016     NumBytesToCreateFrame = 0;
5017     SetCandidateCallInfo(MachineOutlinerTailCall, 4);
5018   }
5019 
5020   else if (LastInstrOpcode == AArch64::BL ||
5021            (LastInstrOpcode == AArch64::BLR && !HasBTI)) {
5022     // FIXME: Do we need to check if the code after this uses the value of LR?
5023     FrameID = MachineOutlinerThunk;
5024     NumBytesToCreateFrame = 0;
5025     SetCandidateCallInfo(MachineOutlinerThunk, 4);
5026   }
5027 
5028   else {
5029     // We need to decide how to emit calls + frames. We can always emit the same
5030     // frame if we don't need to save to the stack. If we have to save to the
5031     // stack, then we need a different frame.
5032     unsigned NumBytesNoStackCalls = 0;
5033     std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
5034 
5035     for (outliner::Candidate &C : RepeatedSequenceLocs) {
5036       C.initLRU(TRI);
5037 
5038       // Is LR available? If so, we don't need a save.
5039       if (C.LRU.available(AArch64::LR)) {
5040         NumBytesNoStackCalls += 4;
5041         C.setCallInfo(MachineOutlinerNoLRSave, 4);
5042         CandidatesWithoutStackFixups.push_back(C);
5043       }
5044 
5045       // Is an unused register available? If so, we won't modify the stack, so
5046       // we can outline with the same frame type as those that don't save LR.
5047       else if (findRegisterToSaveLRTo(C)) {
5048         NumBytesNoStackCalls += 12;
5049         C.setCallInfo(MachineOutlinerRegSave, 12);
5050         CandidatesWithoutStackFixups.push_back(C);
5051       }
5052 
5053       // Is SP used in the sequence at all? If not, we don't have to modify
5054       // the stack, so we are guaranteed to get the same frame.
5055       else if (C.UsedInSequence.available(AArch64::SP)) {
5056         NumBytesNoStackCalls += 12;
5057         C.setCallInfo(MachineOutlinerDefault, 12);
5058         CandidatesWithoutStackFixups.push_back(C);
5059       }
5060 
5061       // If we outline this, we need to modify the stack. Pretend we don't
5062       // outline this by saving all of its bytes.
5063       else {
5064         NumBytesNoStackCalls += SequenceSize;
5065       }
5066     }
5067 
5068     // If there are no places where we have to save LR, then note that we
5069     // don't have to update the stack. Otherwise, give every candidate the
5070     // default call type, as long as it's safe to do so.
5071     if (!AllStackInstrsSafe ||
5072         NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
5073       RepeatedSequenceLocs = CandidatesWithoutStackFixups;
5074       FrameID = MachineOutlinerNoLRSave;
5075     } else {
5076       SetCandidateCallInfo(MachineOutlinerDefault, 12);
5077     }
5078 
5079     // If we dropped all of the candidates, bail out here.
5080     if (RepeatedSequenceLocs.size() < 2) {
5081       RepeatedSequenceLocs.clear();
5082       return outliner::OutlinedFunction();
5083     }
5084   }
5085 
5086   // Does every candidate's MBB contain a call? If so, then we might have a call
5087   // in the range.
5088   if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
5089     // Check if the range contains a call. These require a save + restore of the
5090     // link register.
5091     bool ModStackToSaveLR = false;
5092     if (std::any_of(FirstCand.front(), FirstCand.back(),
5093                     [](const MachineInstr &MI) { return MI.isCall(); }))
5094       ModStackToSaveLR = true;
5095 
5096     // Handle the last instruction separately. If this is a tail call, then the
5097     // last instruction is a call. We don't want to save + restore in this case.
5098     // However, it could be possible that the last instruction is a call without
5099     // it being valid to tail call this sequence. We should consider this as
5100     // well.
5101     else if (FrameID != MachineOutlinerThunk &&
5102              FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall())
5103       ModStackToSaveLR = true;
5104 
5105     if (ModStackToSaveLR) {
5106       // We can't fix up the stack. Bail out.
5107       if (!AllStackInstrsSafe) {
5108         RepeatedSequenceLocs.clear();
5109         return outliner::OutlinedFunction();
5110       }
5111 
5112       // Save + restore LR.
5113       NumBytesToCreateFrame += 8;
5114     }
5115   }
5116 
5117   return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
5118                                     NumBytesToCreateFrame, FrameID);
5119 }
5120 
isFunctionSafeToOutlineFrom(MachineFunction & MF,bool OutlineFromLinkOnceODRs) const5121 bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
5122     MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
5123   const Function &F = MF.getFunction();
5124 
5125   // Can F be deduplicated by the linker? If it can, don't outline from it.
5126   if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
5127     return false;
5128 
5129   // Don't outline from functions with section markings; the program could
5130   // expect that all the code is in the named section.
5131   // FIXME: Allow outlining from multiple functions with the same section
5132   // marking.
5133   if (F.hasSection())
5134     return false;
5135 
5136   // Outlining from functions with redzones is unsafe since the outliner may
5137   // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
5138   // outline from it.
5139   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
5140   if (!AFI || AFI->hasRedZone().getValueOr(true))
5141     return false;
5142 
5143   // It's safe to outline from MF.
5144   return true;
5145 }
5146 
isMBBSafeToOutlineFrom(MachineBasicBlock & MBB,unsigned & Flags) const5147 bool AArch64InstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
5148                                               unsigned &Flags) const {
5149   // Check if LR is available through all of the MBB. If it's not, then set
5150   // a flag.
5151   assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
5152          "Suitable Machine Function for outlining must track liveness");
5153   LiveRegUnits LRU(getRegisterInfo());
5154 
5155   std::for_each(MBB.rbegin(), MBB.rend(),
5156                 [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
5157 
5158   // Check if each of the unsafe registers are available...
5159   bool W16AvailableInBlock = LRU.available(AArch64::W16);
5160   bool W17AvailableInBlock = LRU.available(AArch64::W17);
5161   bool NZCVAvailableInBlock = LRU.available(AArch64::NZCV);
5162 
5163   // If all of these are dead (and not live out), we know we don't have to check
5164   // them later.
5165   if (W16AvailableInBlock && W17AvailableInBlock && NZCVAvailableInBlock)
5166     Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead;
5167 
5168   // Now, add the live outs to the set.
5169   LRU.addLiveOuts(MBB);
5170 
5171   // If any of these registers is available in the MBB, but also a live out of
5172   // the block, then we know outlining is unsafe.
5173   if (W16AvailableInBlock && !LRU.available(AArch64::W16))
5174     return false;
5175   if (W17AvailableInBlock && !LRU.available(AArch64::W17))
5176     return false;
5177   if (NZCVAvailableInBlock && !LRU.available(AArch64::NZCV))
5178     return false;
5179 
5180   // Check if there's a call inside this MachineBasicBlock. If there is, then
5181   // set a flag.
5182   if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
5183     Flags |= MachineOutlinerMBBFlags::HasCalls;
5184 
5185   MachineFunction *MF = MBB.getParent();
5186 
5187   // In the event that we outline, we may have to save LR. If there is an
5188   // available register in the MBB, then we'll always save LR there. Check if
5189   // this is true.
5190   bool CanSaveLR = false;
5191   const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
5192       MF->getSubtarget().getRegisterInfo());
5193 
5194   // Check if there is an available register across the sequence that we can
5195   // use.
5196   for (unsigned Reg : AArch64::GPR64RegClass) {
5197     if (!ARI->isReservedReg(*MF, Reg) && Reg != AArch64::LR &&
5198         Reg != AArch64::X16 && Reg != AArch64::X17 && LRU.available(Reg)) {
5199       CanSaveLR = true;
5200       break;
5201     }
5202   }
5203 
5204   // Check if we have a register we can save LR to, and if LR was used
5205   // somewhere. If both of those things are true, then we need to evaluate the
5206   // safety of outlining stack instructions later.
5207   if (!CanSaveLR && !LRU.available(AArch64::LR))
5208     Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
5209 
5210   return true;
5211 }
5212 
5213 outliner::InstrType
getOutliningType(MachineBasicBlock::iterator & MIT,unsigned Flags) const5214 AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
5215                                    unsigned Flags) const {
5216   MachineInstr &MI = *MIT;
5217   MachineBasicBlock *MBB = MI.getParent();
5218   MachineFunction *MF = MBB->getParent();
5219   AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
5220 
5221   // Don't outline LOHs.
5222   if (FuncInfo->getLOHRelated().count(&MI))
5223     return outliner::InstrType::Illegal;
5224 
5225   // Don't allow debug values to impact outlining type.
5226   if (MI.isDebugInstr() || MI.isIndirectDebugValue())
5227     return outliner::InstrType::Invisible;
5228 
5229   // At this point, KILL instructions don't really tell us much so we can go
5230   // ahead and skip over them.
5231   if (MI.isKill())
5232     return outliner::InstrType::Invisible;
5233 
5234   // Is this a terminator for a basic block?
5235   if (MI.isTerminator()) {
5236 
5237     // Is this the end of a function?
5238     if (MI.getParent()->succ_empty())
5239       return outliner::InstrType::Legal;
5240 
5241     // It's not, so don't outline it.
5242     return outliner::InstrType::Illegal;
5243   }
5244 
5245   // Make sure none of the operands are un-outlinable.
5246   for (const MachineOperand &MOP : MI.operands()) {
5247     if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
5248         MOP.isTargetIndex())
5249       return outliner::InstrType::Illegal;
5250 
5251     // If it uses LR or W30 explicitly, then don't touch it.
5252     if (MOP.isReg() && !MOP.isImplicit() &&
5253         (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
5254       return outliner::InstrType::Illegal;
5255   }
5256 
5257   // Special cases for instructions that can always be outlined, but will fail
5258   // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
5259   // be outlined because they don't require a *specific* value to be in LR.
5260   if (MI.getOpcode() == AArch64::ADRP)
5261     return outliner::InstrType::Legal;
5262 
5263   // If MI is a call we might be able to outline it. We don't want to outline
5264   // any calls that rely on the position of items on the stack. When we outline
5265   // something containing a call, we have to emit a save and restore of LR in
5266   // the outlined function. Currently, this always happens by saving LR to the
5267   // stack. Thus, if we outline, say, half the parameters for a function call
5268   // plus the call, then we'll break the callee's expectations for the layout
5269   // of the stack.
5270   //
5271   // FIXME: Allow calls to functions which construct a stack frame, as long
5272   // as they don't access arguments on the stack.
5273   // FIXME: Figure out some way to analyze functions defined in other modules.
5274   // We should be able to compute the memory usage based on the IR calling
5275   // convention, even if we can't see the definition.
5276   if (MI.isCall()) {
5277     // Get the function associated with the call. Look at each operand and find
5278     // the one that represents the callee and get its name.
5279     const Function *Callee = nullptr;
5280     for (const MachineOperand &MOP : MI.operands()) {
5281       if (MOP.isGlobal()) {
5282         Callee = dyn_cast<Function>(MOP.getGlobal());
5283         break;
5284       }
5285     }
5286 
5287     // Never outline calls to mcount.  There isn't any rule that would require
5288     // this, but the Linux kernel's "ftrace" feature depends on it.
5289     if (Callee && Callee->getName() == "\01_mcount")
5290       return outliner::InstrType::Illegal;
5291 
5292     // If we don't know anything about the callee, assume it depends on the
5293     // stack layout of the caller. In that case, it's only legal to outline
5294     // as a tail-call.  Whitelist the call instructions we know about so we
5295     // don't get unexpected results with call pseudo-instructions.
5296     auto UnknownCallOutlineType = outliner::InstrType::Illegal;
5297     if (MI.getOpcode() == AArch64::BLR || MI.getOpcode() == AArch64::BL)
5298       UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
5299 
5300     if (!Callee)
5301       return UnknownCallOutlineType;
5302 
5303     // We have a function we have information about. Check it if it's something
5304     // can safely outline.
5305     MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
5306 
5307     // We don't know what's going on with the callee at all. Don't touch it.
5308     if (!CalleeMF)
5309       return UnknownCallOutlineType;
5310 
5311     // Check if we know anything about the callee saves on the function. If we
5312     // don't, then don't touch it, since that implies that we haven't
5313     // computed anything about its stack frame yet.
5314     MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
5315     if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
5316         MFI.getNumObjects() > 0)
5317       return UnknownCallOutlineType;
5318 
5319     // At this point, we can say that CalleeMF ought to not pass anything on the
5320     // stack. Therefore, we can outline it.
5321     return outliner::InstrType::Legal;
5322   }
5323 
5324   // Don't outline positions.
5325   if (MI.isPosition())
5326     return outliner::InstrType::Illegal;
5327 
5328   // Don't touch the link register or W30.
5329   if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
5330       MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
5331     return outliner::InstrType::Illegal;
5332 
5333   // Don't outline BTI instructions, because that will prevent the outlining
5334   // site from being indirectly callable.
5335   if (MI.getOpcode() == AArch64::HINT) {
5336     int64_t Imm = MI.getOperand(0).getImm();
5337     if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
5338       return outliner::InstrType::Illegal;
5339   }
5340 
5341   return outliner::InstrType::Legal;
5342 }
5343 
fixupPostOutline(MachineBasicBlock & MBB) const5344 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
5345   for (MachineInstr &MI : MBB) {
5346     const MachineOperand *Base;
5347     unsigned Width;
5348     int64_t Offset;
5349 
5350     // Is this a load or store with an immediate offset with SP as the base?
5351     if (!MI.mayLoadOrStore() ||
5352         !getMemOperandWithOffsetWidth(MI, Base, Offset, Width, &RI) ||
5353         (Base->isReg() && Base->getReg() != AArch64::SP))
5354       continue;
5355 
5356     // It is, so we have to fix it up.
5357     unsigned Scale;
5358     int64_t Dummy1, Dummy2;
5359 
5360     MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
5361     assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
5362     getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
5363     assert(Scale != 0 && "Unexpected opcode!");
5364 
5365     // We've pushed the return address to the stack, so add 16 to the offset.
5366     // This is safe, since we already checked if it would overflow when we
5367     // checked if this instruction was legal to outline.
5368     int64_t NewImm = (Offset + 16) / Scale;
5369     StackOffsetOperand.setImm(NewImm);
5370   }
5371 }
5372 
buildOutlinedFrame(MachineBasicBlock & MBB,MachineFunction & MF,const outliner::OutlinedFunction & OF) const5373 void AArch64InstrInfo::buildOutlinedFrame(
5374     MachineBasicBlock &MBB, MachineFunction &MF,
5375     const outliner::OutlinedFunction &OF) const {
5376   // For thunk outlining, rewrite the last instruction from a call to a
5377   // tail-call.
5378   if (OF.FrameConstructionID == MachineOutlinerThunk) {
5379     MachineInstr *Call = &*--MBB.instr_end();
5380     unsigned TailOpcode;
5381     if (Call->getOpcode() == AArch64::BL) {
5382       TailOpcode = AArch64::TCRETURNdi;
5383     } else {
5384       assert(Call->getOpcode() == AArch64::BLR);
5385       TailOpcode = AArch64::TCRETURNriALL;
5386     }
5387     MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
5388                             .add(Call->getOperand(0))
5389                             .addImm(0);
5390     MBB.insert(MBB.end(), TC);
5391     Call->eraseFromParent();
5392   }
5393 
5394   // Is there a call in the outlined range?
5395   auto IsNonTailCall = [](MachineInstr &MI) {
5396     return MI.isCall() && !MI.isReturn();
5397   };
5398   if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) {
5399     // Fix up the instructions in the range, since we're going to modify the
5400     // stack.
5401     assert(OF.FrameConstructionID != MachineOutlinerDefault &&
5402            "Can only fix up stack references once");
5403     fixupPostOutline(MBB);
5404 
5405     // LR has to be a live in so that we can save it.
5406     MBB.addLiveIn(AArch64::LR);
5407 
5408     MachineBasicBlock::iterator It = MBB.begin();
5409     MachineBasicBlock::iterator Et = MBB.end();
5410 
5411     if (OF.FrameConstructionID == MachineOutlinerTailCall ||
5412         OF.FrameConstructionID == MachineOutlinerThunk)
5413       Et = std::prev(MBB.end());
5414 
5415     // Insert a save before the outlined region
5416     MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
5417                                 .addReg(AArch64::SP, RegState::Define)
5418                                 .addReg(AArch64::LR)
5419                                 .addReg(AArch64::SP)
5420                                 .addImm(-16);
5421     It = MBB.insert(It, STRXpre);
5422 
5423     const TargetSubtargetInfo &STI = MF.getSubtarget();
5424     const MCRegisterInfo *MRI = STI.getRegisterInfo();
5425     unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
5426 
5427     // Add a CFI saying the stack was moved 16 B down.
5428     int64_t StackPosEntry =
5429         MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 16));
5430     BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
5431         .addCFIIndex(StackPosEntry)
5432         .setMIFlags(MachineInstr::FrameSetup);
5433 
5434     // Add a CFI saying that the LR that we want to find is now 16 B higher than
5435     // before.
5436     int64_t LRPosEntry =
5437         MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, 16));
5438     BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
5439         .addCFIIndex(LRPosEntry)
5440         .setMIFlags(MachineInstr::FrameSetup);
5441 
5442     // Insert a restore before the terminator for the function.
5443     MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
5444                                  .addReg(AArch64::SP, RegState::Define)
5445                                  .addReg(AArch64::LR, RegState::Define)
5446                                  .addReg(AArch64::SP)
5447                                  .addImm(16);
5448     Et = MBB.insert(Et, LDRXpost);
5449   }
5450 
5451   // If this is a tail call outlined function, then there's already a return.
5452   if (OF.FrameConstructionID == MachineOutlinerTailCall ||
5453       OF.FrameConstructionID == MachineOutlinerThunk)
5454     return;
5455 
5456   // It's not a tail call, so we have to insert the return ourselves.
5457   MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
5458                           .addReg(AArch64::LR, RegState::Undef);
5459   MBB.insert(MBB.end(), ret);
5460 
5461   // Did we have to modify the stack by saving the link register?
5462   if (OF.FrameConstructionID != MachineOutlinerDefault)
5463     return;
5464 
5465   // We modified the stack.
5466   // Walk over the basic block and fix up all the stack accesses.
5467   fixupPostOutline(MBB);
5468 }
5469 
insertOutlinedCall(Module & M,MachineBasicBlock & MBB,MachineBasicBlock::iterator & It,MachineFunction & MF,const outliner::Candidate & C) const5470 MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
5471     Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
5472     MachineFunction &MF, const outliner::Candidate &C) const {
5473 
5474   // Are we tail calling?
5475   if (C.CallConstructionID == MachineOutlinerTailCall) {
5476     // If yes, then we can just branch to the label.
5477     It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
5478                             .addGlobalAddress(M.getNamedValue(MF.getName()))
5479                             .addImm(0));
5480     return It;
5481   }
5482 
5483   // Are we saving the link register?
5484   if (C.CallConstructionID == MachineOutlinerNoLRSave ||
5485       C.CallConstructionID == MachineOutlinerThunk) {
5486     // No, so just insert the call.
5487     It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
5488                             .addGlobalAddress(M.getNamedValue(MF.getName())));
5489     return It;
5490   }
5491 
5492   // We want to return the spot where we inserted the call.
5493   MachineBasicBlock::iterator CallPt;
5494 
5495   // Instructions for saving and restoring LR around the call instruction we're
5496   // going to insert.
5497   MachineInstr *Save;
5498   MachineInstr *Restore;
5499   // Can we save to a register?
5500   if (C.CallConstructionID == MachineOutlinerRegSave) {
5501     // FIXME: This logic should be sunk into a target-specific interface so that
5502     // we don't have to recompute the register.
5503     unsigned Reg = findRegisterToSaveLRTo(C);
5504     assert(Reg != 0 && "No callee-saved register available?");
5505 
5506     // Save and restore LR from that register.
5507     Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
5508                .addReg(AArch64::XZR)
5509                .addReg(AArch64::LR)
5510                .addImm(0);
5511     Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
5512                 .addReg(AArch64::XZR)
5513                 .addReg(Reg)
5514                 .addImm(0);
5515   } else {
5516     // We have the default case. Save and restore from SP.
5517     Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
5518                .addReg(AArch64::SP, RegState::Define)
5519                .addReg(AArch64::LR)
5520                .addReg(AArch64::SP)
5521                .addImm(-16);
5522     Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
5523                   .addReg(AArch64::SP, RegState::Define)
5524                   .addReg(AArch64::LR, RegState::Define)
5525                   .addReg(AArch64::SP)
5526                   .addImm(16);
5527   }
5528 
5529   It = MBB.insert(It, Save);
5530   It++;
5531 
5532   // Insert the call.
5533   It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
5534                           .addGlobalAddress(M.getNamedValue(MF.getName())));
5535   CallPt = It;
5536   It++;
5537 
5538   It = MBB.insert(It, Restore);
5539   return CallPt;
5540 }
5541 
shouldOutlineFromFunctionByDefault(MachineFunction & MF) const5542 bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
5543   MachineFunction &MF) const {
5544   return MF.getFunction().hasMinSize();
5545 }
5546 
isCopyInstrImpl(const MachineInstr & MI,const MachineOperand * & Source,const MachineOperand * & Destination) const5547 bool AArch64InstrInfo::isCopyInstrImpl(
5548     const MachineInstr &MI, const MachineOperand *&Source,
5549     const MachineOperand *&Destination) const {
5550 
5551   // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
5552   // and zero immediate operands used as an alias for mov instruction.
5553   if (MI.getOpcode() == AArch64::ORRWrs &&
5554       MI.getOperand(1).getReg() == AArch64::WZR &&
5555       MI.getOperand(3).getImm() == 0x0) {
5556     Destination = &MI.getOperand(0);
5557     Source = &MI.getOperand(2);
5558     return true;
5559   }
5560 
5561   if (MI.getOpcode() == AArch64::ORRXrs &&
5562       MI.getOperand(1).getReg() == AArch64::XZR &&
5563       MI.getOperand(3).getImm() == 0x0) {
5564     Destination = &MI.getOperand(0);
5565     Source = &MI.getOperand(2);
5566     return true;
5567   }
5568 
5569   return false;
5570 }
5571 
5572 #define GET_INSTRINFO_HELPERS
5573 #include "AArch64GenInstrInfo.inc"
5574