1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the AArch64 implementation of the TargetInstrInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64InstrInfo.h"
15 #include "AArch64MachineFunctionInfo.h"
16 #include "AArch64Subtarget.h"
17 #include "MCTargetDesc/AArch64AddressingModes.h"
18 #include "Utils/AArch64BaseInfo.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/CodeGen/MachineBasicBlock.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineMemOperand.h"
28 #include "llvm/CodeGen/MachineOperand.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/MachineModuleInfo.h"
31 #include "llvm/CodeGen/StackMaps.h"
32 #include "llvm/CodeGen/TargetRegisterInfo.h"
33 #include "llvm/CodeGen/TargetSubtargetInfo.h"
34 #include "llvm/IR/DebugLoc.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/MC/MCInst.h"
37 #include "llvm/MC/MCInstrDesc.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/CodeGen.h"
40 #include "llvm/Support/CommandLine.h"
41 #include "llvm/Support/Compiler.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/MathExtras.h"
44 #include "llvm/Target/TargetMachine.h"
45 #include "llvm/Target/TargetOptions.h"
46 #include <cassert>
47 #include <cstdint>
48 #include <iterator>
49 #include <utility>
50 
51 using namespace llvm;
52 
53 #define GET_INSTRINFO_CTOR_DTOR
54 #include "AArch64GenInstrInfo.inc"
55 
56 static cl::opt<unsigned> TBZDisplacementBits(
57     "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
58     cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
59 
60 static cl::opt<unsigned> CBZDisplacementBits(
61     "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
62     cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
63 
64 static cl::opt<unsigned>
65     BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
66                         cl::desc("Restrict range of Bcc instructions (DEBUG)"));
67 
AArch64InstrInfo(const AArch64Subtarget & STI)68 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
69     : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
70       RI(STI.getTargetTriple()), Subtarget(STI) {}
71 
72 /// GetInstSize - Return the number of bytes of code the specified
73 /// instruction may be.  This returns the maximum number of bytes.
getInstSizeInBytes(const MachineInstr & MI) const74 unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
75   const MachineBasicBlock &MBB = *MI.getParent();
76   const MachineFunction *MF = MBB.getParent();
77   const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
78 
79   if (MI.getOpcode() == AArch64::INLINEASM)
80     return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
81 
82   // FIXME: We currently only handle pseudoinstructions that don't get expanded
83   //        before the assembly printer.
84   unsigned NumBytes = 0;
85   const MCInstrDesc &Desc = MI.getDesc();
86   switch (Desc.getOpcode()) {
87   default:
88     // Anything not explicitly designated otherwise is a normal 4-byte insn.
89     NumBytes = 4;
90     break;
91   case TargetOpcode::DBG_VALUE:
92   case TargetOpcode::EH_LABEL:
93   case TargetOpcode::IMPLICIT_DEF:
94   case TargetOpcode::KILL:
95     NumBytes = 0;
96     break;
97   case TargetOpcode::STACKMAP:
98     // The upper bound for a stackmap intrinsic is the full length of its shadow
99     NumBytes = StackMapOpers(&MI).getNumPatchBytes();
100     assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
101     break;
102   case TargetOpcode::PATCHPOINT:
103     // The size of the patchpoint intrinsic is the number of bytes requested
104     NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
105     assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
106     break;
107   case AArch64::TLSDESC_CALLSEQ:
108     // This gets lowered to an instruction sequence which takes 16 bytes
109     NumBytes = 16;
110     break;
111   }
112 
113   return NumBytes;
114 }
115 
parseCondBranch(MachineInstr * LastInst,MachineBasicBlock * & Target,SmallVectorImpl<MachineOperand> & Cond)116 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
117                             SmallVectorImpl<MachineOperand> &Cond) {
118   // Block ends with fall-through condbranch.
119   switch (LastInst->getOpcode()) {
120   default:
121     llvm_unreachable("Unknown branch instruction?");
122   case AArch64::Bcc:
123     Target = LastInst->getOperand(1).getMBB();
124     Cond.push_back(LastInst->getOperand(0));
125     break;
126   case AArch64::CBZW:
127   case AArch64::CBZX:
128   case AArch64::CBNZW:
129   case AArch64::CBNZX:
130     Target = LastInst->getOperand(1).getMBB();
131     Cond.push_back(MachineOperand::CreateImm(-1));
132     Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
133     Cond.push_back(LastInst->getOperand(0));
134     break;
135   case AArch64::TBZW:
136   case AArch64::TBZX:
137   case AArch64::TBNZW:
138   case AArch64::TBNZX:
139     Target = LastInst->getOperand(2).getMBB();
140     Cond.push_back(MachineOperand::CreateImm(-1));
141     Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
142     Cond.push_back(LastInst->getOperand(0));
143     Cond.push_back(LastInst->getOperand(1));
144   }
145 }
146 
getBranchDisplacementBits(unsigned Opc)147 static unsigned getBranchDisplacementBits(unsigned Opc) {
148   switch (Opc) {
149   default:
150     llvm_unreachable("unexpected opcode!");
151   case AArch64::B:
152     return 64;
153   case AArch64::TBNZW:
154   case AArch64::TBZW:
155   case AArch64::TBNZX:
156   case AArch64::TBZX:
157     return TBZDisplacementBits;
158   case AArch64::CBNZW:
159   case AArch64::CBZW:
160   case AArch64::CBNZX:
161   case AArch64::CBZX:
162     return CBZDisplacementBits;
163   case AArch64::Bcc:
164     return BCCDisplacementBits;
165   }
166 }
167 
isBranchOffsetInRange(unsigned BranchOp,int64_t BrOffset) const168 bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
169                                              int64_t BrOffset) const {
170   unsigned Bits = getBranchDisplacementBits(BranchOp);
171   assert(Bits >= 3 && "max branch displacement must be enough to jump"
172                       "over conditional branch expansion");
173   return isIntN(Bits, BrOffset / 4);
174 }
175 
176 MachineBasicBlock *
getBranchDestBlock(const MachineInstr & MI) const177 AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
178   switch (MI.getOpcode()) {
179   default:
180     llvm_unreachable("unexpected opcode!");
181   case AArch64::B:
182     return MI.getOperand(0).getMBB();
183   case AArch64::TBZW:
184   case AArch64::TBNZW:
185   case AArch64::TBZX:
186   case AArch64::TBNZX:
187     return MI.getOperand(2).getMBB();
188   case AArch64::CBZW:
189   case AArch64::CBNZW:
190   case AArch64::CBZX:
191   case AArch64::CBNZX:
192   case AArch64::Bcc:
193     return MI.getOperand(1).getMBB();
194   }
195 }
196 
197 // Branch analysis.
analyzeBranch(MachineBasicBlock & MBB,MachineBasicBlock * & TBB,MachineBasicBlock * & FBB,SmallVectorImpl<MachineOperand> & Cond,bool AllowModify) const198 bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
199                                      MachineBasicBlock *&TBB,
200                                      MachineBasicBlock *&FBB,
201                                      SmallVectorImpl<MachineOperand> &Cond,
202                                      bool AllowModify) const {
203   // If the block has no terminators, it just falls into the block after it.
204   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
205   if (I == MBB.end())
206     return false;
207 
208   if (!isUnpredicatedTerminator(*I))
209     return false;
210 
211   // Get the last instruction in the block.
212   MachineInstr *LastInst = &*I;
213 
214   // If there is only one terminator instruction, process it.
215   unsigned LastOpc = LastInst->getOpcode();
216   if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
217     if (isUncondBranchOpcode(LastOpc)) {
218       TBB = LastInst->getOperand(0).getMBB();
219       return false;
220     }
221     if (isCondBranchOpcode(LastOpc)) {
222       // Block ends with fall-through condbranch.
223       parseCondBranch(LastInst, TBB, Cond);
224       return false;
225     }
226     return true; // Can't handle indirect branch.
227   }
228 
229   // Get the instruction before it if it is a terminator.
230   MachineInstr *SecondLastInst = &*I;
231   unsigned SecondLastOpc = SecondLastInst->getOpcode();
232 
233   // If AllowModify is true and the block ends with two or more unconditional
234   // branches, delete all but the first unconditional branch.
235   if (AllowModify && isUncondBranchOpcode(LastOpc)) {
236     while (isUncondBranchOpcode(SecondLastOpc)) {
237       LastInst->eraseFromParent();
238       LastInst = SecondLastInst;
239       LastOpc = LastInst->getOpcode();
240       if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
241         // Return now the only terminator is an unconditional branch.
242         TBB = LastInst->getOperand(0).getMBB();
243         return false;
244       } else {
245         SecondLastInst = &*I;
246         SecondLastOpc = SecondLastInst->getOpcode();
247       }
248     }
249   }
250 
251   // If there are three terminators, we don't know what sort of block this is.
252   if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
253     return true;
254 
255   // If the block ends with a B and a Bcc, handle it.
256   if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
257     parseCondBranch(SecondLastInst, TBB, Cond);
258     FBB = LastInst->getOperand(0).getMBB();
259     return false;
260   }
261 
262   // If the block ends with two unconditional branches, handle it.  The second
263   // one is not executed, so remove it.
264   if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
265     TBB = SecondLastInst->getOperand(0).getMBB();
266     I = LastInst;
267     if (AllowModify)
268       I->eraseFromParent();
269     return false;
270   }
271 
272   // ...likewise if it ends with an indirect branch followed by an unconditional
273   // branch.
274   if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
275     I = LastInst;
276     if (AllowModify)
277       I->eraseFromParent();
278     return true;
279   }
280 
281   // Otherwise, can't handle this.
282   return true;
283 }
284 
reverseBranchCondition(SmallVectorImpl<MachineOperand> & Cond) const285 bool AArch64InstrInfo::reverseBranchCondition(
286     SmallVectorImpl<MachineOperand> &Cond) const {
287   if (Cond[0].getImm() != -1) {
288     // Regular Bcc
289     AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
290     Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
291   } else {
292     // Folded compare-and-branch
293     switch (Cond[1].getImm()) {
294     default:
295       llvm_unreachable("Unknown conditional branch!");
296     case AArch64::CBZW:
297       Cond[1].setImm(AArch64::CBNZW);
298       break;
299     case AArch64::CBNZW:
300       Cond[1].setImm(AArch64::CBZW);
301       break;
302     case AArch64::CBZX:
303       Cond[1].setImm(AArch64::CBNZX);
304       break;
305     case AArch64::CBNZX:
306       Cond[1].setImm(AArch64::CBZX);
307       break;
308     case AArch64::TBZW:
309       Cond[1].setImm(AArch64::TBNZW);
310       break;
311     case AArch64::TBNZW:
312       Cond[1].setImm(AArch64::TBZW);
313       break;
314     case AArch64::TBZX:
315       Cond[1].setImm(AArch64::TBNZX);
316       break;
317     case AArch64::TBNZX:
318       Cond[1].setImm(AArch64::TBZX);
319       break;
320     }
321   }
322 
323   return false;
324 }
325 
removeBranch(MachineBasicBlock & MBB,int * BytesRemoved) const326 unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
327                                         int *BytesRemoved) const {
328   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
329   if (I == MBB.end())
330     return 0;
331 
332   if (!isUncondBranchOpcode(I->getOpcode()) &&
333       !isCondBranchOpcode(I->getOpcode()))
334     return 0;
335 
336   // Remove the branch.
337   I->eraseFromParent();
338 
339   I = MBB.end();
340 
341   if (I == MBB.begin()) {
342     if (BytesRemoved)
343       *BytesRemoved = 4;
344     return 1;
345   }
346   --I;
347   if (!isCondBranchOpcode(I->getOpcode())) {
348     if (BytesRemoved)
349       *BytesRemoved = 4;
350     return 1;
351   }
352 
353   // Remove the branch.
354   I->eraseFromParent();
355   if (BytesRemoved)
356     *BytesRemoved = 8;
357 
358   return 2;
359 }
360 
instantiateCondBranch(MachineBasicBlock & MBB,const DebugLoc & DL,MachineBasicBlock * TBB,ArrayRef<MachineOperand> Cond) const361 void AArch64InstrInfo::instantiateCondBranch(
362     MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
363     ArrayRef<MachineOperand> Cond) const {
364   if (Cond[0].getImm() != -1) {
365     // Regular Bcc
366     BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
367   } else {
368     // Folded compare-and-branch
369     // Note that we use addOperand instead of addReg to keep the flags.
370     const MachineInstrBuilder MIB =
371         BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
372     if (Cond.size() > 3)
373       MIB.addImm(Cond[3].getImm());
374     MIB.addMBB(TBB);
375   }
376 }
377 
insertBranch(MachineBasicBlock & MBB,MachineBasicBlock * TBB,MachineBasicBlock * FBB,ArrayRef<MachineOperand> Cond,const DebugLoc & DL,int * BytesAdded) const378 unsigned AArch64InstrInfo::insertBranch(
379     MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
380     ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
381   // Shouldn't be a fall through.
382   assert(TBB && "insertBranch must not be told to insert a fallthrough");
383 
384   if (!FBB) {
385     if (Cond.empty()) // Unconditional branch?
386       BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
387     else
388       instantiateCondBranch(MBB, DL, TBB, Cond);
389 
390     if (BytesAdded)
391       *BytesAdded = 4;
392 
393     return 1;
394   }
395 
396   // Two-way conditional branch.
397   instantiateCondBranch(MBB, DL, TBB, Cond);
398   BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
399 
400   if (BytesAdded)
401     *BytesAdded = 8;
402 
403   return 2;
404 }
405 
406 // Find the original register that VReg is copied from.
removeCopies(const MachineRegisterInfo & MRI,unsigned VReg)407 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
408   while (TargetRegisterInfo::isVirtualRegister(VReg)) {
409     const MachineInstr *DefMI = MRI.getVRegDef(VReg);
410     if (!DefMI->isFullCopy())
411       return VReg;
412     VReg = DefMI->getOperand(1).getReg();
413   }
414   return VReg;
415 }
416 
417 // Determine if VReg is defined by an instruction that can be folded into a
418 // csel instruction. If so, return the folded opcode, and the replacement
419 // register.
canFoldIntoCSel(const MachineRegisterInfo & MRI,unsigned VReg,unsigned * NewVReg=nullptr)420 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
421                                 unsigned *NewVReg = nullptr) {
422   VReg = removeCopies(MRI, VReg);
423   if (!TargetRegisterInfo::isVirtualRegister(VReg))
424     return 0;
425 
426   bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
427   const MachineInstr *DefMI = MRI.getVRegDef(VReg);
428   unsigned Opc = 0;
429   unsigned SrcOpNum = 0;
430   switch (DefMI->getOpcode()) {
431   case AArch64::ADDSXri:
432   case AArch64::ADDSWri:
433     // if NZCV is used, do not fold.
434     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
435       return 0;
436     // fall-through to ADDXri and ADDWri.
437     LLVM_FALLTHROUGH;
438   case AArch64::ADDXri:
439   case AArch64::ADDWri:
440     // add x, 1 -> csinc.
441     if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
442         DefMI->getOperand(3).getImm() != 0)
443       return 0;
444     SrcOpNum = 1;
445     Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
446     break;
447 
448   case AArch64::ORNXrr:
449   case AArch64::ORNWrr: {
450     // not x -> csinv, represented as orn dst, xzr, src.
451     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
452     if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
453       return 0;
454     SrcOpNum = 2;
455     Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
456     break;
457   }
458 
459   case AArch64::SUBSXrr:
460   case AArch64::SUBSWrr:
461     // if NZCV is used, do not fold.
462     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
463       return 0;
464     // fall-through to SUBXrr and SUBWrr.
465     LLVM_FALLTHROUGH;
466   case AArch64::SUBXrr:
467   case AArch64::SUBWrr: {
468     // neg x -> csneg, represented as sub dst, xzr, src.
469     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
470     if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
471       return 0;
472     SrcOpNum = 2;
473     Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
474     break;
475   }
476   default:
477     return 0;
478   }
479   assert(Opc && SrcOpNum && "Missing parameters");
480 
481   if (NewVReg)
482     *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
483   return Opc;
484 }
485 
canInsertSelect(const MachineBasicBlock & MBB,ArrayRef<MachineOperand> Cond,unsigned TrueReg,unsigned FalseReg,int & CondCycles,int & TrueCycles,int & FalseCycles) const486 bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
487                                        ArrayRef<MachineOperand> Cond,
488                                        unsigned TrueReg, unsigned FalseReg,
489                                        int &CondCycles, int &TrueCycles,
490                                        int &FalseCycles) const {
491   // Check register classes.
492   const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
493   const TargetRegisterClass *RC =
494       RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
495   if (!RC)
496     return false;
497 
498   // Expanding cbz/tbz requires an extra cycle of latency on the condition.
499   unsigned ExtraCondLat = Cond.size() != 1;
500 
501   // GPRs are handled by csel.
502   // FIXME: Fold in x+1, -x, and ~x when applicable.
503   if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
504       AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
505     // Single-cycle csel, csinc, csinv, and csneg.
506     CondCycles = 1 + ExtraCondLat;
507     TrueCycles = FalseCycles = 1;
508     if (canFoldIntoCSel(MRI, TrueReg))
509       TrueCycles = 0;
510     else if (canFoldIntoCSel(MRI, FalseReg))
511       FalseCycles = 0;
512     return true;
513   }
514 
515   // Scalar floating point is handled by fcsel.
516   // FIXME: Form fabs, fmin, and fmax when applicable.
517   if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
518       AArch64::FPR32RegClass.hasSubClassEq(RC)) {
519     CondCycles = 5 + ExtraCondLat;
520     TrueCycles = FalseCycles = 2;
521     return true;
522   }
523 
524   // Can't do vectors.
525   return false;
526 }
527 
insertSelect(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,unsigned DstReg,ArrayRef<MachineOperand> Cond,unsigned TrueReg,unsigned FalseReg) const528 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
529                                     MachineBasicBlock::iterator I,
530                                     const DebugLoc &DL, unsigned DstReg,
531                                     ArrayRef<MachineOperand> Cond,
532                                     unsigned TrueReg, unsigned FalseReg) const {
533   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
534 
535   // Parse the condition code, see parseCondBranch() above.
536   AArch64CC::CondCode CC;
537   switch (Cond.size()) {
538   default:
539     llvm_unreachable("Unknown condition opcode in Cond");
540   case 1: // b.cc
541     CC = AArch64CC::CondCode(Cond[0].getImm());
542     break;
543   case 3: { // cbz/cbnz
544     // We must insert a compare against 0.
545     bool Is64Bit;
546     switch (Cond[1].getImm()) {
547     default:
548       llvm_unreachable("Unknown branch opcode in Cond");
549     case AArch64::CBZW:
550       Is64Bit = false;
551       CC = AArch64CC::EQ;
552       break;
553     case AArch64::CBZX:
554       Is64Bit = true;
555       CC = AArch64CC::EQ;
556       break;
557     case AArch64::CBNZW:
558       Is64Bit = false;
559       CC = AArch64CC::NE;
560       break;
561     case AArch64::CBNZX:
562       Is64Bit = true;
563       CC = AArch64CC::NE;
564       break;
565     }
566     unsigned SrcReg = Cond[2].getReg();
567     if (Is64Bit) {
568       // cmp reg, #0 is actually subs xzr, reg, #0.
569       MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
570       BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
571           .addReg(SrcReg)
572           .addImm(0)
573           .addImm(0);
574     } else {
575       MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
576       BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
577           .addReg(SrcReg)
578           .addImm(0)
579           .addImm(0);
580     }
581     break;
582   }
583   case 4: { // tbz/tbnz
584     // We must insert a tst instruction.
585     switch (Cond[1].getImm()) {
586     default:
587       llvm_unreachable("Unknown branch opcode in Cond");
588     case AArch64::TBZW:
589     case AArch64::TBZX:
590       CC = AArch64CC::EQ;
591       break;
592     case AArch64::TBNZW:
593     case AArch64::TBNZX:
594       CC = AArch64CC::NE;
595       break;
596     }
597     // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
598     if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
599       BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
600           .addReg(Cond[2].getReg())
601           .addImm(
602               AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
603     else
604       BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
605           .addReg(Cond[2].getReg())
606           .addImm(
607               AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
608     break;
609   }
610   }
611 
612   unsigned Opc = 0;
613   const TargetRegisterClass *RC = nullptr;
614   bool TryFold = false;
615   if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
616     RC = &AArch64::GPR64RegClass;
617     Opc = AArch64::CSELXr;
618     TryFold = true;
619   } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
620     RC = &AArch64::GPR32RegClass;
621     Opc = AArch64::CSELWr;
622     TryFold = true;
623   } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
624     RC = &AArch64::FPR64RegClass;
625     Opc = AArch64::FCSELDrrr;
626   } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
627     RC = &AArch64::FPR32RegClass;
628     Opc = AArch64::FCSELSrrr;
629   }
630   assert(RC && "Unsupported regclass");
631 
632   // Try folding simple instructions into the csel.
633   if (TryFold) {
634     unsigned NewVReg = 0;
635     unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
636     if (FoldedOpc) {
637       // The folded opcodes csinc, csinc and csneg apply the operation to
638       // FalseReg, so we need to invert the condition.
639       CC = AArch64CC::getInvertedCondCode(CC);
640       TrueReg = FalseReg;
641     } else
642       FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
643 
644     // Fold the operation. Leave any dead instructions for DCE to clean up.
645     if (FoldedOpc) {
646       FalseReg = NewVReg;
647       Opc = FoldedOpc;
648       // The extends the live range of NewVReg.
649       MRI.clearKillFlags(NewVReg);
650     }
651   }
652 
653   // Pull all virtual register into the appropriate class.
654   MRI.constrainRegClass(TrueReg, RC);
655   MRI.constrainRegClass(FalseReg, RC);
656 
657   // Insert the csel.
658   BuildMI(MBB, I, DL, get(Opc), DstReg)
659       .addReg(TrueReg)
660       .addReg(FalseReg)
661       .addImm(CC);
662 }
663 
664 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an  ORRxx.
canBeExpandedToORR(const MachineInstr & MI,unsigned BitSize)665 static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
666   uint64_t Imm = MI.getOperand(1).getImm();
667   uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
668   uint64_t Encoding;
669   return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
670 }
671 
672 // FIXME: this implementation should be micro-architecture dependent, so a
673 // micro-architecture target hook should be introduced here in future.
isAsCheapAsAMove(const MachineInstr & MI) const674 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
675   if (!Subtarget.hasCustomCheapAsMoveHandling())
676     return MI.isAsCheapAsAMove();
677 
678   if (Subtarget.hasExynosCheapAsMoveHandling()) {
679     if (isExynosResetFast(MI) || isExynosShiftLeftFast(MI))
680       return true;
681     else
682       return MI.isAsCheapAsAMove();
683   }
684 
685   switch (MI.getOpcode()) {
686   default:
687     return false;
688 
689   // add/sub on register without shift
690   case AArch64::ADDWri:
691   case AArch64::ADDXri:
692   case AArch64::SUBWri:
693   case AArch64::SUBXri:
694     return (MI.getOperand(3).getImm() == 0);
695 
696   // logical ops on immediate
697   case AArch64::ANDWri:
698   case AArch64::ANDXri:
699   case AArch64::EORWri:
700   case AArch64::EORXri:
701   case AArch64::ORRWri:
702   case AArch64::ORRXri:
703     return true;
704 
705   // logical ops on register without shift
706   case AArch64::ANDWrr:
707   case AArch64::ANDXrr:
708   case AArch64::BICWrr:
709   case AArch64::BICXrr:
710   case AArch64::EONWrr:
711   case AArch64::EONXrr:
712   case AArch64::EORWrr:
713   case AArch64::EORXrr:
714   case AArch64::ORNWrr:
715   case AArch64::ORNXrr:
716   case AArch64::ORRWrr:
717   case AArch64::ORRXrr:
718     return true;
719 
720   // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
721   // ORRXri, it is as cheap as MOV
722   case AArch64::MOVi32imm:
723     return canBeExpandedToORR(MI, 32);
724   case AArch64::MOVi64imm:
725     return canBeExpandedToORR(MI, 64);
726 
727   // It is cheap to zero out registers if the subtarget has ZeroCycleZeroing
728   // feature.
729   case AArch64::FMOVH0:
730   case AArch64::FMOVS0:
731   case AArch64::FMOVD0:
732     return Subtarget.hasZeroCycleZeroing();
733   case TargetOpcode::COPY:
734     return (Subtarget.hasZeroCycleZeroing() &&
735             (MI.getOperand(1).getReg() == AArch64::WZR ||
736              MI.getOperand(1).getReg() == AArch64::XZR));
737   }
738 
739   llvm_unreachable("Unknown opcode to check as cheap as a move!");
740 }
741 
isExynosResetFast(const MachineInstr & MI) const742 bool AArch64InstrInfo::isExynosResetFast(const MachineInstr &MI) const {
743   unsigned Reg, Imm, Shift;
744 
745   switch (MI.getOpcode()) {
746   default:
747     return false;
748 
749   // MOV Rd, SP
750   case AArch64::ADDWri:
751   case AArch64::ADDXri:
752     if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())
753       return false;
754 
755     Reg = MI.getOperand(1).getReg();
756     Imm = MI.getOperand(2).getImm();
757     return ((Reg == AArch64::WSP || Reg == AArch64::SP) && Imm == 0);
758 
759   // Literal
760   case AArch64::ADR:
761   case AArch64::ADRP:
762     return true;
763 
764   // MOVI Vd, #0
765   case AArch64::MOVID:
766   case AArch64::MOVIv8b_ns:
767   case AArch64::MOVIv2d_ns:
768   case AArch64::MOVIv16b_ns:
769     Imm = MI.getOperand(1).getImm();
770     return (Imm == 0);
771 
772   // MOVI Vd, #0
773   case AArch64::MOVIv2i32:
774   case AArch64::MOVIv4i16:
775   case AArch64::MOVIv4i32:
776   case AArch64::MOVIv8i16:
777     Imm = MI.getOperand(1).getImm();
778     Shift = MI.getOperand(2).getImm();
779     return (Imm == 0 && Shift == 0);
780 
781   // MOV Rd, Imm
782   case AArch64::MOVNWi:
783   case AArch64::MOVNXi:
784 
785   // MOV Rd, Imm
786   case AArch64::MOVZWi:
787   case AArch64::MOVZXi:
788     return true;
789 
790   // MOV Rd, Imm
791   case AArch64::ORRWri:
792   case AArch64::ORRXri:
793     if (!MI.getOperand(1).isReg())
794       return false;
795 
796     Reg = MI.getOperand(1).getReg();
797     Imm = MI.getOperand(2).getImm();
798     return ((Reg == AArch64::WZR || Reg == AArch64::XZR) && Imm == 0);
799 
800   // MOV Rd, Rm
801   case AArch64::ORRWrs:
802   case AArch64::ORRXrs:
803     if (!MI.getOperand(1).isReg())
804       return false;
805 
806     Reg = MI.getOperand(1).getReg();
807     Imm = MI.getOperand(3).getImm();
808     Shift = AArch64_AM::getShiftValue(Imm);
809     return ((Reg == AArch64::WZR || Reg == AArch64::XZR) && Shift == 0);
810   }
811 }
812 
isExynosShiftLeftFast(const MachineInstr & MI) const813 bool AArch64InstrInfo::isExynosShiftLeftFast(const MachineInstr &MI) const {
814   unsigned Imm, Shift;
815   AArch64_AM::ShiftExtendType Ext;
816 
817   switch (MI.getOpcode()) {
818   default:
819     return false;
820 
821   // WriteI
822   case AArch64::ADDSWri:
823   case AArch64::ADDSXri:
824   case AArch64::ADDWri:
825   case AArch64::ADDXri:
826   case AArch64::SUBSWri:
827   case AArch64::SUBSXri:
828   case AArch64::SUBWri:
829   case AArch64::SUBXri:
830     return true;
831 
832   // WriteISReg
833   case AArch64::ADDSWrs:
834   case AArch64::ADDSXrs:
835   case AArch64::ADDWrs:
836   case AArch64::ADDXrs:
837   case AArch64::ANDSWrs:
838   case AArch64::ANDSXrs:
839   case AArch64::ANDWrs:
840   case AArch64::ANDXrs:
841   case AArch64::BICSWrs:
842   case AArch64::BICSXrs:
843   case AArch64::BICWrs:
844   case AArch64::BICXrs:
845   case AArch64::EONWrs:
846   case AArch64::EONXrs:
847   case AArch64::EORWrs:
848   case AArch64::EORXrs:
849   case AArch64::ORNWrs:
850   case AArch64::ORNXrs:
851   case AArch64::ORRWrs:
852   case AArch64::ORRXrs:
853   case AArch64::SUBSWrs:
854   case AArch64::SUBSXrs:
855   case AArch64::SUBWrs:
856   case AArch64::SUBXrs:
857     Imm = MI.getOperand(3).getImm();
858     Shift = AArch64_AM::getShiftValue(Imm);
859     Ext = AArch64_AM::getShiftType(Imm);
860     return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::LSL));
861 
862   // WriteIEReg
863   case AArch64::ADDSWrx:
864   case AArch64::ADDSXrx:
865   case AArch64::ADDSXrx64:
866   case AArch64::ADDWrx:
867   case AArch64::ADDXrx:
868   case AArch64::ADDXrx64:
869   case AArch64::SUBSWrx:
870   case AArch64::SUBSXrx:
871   case AArch64::SUBSXrx64:
872   case AArch64::SUBWrx:
873   case AArch64::SUBXrx:
874   case AArch64::SUBXrx64:
875     Imm = MI.getOperand(3).getImm();
876     Shift = AArch64_AM::getArithShiftValue(Imm);
877     Ext = AArch64_AM::getArithExtendType(Imm);
878     return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::UXTX));
879 
880   case AArch64::PRFMroW:
881   case AArch64::PRFMroX:
882 
883   // WriteLDIdx
884   case AArch64::LDRBBroW:
885   case AArch64::LDRBBroX:
886   case AArch64::LDRHHroW:
887   case AArch64::LDRHHroX:
888   case AArch64::LDRSBWroW:
889   case AArch64::LDRSBWroX:
890   case AArch64::LDRSBXroW:
891   case AArch64::LDRSBXroX:
892   case AArch64::LDRSHWroW:
893   case AArch64::LDRSHWroX:
894   case AArch64::LDRSHXroW:
895   case AArch64::LDRSHXroX:
896   case AArch64::LDRSWroW:
897   case AArch64::LDRSWroX:
898   case AArch64::LDRWroW:
899   case AArch64::LDRWroX:
900   case AArch64::LDRXroW:
901   case AArch64::LDRXroX:
902 
903   case AArch64::LDRBroW:
904   case AArch64::LDRBroX:
905   case AArch64::LDRDroW:
906   case AArch64::LDRDroX:
907   case AArch64::LDRHroW:
908   case AArch64::LDRHroX:
909   case AArch64::LDRSroW:
910   case AArch64::LDRSroX:
911 
912   // WriteSTIdx
913   case AArch64::STRBBroW:
914   case AArch64::STRBBroX:
915   case AArch64::STRHHroW:
916   case AArch64::STRHHroX:
917   case AArch64::STRWroW:
918   case AArch64::STRWroX:
919   case AArch64::STRXroW:
920   case AArch64::STRXroX:
921 
922   case AArch64::STRBroW:
923   case AArch64::STRBroX:
924   case AArch64::STRDroW:
925   case AArch64::STRDroX:
926   case AArch64::STRHroW:
927   case AArch64::STRHroX:
928   case AArch64::STRSroW:
929   case AArch64::STRSroX:
930     Imm = MI.getOperand(3).getImm();
931     Ext = AArch64_AM::getMemExtendType(Imm);
932     return (Ext == AArch64_AM::SXTX || Ext == AArch64_AM::UXTX);
933   }
934 }
935 
isFalkorShiftExtFast(const MachineInstr & MI) const936 bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) const {
937   switch (MI.getOpcode()) {
938   default:
939     return false;
940 
941   case AArch64::ADDWrs:
942   case AArch64::ADDXrs:
943   case AArch64::ADDSWrs:
944   case AArch64::ADDSXrs: {
945     unsigned Imm = MI.getOperand(3).getImm();
946     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
947     if (ShiftVal == 0)
948       return true;
949     return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
950   }
951 
952   case AArch64::ADDWrx:
953   case AArch64::ADDXrx:
954   case AArch64::ADDXrx64:
955   case AArch64::ADDSWrx:
956   case AArch64::ADDSXrx:
957   case AArch64::ADDSXrx64: {
958     unsigned Imm = MI.getOperand(3).getImm();
959     switch (AArch64_AM::getArithExtendType(Imm)) {
960     default:
961       return false;
962     case AArch64_AM::UXTB:
963     case AArch64_AM::UXTH:
964     case AArch64_AM::UXTW:
965     case AArch64_AM::UXTX:
966       return AArch64_AM::getArithShiftValue(Imm) <= 4;
967     }
968   }
969 
970   case AArch64::SUBWrs:
971   case AArch64::SUBSWrs: {
972     unsigned Imm = MI.getOperand(3).getImm();
973     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
974     return ShiftVal == 0 ||
975            (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
976   }
977 
978   case AArch64::SUBXrs:
979   case AArch64::SUBSXrs: {
980     unsigned Imm = MI.getOperand(3).getImm();
981     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
982     return ShiftVal == 0 ||
983            (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
984   }
985 
986   case AArch64::SUBWrx:
987   case AArch64::SUBXrx:
988   case AArch64::SUBXrx64:
989   case AArch64::SUBSWrx:
990   case AArch64::SUBSXrx:
991   case AArch64::SUBSXrx64: {
992     unsigned Imm = MI.getOperand(3).getImm();
993     switch (AArch64_AM::getArithExtendType(Imm)) {
994     default:
995       return false;
996     case AArch64_AM::UXTB:
997     case AArch64_AM::UXTH:
998     case AArch64_AM::UXTW:
999     case AArch64_AM::UXTX:
1000       return AArch64_AM::getArithShiftValue(Imm) == 0;
1001     }
1002   }
1003 
1004   case AArch64::LDRBBroW:
1005   case AArch64::LDRBBroX:
1006   case AArch64::LDRBroW:
1007   case AArch64::LDRBroX:
1008   case AArch64::LDRDroW:
1009   case AArch64::LDRDroX:
1010   case AArch64::LDRHHroW:
1011   case AArch64::LDRHHroX:
1012   case AArch64::LDRHroW:
1013   case AArch64::LDRHroX:
1014   case AArch64::LDRQroW:
1015   case AArch64::LDRQroX:
1016   case AArch64::LDRSBWroW:
1017   case AArch64::LDRSBWroX:
1018   case AArch64::LDRSBXroW:
1019   case AArch64::LDRSBXroX:
1020   case AArch64::LDRSHWroW:
1021   case AArch64::LDRSHWroX:
1022   case AArch64::LDRSHXroW:
1023   case AArch64::LDRSHXroX:
1024   case AArch64::LDRSWroW:
1025   case AArch64::LDRSWroX:
1026   case AArch64::LDRSroW:
1027   case AArch64::LDRSroX:
1028   case AArch64::LDRWroW:
1029   case AArch64::LDRWroX:
1030   case AArch64::LDRXroW:
1031   case AArch64::LDRXroX:
1032   case AArch64::PRFMroW:
1033   case AArch64::PRFMroX:
1034   case AArch64::STRBBroW:
1035   case AArch64::STRBBroX:
1036   case AArch64::STRBroW:
1037   case AArch64::STRBroX:
1038   case AArch64::STRDroW:
1039   case AArch64::STRDroX:
1040   case AArch64::STRHHroW:
1041   case AArch64::STRHHroX:
1042   case AArch64::STRHroW:
1043   case AArch64::STRHroX:
1044   case AArch64::STRQroW:
1045   case AArch64::STRQroX:
1046   case AArch64::STRSroW:
1047   case AArch64::STRSroX:
1048   case AArch64::STRWroW:
1049   case AArch64::STRWroX:
1050   case AArch64::STRXroW:
1051   case AArch64::STRXroX: {
1052     unsigned IsSigned = MI.getOperand(3).getImm();
1053     return !IsSigned;
1054   }
1055   }
1056 }
1057 
isCoalescableExtInstr(const MachineInstr & MI,unsigned & SrcReg,unsigned & DstReg,unsigned & SubIdx) const1058 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
1059                                              unsigned &SrcReg, unsigned &DstReg,
1060                                              unsigned &SubIdx) const {
1061   switch (MI.getOpcode()) {
1062   default:
1063     return false;
1064   case AArch64::SBFMXri: // aka sxtw
1065   case AArch64::UBFMXri: // aka uxtw
1066     // Check for the 32 -> 64 bit extension case, these instructions can do
1067     // much more.
1068     if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1069       return false;
1070     // This is a signed or unsigned 32 -> 64 bit extension.
1071     SrcReg = MI.getOperand(1).getReg();
1072     DstReg = MI.getOperand(0).getReg();
1073     SubIdx = AArch64::sub_32;
1074     return true;
1075   }
1076 }
1077 
areMemAccessesTriviallyDisjoint(MachineInstr & MIa,MachineInstr & MIb,AliasAnalysis * AA) const1078 bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
1079     MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
1080   const TargetRegisterInfo *TRI = &getRegisterInfo();
1081   unsigned BaseRegA = 0, BaseRegB = 0;
1082   int64_t OffsetA = 0, OffsetB = 0;
1083   unsigned WidthA = 0, WidthB = 0;
1084 
1085   assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1086   assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1087 
1088   if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
1089       MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
1090     return false;
1091 
1092   // Retrieve the base register, offset from the base register and width. Width
1093   // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8).  If
1094   // base registers are identical, and the offset of a lower memory access +
1095   // the width doesn't overlap the offset of a higher memory access,
1096   // then the memory accesses are different.
1097   if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
1098       getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
1099     if (BaseRegA == BaseRegB) {
1100       int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1101       int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1102       int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1103       if (LowOffset + LowWidth <= HighOffset)
1104         return true;
1105     }
1106   }
1107   return false;
1108 }
1109 
1110 /// analyzeCompare - For a comparison instruction, return the source registers
1111 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1112 /// Return true if the comparison instruction can be analyzed.
analyzeCompare(const MachineInstr & MI,unsigned & SrcReg,unsigned & SrcReg2,int & CmpMask,int & CmpValue) const1113 bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
1114                                       unsigned &SrcReg2, int &CmpMask,
1115                                       int &CmpValue) const {
1116   // The first operand can be a frame index where we'd normally expect a
1117   // register.
1118   assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1119   if (!MI.getOperand(1).isReg())
1120     return false;
1121 
1122   switch (MI.getOpcode()) {
1123   default:
1124     break;
1125   case AArch64::SUBSWrr:
1126   case AArch64::SUBSWrs:
1127   case AArch64::SUBSWrx:
1128   case AArch64::SUBSXrr:
1129   case AArch64::SUBSXrs:
1130   case AArch64::SUBSXrx:
1131   case AArch64::ADDSWrr:
1132   case AArch64::ADDSWrs:
1133   case AArch64::ADDSWrx:
1134   case AArch64::ADDSXrr:
1135   case AArch64::ADDSXrs:
1136   case AArch64::ADDSXrx:
1137     // Replace SUBSWrr with SUBWrr if NZCV is not used.
1138     SrcReg = MI.getOperand(1).getReg();
1139     SrcReg2 = MI.getOperand(2).getReg();
1140     CmpMask = ~0;
1141     CmpValue = 0;
1142     return true;
1143   case AArch64::SUBSWri:
1144   case AArch64::ADDSWri:
1145   case AArch64::SUBSXri:
1146   case AArch64::ADDSXri:
1147     SrcReg = MI.getOperand(1).getReg();
1148     SrcReg2 = 0;
1149     CmpMask = ~0;
1150     // FIXME: In order to convert CmpValue to 0 or 1
1151     CmpValue = MI.getOperand(2).getImm() != 0;
1152     return true;
1153   case AArch64::ANDSWri:
1154   case AArch64::ANDSXri:
1155     // ANDS does not use the same encoding scheme as the others xxxS
1156     // instructions.
1157     SrcReg = MI.getOperand(1).getReg();
1158     SrcReg2 = 0;
1159     CmpMask = ~0;
1160     // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
1161     // while the type of CmpValue is int. When converting uint64_t to int,
1162     // the high 32 bits of uint64_t will be lost.
1163     // In fact it causes a bug in spec2006-483.xalancbmk
1164     // CmpValue is only used to compare with zero in OptimizeCompareInstr
1165     CmpValue = AArch64_AM::decodeLogicalImmediate(
1166                    MI.getOperand(2).getImm(),
1167                    MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
1168     return true;
1169   }
1170 
1171   return false;
1172 }
1173 
UpdateOperandRegClass(MachineInstr & Instr)1174 static bool UpdateOperandRegClass(MachineInstr &Instr) {
1175   MachineBasicBlock *MBB = Instr.getParent();
1176   assert(MBB && "Can't get MachineBasicBlock here");
1177   MachineFunction *MF = MBB->getParent();
1178   assert(MF && "Can't get MachineFunction here");
1179   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1180   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
1181   MachineRegisterInfo *MRI = &MF->getRegInfo();
1182 
1183   for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1184        ++OpIdx) {
1185     MachineOperand &MO = Instr.getOperand(OpIdx);
1186     const TargetRegisterClass *OpRegCstraints =
1187         Instr.getRegClassConstraint(OpIdx, TII, TRI);
1188 
1189     // If there's no constraint, there's nothing to do.
1190     if (!OpRegCstraints)
1191       continue;
1192     // If the operand is a frame index, there's nothing to do here.
1193     // A frame index operand will resolve correctly during PEI.
1194     if (MO.isFI())
1195       continue;
1196 
1197     assert(MO.isReg() &&
1198            "Operand has register constraints without being a register!");
1199 
1200     unsigned Reg = MO.getReg();
1201     if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
1202       if (!OpRegCstraints->contains(Reg))
1203         return false;
1204     } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1205                !MRI->constrainRegClass(Reg, OpRegCstraints))
1206       return false;
1207   }
1208 
1209   return true;
1210 }
1211 
1212 /// Return the opcode that does not set flags when possible - otherwise
1213 /// return the original opcode. The caller is responsible to do the actual
1214 /// substitution and legality checking.
convertToNonFlagSettingOpc(const MachineInstr & MI)1215 static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
1216   // Don't convert all compare instructions, because for some the zero register
1217   // encoding becomes the sp register.
1218   bool MIDefinesZeroReg = false;
1219   if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
1220     MIDefinesZeroReg = true;
1221 
1222   switch (MI.getOpcode()) {
1223   default:
1224     return MI.getOpcode();
1225   case AArch64::ADDSWrr:
1226     return AArch64::ADDWrr;
1227   case AArch64::ADDSWri:
1228     return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1229   case AArch64::ADDSWrs:
1230     return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1231   case AArch64::ADDSWrx:
1232     return AArch64::ADDWrx;
1233   case AArch64::ADDSXrr:
1234     return AArch64::ADDXrr;
1235   case AArch64::ADDSXri:
1236     return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1237   case AArch64::ADDSXrs:
1238     return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1239   case AArch64::ADDSXrx:
1240     return AArch64::ADDXrx;
1241   case AArch64::SUBSWrr:
1242     return AArch64::SUBWrr;
1243   case AArch64::SUBSWri:
1244     return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1245   case AArch64::SUBSWrs:
1246     return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1247   case AArch64::SUBSWrx:
1248     return AArch64::SUBWrx;
1249   case AArch64::SUBSXrr:
1250     return AArch64::SUBXrr;
1251   case AArch64::SUBSXri:
1252     return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1253   case AArch64::SUBSXrs:
1254     return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1255   case AArch64::SUBSXrx:
1256     return AArch64::SUBXrx;
1257   }
1258 }
1259 
1260 enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1261 
1262 /// True when condition flags are accessed (either by writing or reading)
1263 /// on the instruction trace starting at From and ending at To.
1264 ///
1265 /// Note: If From and To are from different blocks it's assumed CC are accessed
1266 ///       on the path.
areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From,MachineBasicBlock::iterator To,const TargetRegisterInfo * TRI,const AccessKind AccessToCheck=AK_All)1267 static bool areCFlagsAccessedBetweenInstrs(
1268     MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
1269     const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1270   // Early exit if To is at the beginning of the BB.
1271   if (To == To->getParent()->begin())
1272     return true;
1273 
1274   // Check whether the instructions are in the same basic block
1275   // If not, assume the condition flags might get modified somewhere.
1276   if (To->getParent() != From->getParent())
1277     return true;
1278 
1279   // From must be above To.
1280   assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
1281                       [From](MachineInstr &MI) {
1282                         return MI.getIterator() == From;
1283                       }) != To->getParent()->rend());
1284 
1285   // We iterate backward starting \p To until we hit \p From.
1286   for (--To; To != From; --To) {
1287     const MachineInstr &Instr = *To;
1288 
1289     if (((AccessToCheck & AK_Write) &&
1290          Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1291         ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1292       return true;
1293   }
1294   return false;
1295 }
1296 
1297 /// Try to optimize a compare instruction. A compare instruction is an
1298 /// instruction which produces AArch64::NZCV. It can be truly compare
1299 /// instruction
1300 /// when there are no uses of its destination register.
1301 ///
1302 /// The following steps are tried in order:
1303 /// 1. Convert CmpInstr into an unconditional version.
1304 /// 2. Remove CmpInstr if above there is an instruction producing a needed
1305 ///    condition code or an instruction which can be converted into such an
1306 ///    instruction.
1307 ///    Only comparison with zero is supported.
optimizeCompareInstr(MachineInstr & CmpInstr,unsigned SrcReg,unsigned SrcReg2,int CmpMask,int CmpValue,const MachineRegisterInfo * MRI) const1308 bool AArch64InstrInfo::optimizeCompareInstr(
1309     MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
1310     int CmpValue, const MachineRegisterInfo *MRI) const {
1311   assert(CmpInstr.getParent());
1312   assert(MRI);
1313 
1314   // Replace SUBSWrr with SUBWrr if NZCV is not used.
1315   int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
1316   if (DeadNZCVIdx != -1) {
1317     if (CmpInstr.definesRegister(AArch64::WZR) ||
1318         CmpInstr.definesRegister(AArch64::XZR)) {
1319       CmpInstr.eraseFromParent();
1320       return true;
1321     }
1322     unsigned Opc = CmpInstr.getOpcode();
1323     unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1324     if (NewOpc == Opc)
1325       return false;
1326     const MCInstrDesc &MCID = get(NewOpc);
1327     CmpInstr.setDesc(MCID);
1328     CmpInstr.RemoveOperand(DeadNZCVIdx);
1329     bool succeeded = UpdateOperandRegClass(CmpInstr);
1330     (void)succeeded;
1331     assert(succeeded && "Some operands reg class are incompatible!");
1332     return true;
1333   }
1334 
1335   // Continue only if we have a "ri" where immediate is zero.
1336   // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
1337   // function.
1338   assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
1339   if (CmpValue != 0 || SrcReg2 != 0)
1340     return false;
1341 
1342   // CmpInstr is a Compare instruction if destination register is not used.
1343   if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1344     return false;
1345 
1346   return substituteCmpToZero(CmpInstr, SrcReg, MRI);
1347 }
1348 
1349 /// Get opcode of S version of Instr.
1350 /// If Instr is S version its opcode is returned.
1351 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1352 /// or we are not interested in it.
sForm(MachineInstr & Instr)1353 static unsigned sForm(MachineInstr &Instr) {
1354   switch (Instr.getOpcode()) {
1355   default:
1356     return AArch64::INSTRUCTION_LIST_END;
1357 
1358   case AArch64::ADDSWrr:
1359   case AArch64::ADDSWri:
1360   case AArch64::ADDSXrr:
1361   case AArch64::ADDSXri:
1362   case AArch64::SUBSWrr:
1363   case AArch64::SUBSWri:
1364   case AArch64::SUBSXrr:
1365   case AArch64::SUBSXri:
1366     return Instr.getOpcode();
1367 
1368   case AArch64::ADDWrr:
1369     return AArch64::ADDSWrr;
1370   case AArch64::ADDWri:
1371     return AArch64::ADDSWri;
1372   case AArch64::ADDXrr:
1373     return AArch64::ADDSXrr;
1374   case AArch64::ADDXri:
1375     return AArch64::ADDSXri;
1376   case AArch64::ADCWr:
1377     return AArch64::ADCSWr;
1378   case AArch64::ADCXr:
1379     return AArch64::ADCSXr;
1380   case AArch64::SUBWrr:
1381     return AArch64::SUBSWrr;
1382   case AArch64::SUBWri:
1383     return AArch64::SUBSWri;
1384   case AArch64::SUBXrr:
1385     return AArch64::SUBSXrr;
1386   case AArch64::SUBXri:
1387     return AArch64::SUBSXri;
1388   case AArch64::SBCWr:
1389     return AArch64::SBCSWr;
1390   case AArch64::SBCXr:
1391     return AArch64::SBCSXr;
1392   case AArch64::ANDWri:
1393     return AArch64::ANDSWri;
1394   case AArch64::ANDXri:
1395     return AArch64::ANDSXri;
1396   }
1397 }
1398 
1399 /// Check if AArch64::NZCV should be alive in successors of MBB.
areCFlagsAliveInSuccessors(MachineBasicBlock * MBB)1400 static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
1401   for (auto *BB : MBB->successors())
1402     if (BB->isLiveIn(AArch64::NZCV))
1403       return true;
1404   return false;
1405 }
1406 
1407 namespace {
1408 
1409 struct UsedNZCV {
1410   bool N = false;
1411   bool Z = false;
1412   bool C = false;
1413   bool V = false;
1414 
1415   UsedNZCV() = default;
1416 
operator |=__anoncacf0b050211::UsedNZCV1417   UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
1418     this->N |= UsedFlags.N;
1419     this->Z |= UsedFlags.Z;
1420     this->C |= UsedFlags.C;
1421     this->V |= UsedFlags.V;
1422     return *this;
1423   }
1424 };
1425 
1426 } // end anonymous namespace
1427 
1428 /// Find a condition code used by the instruction.
1429 /// Returns AArch64CC::Invalid if either the instruction does not use condition
1430 /// codes or we don't optimize CmpInstr in the presence of such instructions.
findCondCodeUsedByInstr(const MachineInstr & Instr)1431 static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
1432   switch (Instr.getOpcode()) {
1433   default:
1434     return AArch64CC::Invalid;
1435 
1436   case AArch64::Bcc: {
1437     int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1438     assert(Idx >= 2);
1439     return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
1440   }
1441 
1442   case AArch64::CSINVWr:
1443   case AArch64::CSINVXr:
1444   case AArch64::CSINCWr:
1445   case AArch64::CSINCXr:
1446   case AArch64::CSELWr:
1447   case AArch64::CSELXr:
1448   case AArch64::CSNEGWr:
1449   case AArch64::CSNEGXr:
1450   case AArch64::FCSELSrrr:
1451   case AArch64::FCSELDrrr: {
1452     int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1453     assert(Idx >= 1);
1454     return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
1455   }
1456   }
1457 }
1458 
getUsedNZCV(AArch64CC::CondCode CC)1459 static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1460   assert(CC != AArch64CC::Invalid);
1461   UsedNZCV UsedFlags;
1462   switch (CC) {
1463   default:
1464     break;
1465 
1466   case AArch64CC::EQ: // Z set
1467   case AArch64CC::NE: // Z clear
1468     UsedFlags.Z = true;
1469     break;
1470 
1471   case AArch64CC::HI: // Z clear and C set
1472   case AArch64CC::LS: // Z set   or  C clear
1473     UsedFlags.Z = true;
1474     LLVM_FALLTHROUGH;
1475   case AArch64CC::HS: // C set
1476   case AArch64CC::LO: // C clear
1477     UsedFlags.C = true;
1478     break;
1479 
1480   case AArch64CC::MI: // N set
1481   case AArch64CC::PL: // N clear
1482     UsedFlags.N = true;
1483     break;
1484 
1485   case AArch64CC::VS: // V set
1486   case AArch64CC::VC: // V clear
1487     UsedFlags.V = true;
1488     break;
1489 
1490   case AArch64CC::GT: // Z clear, N and V the same
1491   case AArch64CC::LE: // Z set,   N and V differ
1492     UsedFlags.Z = true;
1493     LLVM_FALLTHROUGH;
1494   case AArch64CC::GE: // N and V the same
1495   case AArch64CC::LT: // N and V differ
1496     UsedFlags.N = true;
1497     UsedFlags.V = true;
1498     break;
1499   }
1500   return UsedFlags;
1501 }
1502 
isADDSRegImm(unsigned Opcode)1503 static bool isADDSRegImm(unsigned Opcode) {
1504   return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1505 }
1506 
isSUBSRegImm(unsigned Opcode)1507 static bool isSUBSRegImm(unsigned Opcode) {
1508   return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1509 }
1510 
1511 /// Check if CmpInstr can be substituted by MI.
1512 ///
1513 /// CmpInstr can be substituted:
1514 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1515 /// - and, MI and CmpInstr are from the same MachineBB
1516 /// - and, condition flags are not alive in successors of the CmpInstr parent
1517 /// - and, if MI opcode is the S form there must be no defs of flags between
1518 ///        MI and CmpInstr
1519 ///        or if MI opcode is not the S form there must be neither defs of flags
1520 ///        nor uses of flags between MI and CmpInstr.
1521 /// - and  C/V flags are not used after CmpInstr
canInstrSubstituteCmpInstr(MachineInstr * MI,MachineInstr * CmpInstr,const TargetRegisterInfo * TRI)1522 static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
1523                                        const TargetRegisterInfo *TRI) {
1524   assert(MI);
1525   assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
1526   assert(CmpInstr);
1527 
1528   const unsigned CmpOpcode = CmpInstr->getOpcode();
1529   if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1530     return false;
1531 
1532   if (MI->getParent() != CmpInstr->getParent())
1533     return false;
1534 
1535   if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
1536     return false;
1537 
1538   AccessKind AccessToCheck = AK_Write;
1539   if (sForm(*MI) != MI->getOpcode())
1540     AccessToCheck = AK_All;
1541   if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
1542     return false;
1543 
1544   UsedNZCV NZCVUsedAfterCmp;
1545   for (auto I = std::next(CmpInstr->getIterator()),
1546             E = CmpInstr->getParent()->instr_end();
1547        I != E; ++I) {
1548     const MachineInstr &Instr = *I;
1549     if (Instr.readsRegister(AArch64::NZCV, TRI)) {
1550       AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
1551       if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1552         return false;
1553       NZCVUsedAfterCmp |= getUsedNZCV(CC);
1554     }
1555 
1556     if (Instr.modifiesRegister(AArch64::NZCV, TRI))
1557       break;
1558   }
1559 
1560   return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
1561 }
1562 
1563 /// Substitute an instruction comparing to zero with another instruction
1564 /// which produces needed condition flags.
1565 ///
1566 /// Return true on success.
substituteCmpToZero(MachineInstr & CmpInstr,unsigned SrcReg,const MachineRegisterInfo * MRI) const1567 bool AArch64InstrInfo::substituteCmpToZero(
1568     MachineInstr &CmpInstr, unsigned SrcReg,
1569     const MachineRegisterInfo *MRI) const {
1570   assert(MRI);
1571   // Get the unique definition of SrcReg.
1572   MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
1573   if (!MI)
1574     return false;
1575 
1576   const TargetRegisterInfo *TRI = &getRegisterInfo();
1577 
1578   unsigned NewOpc = sForm(*MI);
1579   if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1580     return false;
1581 
1582   if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
1583     return false;
1584 
1585   // Update the instruction to set NZCV.
1586   MI->setDesc(get(NewOpc));
1587   CmpInstr.eraseFromParent();
1588   bool succeeded = UpdateOperandRegClass(*MI);
1589   (void)succeeded;
1590   assert(succeeded && "Some operands reg class are incompatible!");
1591   MI->addRegisterDefined(AArch64::NZCV, TRI);
1592   return true;
1593 }
1594 
expandPostRAPseudo(MachineInstr & MI) const1595 bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1596   if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
1597     return false;
1598 
1599   MachineBasicBlock &MBB = *MI.getParent();
1600   DebugLoc DL = MI.getDebugLoc();
1601   unsigned Reg = MI.getOperand(0).getReg();
1602   const GlobalValue *GV =
1603       cast<GlobalValue>((*MI.memoperands_begin())->getValue());
1604   const TargetMachine &TM = MBB.getParent()->getTarget();
1605   unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1606   const unsigned char MO_NC = AArch64II::MO_NC;
1607 
1608   if ((OpFlags & AArch64II::MO_GOT) != 0) {
1609     BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1610         .addGlobalAddress(GV, 0, AArch64II::MO_GOT);
1611     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1612         .addReg(Reg, RegState::Kill)
1613         .addImm(0)
1614         .addMemOperand(*MI.memoperands_begin());
1615   } else if (TM.getCodeModel() == CodeModel::Large) {
1616     BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
1617         .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
1618         .addImm(0);
1619     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1620         .addReg(Reg, RegState::Kill)
1621         .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
1622         .addImm(16);
1623     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1624         .addReg(Reg, RegState::Kill)
1625         .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
1626         .addImm(32);
1627     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1628         .addReg(Reg, RegState::Kill)
1629         .addGlobalAddress(GV, 0, AArch64II::MO_G3)
1630         .addImm(48);
1631     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1632         .addReg(Reg, RegState::Kill)
1633         .addImm(0)
1634         .addMemOperand(*MI.memoperands_begin());
1635   } else {
1636     BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1637         .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1638     unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1639     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1640         .addReg(Reg, RegState::Kill)
1641         .addGlobalAddress(GV, 0, LoFlags)
1642         .addMemOperand(*MI.memoperands_begin());
1643   }
1644 
1645   MBB.erase(MI);
1646 
1647   return true;
1648 }
1649 
1650 /// Return true if this is this instruction has a non-zero immediate
hasShiftedReg(const MachineInstr & MI)1651 bool AArch64InstrInfo::hasShiftedReg(const MachineInstr &MI) {
1652   switch (MI.getOpcode()) {
1653   default:
1654     break;
1655   case AArch64::ADDSWrs:
1656   case AArch64::ADDSXrs:
1657   case AArch64::ADDWrs:
1658   case AArch64::ADDXrs:
1659   case AArch64::ANDSWrs:
1660   case AArch64::ANDSXrs:
1661   case AArch64::ANDWrs:
1662   case AArch64::ANDXrs:
1663   case AArch64::BICSWrs:
1664   case AArch64::BICSXrs:
1665   case AArch64::BICWrs:
1666   case AArch64::BICXrs:
1667   case AArch64::EONWrs:
1668   case AArch64::EONXrs:
1669   case AArch64::EORWrs:
1670   case AArch64::EORXrs:
1671   case AArch64::ORNWrs:
1672   case AArch64::ORNXrs:
1673   case AArch64::ORRWrs:
1674   case AArch64::ORRXrs:
1675   case AArch64::SUBSWrs:
1676   case AArch64::SUBSXrs:
1677   case AArch64::SUBWrs:
1678   case AArch64::SUBXrs:
1679     if (MI.getOperand(3).isImm()) {
1680       unsigned val = MI.getOperand(3).getImm();
1681       return (val != 0);
1682     }
1683     break;
1684   }
1685   return false;
1686 }
1687 
1688 /// Return true if this is this instruction has a non-zero immediate
hasExtendedReg(const MachineInstr & MI)1689 bool AArch64InstrInfo::hasExtendedReg(const MachineInstr &MI) {
1690   switch (MI.getOpcode()) {
1691   default:
1692     break;
1693   case AArch64::ADDSWrx:
1694   case AArch64::ADDSXrx:
1695   case AArch64::ADDSXrx64:
1696   case AArch64::ADDWrx:
1697   case AArch64::ADDXrx:
1698   case AArch64::ADDXrx64:
1699   case AArch64::SUBSWrx:
1700   case AArch64::SUBSXrx:
1701   case AArch64::SUBSXrx64:
1702   case AArch64::SUBWrx:
1703   case AArch64::SUBXrx:
1704   case AArch64::SUBXrx64:
1705     if (MI.getOperand(3).isImm()) {
1706       unsigned val = MI.getOperand(3).getImm();
1707       return (val != 0);
1708     }
1709     break;
1710   }
1711 
1712   return false;
1713 }
1714 
1715 // Return true if this instruction simply sets its single destination register
1716 // to zero. This is equivalent to a register rename of the zero-register.
isGPRZero(const MachineInstr & MI)1717 bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
1718   switch (MI.getOpcode()) {
1719   default:
1720     break;
1721   case AArch64::MOVZWi:
1722   case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
1723     if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
1724       assert(MI.getDesc().getNumOperands() == 3 &&
1725              MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
1726       return true;
1727     }
1728     break;
1729   case AArch64::ANDWri: // and Rd, Rzr, #imm
1730     return MI.getOperand(1).getReg() == AArch64::WZR;
1731   case AArch64::ANDXri:
1732     return MI.getOperand(1).getReg() == AArch64::XZR;
1733   case TargetOpcode::COPY:
1734     return MI.getOperand(1).getReg() == AArch64::WZR;
1735   }
1736   return false;
1737 }
1738 
1739 // Return true if this instruction simply renames a general register without
1740 // modifying bits.
isGPRCopy(const MachineInstr & MI)1741 bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) {
1742   switch (MI.getOpcode()) {
1743   default:
1744     break;
1745   case TargetOpcode::COPY: {
1746     // GPR32 copies will by lowered to ORRXrs
1747     unsigned DstReg = MI.getOperand(0).getReg();
1748     return (AArch64::GPR32RegClass.contains(DstReg) ||
1749             AArch64::GPR64RegClass.contains(DstReg));
1750   }
1751   case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
1752     if (MI.getOperand(1).getReg() == AArch64::XZR) {
1753       assert(MI.getDesc().getNumOperands() == 4 &&
1754              MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
1755       return true;
1756     }
1757     break;
1758   case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
1759     if (MI.getOperand(2).getImm() == 0) {
1760       assert(MI.getDesc().getNumOperands() == 4 &&
1761              MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
1762       return true;
1763     }
1764     break;
1765   }
1766   return false;
1767 }
1768 
1769 // Return true if this instruction simply renames a general register without
1770 // modifying bits.
isFPRCopy(const MachineInstr & MI)1771 bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
1772   switch (MI.getOpcode()) {
1773   default:
1774     break;
1775   case TargetOpcode::COPY: {
1776     // FPR64 copies will by lowered to ORR.16b
1777     unsigned DstReg = MI.getOperand(0).getReg();
1778     return (AArch64::FPR64RegClass.contains(DstReg) ||
1779             AArch64::FPR128RegClass.contains(DstReg));
1780   }
1781   case AArch64::ORRv16i8:
1782     if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
1783       assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
1784              "invalid ORRv16i8 operands");
1785       return true;
1786     }
1787     break;
1788   }
1789   return false;
1790 }
1791 
isLoadFromStackSlot(const MachineInstr & MI,int & FrameIndex) const1792 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
1793                                                int &FrameIndex) const {
1794   switch (MI.getOpcode()) {
1795   default:
1796     break;
1797   case AArch64::LDRWui:
1798   case AArch64::LDRXui:
1799   case AArch64::LDRBui:
1800   case AArch64::LDRHui:
1801   case AArch64::LDRSui:
1802   case AArch64::LDRDui:
1803   case AArch64::LDRQui:
1804     if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1805         MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1806       FrameIndex = MI.getOperand(1).getIndex();
1807       return MI.getOperand(0).getReg();
1808     }
1809     break;
1810   }
1811 
1812   return 0;
1813 }
1814 
isStoreToStackSlot(const MachineInstr & MI,int & FrameIndex) const1815 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
1816                                               int &FrameIndex) const {
1817   switch (MI.getOpcode()) {
1818   default:
1819     break;
1820   case AArch64::STRWui:
1821   case AArch64::STRXui:
1822   case AArch64::STRBui:
1823   case AArch64::STRHui:
1824   case AArch64::STRSui:
1825   case AArch64::STRDui:
1826   case AArch64::STRQui:
1827     if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1828         MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1829       FrameIndex = MI.getOperand(1).getIndex();
1830       return MI.getOperand(0).getReg();
1831     }
1832     break;
1833   }
1834   return 0;
1835 }
1836 
1837 /// Return true if this is load/store scales or extends its register offset.
1838 /// This refers to scaling a dynamic index as opposed to scaled immediates.
1839 /// MI should be a memory op that allows scaled addressing.
isScaledAddr(const MachineInstr & MI)1840 bool AArch64InstrInfo::isScaledAddr(const MachineInstr &MI) {
1841   switch (MI.getOpcode()) {
1842   default:
1843     break;
1844   case AArch64::LDRBBroW:
1845   case AArch64::LDRBroW:
1846   case AArch64::LDRDroW:
1847   case AArch64::LDRHHroW:
1848   case AArch64::LDRHroW:
1849   case AArch64::LDRQroW:
1850   case AArch64::LDRSBWroW:
1851   case AArch64::LDRSBXroW:
1852   case AArch64::LDRSHWroW:
1853   case AArch64::LDRSHXroW:
1854   case AArch64::LDRSWroW:
1855   case AArch64::LDRSroW:
1856   case AArch64::LDRWroW:
1857   case AArch64::LDRXroW:
1858   case AArch64::STRBBroW:
1859   case AArch64::STRBroW:
1860   case AArch64::STRDroW:
1861   case AArch64::STRHHroW:
1862   case AArch64::STRHroW:
1863   case AArch64::STRQroW:
1864   case AArch64::STRSroW:
1865   case AArch64::STRWroW:
1866   case AArch64::STRXroW:
1867   case AArch64::LDRBBroX:
1868   case AArch64::LDRBroX:
1869   case AArch64::LDRDroX:
1870   case AArch64::LDRHHroX:
1871   case AArch64::LDRHroX:
1872   case AArch64::LDRQroX:
1873   case AArch64::LDRSBWroX:
1874   case AArch64::LDRSBXroX:
1875   case AArch64::LDRSHWroX:
1876   case AArch64::LDRSHXroX:
1877   case AArch64::LDRSWroX:
1878   case AArch64::LDRSroX:
1879   case AArch64::LDRWroX:
1880   case AArch64::LDRXroX:
1881   case AArch64::STRBBroX:
1882   case AArch64::STRBroX:
1883   case AArch64::STRDroX:
1884   case AArch64::STRHHroX:
1885   case AArch64::STRHroX:
1886   case AArch64::STRQroX:
1887   case AArch64::STRSroX:
1888   case AArch64::STRWroX:
1889   case AArch64::STRXroX:
1890 
1891     unsigned Val = MI.getOperand(3).getImm();
1892     AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val);
1893     return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
1894   }
1895   return false;
1896 }
1897 
1898 /// Check all MachineMemOperands for a hint to suppress pairing.
isLdStPairSuppressed(const MachineInstr & MI)1899 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
1900   return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1901     return MMO->getFlags() & MOSuppressPair;
1902   });
1903 }
1904 
1905 /// Set a flag on the first MachineMemOperand to suppress pairing.
suppressLdStPair(MachineInstr & MI)1906 void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) {
1907   if (MI.memoperands_empty())
1908     return;
1909   (*MI.memoperands_begin())->setFlags(MOSuppressPair);
1910 }
1911 
1912 /// Check all MachineMemOperands for a hint that the load/store is strided.
isStridedAccess(const MachineInstr & MI)1913 bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {
1914   return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1915     return MMO->getFlags() & MOStridedAccess;
1916   });
1917 }
1918 
isUnscaledLdSt(unsigned Opc)1919 bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) {
1920   switch (Opc) {
1921   default:
1922     return false;
1923   case AArch64::STURSi:
1924   case AArch64::STURDi:
1925   case AArch64::STURQi:
1926   case AArch64::STURBBi:
1927   case AArch64::STURHHi:
1928   case AArch64::STURWi:
1929   case AArch64::STURXi:
1930   case AArch64::LDURSi:
1931   case AArch64::LDURDi:
1932   case AArch64::LDURQi:
1933   case AArch64::LDURWi:
1934   case AArch64::LDURXi:
1935   case AArch64::LDURSWi:
1936   case AArch64::LDURHHi:
1937   case AArch64::LDURBBi:
1938   case AArch64::LDURSBWi:
1939   case AArch64::LDURSHWi:
1940     return true;
1941   }
1942 }
1943 
isPairableLdStInst(const MachineInstr & MI)1944 bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
1945   switch (MI.getOpcode()) {
1946   default:
1947     return false;
1948   // Scaled instructions.
1949   case AArch64::STRSui:
1950   case AArch64::STRDui:
1951   case AArch64::STRQui:
1952   case AArch64::STRXui:
1953   case AArch64::STRWui:
1954   case AArch64::LDRSui:
1955   case AArch64::LDRDui:
1956   case AArch64::LDRQui:
1957   case AArch64::LDRXui:
1958   case AArch64::LDRWui:
1959   case AArch64::LDRSWui:
1960   // Unscaled instructions.
1961   case AArch64::STURSi:
1962   case AArch64::STURDi:
1963   case AArch64::STURQi:
1964   case AArch64::STURWi:
1965   case AArch64::STURXi:
1966   case AArch64::LDURSi:
1967   case AArch64::LDURDi:
1968   case AArch64::LDURQi:
1969   case AArch64::LDURWi:
1970   case AArch64::LDURXi:
1971   case AArch64::LDURSWi:
1972     return true;
1973   }
1974 }
1975 
convertToFlagSettingOpc(unsigned Opc,bool & Is64Bit)1976 unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc,
1977                                                    bool &Is64Bit) {
1978   switch (Opc) {
1979   default:
1980     llvm_unreachable("Opcode has no flag setting equivalent!");
1981   // 32-bit cases:
1982   case AArch64::ADDWri:
1983     Is64Bit = false;
1984     return AArch64::ADDSWri;
1985   case AArch64::ADDWrr:
1986     Is64Bit = false;
1987     return AArch64::ADDSWrr;
1988   case AArch64::ADDWrs:
1989     Is64Bit = false;
1990     return AArch64::ADDSWrs;
1991   case AArch64::ADDWrx:
1992     Is64Bit = false;
1993     return AArch64::ADDSWrx;
1994   case AArch64::ANDWri:
1995     Is64Bit = false;
1996     return AArch64::ANDSWri;
1997   case AArch64::ANDWrr:
1998     Is64Bit = false;
1999     return AArch64::ANDSWrr;
2000   case AArch64::ANDWrs:
2001     Is64Bit = false;
2002     return AArch64::ANDSWrs;
2003   case AArch64::BICWrr:
2004     Is64Bit = false;
2005     return AArch64::BICSWrr;
2006   case AArch64::BICWrs:
2007     Is64Bit = false;
2008     return AArch64::BICSWrs;
2009   case AArch64::SUBWri:
2010     Is64Bit = false;
2011     return AArch64::SUBSWri;
2012   case AArch64::SUBWrr:
2013     Is64Bit = false;
2014     return AArch64::SUBSWrr;
2015   case AArch64::SUBWrs:
2016     Is64Bit = false;
2017     return AArch64::SUBSWrs;
2018   case AArch64::SUBWrx:
2019     Is64Bit = false;
2020     return AArch64::SUBSWrx;
2021   // 64-bit cases:
2022   case AArch64::ADDXri:
2023     Is64Bit = true;
2024     return AArch64::ADDSXri;
2025   case AArch64::ADDXrr:
2026     Is64Bit = true;
2027     return AArch64::ADDSXrr;
2028   case AArch64::ADDXrs:
2029     Is64Bit = true;
2030     return AArch64::ADDSXrs;
2031   case AArch64::ADDXrx:
2032     Is64Bit = true;
2033     return AArch64::ADDSXrx;
2034   case AArch64::ANDXri:
2035     Is64Bit = true;
2036     return AArch64::ANDSXri;
2037   case AArch64::ANDXrr:
2038     Is64Bit = true;
2039     return AArch64::ANDSXrr;
2040   case AArch64::ANDXrs:
2041     Is64Bit = true;
2042     return AArch64::ANDSXrs;
2043   case AArch64::BICXrr:
2044     Is64Bit = true;
2045     return AArch64::BICSXrr;
2046   case AArch64::BICXrs:
2047     Is64Bit = true;
2048     return AArch64::BICSXrs;
2049   case AArch64::SUBXri:
2050     Is64Bit = true;
2051     return AArch64::SUBSXri;
2052   case AArch64::SUBXrr:
2053     Is64Bit = true;
2054     return AArch64::SUBSXrr;
2055   case AArch64::SUBXrs:
2056     Is64Bit = true;
2057     return AArch64::SUBSXrs;
2058   case AArch64::SUBXrx:
2059     Is64Bit = true;
2060     return AArch64::SUBSXrx;
2061   }
2062 }
2063 
2064 // Is this a candidate for ld/st merging or pairing?  For example, we don't
2065 // touch volatiles or load/stores that have a hint to avoid pair formation.
isCandidateToMergeOrPair(MachineInstr & MI) const2066 bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
2067   // If this is a volatile load/store, don't mess with it.
2068   if (MI.hasOrderedMemoryRef())
2069     return false;
2070 
2071   // Make sure this is a reg+imm (as opposed to an address reloc).
2072   assert(MI.getOperand(1).isReg() && "Expected a reg operand.");
2073   if (!MI.getOperand(2).isImm())
2074     return false;
2075 
2076   // Can't merge/pair if the instruction modifies the base register.
2077   // e.g., ldr x0, [x0]
2078   unsigned BaseReg = MI.getOperand(1).getReg();
2079   const TargetRegisterInfo *TRI = &getRegisterInfo();
2080   if (MI.modifiesRegister(BaseReg, TRI))
2081     return false;
2082 
2083   // Check if this load/store has a hint to avoid pair formation.
2084   // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2085   if (isLdStPairSuppressed(MI))
2086     return false;
2087 
2088   // On some CPUs quad load/store pairs are slower than two single load/stores.
2089   if (Subtarget.isPaired128Slow()) {
2090     switch (MI.getOpcode()) {
2091     default:
2092       break;
2093     case AArch64::LDURQi:
2094     case AArch64::STURQi:
2095     case AArch64::LDRQui:
2096     case AArch64::STRQui:
2097       return false;
2098     }
2099   }
2100 
2101   return true;
2102 }
2103 
getMemOpBaseRegImmOfs(MachineInstr & LdSt,unsigned & BaseReg,int64_t & Offset,const TargetRegisterInfo * TRI) const2104 bool AArch64InstrInfo::getMemOpBaseRegImmOfs(
2105     MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
2106     const TargetRegisterInfo *TRI) const {
2107   unsigned Width;
2108   return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
2109 }
2110 
getMemOpBaseRegImmOfsWidth(MachineInstr & LdSt,unsigned & BaseReg,int64_t & Offset,unsigned & Width,const TargetRegisterInfo * TRI) const2111 bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
2112     MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width,
2113     const TargetRegisterInfo *TRI) const {
2114   assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
2115   // Handle only loads/stores with base register followed by immediate offset.
2116   if (LdSt.getNumExplicitOperands() == 3) {
2117     // Non-paired instruction (e.g., ldr x1, [x0, #8]).
2118     if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
2119       return false;
2120   } else if (LdSt.getNumExplicitOperands() == 4) {
2121     // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
2122     if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() ||
2123         !LdSt.getOperand(3).isImm())
2124       return false;
2125   } else
2126     return false;
2127 
2128   // Get the scaling factor for the instruction and set the width for the
2129   // instruction.
2130   unsigned Scale = 0;
2131   int64_t Dummy1, Dummy2;
2132 
2133   // If this returns false, then it's an instruction we don't want to handle.
2134   if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
2135     return false;
2136 
2137   // Compute the offset. Offset is calculated as the immediate operand
2138   // multiplied by the scaling factor. Unscaled instructions have scaling factor
2139   // set to 1.
2140   if (LdSt.getNumExplicitOperands() == 3) {
2141     BaseReg = LdSt.getOperand(1).getReg();
2142     Offset = LdSt.getOperand(2).getImm() * Scale;
2143   } else {
2144     assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
2145     BaseReg = LdSt.getOperand(2).getReg();
2146     Offset = LdSt.getOperand(3).getImm() * Scale;
2147   }
2148   return true;
2149 }
2150 
2151 MachineOperand &
getMemOpBaseRegImmOfsOffsetOperand(MachineInstr & LdSt) const2152 AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
2153   assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
2154   MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
2155   assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
2156   return OfsOp;
2157 }
2158 
getMemOpInfo(unsigned Opcode,unsigned & Scale,unsigned & Width,int64_t & MinOffset,int64_t & MaxOffset) const2159 bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
2160                                     unsigned &Width, int64_t &MinOffset,
2161                                     int64_t &MaxOffset) const {
2162   switch (Opcode) {
2163   // Not a memory operation or something we want to handle.
2164   default:
2165     Scale = Width = 0;
2166     MinOffset = MaxOffset = 0;
2167     return false;
2168   case AArch64::STRWpost:
2169   case AArch64::LDRWpost:
2170     Width = 32;
2171     Scale = 4;
2172     MinOffset = -256;
2173     MaxOffset = 255;
2174     break;
2175   case AArch64::LDURQi:
2176   case AArch64::STURQi:
2177     Width = 16;
2178     Scale = 1;
2179     MinOffset = -256;
2180     MaxOffset = 255;
2181     break;
2182   case AArch64::LDURXi:
2183   case AArch64::LDURDi:
2184   case AArch64::STURXi:
2185   case AArch64::STURDi:
2186     Width = 8;
2187     Scale = 1;
2188     MinOffset = -256;
2189     MaxOffset = 255;
2190     break;
2191   case AArch64::LDURWi:
2192   case AArch64::LDURSi:
2193   case AArch64::LDURSWi:
2194   case AArch64::STURWi:
2195   case AArch64::STURSi:
2196     Width = 4;
2197     Scale = 1;
2198     MinOffset = -256;
2199     MaxOffset = 255;
2200     break;
2201   case AArch64::LDURHi:
2202   case AArch64::LDURHHi:
2203   case AArch64::LDURSHXi:
2204   case AArch64::LDURSHWi:
2205   case AArch64::STURHi:
2206   case AArch64::STURHHi:
2207     Width = 2;
2208     Scale = 1;
2209     MinOffset = -256;
2210     MaxOffset = 255;
2211     break;
2212   case AArch64::LDURBi:
2213   case AArch64::LDURBBi:
2214   case AArch64::LDURSBXi:
2215   case AArch64::LDURSBWi:
2216   case AArch64::STURBi:
2217   case AArch64::STURBBi:
2218     Width = 1;
2219     Scale = 1;
2220     MinOffset = -256;
2221     MaxOffset = 255;
2222     break;
2223   case AArch64::LDPQi:
2224   case AArch64::LDNPQi:
2225   case AArch64::STPQi:
2226   case AArch64::STNPQi:
2227     Scale = 16;
2228     Width = 32;
2229     MinOffset = -64;
2230     MaxOffset = 63;
2231     break;
2232   case AArch64::LDRQui:
2233   case AArch64::STRQui:
2234     Scale = Width = 16;
2235     MinOffset = 0;
2236     MaxOffset = 4095;
2237     break;
2238   case AArch64::LDPXi:
2239   case AArch64::LDPDi:
2240   case AArch64::LDNPXi:
2241   case AArch64::LDNPDi:
2242   case AArch64::STPXi:
2243   case AArch64::STPDi:
2244   case AArch64::STNPXi:
2245   case AArch64::STNPDi:
2246     Scale = 8;
2247     Width = 16;
2248     MinOffset = -64;
2249     MaxOffset = 63;
2250     break;
2251   case AArch64::LDRXui:
2252   case AArch64::LDRDui:
2253   case AArch64::STRXui:
2254   case AArch64::STRDui:
2255     Scale = Width = 8;
2256     MinOffset = 0;
2257     MaxOffset = 4095;
2258     break;
2259   case AArch64::LDPWi:
2260   case AArch64::LDPSi:
2261   case AArch64::LDNPWi:
2262   case AArch64::LDNPSi:
2263   case AArch64::STPWi:
2264   case AArch64::STPSi:
2265   case AArch64::STNPWi:
2266   case AArch64::STNPSi:
2267     Scale = 4;
2268     Width = 8;
2269     MinOffset = -64;
2270     MaxOffset = 63;
2271     break;
2272   case AArch64::LDRWui:
2273   case AArch64::LDRSui:
2274   case AArch64::LDRSWui:
2275   case AArch64::STRWui:
2276   case AArch64::STRSui:
2277     Scale = Width = 4;
2278     MinOffset = 0;
2279     MaxOffset = 4095;
2280     break;
2281   case AArch64::LDRHui:
2282   case AArch64::LDRHHui:
2283   case AArch64::STRHui:
2284   case AArch64::STRHHui:
2285     Scale = Width = 2;
2286     MinOffset = 0;
2287     MaxOffset = 4095;
2288     break;
2289   case AArch64::LDRBui:
2290   case AArch64::LDRBBui:
2291   case AArch64::STRBui:
2292   case AArch64::STRBBui:
2293     Scale = Width = 1;
2294     MinOffset = 0;
2295     MaxOffset = 4095;
2296     break;
2297   }
2298 
2299   return true;
2300 }
2301 
2302 // Scale the unscaled offsets.  Returns false if the unscaled offset can't be
2303 // scaled.
scaleOffset(unsigned Opc,int64_t & Offset)2304 static bool scaleOffset(unsigned Opc, int64_t &Offset) {
2305   unsigned OffsetStride = 1;
2306   switch (Opc) {
2307   default:
2308     return false;
2309   case AArch64::LDURQi:
2310   case AArch64::STURQi:
2311     OffsetStride = 16;
2312     break;
2313   case AArch64::LDURXi:
2314   case AArch64::LDURDi:
2315   case AArch64::STURXi:
2316   case AArch64::STURDi:
2317     OffsetStride = 8;
2318     break;
2319   case AArch64::LDURWi:
2320   case AArch64::LDURSi:
2321   case AArch64::LDURSWi:
2322   case AArch64::STURWi:
2323   case AArch64::STURSi:
2324     OffsetStride = 4;
2325     break;
2326   }
2327   // If the byte-offset isn't a multiple of the stride, we can't scale this
2328   // offset.
2329   if (Offset % OffsetStride != 0)
2330     return false;
2331 
2332   // Convert the byte-offset used by unscaled into an "element" offset used
2333   // by the scaled pair load/store instructions.
2334   Offset /= OffsetStride;
2335   return true;
2336 }
2337 
canPairLdStOpc(unsigned FirstOpc,unsigned SecondOpc)2338 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
2339   if (FirstOpc == SecondOpc)
2340     return true;
2341   // We can also pair sign-ext and zero-ext instructions.
2342   switch (FirstOpc) {
2343   default:
2344     return false;
2345   case AArch64::LDRWui:
2346   case AArch64::LDURWi:
2347     return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
2348   case AArch64::LDRSWui:
2349   case AArch64::LDURSWi:
2350     return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
2351   }
2352   // These instructions can't be paired based on their opcodes.
2353   return false;
2354 }
2355 
2356 /// Detect opportunities for ldp/stp formation.
2357 ///
2358 /// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
shouldClusterMemOps(MachineInstr & FirstLdSt,unsigned BaseReg1,MachineInstr & SecondLdSt,unsigned BaseReg2,unsigned NumLoads) const2359 bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
2360                                            unsigned BaseReg1,
2361                                            MachineInstr &SecondLdSt,
2362                                            unsigned BaseReg2,
2363                                            unsigned NumLoads) const {
2364   if (BaseReg1 != BaseReg2)
2365     return false;
2366 
2367   // Only cluster up to a single pair.
2368   if (NumLoads > 1)
2369     return false;
2370 
2371   if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
2372     return false;
2373 
2374   // Can we pair these instructions based on their opcodes?
2375   unsigned FirstOpc = FirstLdSt.getOpcode();
2376   unsigned SecondOpc = SecondLdSt.getOpcode();
2377   if (!canPairLdStOpc(FirstOpc, SecondOpc))
2378     return false;
2379 
2380   // Can't merge volatiles or load/stores that have a hint to avoid pair
2381   // formation, for example.
2382   if (!isCandidateToMergeOrPair(FirstLdSt) ||
2383       !isCandidateToMergeOrPair(SecondLdSt))
2384     return false;
2385 
2386   // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
2387   int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
2388   if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
2389     return false;
2390 
2391   int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
2392   if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
2393     return false;
2394 
2395   // Pairwise instructions have a 7-bit signed offset field.
2396   if (Offset1 > 63 || Offset1 < -64)
2397     return false;
2398 
2399   // The caller should already have ordered First/SecondLdSt by offset.
2400   assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
2401   return Offset1 + 1 == Offset2;
2402 }
2403 
AddSubReg(const MachineInstrBuilder & MIB,unsigned Reg,unsigned SubIdx,unsigned State,const TargetRegisterInfo * TRI)2404 static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
2405                                             unsigned Reg, unsigned SubIdx,
2406                                             unsigned State,
2407                                             const TargetRegisterInfo *TRI) {
2408   if (!SubIdx)
2409     return MIB.addReg(Reg, State);
2410 
2411   if (TargetRegisterInfo::isPhysicalRegister(Reg))
2412     return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
2413   return MIB.addReg(Reg, State, SubIdx);
2414 }
2415 
forwardCopyWillClobberTuple(unsigned DestReg,unsigned SrcReg,unsigned NumRegs)2416 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
2417                                         unsigned NumRegs) {
2418   // We really want the positive remainder mod 32 here, that happens to be
2419   // easily obtainable with a mask.
2420   return ((DestReg - SrcReg) & 0x1f) < NumRegs;
2421 }
2422 
copyPhysRegTuple(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,unsigned DestReg,unsigned SrcReg,bool KillSrc,unsigned Opcode,ArrayRef<unsigned> Indices) const2423 void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
2424                                         MachineBasicBlock::iterator I,
2425                                         const DebugLoc &DL, unsigned DestReg,
2426                                         unsigned SrcReg, bool KillSrc,
2427                                         unsigned Opcode,
2428                                         ArrayRef<unsigned> Indices) const {
2429   assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
2430   const TargetRegisterInfo *TRI = &getRegisterInfo();
2431   uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2432   uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2433   unsigned NumRegs = Indices.size();
2434 
2435   int SubReg = 0, End = NumRegs, Incr = 1;
2436   if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
2437     SubReg = NumRegs - 1;
2438     End = -1;
2439     Incr = -1;
2440   }
2441 
2442   for (; SubReg != End; SubReg += Incr) {
2443     const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
2444     AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2445     AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
2446     AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2447   }
2448 }
2449 
copyPhysReg(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,unsigned DestReg,unsigned SrcReg,bool KillSrc) const2450 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
2451                                    MachineBasicBlock::iterator I,
2452                                    const DebugLoc &DL, unsigned DestReg,
2453                                    unsigned SrcReg, bool KillSrc) const {
2454   if (AArch64::GPR32spRegClass.contains(DestReg) &&
2455       (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
2456     const TargetRegisterInfo *TRI = &getRegisterInfo();
2457 
2458     if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
2459       // If either operand is WSP, expand to ADD #0.
2460       if (Subtarget.hasZeroCycleRegMove()) {
2461         // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
2462         unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2463                                                      &AArch64::GPR64spRegClass);
2464         unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2465                                                     &AArch64::GPR64spRegClass);
2466         // This instruction is reading and writing X registers.  This may upset
2467         // the register scavenger and machine verifier, so we need to indicate
2468         // that we are reading an undefined value from SrcRegX, but a proper
2469         // value from SrcReg.
2470         BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
2471             .addReg(SrcRegX, RegState::Undef)
2472             .addImm(0)
2473             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
2474             .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2475       } else {
2476         BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
2477             .addReg(SrcReg, getKillRegState(KillSrc))
2478             .addImm(0)
2479             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2480       }
2481     } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
2482       BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
2483           .addImm(0)
2484           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2485     } else {
2486       if (Subtarget.hasZeroCycleRegMove()) {
2487         // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
2488         unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2489                                                      &AArch64::GPR64spRegClass);
2490         unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2491                                                     &AArch64::GPR64spRegClass);
2492         // This instruction is reading and writing X registers.  This may upset
2493         // the register scavenger and machine verifier, so we need to indicate
2494         // that we are reading an undefined value from SrcRegX, but a proper
2495         // value from SrcReg.
2496         BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
2497             .addReg(AArch64::XZR)
2498             .addReg(SrcRegX, RegState::Undef)
2499             .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2500       } else {
2501         // Otherwise, expand to ORR WZR.
2502         BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
2503             .addReg(AArch64::WZR)
2504             .addReg(SrcReg, getKillRegState(KillSrc));
2505       }
2506     }
2507     return;
2508   }
2509 
2510   if (AArch64::GPR64spRegClass.contains(DestReg) &&
2511       (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
2512     if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
2513       // If either operand is SP, expand to ADD #0.
2514       BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
2515           .addReg(SrcReg, getKillRegState(KillSrc))
2516           .addImm(0)
2517           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2518     } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
2519       BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
2520           .addImm(0)
2521           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2522     } else {
2523       // Otherwise, expand to ORR XZR.
2524       BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
2525           .addReg(AArch64::XZR)
2526           .addReg(SrcReg, getKillRegState(KillSrc));
2527     }
2528     return;
2529   }
2530 
2531   // Copy a DDDD register quad by copying the individual sub-registers.
2532   if (AArch64::DDDDRegClass.contains(DestReg) &&
2533       AArch64::DDDDRegClass.contains(SrcReg)) {
2534     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2535                                        AArch64::dsub2, AArch64::dsub3};
2536     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2537                      Indices);
2538     return;
2539   }
2540 
2541   // Copy a DDD register triple by copying the individual sub-registers.
2542   if (AArch64::DDDRegClass.contains(DestReg) &&
2543       AArch64::DDDRegClass.contains(SrcReg)) {
2544     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2545                                        AArch64::dsub2};
2546     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2547                      Indices);
2548     return;
2549   }
2550 
2551   // Copy a DD register pair by copying the individual sub-registers.
2552   if (AArch64::DDRegClass.contains(DestReg) &&
2553       AArch64::DDRegClass.contains(SrcReg)) {
2554     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
2555     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2556                      Indices);
2557     return;
2558   }
2559 
2560   // Copy a QQQQ register quad by copying the individual sub-registers.
2561   if (AArch64::QQQQRegClass.contains(DestReg) &&
2562       AArch64::QQQQRegClass.contains(SrcReg)) {
2563     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2564                                        AArch64::qsub2, AArch64::qsub3};
2565     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2566                      Indices);
2567     return;
2568   }
2569 
2570   // Copy a QQQ register triple by copying the individual sub-registers.
2571   if (AArch64::QQQRegClass.contains(DestReg) &&
2572       AArch64::QQQRegClass.contains(SrcReg)) {
2573     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2574                                        AArch64::qsub2};
2575     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2576                      Indices);
2577     return;
2578   }
2579 
2580   // Copy a QQ register pair by copying the individual sub-registers.
2581   if (AArch64::QQRegClass.contains(DestReg) &&
2582       AArch64::QQRegClass.contains(SrcReg)) {
2583     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
2584     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2585                      Indices);
2586     return;
2587   }
2588 
2589   if (AArch64::FPR128RegClass.contains(DestReg) &&
2590       AArch64::FPR128RegClass.contains(SrcReg)) {
2591     if (Subtarget.hasNEON()) {
2592       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2593           .addReg(SrcReg)
2594           .addReg(SrcReg, getKillRegState(KillSrc));
2595     } else {
2596       BuildMI(MBB, I, DL, get(AArch64::STRQpre))
2597           .addReg(AArch64::SP, RegState::Define)
2598           .addReg(SrcReg, getKillRegState(KillSrc))
2599           .addReg(AArch64::SP)
2600           .addImm(-16);
2601       BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
2602           .addReg(AArch64::SP, RegState::Define)
2603           .addReg(DestReg, RegState::Define)
2604           .addReg(AArch64::SP)
2605           .addImm(16);
2606     }
2607     return;
2608   }
2609 
2610   if (AArch64::FPR64RegClass.contains(DestReg) &&
2611       AArch64::FPR64RegClass.contains(SrcReg)) {
2612     if (Subtarget.hasNEON()) {
2613       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
2614                                        &AArch64::FPR128RegClass);
2615       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
2616                                       &AArch64::FPR128RegClass);
2617       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2618           .addReg(SrcReg)
2619           .addReg(SrcReg, getKillRegState(KillSrc));
2620     } else {
2621       BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
2622           .addReg(SrcReg, getKillRegState(KillSrc));
2623     }
2624     return;
2625   }
2626 
2627   if (AArch64::FPR32RegClass.contains(DestReg) &&
2628       AArch64::FPR32RegClass.contains(SrcReg)) {
2629     if (Subtarget.hasNEON()) {
2630       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
2631                                        &AArch64::FPR128RegClass);
2632       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
2633                                       &AArch64::FPR128RegClass);
2634       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2635           .addReg(SrcReg)
2636           .addReg(SrcReg, getKillRegState(KillSrc));
2637     } else {
2638       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2639           .addReg(SrcReg, getKillRegState(KillSrc));
2640     }
2641     return;
2642   }
2643 
2644   if (AArch64::FPR16RegClass.contains(DestReg) &&
2645       AArch64::FPR16RegClass.contains(SrcReg)) {
2646     if (Subtarget.hasNEON()) {
2647       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2648                                        &AArch64::FPR128RegClass);
2649       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2650                                       &AArch64::FPR128RegClass);
2651       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2652           .addReg(SrcReg)
2653           .addReg(SrcReg, getKillRegState(KillSrc));
2654     } else {
2655       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2656                                        &AArch64::FPR32RegClass);
2657       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2658                                       &AArch64::FPR32RegClass);
2659       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2660           .addReg(SrcReg, getKillRegState(KillSrc));
2661     }
2662     return;
2663   }
2664 
2665   if (AArch64::FPR8RegClass.contains(DestReg) &&
2666       AArch64::FPR8RegClass.contains(SrcReg)) {
2667     if (Subtarget.hasNEON()) {
2668       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2669                                        &AArch64::FPR128RegClass);
2670       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2671                                       &AArch64::FPR128RegClass);
2672       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2673           .addReg(SrcReg)
2674           .addReg(SrcReg, getKillRegState(KillSrc));
2675     } else {
2676       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2677                                        &AArch64::FPR32RegClass);
2678       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2679                                       &AArch64::FPR32RegClass);
2680       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2681           .addReg(SrcReg, getKillRegState(KillSrc));
2682     }
2683     return;
2684   }
2685 
2686   // Copies between GPR64 and FPR64.
2687   if (AArch64::FPR64RegClass.contains(DestReg) &&
2688       AArch64::GPR64RegClass.contains(SrcReg)) {
2689     BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
2690         .addReg(SrcReg, getKillRegState(KillSrc));
2691     return;
2692   }
2693   if (AArch64::GPR64RegClass.contains(DestReg) &&
2694       AArch64::FPR64RegClass.contains(SrcReg)) {
2695     BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
2696         .addReg(SrcReg, getKillRegState(KillSrc));
2697     return;
2698   }
2699   // Copies between GPR32 and FPR32.
2700   if (AArch64::FPR32RegClass.contains(DestReg) &&
2701       AArch64::GPR32RegClass.contains(SrcReg)) {
2702     BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
2703         .addReg(SrcReg, getKillRegState(KillSrc));
2704     return;
2705   }
2706   if (AArch64::GPR32RegClass.contains(DestReg) &&
2707       AArch64::FPR32RegClass.contains(SrcReg)) {
2708     BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
2709         .addReg(SrcReg, getKillRegState(KillSrc));
2710     return;
2711   }
2712 
2713   if (DestReg == AArch64::NZCV) {
2714     assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
2715     BuildMI(MBB, I, DL, get(AArch64::MSR))
2716         .addImm(AArch64SysReg::NZCV)
2717         .addReg(SrcReg, getKillRegState(KillSrc))
2718         .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
2719     return;
2720   }
2721 
2722   if (SrcReg == AArch64::NZCV) {
2723     assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
2724     BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
2725         .addImm(AArch64SysReg::NZCV)
2726         .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
2727     return;
2728   }
2729 
2730   llvm_unreachable("unimplemented reg-to-reg copy");
2731 }
2732 
storeRegToStackSlot(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned SrcReg,bool isKill,int FI,const TargetRegisterClass * RC,const TargetRegisterInfo * TRI) const2733 void AArch64InstrInfo::storeRegToStackSlot(
2734     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
2735     bool isKill, int FI, const TargetRegisterClass *RC,
2736     const TargetRegisterInfo *TRI) const {
2737   DebugLoc DL;
2738   if (MBBI != MBB.end())
2739     DL = MBBI->getDebugLoc();
2740   MachineFunction &MF = *MBB.getParent();
2741   MachineFrameInfo &MFI = MF.getFrameInfo();
2742   unsigned Align = MFI.getObjectAlignment(FI);
2743 
2744   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
2745   MachineMemOperand *MMO = MF.getMachineMemOperand(
2746       PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
2747   unsigned Opc = 0;
2748   bool Offset = true;
2749   switch (TRI->getSpillSize(*RC)) {
2750   case 1:
2751     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2752       Opc = AArch64::STRBui;
2753     break;
2754   case 2:
2755     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2756       Opc = AArch64::STRHui;
2757     break;
2758   case 4:
2759     if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2760       Opc = AArch64::STRWui;
2761       if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2762         MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2763       else
2764         assert(SrcReg != AArch64::WSP);
2765     } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2766       Opc = AArch64::STRSui;
2767     break;
2768   case 8:
2769     if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2770       Opc = AArch64::STRXui;
2771       if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2772         MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2773       else
2774         assert(SrcReg != AArch64::SP);
2775     } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2776       Opc = AArch64::STRDui;
2777     break;
2778   case 16:
2779     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2780       Opc = AArch64::STRQui;
2781     else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2782       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2783       Opc = AArch64::ST1Twov1d;
2784       Offset = false;
2785     } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
2786       BuildMI(MBB, MBBI, DL, get(AArch64::STPXi))
2787           .addReg(TRI->getSubReg(SrcReg, AArch64::sube64),
2788                   getKillRegState(isKill))
2789           .addReg(TRI->getSubReg(SrcReg, AArch64::subo64),
2790                   getKillRegState(isKill))
2791           .addFrameIndex(FI)
2792           .addImm(0)
2793           .addMemOperand(MMO);
2794       return;
2795     }
2796     break;
2797   case 24:
2798     if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2799       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2800       Opc = AArch64::ST1Threev1d;
2801       Offset = false;
2802     }
2803     break;
2804   case 32:
2805     if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2806       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2807       Opc = AArch64::ST1Fourv1d;
2808       Offset = false;
2809     } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2810       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2811       Opc = AArch64::ST1Twov2d;
2812       Offset = false;
2813     }
2814     break;
2815   case 48:
2816     if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2817       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2818       Opc = AArch64::ST1Threev2d;
2819       Offset = false;
2820     }
2821     break;
2822   case 64:
2823     if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2824       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2825       Opc = AArch64::ST1Fourv2d;
2826       Offset = false;
2827     }
2828     break;
2829   }
2830   assert(Opc && "Unknown register class");
2831 
2832   const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
2833                                      .addReg(SrcReg, getKillRegState(isKill))
2834                                      .addFrameIndex(FI);
2835 
2836   if (Offset)
2837     MI.addImm(0);
2838   MI.addMemOperand(MMO);
2839 }
2840 
loadRegFromStackSlot(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned DestReg,int FI,const TargetRegisterClass * RC,const TargetRegisterInfo * TRI) const2841 void AArch64InstrInfo::loadRegFromStackSlot(
2842     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
2843     int FI, const TargetRegisterClass *RC,
2844     const TargetRegisterInfo *TRI) const {
2845   DebugLoc DL;
2846   if (MBBI != MBB.end())
2847     DL = MBBI->getDebugLoc();
2848   MachineFunction &MF = *MBB.getParent();
2849   MachineFrameInfo &MFI = MF.getFrameInfo();
2850   unsigned Align = MFI.getObjectAlignment(FI);
2851   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
2852   MachineMemOperand *MMO = MF.getMachineMemOperand(
2853       PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
2854 
2855   unsigned Opc = 0;
2856   bool Offset = true;
2857   switch (TRI->getSpillSize(*RC)) {
2858   case 1:
2859     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2860       Opc = AArch64::LDRBui;
2861     break;
2862   case 2:
2863     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2864       Opc = AArch64::LDRHui;
2865     break;
2866   case 4:
2867     if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2868       Opc = AArch64::LDRWui;
2869       if (TargetRegisterInfo::isVirtualRegister(DestReg))
2870         MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
2871       else
2872         assert(DestReg != AArch64::WSP);
2873     } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2874       Opc = AArch64::LDRSui;
2875     break;
2876   case 8:
2877     if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2878       Opc = AArch64::LDRXui;
2879       if (TargetRegisterInfo::isVirtualRegister(DestReg))
2880         MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
2881       else
2882         assert(DestReg != AArch64::SP);
2883     } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
2884       Opc = AArch64::LDRDui;
2885     break;
2886   case 16:
2887     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2888       Opc = AArch64::LDRQui;
2889     else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2890       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2891       Opc = AArch64::LD1Twov1d;
2892       Offset = false;
2893     } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
2894       BuildMI(MBB, MBBI, DL, get(AArch64::LDPXi))
2895           .addReg(TRI->getSubReg(DestReg, AArch64::sube64),
2896                   getDefRegState(true))
2897           .addReg(TRI->getSubReg(DestReg, AArch64::subo64),
2898                   getDefRegState(true))
2899           .addFrameIndex(FI)
2900           .addImm(0)
2901           .addMemOperand(MMO);
2902       return;
2903     }
2904     break;
2905   case 24:
2906     if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2907       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2908       Opc = AArch64::LD1Threev1d;
2909       Offset = false;
2910     }
2911     break;
2912   case 32:
2913     if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2914       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2915       Opc = AArch64::LD1Fourv1d;
2916       Offset = false;
2917     } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2918       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2919       Opc = AArch64::LD1Twov2d;
2920       Offset = false;
2921     }
2922     break;
2923   case 48:
2924     if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2925       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2926       Opc = AArch64::LD1Threev2d;
2927       Offset = false;
2928     }
2929     break;
2930   case 64:
2931     if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2932       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2933       Opc = AArch64::LD1Fourv2d;
2934       Offset = false;
2935     }
2936     break;
2937   }
2938   assert(Opc && "Unknown register class");
2939 
2940   const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
2941                                      .addReg(DestReg, getDefRegState(true))
2942                                      .addFrameIndex(FI);
2943   if (Offset)
2944     MI.addImm(0);
2945   MI.addMemOperand(MMO);
2946 }
2947 
emitFrameOffset(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const DebugLoc & DL,unsigned DestReg,unsigned SrcReg,int Offset,const TargetInstrInfo * TII,MachineInstr::MIFlag Flag,bool SetNZCV)2948 void llvm::emitFrameOffset(MachineBasicBlock &MBB,
2949                            MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
2950                            unsigned DestReg, unsigned SrcReg, int Offset,
2951                            const TargetInstrInfo *TII,
2952                            MachineInstr::MIFlag Flag, bool SetNZCV) {
2953   if (DestReg == SrcReg && Offset == 0)
2954     return;
2955 
2956   assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
2957          "SP increment/decrement not 16-byte aligned");
2958 
2959   bool isSub = Offset < 0;
2960   if (isSub)
2961     Offset = -Offset;
2962 
2963   // FIXME: If the offset won't fit in 24-bits, compute the offset into a
2964   // scratch register.  If DestReg is a virtual register, use it as the
2965   // scratch register; otherwise, create a new virtual register (to be
2966   // replaced by the scavenger at the end of PEI).  That case can be optimized
2967   // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
2968   // register can be loaded with offset%8 and the add/sub can use an extending
2969   // instruction with LSL#3.
2970   // Currently the function handles any offsets but generates a poor sequence
2971   // of code.
2972   //  assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
2973 
2974   unsigned Opc;
2975   if (SetNZCV)
2976     Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
2977   else
2978     Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
2979   const unsigned MaxEncoding = 0xfff;
2980   const unsigned ShiftSize = 12;
2981   const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
2982   while (((unsigned)Offset) >= (1 << ShiftSize)) {
2983     unsigned ThisVal;
2984     if (((unsigned)Offset) > MaxEncodableValue) {
2985       ThisVal = MaxEncodableValue;
2986     } else {
2987       ThisVal = Offset & MaxEncodableValue;
2988     }
2989     assert((ThisVal >> ShiftSize) <= MaxEncoding &&
2990            "Encoding cannot handle value that big");
2991     BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2992         .addReg(SrcReg)
2993         .addImm(ThisVal >> ShiftSize)
2994         .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
2995         .setMIFlag(Flag);
2996 
2997     SrcReg = DestReg;
2998     Offset -= ThisVal;
2999     if (Offset == 0)
3000       return;
3001   }
3002   BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
3003       .addReg(SrcReg)
3004       .addImm(Offset)
3005       .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
3006       .setMIFlag(Flag);
3007 }
3008 
foldMemoryOperandImpl(MachineFunction & MF,MachineInstr & MI,ArrayRef<unsigned> Ops,MachineBasicBlock::iterator InsertPt,int FrameIndex,LiveIntervals * LIS) const3009 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
3010     MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
3011     MachineBasicBlock::iterator InsertPt, int FrameIndex,
3012     LiveIntervals *LIS) const {
3013   // This is a bit of a hack. Consider this instruction:
3014   //
3015   //   %0 = COPY %sp; GPR64all:%0
3016   //
3017   // We explicitly chose GPR64all for the virtual register so such a copy might
3018   // be eliminated by RegisterCoalescer. However, that may not be possible, and
3019   // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
3020   // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
3021   //
3022   // To prevent that, we are going to constrain the %0 register class here.
3023   //
3024   // <rdar://problem/11522048>
3025   //
3026   if (MI.isFullCopy()) {
3027     unsigned DstReg = MI.getOperand(0).getReg();
3028     unsigned SrcReg = MI.getOperand(1).getReg();
3029     if (SrcReg == AArch64::SP &&
3030         TargetRegisterInfo::isVirtualRegister(DstReg)) {
3031       MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
3032       return nullptr;
3033     }
3034     if (DstReg == AArch64::SP &&
3035         TargetRegisterInfo::isVirtualRegister(SrcReg)) {
3036       MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
3037       return nullptr;
3038     }
3039   }
3040 
3041   // Handle the case where a copy is being spilled or filled but the source
3042   // and destination register class don't match.  For example:
3043   //
3044   //   %0 = COPY %xzr; GPR64common:%0
3045   //
3046   // In this case we can still safely fold away the COPY and generate the
3047   // following spill code:
3048   //
3049   //   STRXui %xzr, %stack.0
3050   //
3051   // This also eliminates spilled cross register class COPYs (e.g. between x and
3052   // d regs) of the same size.  For example:
3053   //
3054   //   %0 = COPY %1; GPR64:%0, FPR64:%1
3055   //
3056   // will be filled as
3057   //
3058   //   LDRDui %0, fi<#0>
3059   //
3060   // instead of
3061   //
3062   //   LDRXui %Temp, fi<#0>
3063   //   %0 = FMOV %Temp
3064   //
3065   if (MI.isCopy() && Ops.size() == 1 &&
3066       // Make sure we're only folding the explicit COPY defs/uses.
3067       (Ops[0] == 0 || Ops[0] == 1)) {
3068     bool IsSpill = Ops[0] == 0;
3069     bool IsFill = !IsSpill;
3070     const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
3071     const MachineRegisterInfo &MRI = MF.getRegInfo();
3072     MachineBasicBlock &MBB = *MI.getParent();
3073     const MachineOperand &DstMO = MI.getOperand(0);
3074     const MachineOperand &SrcMO = MI.getOperand(1);
3075     unsigned DstReg = DstMO.getReg();
3076     unsigned SrcReg = SrcMO.getReg();
3077     // This is slightly expensive to compute for physical regs since
3078     // getMinimalPhysRegClass is slow.
3079     auto getRegClass = [&](unsigned Reg) {
3080       return TargetRegisterInfo::isVirtualRegister(Reg)
3081                  ? MRI.getRegClass(Reg)
3082                  : TRI.getMinimalPhysRegClass(Reg);
3083     };
3084 
3085     if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
3086       assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
3087                  TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
3088              "Mismatched register size in non subreg COPY");
3089       if (IsSpill)
3090         storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
3091                             getRegClass(SrcReg), &TRI);
3092       else
3093         loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
3094                              getRegClass(DstReg), &TRI);
3095       return &*--InsertPt;
3096     }
3097 
3098     // Handle cases like spilling def of:
3099     //
3100     //   %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
3101     //
3102     // where the physical register source can be widened and stored to the full
3103     // virtual reg destination stack slot, in this case producing:
3104     //
3105     //   STRXui %xzr, %stack.0
3106     //
3107     if (IsSpill && DstMO.isUndef() &&
3108         TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
3109       assert(SrcMO.getSubReg() == 0 &&
3110              "Unexpected subreg on physical register");
3111       const TargetRegisterClass *SpillRC;
3112       unsigned SpillSubreg;
3113       switch (DstMO.getSubReg()) {
3114       default:
3115         SpillRC = nullptr;
3116         break;
3117       case AArch64::sub_32:
3118       case AArch64::ssub:
3119         if (AArch64::GPR32RegClass.contains(SrcReg)) {
3120           SpillRC = &AArch64::GPR64RegClass;
3121           SpillSubreg = AArch64::sub_32;
3122         } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
3123           SpillRC = &AArch64::FPR64RegClass;
3124           SpillSubreg = AArch64::ssub;
3125         } else
3126           SpillRC = nullptr;
3127         break;
3128       case AArch64::dsub:
3129         if (AArch64::FPR64RegClass.contains(SrcReg)) {
3130           SpillRC = &AArch64::FPR128RegClass;
3131           SpillSubreg = AArch64::dsub;
3132         } else
3133           SpillRC = nullptr;
3134         break;
3135       }
3136 
3137       if (SpillRC)
3138         if (unsigned WidenedSrcReg =
3139                 TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
3140           storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
3141                               FrameIndex, SpillRC, &TRI);
3142           return &*--InsertPt;
3143         }
3144     }
3145 
3146     // Handle cases like filling use of:
3147     //
3148     //   %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
3149     //
3150     // where we can load the full virtual reg source stack slot, into the subreg
3151     // destination, in this case producing:
3152     //
3153     //   LDRWui %0:sub_32<def,read-undef>, %stack.0
3154     //
3155     if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
3156       const TargetRegisterClass *FillRC;
3157       switch (DstMO.getSubReg()) {
3158       default:
3159         FillRC = nullptr;
3160         break;
3161       case AArch64::sub_32:
3162         FillRC = &AArch64::GPR32RegClass;
3163         break;
3164       case AArch64::ssub:
3165         FillRC = &AArch64::FPR32RegClass;
3166         break;
3167       case AArch64::dsub:
3168         FillRC = &AArch64::FPR64RegClass;
3169         break;
3170       }
3171 
3172       if (FillRC) {
3173         assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
3174                    TRI.getRegSizeInBits(*FillRC) &&
3175                "Mismatched regclass size on folded subreg COPY");
3176         loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
3177         MachineInstr &LoadMI = *--InsertPt;
3178         MachineOperand &LoadDst = LoadMI.getOperand(0);
3179         assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
3180         LoadDst.setSubReg(DstMO.getSubReg());
3181         LoadDst.setIsUndef();
3182         return &LoadMI;
3183       }
3184     }
3185   }
3186 
3187   // Cannot fold.
3188   return nullptr;
3189 }
3190 
isAArch64FrameOffsetLegal(const MachineInstr & MI,int & Offset,bool * OutUseUnscaledOp,unsigned * OutUnscaledOp,int * EmittableOffset)3191 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
3192                                     bool *OutUseUnscaledOp,
3193                                     unsigned *OutUnscaledOp,
3194                                     int *EmittableOffset) {
3195   int Scale = 1;
3196   bool IsSigned = false;
3197   // The ImmIdx should be changed case by case if it is not 2.
3198   unsigned ImmIdx = 2;
3199   unsigned UnscaledOp = 0;
3200   // Set output values in case of early exit.
3201   if (EmittableOffset)
3202     *EmittableOffset = 0;
3203   if (OutUseUnscaledOp)
3204     *OutUseUnscaledOp = false;
3205   if (OutUnscaledOp)
3206     *OutUnscaledOp = 0;
3207   switch (MI.getOpcode()) {
3208   default:
3209     llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
3210   // Vector spills/fills can't take an immediate offset.
3211   case AArch64::LD1Twov2d:
3212   case AArch64::LD1Threev2d:
3213   case AArch64::LD1Fourv2d:
3214   case AArch64::LD1Twov1d:
3215   case AArch64::LD1Threev1d:
3216   case AArch64::LD1Fourv1d:
3217   case AArch64::ST1Twov2d:
3218   case AArch64::ST1Threev2d:
3219   case AArch64::ST1Fourv2d:
3220   case AArch64::ST1Twov1d:
3221   case AArch64::ST1Threev1d:
3222   case AArch64::ST1Fourv1d:
3223     return AArch64FrameOffsetCannotUpdate;
3224   case AArch64::PRFMui:
3225     Scale = 8;
3226     UnscaledOp = AArch64::PRFUMi;
3227     break;
3228   case AArch64::LDRXui:
3229     Scale = 8;
3230     UnscaledOp = AArch64::LDURXi;
3231     break;
3232   case AArch64::LDRWui:
3233     Scale = 4;
3234     UnscaledOp = AArch64::LDURWi;
3235     break;
3236   case AArch64::LDRBui:
3237     Scale = 1;
3238     UnscaledOp = AArch64::LDURBi;
3239     break;
3240   case AArch64::LDRHui:
3241     Scale = 2;
3242     UnscaledOp = AArch64::LDURHi;
3243     break;
3244   case AArch64::LDRSui:
3245     Scale = 4;
3246     UnscaledOp = AArch64::LDURSi;
3247     break;
3248   case AArch64::LDRDui:
3249     Scale = 8;
3250     UnscaledOp = AArch64::LDURDi;
3251     break;
3252   case AArch64::LDRQui:
3253     Scale = 16;
3254     UnscaledOp = AArch64::LDURQi;
3255     break;
3256   case AArch64::LDRBBui:
3257     Scale = 1;
3258     UnscaledOp = AArch64::LDURBBi;
3259     break;
3260   case AArch64::LDRHHui:
3261     Scale = 2;
3262     UnscaledOp = AArch64::LDURHHi;
3263     break;
3264   case AArch64::LDRSBXui:
3265     Scale = 1;
3266     UnscaledOp = AArch64::LDURSBXi;
3267     break;
3268   case AArch64::LDRSBWui:
3269     Scale = 1;
3270     UnscaledOp = AArch64::LDURSBWi;
3271     break;
3272   case AArch64::LDRSHXui:
3273     Scale = 2;
3274     UnscaledOp = AArch64::LDURSHXi;
3275     break;
3276   case AArch64::LDRSHWui:
3277     Scale = 2;
3278     UnscaledOp = AArch64::LDURSHWi;
3279     break;
3280   case AArch64::LDRSWui:
3281     Scale = 4;
3282     UnscaledOp = AArch64::LDURSWi;
3283     break;
3284 
3285   case AArch64::STRXui:
3286     Scale = 8;
3287     UnscaledOp = AArch64::STURXi;
3288     break;
3289   case AArch64::STRWui:
3290     Scale = 4;
3291     UnscaledOp = AArch64::STURWi;
3292     break;
3293   case AArch64::STRBui:
3294     Scale = 1;
3295     UnscaledOp = AArch64::STURBi;
3296     break;
3297   case AArch64::STRHui:
3298     Scale = 2;
3299     UnscaledOp = AArch64::STURHi;
3300     break;
3301   case AArch64::STRSui:
3302     Scale = 4;
3303     UnscaledOp = AArch64::STURSi;
3304     break;
3305   case AArch64::STRDui:
3306     Scale = 8;
3307     UnscaledOp = AArch64::STURDi;
3308     break;
3309   case AArch64::STRQui:
3310     Scale = 16;
3311     UnscaledOp = AArch64::STURQi;
3312     break;
3313   case AArch64::STRBBui:
3314     Scale = 1;
3315     UnscaledOp = AArch64::STURBBi;
3316     break;
3317   case AArch64::STRHHui:
3318     Scale = 2;
3319     UnscaledOp = AArch64::STURHHi;
3320     break;
3321 
3322   case AArch64::LDPXi:
3323   case AArch64::LDPDi:
3324   case AArch64::STPXi:
3325   case AArch64::STPDi:
3326   case AArch64::LDNPXi:
3327   case AArch64::LDNPDi:
3328   case AArch64::STNPXi:
3329   case AArch64::STNPDi:
3330     ImmIdx = 3;
3331     IsSigned = true;
3332     Scale = 8;
3333     break;
3334   case AArch64::LDPQi:
3335   case AArch64::STPQi:
3336   case AArch64::LDNPQi:
3337   case AArch64::STNPQi:
3338     ImmIdx = 3;
3339     IsSigned = true;
3340     Scale = 16;
3341     break;
3342   case AArch64::LDPWi:
3343   case AArch64::LDPSi:
3344   case AArch64::STPWi:
3345   case AArch64::STPSi:
3346   case AArch64::LDNPWi:
3347   case AArch64::LDNPSi:
3348   case AArch64::STNPWi:
3349   case AArch64::STNPSi:
3350     ImmIdx = 3;
3351     IsSigned = true;
3352     Scale = 4;
3353     break;
3354 
3355   case AArch64::LDURXi:
3356   case AArch64::LDURWi:
3357   case AArch64::LDURBi:
3358   case AArch64::LDURHi:
3359   case AArch64::LDURSi:
3360   case AArch64::LDURDi:
3361   case AArch64::LDURQi:
3362   case AArch64::LDURHHi:
3363   case AArch64::LDURBBi:
3364   case AArch64::LDURSBXi:
3365   case AArch64::LDURSBWi:
3366   case AArch64::LDURSHXi:
3367   case AArch64::LDURSHWi:
3368   case AArch64::LDURSWi:
3369   case AArch64::STURXi:
3370   case AArch64::STURWi:
3371   case AArch64::STURBi:
3372   case AArch64::STURHi:
3373   case AArch64::STURSi:
3374   case AArch64::STURDi:
3375   case AArch64::STURQi:
3376   case AArch64::STURBBi:
3377   case AArch64::STURHHi:
3378     Scale = 1;
3379     break;
3380   }
3381 
3382   Offset += MI.getOperand(ImmIdx).getImm() * Scale;
3383 
3384   bool useUnscaledOp = false;
3385   // If the offset doesn't match the scale, we rewrite the instruction to
3386   // use the unscaled instruction instead. Likewise, if we have a negative
3387   // offset (and have an unscaled op to use).
3388   if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
3389     useUnscaledOp = true;
3390 
3391   // Use an unscaled addressing mode if the instruction has a negative offset
3392   // (or if the instruction is already using an unscaled addressing mode).
3393   unsigned MaskBits;
3394   if (IsSigned) {
3395     // ldp/stp instructions.
3396     MaskBits = 7;
3397     Offset /= Scale;
3398   } else if (UnscaledOp == 0 || useUnscaledOp) {
3399     MaskBits = 9;
3400     IsSigned = true;
3401     Scale = 1;
3402   } else {
3403     MaskBits = 12;
3404     IsSigned = false;
3405     Offset /= Scale;
3406   }
3407 
3408   // Attempt to fold address computation.
3409   int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
3410   int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
3411   if (Offset >= MinOff && Offset <= MaxOff) {
3412     if (EmittableOffset)
3413       *EmittableOffset = Offset;
3414     Offset = 0;
3415   } else {
3416     int NewOff = Offset < 0 ? MinOff : MaxOff;
3417     if (EmittableOffset)
3418       *EmittableOffset = NewOff;
3419     Offset = (Offset - NewOff) * Scale;
3420   }
3421   if (OutUseUnscaledOp)
3422     *OutUseUnscaledOp = useUnscaledOp;
3423   if (OutUnscaledOp)
3424     *OutUnscaledOp = UnscaledOp;
3425   return AArch64FrameOffsetCanUpdate |
3426          (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
3427 }
3428 
rewriteAArch64FrameIndex(MachineInstr & MI,unsigned FrameRegIdx,unsigned FrameReg,int & Offset,const AArch64InstrInfo * TII)3429 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
3430                                     unsigned FrameReg, int &Offset,
3431                                     const AArch64InstrInfo *TII) {
3432   unsigned Opcode = MI.getOpcode();
3433   unsigned ImmIdx = FrameRegIdx + 1;
3434 
3435   if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
3436     Offset += MI.getOperand(ImmIdx).getImm();
3437     emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
3438                     MI.getOperand(0).getReg(), FrameReg, Offset, TII,
3439                     MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
3440     MI.eraseFromParent();
3441     Offset = 0;
3442     return true;
3443   }
3444 
3445   int NewOffset;
3446   unsigned UnscaledOp;
3447   bool UseUnscaledOp;
3448   int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
3449                                          &UnscaledOp, &NewOffset);
3450   if (Status & AArch64FrameOffsetCanUpdate) {
3451     if (Status & AArch64FrameOffsetIsLegal)
3452       // Replace the FrameIndex with FrameReg.
3453       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
3454     if (UseUnscaledOp)
3455       MI.setDesc(TII->get(UnscaledOp));
3456 
3457     MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
3458     return Offset == 0;
3459   }
3460 
3461   return false;
3462 }
3463 
getNoop(MCInst & NopInst) const3464 void AArch64InstrInfo::getNoop(MCInst &NopInst) const {
3465   NopInst.setOpcode(AArch64::HINT);
3466   NopInst.addOperand(MCOperand::createImm(0));
3467 }
3468 
3469 // AArch64 supports MachineCombiner.
useMachineCombiner() const3470 bool AArch64InstrInfo::useMachineCombiner() const { return true; }
3471 
3472 // True when Opc sets flag
isCombineInstrSettingFlag(unsigned Opc)3473 static bool isCombineInstrSettingFlag(unsigned Opc) {
3474   switch (Opc) {
3475   case AArch64::ADDSWrr:
3476   case AArch64::ADDSWri:
3477   case AArch64::ADDSXrr:
3478   case AArch64::ADDSXri:
3479   case AArch64::SUBSWrr:
3480   case AArch64::SUBSXrr:
3481   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3482   case AArch64::SUBSWri:
3483   case AArch64::SUBSXri:
3484     return true;
3485   default:
3486     break;
3487   }
3488   return false;
3489 }
3490 
3491 // 32b Opcodes that can be combined with a MUL
isCombineInstrCandidate32(unsigned Opc)3492 static bool isCombineInstrCandidate32(unsigned Opc) {
3493   switch (Opc) {
3494   case AArch64::ADDWrr:
3495   case AArch64::ADDWri:
3496   case AArch64::SUBWrr:
3497   case AArch64::ADDSWrr:
3498   case AArch64::ADDSWri:
3499   case AArch64::SUBSWrr:
3500   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3501   case AArch64::SUBWri:
3502   case AArch64::SUBSWri:
3503     return true;
3504   default:
3505     break;
3506   }
3507   return false;
3508 }
3509 
3510 // 64b Opcodes that can be combined with a MUL
isCombineInstrCandidate64(unsigned Opc)3511 static bool isCombineInstrCandidate64(unsigned Opc) {
3512   switch (Opc) {
3513   case AArch64::ADDXrr:
3514   case AArch64::ADDXri:
3515   case AArch64::SUBXrr:
3516   case AArch64::ADDSXrr:
3517   case AArch64::ADDSXri:
3518   case AArch64::SUBSXrr:
3519   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3520   case AArch64::SUBXri:
3521   case AArch64::SUBSXri:
3522     return true;
3523   default:
3524     break;
3525   }
3526   return false;
3527 }
3528 
3529 // FP Opcodes that can be combined with a FMUL
isCombineInstrCandidateFP(const MachineInstr & Inst)3530 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
3531   switch (Inst.getOpcode()) {
3532   default:
3533     break;
3534   case AArch64::FADDSrr:
3535   case AArch64::FADDDrr:
3536   case AArch64::FADDv2f32:
3537   case AArch64::FADDv2f64:
3538   case AArch64::FADDv4f32:
3539   case AArch64::FSUBSrr:
3540   case AArch64::FSUBDrr:
3541   case AArch64::FSUBv2f32:
3542   case AArch64::FSUBv2f64:
3543   case AArch64::FSUBv4f32:
3544     TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
3545     return (Options.UnsafeFPMath ||
3546             Options.AllowFPOpFusion == FPOpFusion::Fast);
3547   }
3548   return false;
3549 }
3550 
3551 // Opcodes that can be combined with a MUL
isCombineInstrCandidate(unsigned Opc)3552 static bool isCombineInstrCandidate(unsigned Opc) {
3553   return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
3554 }
3555 
3556 //
3557 // Utility routine that checks if \param MO is defined by an
3558 // \param CombineOpc instruction in the basic block \param MBB
canCombine(MachineBasicBlock & MBB,MachineOperand & MO,unsigned CombineOpc,unsigned ZeroReg=0,bool CheckZeroReg=false)3559 static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
3560                        unsigned CombineOpc, unsigned ZeroReg = 0,
3561                        bool CheckZeroReg = false) {
3562   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3563   MachineInstr *MI = nullptr;
3564 
3565   if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
3566     MI = MRI.getUniqueVRegDef(MO.getReg());
3567   // And it needs to be in the trace (otherwise, it won't have a depth).
3568   if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
3569     return false;
3570   // Must only used by the user we combine with.
3571   if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
3572     return false;
3573 
3574   if (CheckZeroReg) {
3575     assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
3576            MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
3577            MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
3578     // The third input reg must be zero.
3579     if (MI->getOperand(3).getReg() != ZeroReg)
3580       return false;
3581   }
3582 
3583   return true;
3584 }
3585 
3586 //
3587 // Is \param MO defined by an integer multiply and can be combined?
canCombineWithMUL(MachineBasicBlock & MBB,MachineOperand & MO,unsigned MulOpc,unsigned ZeroReg)3588 static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3589                               unsigned MulOpc, unsigned ZeroReg) {
3590   return canCombine(MBB, MO, MulOpc, ZeroReg, true);
3591 }
3592 
3593 //
3594 // Is \param MO defined by a floating-point multiply and can be combined?
canCombineWithFMUL(MachineBasicBlock & MBB,MachineOperand & MO,unsigned MulOpc)3595 static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3596                                unsigned MulOpc) {
3597   return canCombine(MBB, MO, MulOpc);
3598 }
3599 
3600 // TODO: There are many more machine instruction opcodes to match:
3601 //       1. Other data types (integer, vectors)
3602 //       2. Other math / logic operations (xor, or)
3603 //       3. Other forms of the same operation (intrinsics and other variants)
isAssociativeAndCommutative(const MachineInstr & Inst) const3604 bool AArch64InstrInfo::isAssociativeAndCommutative(
3605     const MachineInstr &Inst) const {
3606   switch (Inst.getOpcode()) {
3607   case AArch64::FADDDrr:
3608   case AArch64::FADDSrr:
3609   case AArch64::FADDv2f32:
3610   case AArch64::FADDv2f64:
3611   case AArch64::FADDv4f32:
3612   case AArch64::FMULDrr:
3613   case AArch64::FMULSrr:
3614   case AArch64::FMULX32:
3615   case AArch64::FMULX64:
3616   case AArch64::FMULXv2f32:
3617   case AArch64::FMULXv2f64:
3618   case AArch64::FMULXv4f32:
3619   case AArch64::FMULv2f32:
3620   case AArch64::FMULv2f64:
3621   case AArch64::FMULv4f32:
3622     return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
3623   default:
3624     return false;
3625   }
3626 }
3627 
3628 /// Find instructions that can be turned into madd.
getMaddPatterns(MachineInstr & Root,SmallVectorImpl<MachineCombinerPattern> & Patterns)3629 static bool getMaddPatterns(MachineInstr &Root,
3630                             SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3631   unsigned Opc = Root.getOpcode();
3632   MachineBasicBlock &MBB = *Root.getParent();
3633   bool Found = false;
3634 
3635   if (!isCombineInstrCandidate(Opc))
3636     return false;
3637   if (isCombineInstrSettingFlag(Opc)) {
3638     int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
3639     // When NZCV is live bail out.
3640     if (Cmp_NZCV == -1)
3641       return false;
3642     unsigned NewOpc = convertToNonFlagSettingOpc(Root);
3643     // When opcode can't change bail out.
3644     // CHECKME: do we miss any cases for opcode conversion?
3645     if (NewOpc == Opc)
3646       return false;
3647     Opc = NewOpc;
3648   }
3649 
3650   switch (Opc) {
3651   default:
3652     break;
3653   case AArch64::ADDWrr:
3654     assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3655            "ADDWrr does not have register operands");
3656     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3657                           AArch64::WZR)) {
3658       Patterns.push_back(MachineCombinerPattern::MULADDW_OP1);
3659       Found = true;
3660     }
3661     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3662                           AArch64::WZR)) {
3663       Patterns.push_back(MachineCombinerPattern::MULADDW_OP2);
3664       Found = true;
3665     }
3666     break;
3667   case AArch64::ADDXrr:
3668     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3669                           AArch64::XZR)) {
3670       Patterns.push_back(MachineCombinerPattern::MULADDX_OP1);
3671       Found = true;
3672     }
3673     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3674                           AArch64::XZR)) {
3675       Patterns.push_back(MachineCombinerPattern::MULADDX_OP2);
3676       Found = true;
3677     }
3678     break;
3679   case AArch64::SUBWrr:
3680     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3681                           AArch64::WZR)) {
3682       Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1);
3683       Found = true;
3684     }
3685     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3686                           AArch64::WZR)) {
3687       Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2);
3688       Found = true;
3689     }
3690     break;
3691   case AArch64::SUBXrr:
3692     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3693                           AArch64::XZR)) {
3694       Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1);
3695       Found = true;
3696     }
3697     if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3698                           AArch64::XZR)) {
3699       Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2);
3700       Found = true;
3701     }
3702     break;
3703   case AArch64::ADDWri:
3704     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3705                           AArch64::WZR)) {
3706       Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1);
3707       Found = true;
3708     }
3709     break;
3710   case AArch64::ADDXri:
3711     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3712                           AArch64::XZR)) {
3713       Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1);
3714       Found = true;
3715     }
3716     break;
3717   case AArch64::SUBWri:
3718     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3719                           AArch64::WZR)) {
3720       Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1);
3721       Found = true;
3722     }
3723     break;
3724   case AArch64::SUBXri:
3725     if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3726                           AArch64::XZR)) {
3727       Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1);
3728       Found = true;
3729     }
3730     break;
3731   }
3732   return Found;
3733 }
3734 /// Floating-Point Support
3735 
3736 /// Find instructions that can be turned into madd.
getFMAPatterns(MachineInstr & Root,SmallVectorImpl<MachineCombinerPattern> & Patterns)3737 static bool getFMAPatterns(MachineInstr &Root,
3738                            SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3739 
3740   if (!isCombineInstrCandidateFP(Root))
3741     return false;
3742 
3743   MachineBasicBlock &MBB = *Root.getParent();
3744   bool Found = false;
3745 
3746   switch (Root.getOpcode()) {
3747   default:
3748     assert(false && "Unsupported FP instruction in combiner\n");
3749     break;
3750   case AArch64::FADDSrr:
3751     assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3752            "FADDWrr does not have register operands");
3753     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3754       Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1);
3755       Found = true;
3756     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3757                                   AArch64::FMULv1i32_indexed)) {
3758       Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1);
3759       Found = true;
3760     }
3761     if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3762       Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2);
3763       Found = true;
3764     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3765                                   AArch64::FMULv1i32_indexed)) {
3766       Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2);
3767       Found = true;
3768     }
3769     break;
3770   case AArch64::FADDDrr:
3771     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3772       Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1);
3773       Found = true;
3774     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3775                                   AArch64::FMULv1i64_indexed)) {
3776       Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1);
3777       Found = true;
3778     }
3779     if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3780       Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2);
3781       Found = true;
3782     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3783                                   AArch64::FMULv1i64_indexed)) {
3784       Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2);
3785       Found = true;
3786     }
3787     break;
3788   case AArch64::FADDv2f32:
3789     if (canCombineWithFMUL(MBB, Root.getOperand(1),
3790                            AArch64::FMULv2i32_indexed)) {
3791       Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1);
3792       Found = true;
3793     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3794                                   AArch64::FMULv2f32)) {
3795       Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1);
3796       Found = true;
3797     }
3798     if (canCombineWithFMUL(MBB, Root.getOperand(2),
3799                            AArch64::FMULv2i32_indexed)) {
3800       Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2);
3801       Found = true;
3802     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3803                                   AArch64::FMULv2f32)) {
3804       Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2);
3805       Found = true;
3806     }
3807     break;
3808   case AArch64::FADDv2f64:
3809     if (canCombineWithFMUL(MBB, Root.getOperand(1),
3810                            AArch64::FMULv2i64_indexed)) {
3811       Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1);
3812       Found = true;
3813     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3814                                   AArch64::FMULv2f64)) {
3815       Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1);
3816       Found = true;
3817     }
3818     if (canCombineWithFMUL(MBB, Root.getOperand(2),
3819                            AArch64::FMULv2i64_indexed)) {
3820       Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2);
3821       Found = true;
3822     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3823                                   AArch64::FMULv2f64)) {
3824       Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2);
3825       Found = true;
3826     }
3827     break;
3828   case AArch64::FADDv4f32:
3829     if (canCombineWithFMUL(MBB, Root.getOperand(1),
3830                            AArch64::FMULv4i32_indexed)) {
3831       Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1);
3832       Found = true;
3833     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3834                                   AArch64::FMULv4f32)) {
3835       Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1);
3836       Found = true;
3837     }
3838     if (canCombineWithFMUL(MBB, Root.getOperand(2),
3839                            AArch64::FMULv4i32_indexed)) {
3840       Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2);
3841       Found = true;
3842     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3843                                   AArch64::FMULv4f32)) {
3844       Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2);
3845       Found = true;
3846     }
3847     break;
3848 
3849   case AArch64::FSUBSrr:
3850     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3851       Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1);
3852       Found = true;
3853     }
3854     if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3855       Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2);
3856       Found = true;
3857     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3858                                   AArch64::FMULv1i32_indexed)) {
3859       Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2);
3860       Found = true;
3861     }
3862     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) {
3863       Patterns.push_back(MachineCombinerPattern::FNMULSUBS_OP1);
3864       Found = true;
3865     }
3866     break;
3867   case AArch64::FSUBDrr:
3868     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3869       Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1);
3870       Found = true;
3871     }
3872     if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3873       Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2);
3874       Found = true;
3875     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3876                                   AArch64::FMULv1i64_indexed)) {
3877       Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2);
3878       Found = true;
3879     }
3880     if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) {
3881       Patterns.push_back(MachineCombinerPattern::FNMULSUBD_OP1);
3882       Found = true;
3883     }
3884     break;
3885   case AArch64::FSUBv2f32:
3886     if (canCombineWithFMUL(MBB, Root.getOperand(2),
3887                            AArch64::FMULv2i32_indexed)) {
3888       Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
3889       Found = true;
3890     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3891                                   AArch64::FMULv2f32)) {
3892       Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2);
3893       Found = true;
3894     }
3895     if (canCombineWithFMUL(MBB, Root.getOperand(1),
3896                            AArch64::FMULv2i32_indexed)) {
3897       Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1);
3898       Found = true;
3899     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3900                                   AArch64::FMULv2f32)) {
3901       Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1);
3902       Found = true;
3903     }
3904     break;
3905   case AArch64::FSUBv2f64:
3906     if (canCombineWithFMUL(MBB, Root.getOperand(2),
3907                            AArch64::FMULv2i64_indexed)) {
3908       Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
3909       Found = true;
3910     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3911                                   AArch64::FMULv2f64)) {
3912       Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2);
3913       Found = true;
3914     }
3915     if (canCombineWithFMUL(MBB, Root.getOperand(1),
3916                            AArch64::FMULv2i64_indexed)) {
3917       Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1);
3918       Found = true;
3919     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3920                                   AArch64::FMULv2f64)) {
3921       Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1);
3922       Found = true;
3923     }
3924     break;
3925   case AArch64::FSUBv4f32:
3926     if (canCombineWithFMUL(MBB, Root.getOperand(2),
3927                            AArch64::FMULv4i32_indexed)) {
3928       Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
3929       Found = true;
3930     } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3931                                   AArch64::FMULv4f32)) {
3932       Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2);
3933       Found = true;
3934     }
3935     if (canCombineWithFMUL(MBB, Root.getOperand(1),
3936                            AArch64::FMULv4i32_indexed)) {
3937       Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1);
3938       Found = true;
3939     } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3940                                   AArch64::FMULv4f32)) {
3941       Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1);
3942       Found = true;
3943     }
3944     break;
3945   }
3946   return Found;
3947 }
3948 
3949 /// Return true when a code sequence can improve throughput. It
3950 /// should be called only for instructions in loops.
3951 /// \param Pattern - combiner pattern
isThroughputPattern(MachineCombinerPattern Pattern) const3952 bool AArch64InstrInfo::isThroughputPattern(
3953     MachineCombinerPattern Pattern) const {
3954   switch (Pattern) {
3955   default:
3956     break;
3957   case MachineCombinerPattern::FMULADDS_OP1:
3958   case MachineCombinerPattern::FMULADDS_OP2:
3959   case MachineCombinerPattern::FMULSUBS_OP1:
3960   case MachineCombinerPattern::FMULSUBS_OP2:
3961   case MachineCombinerPattern::FMULADDD_OP1:
3962   case MachineCombinerPattern::FMULADDD_OP2:
3963   case MachineCombinerPattern::FMULSUBD_OP1:
3964   case MachineCombinerPattern::FMULSUBD_OP2:
3965   case MachineCombinerPattern::FNMULSUBS_OP1:
3966   case MachineCombinerPattern::FNMULSUBD_OP1:
3967   case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
3968   case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
3969   case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
3970   case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
3971   case MachineCombinerPattern::FMLAv2f32_OP2:
3972   case MachineCombinerPattern::FMLAv2f32_OP1:
3973   case MachineCombinerPattern::FMLAv2f64_OP1:
3974   case MachineCombinerPattern::FMLAv2f64_OP2:
3975   case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
3976   case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
3977   case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
3978   case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
3979   case MachineCombinerPattern::FMLAv4f32_OP1:
3980   case MachineCombinerPattern::FMLAv4f32_OP2:
3981   case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
3982   case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
3983   case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
3984   case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
3985   case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
3986   case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
3987   case MachineCombinerPattern::FMLSv2f32_OP2:
3988   case MachineCombinerPattern::FMLSv2f64_OP2:
3989   case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
3990   case MachineCombinerPattern::FMLSv4f32_OP2:
3991     return true;
3992   } // end switch (Pattern)
3993   return false;
3994 }
3995 /// Return true when there is potentially a faster code sequence for an
3996 /// instruction chain ending in \p Root. All potential patterns are listed in
3997 /// the \p Pattern vector. Pattern should be sorted in priority order since the
3998 /// pattern evaluator stops checking as soon as it finds a faster sequence.
3999 
getMachineCombinerPatterns(MachineInstr & Root,SmallVectorImpl<MachineCombinerPattern> & Patterns) const4000 bool AArch64InstrInfo::getMachineCombinerPatterns(
4001     MachineInstr &Root,
4002     SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
4003   // Integer patterns
4004   if (getMaddPatterns(Root, Patterns))
4005     return true;
4006   // Floating point patterns
4007   if (getFMAPatterns(Root, Patterns))
4008     return true;
4009 
4010   return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
4011 }
4012 
4013 enum class FMAInstKind { Default, Indexed, Accumulator };
4014 /// genFusedMultiply - Generate fused multiply instructions.
4015 /// This function supports both integer and floating point instructions.
4016 /// A typical example:
4017 ///  F|MUL I=A,B,0
4018 ///  F|ADD R,I,C
4019 ///  ==> F|MADD R,A,B,C
4020 /// \param MF Containing MachineFunction
4021 /// \param MRI Register information
4022 /// \param TII Target information
4023 /// \param Root is the F|ADD instruction
4024 /// \param [out] InsInstrs is a vector of machine instructions and will
4025 /// contain the generated madd instruction
4026 /// \param IdxMulOpd is index of operand in Root that is the result of
4027 /// the F|MUL. In the example above IdxMulOpd is 1.
4028 /// \param MaddOpc the opcode fo the f|madd instruction
4029 /// \param RC Register class of operands
4030 /// \param kind of fma instruction (addressing mode) to be generated
4031 /// \param ReplacedAddend is the result register from the instruction
4032 /// replacing the non-combined operand, if any.
4033 static MachineInstr *
genFusedMultiply(MachineFunction & MF,MachineRegisterInfo & MRI,const TargetInstrInfo * TII,MachineInstr & Root,SmallVectorImpl<MachineInstr * > & InsInstrs,unsigned IdxMulOpd,unsigned MaddOpc,const TargetRegisterClass * RC,FMAInstKind kind=FMAInstKind::Default,const unsigned * ReplacedAddend=nullptr)4034 genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
4035                  const TargetInstrInfo *TII, MachineInstr &Root,
4036                  SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
4037                  unsigned MaddOpc, const TargetRegisterClass *RC,
4038                  FMAInstKind kind = FMAInstKind::Default,
4039                  const unsigned *ReplacedAddend = nullptr) {
4040   assert(IdxMulOpd == 1 || IdxMulOpd == 2);
4041 
4042   unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
4043   MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
4044   unsigned ResultReg = Root.getOperand(0).getReg();
4045   unsigned SrcReg0 = MUL->getOperand(1).getReg();
4046   bool Src0IsKill = MUL->getOperand(1).isKill();
4047   unsigned SrcReg1 = MUL->getOperand(2).getReg();
4048   bool Src1IsKill = MUL->getOperand(2).isKill();
4049 
4050   unsigned SrcReg2;
4051   bool Src2IsKill;
4052   if (ReplacedAddend) {
4053     // If we just generated a new addend, we must be it's only use.
4054     SrcReg2 = *ReplacedAddend;
4055     Src2IsKill = true;
4056   } else {
4057     SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
4058     Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
4059   }
4060 
4061   if (TargetRegisterInfo::isVirtualRegister(ResultReg))
4062     MRI.constrainRegClass(ResultReg, RC);
4063   if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
4064     MRI.constrainRegClass(SrcReg0, RC);
4065   if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
4066     MRI.constrainRegClass(SrcReg1, RC);
4067   if (TargetRegisterInfo::isVirtualRegister(SrcReg2))
4068     MRI.constrainRegClass(SrcReg2, RC);
4069 
4070   MachineInstrBuilder MIB;
4071   if (kind == FMAInstKind::Default)
4072     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4073               .addReg(SrcReg0, getKillRegState(Src0IsKill))
4074               .addReg(SrcReg1, getKillRegState(Src1IsKill))
4075               .addReg(SrcReg2, getKillRegState(Src2IsKill));
4076   else if (kind == FMAInstKind::Indexed)
4077     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4078               .addReg(SrcReg2, getKillRegState(Src2IsKill))
4079               .addReg(SrcReg0, getKillRegState(Src0IsKill))
4080               .addReg(SrcReg1, getKillRegState(Src1IsKill))
4081               .addImm(MUL->getOperand(3).getImm());
4082   else if (kind == FMAInstKind::Accumulator)
4083     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4084               .addReg(SrcReg2, getKillRegState(Src2IsKill))
4085               .addReg(SrcReg0, getKillRegState(Src0IsKill))
4086               .addReg(SrcReg1, getKillRegState(Src1IsKill));
4087   else
4088     assert(false && "Invalid FMA instruction kind \n");
4089   // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
4090   InsInstrs.push_back(MIB);
4091   return MUL;
4092 }
4093 
4094 /// genMaddR - Generate madd instruction and combine mul and add using
4095 /// an extra virtual register
4096 /// Example - an ADD intermediate needs to be stored in a register:
4097 ///   MUL I=A,B,0
4098 ///   ADD R,I,Imm
4099 ///   ==> ORR  V, ZR, Imm
4100 ///   ==> MADD R,A,B,V
4101 /// \param MF Containing MachineFunction
4102 /// \param MRI Register information
4103 /// \param TII Target information
4104 /// \param Root is the ADD instruction
4105 /// \param [out] InsInstrs is a vector of machine instructions and will
4106 /// contain the generated madd instruction
4107 /// \param IdxMulOpd is index of operand in Root that is the result of
4108 /// the MUL. In the example above IdxMulOpd is 1.
4109 /// \param MaddOpc the opcode fo the madd instruction
4110 /// \param VR is a virtual register that holds the value of an ADD operand
4111 /// (V in the example above).
4112 /// \param RC Register class of operands
genMaddR(MachineFunction & MF,MachineRegisterInfo & MRI,const TargetInstrInfo * TII,MachineInstr & Root,SmallVectorImpl<MachineInstr * > & InsInstrs,unsigned IdxMulOpd,unsigned MaddOpc,unsigned VR,const TargetRegisterClass * RC)4113 static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
4114                               const TargetInstrInfo *TII, MachineInstr &Root,
4115                               SmallVectorImpl<MachineInstr *> &InsInstrs,
4116                               unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
4117                               const TargetRegisterClass *RC) {
4118   assert(IdxMulOpd == 1 || IdxMulOpd == 2);
4119 
4120   MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
4121   unsigned ResultReg = Root.getOperand(0).getReg();
4122   unsigned SrcReg0 = MUL->getOperand(1).getReg();
4123   bool Src0IsKill = MUL->getOperand(1).isKill();
4124   unsigned SrcReg1 = MUL->getOperand(2).getReg();
4125   bool Src1IsKill = MUL->getOperand(2).isKill();
4126 
4127   if (TargetRegisterInfo::isVirtualRegister(ResultReg))
4128     MRI.constrainRegClass(ResultReg, RC);
4129   if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
4130     MRI.constrainRegClass(SrcReg0, RC);
4131   if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
4132     MRI.constrainRegClass(SrcReg1, RC);
4133   if (TargetRegisterInfo::isVirtualRegister(VR))
4134     MRI.constrainRegClass(VR, RC);
4135 
4136   MachineInstrBuilder MIB =
4137       BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4138           .addReg(SrcReg0, getKillRegState(Src0IsKill))
4139           .addReg(SrcReg1, getKillRegState(Src1IsKill))
4140           .addReg(VR);
4141   // Insert the MADD
4142   InsInstrs.push_back(MIB);
4143   return MUL;
4144 }
4145 
4146 /// When getMachineCombinerPatterns() finds potential patterns,
4147 /// this function generates the instructions that could replace the
4148 /// original code sequence
genAlternativeCodeSequence(MachineInstr & Root,MachineCombinerPattern Pattern,SmallVectorImpl<MachineInstr * > & InsInstrs,SmallVectorImpl<MachineInstr * > & DelInstrs,DenseMap<unsigned,unsigned> & InstrIdxForVirtReg) const4149 void AArch64InstrInfo::genAlternativeCodeSequence(
4150     MachineInstr &Root, MachineCombinerPattern Pattern,
4151     SmallVectorImpl<MachineInstr *> &InsInstrs,
4152     SmallVectorImpl<MachineInstr *> &DelInstrs,
4153     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
4154   MachineBasicBlock &MBB = *Root.getParent();
4155   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
4156   MachineFunction &MF = *MBB.getParent();
4157   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
4158 
4159   MachineInstr *MUL;
4160   const TargetRegisterClass *RC;
4161   unsigned Opc;
4162   switch (Pattern) {
4163   default:
4164     // Reassociate instructions.
4165     TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
4166                                                 DelInstrs, InstrIdxForVirtReg);
4167     return;
4168   case MachineCombinerPattern::MULADDW_OP1:
4169   case MachineCombinerPattern::MULADDX_OP1:
4170     // MUL I=A,B,0
4171     // ADD R,I,C
4172     // ==> MADD R,A,B,C
4173     // --- Create(MADD);
4174     if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
4175       Opc = AArch64::MADDWrrr;
4176       RC = &AArch64::GPR32RegClass;
4177     } else {
4178       Opc = AArch64::MADDXrrr;
4179       RC = &AArch64::GPR64RegClass;
4180     }
4181     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4182     break;
4183   case MachineCombinerPattern::MULADDW_OP2:
4184   case MachineCombinerPattern::MULADDX_OP2:
4185     // MUL I=A,B,0
4186     // ADD R,C,I
4187     // ==> MADD R,A,B,C
4188     // --- Create(MADD);
4189     if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
4190       Opc = AArch64::MADDWrrr;
4191       RC = &AArch64::GPR32RegClass;
4192     } else {
4193       Opc = AArch64::MADDXrrr;
4194       RC = &AArch64::GPR64RegClass;
4195     }
4196     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4197     break;
4198   case MachineCombinerPattern::MULADDWI_OP1:
4199   case MachineCombinerPattern::MULADDXI_OP1: {
4200     // MUL I=A,B,0
4201     // ADD R,I,Imm
4202     // ==> ORR  V, ZR, Imm
4203     // ==> MADD R,A,B,V
4204     // --- Create(MADD);
4205     const TargetRegisterClass *OrrRC;
4206     unsigned BitSize, OrrOpc, ZeroReg;
4207     if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
4208       OrrOpc = AArch64::ORRWri;
4209       OrrRC = &AArch64::GPR32spRegClass;
4210       BitSize = 32;
4211       ZeroReg = AArch64::WZR;
4212       Opc = AArch64::MADDWrrr;
4213       RC = &AArch64::GPR32RegClass;
4214     } else {
4215       OrrOpc = AArch64::ORRXri;
4216       OrrRC = &AArch64::GPR64spRegClass;
4217       BitSize = 64;
4218       ZeroReg = AArch64::XZR;
4219       Opc = AArch64::MADDXrrr;
4220       RC = &AArch64::GPR64RegClass;
4221     }
4222     unsigned NewVR = MRI.createVirtualRegister(OrrRC);
4223     uint64_t Imm = Root.getOperand(2).getImm();
4224 
4225     if (Root.getOperand(3).isImm()) {
4226       unsigned Val = Root.getOperand(3).getImm();
4227       Imm = Imm << Val;
4228     }
4229     uint64_t UImm = SignExtend64(Imm, BitSize);
4230     uint64_t Encoding;
4231     if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4232       MachineInstrBuilder MIB1 =
4233           BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4234               .addReg(ZeroReg)
4235               .addImm(Encoding);
4236       InsInstrs.push_back(MIB1);
4237       InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4238       MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4239     }
4240     break;
4241   }
4242   case MachineCombinerPattern::MULSUBW_OP1:
4243   case MachineCombinerPattern::MULSUBX_OP1: {
4244     // MUL I=A,B,0
4245     // SUB R,I, C
4246     // ==> SUB  V, 0, C
4247     // ==> MADD R,A,B,V // = -C + A*B
4248     // --- Create(MADD);
4249     const TargetRegisterClass *SubRC;
4250     unsigned SubOpc, ZeroReg;
4251     if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
4252       SubOpc = AArch64::SUBWrr;
4253       SubRC = &AArch64::GPR32spRegClass;
4254       ZeroReg = AArch64::WZR;
4255       Opc = AArch64::MADDWrrr;
4256       RC = &AArch64::GPR32RegClass;
4257     } else {
4258       SubOpc = AArch64::SUBXrr;
4259       SubRC = &AArch64::GPR64spRegClass;
4260       ZeroReg = AArch64::XZR;
4261       Opc = AArch64::MADDXrrr;
4262       RC = &AArch64::GPR64RegClass;
4263     }
4264     unsigned NewVR = MRI.createVirtualRegister(SubRC);
4265     // SUB NewVR, 0, C
4266     MachineInstrBuilder MIB1 =
4267         BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
4268             .addReg(ZeroReg)
4269             .add(Root.getOperand(2));
4270     InsInstrs.push_back(MIB1);
4271     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4272     MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4273     break;
4274   }
4275   case MachineCombinerPattern::MULSUBW_OP2:
4276   case MachineCombinerPattern::MULSUBX_OP2:
4277     // MUL I=A,B,0
4278     // SUB R,C,I
4279     // ==> MSUB R,A,B,C (computes C - A*B)
4280     // --- Create(MSUB);
4281     if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
4282       Opc = AArch64::MSUBWrrr;
4283       RC = &AArch64::GPR32RegClass;
4284     } else {
4285       Opc = AArch64::MSUBXrrr;
4286       RC = &AArch64::GPR64RegClass;
4287     }
4288     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4289     break;
4290   case MachineCombinerPattern::MULSUBWI_OP1:
4291   case MachineCombinerPattern::MULSUBXI_OP1: {
4292     // MUL I=A,B,0
4293     // SUB R,I, Imm
4294     // ==> ORR  V, ZR, -Imm
4295     // ==> MADD R,A,B,V // = -Imm + A*B
4296     // --- Create(MADD);
4297     const TargetRegisterClass *OrrRC;
4298     unsigned BitSize, OrrOpc, ZeroReg;
4299     if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
4300       OrrOpc = AArch64::ORRWri;
4301       OrrRC = &AArch64::GPR32spRegClass;
4302       BitSize = 32;
4303       ZeroReg = AArch64::WZR;
4304       Opc = AArch64::MADDWrrr;
4305       RC = &AArch64::GPR32RegClass;
4306     } else {
4307       OrrOpc = AArch64::ORRXri;
4308       OrrRC = &AArch64::GPR64spRegClass;
4309       BitSize = 64;
4310       ZeroReg = AArch64::XZR;
4311       Opc = AArch64::MADDXrrr;
4312       RC = &AArch64::GPR64RegClass;
4313     }
4314     unsigned NewVR = MRI.createVirtualRegister(OrrRC);
4315     uint64_t Imm = Root.getOperand(2).getImm();
4316     if (Root.getOperand(3).isImm()) {
4317       unsigned Val = Root.getOperand(3).getImm();
4318       Imm = Imm << Val;
4319     }
4320     uint64_t UImm = SignExtend64(-Imm, BitSize);
4321     uint64_t Encoding;
4322     if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4323       MachineInstrBuilder MIB1 =
4324           BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4325               .addReg(ZeroReg)
4326               .addImm(Encoding);
4327       InsInstrs.push_back(MIB1);
4328       InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4329       MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4330     }
4331     break;
4332   }
4333   // Floating Point Support
4334   case MachineCombinerPattern::FMULADDS_OP1:
4335   case MachineCombinerPattern::FMULADDD_OP1:
4336     // MUL I=A,B,0
4337     // ADD R,I,C
4338     // ==> MADD R,A,B,C
4339     // --- Create(MADD);
4340     if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
4341       Opc = AArch64::FMADDSrrr;
4342       RC = &AArch64::FPR32RegClass;
4343     } else {
4344       Opc = AArch64::FMADDDrrr;
4345       RC = &AArch64::FPR64RegClass;
4346     }
4347     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4348     break;
4349   case MachineCombinerPattern::FMULADDS_OP2:
4350   case MachineCombinerPattern::FMULADDD_OP2:
4351     // FMUL I=A,B,0
4352     // FADD R,C,I
4353     // ==> FMADD R,A,B,C
4354     // --- Create(FMADD);
4355     if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
4356       Opc = AArch64::FMADDSrrr;
4357       RC = &AArch64::FPR32RegClass;
4358     } else {
4359       Opc = AArch64::FMADDDrrr;
4360       RC = &AArch64::FPR64RegClass;
4361     }
4362     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4363     break;
4364 
4365   case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
4366     Opc = AArch64::FMLAv1i32_indexed;
4367     RC = &AArch64::FPR32RegClass;
4368     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4369                            FMAInstKind::Indexed);
4370     break;
4371   case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
4372     Opc = AArch64::FMLAv1i32_indexed;
4373     RC = &AArch64::FPR32RegClass;
4374     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4375                            FMAInstKind::Indexed);
4376     break;
4377 
4378   case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
4379     Opc = AArch64::FMLAv1i64_indexed;
4380     RC = &AArch64::FPR64RegClass;
4381     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4382                            FMAInstKind::Indexed);
4383     break;
4384   case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
4385     Opc = AArch64::FMLAv1i64_indexed;
4386     RC = &AArch64::FPR64RegClass;
4387     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4388                            FMAInstKind::Indexed);
4389     break;
4390 
4391   case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
4392   case MachineCombinerPattern::FMLAv2f32_OP1:
4393     RC = &AArch64::FPR64RegClass;
4394     if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
4395       Opc = AArch64::FMLAv2i32_indexed;
4396       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4397                              FMAInstKind::Indexed);
4398     } else {
4399       Opc = AArch64::FMLAv2f32;
4400       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4401                              FMAInstKind::Accumulator);
4402     }
4403     break;
4404   case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
4405   case MachineCombinerPattern::FMLAv2f32_OP2:
4406     RC = &AArch64::FPR64RegClass;
4407     if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
4408       Opc = AArch64::FMLAv2i32_indexed;
4409       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4410                              FMAInstKind::Indexed);
4411     } else {
4412       Opc = AArch64::FMLAv2f32;
4413       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4414                              FMAInstKind::Accumulator);
4415     }
4416     break;
4417 
4418   case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
4419   case MachineCombinerPattern::FMLAv2f64_OP1:
4420     RC = &AArch64::FPR128RegClass;
4421     if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
4422       Opc = AArch64::FMLAv2i64_indexed;
4423       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4424                              FMAInstKind::Indexed);
4425     } else {
4426       Opc = AArch64::FMLAv2f64;
4427       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4428                              FMAInstKind::Accumulator);
4429     }
4430     break;
4431   case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
4432   case MachineCombinerPattern::FMLAv2f64_OP2:
4433     RC = &AArch64::FPR128RegClass;
4434     if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
4435       Opc = AArch64::FMLAv2i64_indexed;
4436       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4437                              FMAInstKind::Indexed);
4438     } else {
4439       Opc = AArch64::FMLAv2f64;
4440       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4441                              FMAInstKind::Accumulator);
4442     }
4443     break;
4444 
4445   case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
4446   case MachineCombinerPattern::FMLAv4f32_OP1:
4447     RC = &AArch64::FPR128RegClass;
4448     if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
4449       Opc = AArch64::FMLAv4i32_indexed;
4450       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4451                              FMAInstKind::Indexed);
4452     } else {
4453       Opc = AArch64::FMLAv4f32;
4454       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4455                              FMAInstKind::Accumulator);
4456     }
4457     break;
4458 
4459   case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
4460   case MachineCombinerPattern::FMLAv4f32_OP2:
4461     RC = &AArch64::FPR128RegClass;
4462     if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
4463       Opc = AArch64::FMLAv4i32_indexed;
4464       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4465                              FMAInstKind::Indexed);
4466     } else {
4467       Opc = AArch64::FMLAv4f32;
4468       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4469                              FMAInstKind::Accumulator);
4470     }
4471     break;
4472 
4473   case MachineCombinerPattern::FMULSUBS_OP1:
4474   case MachineCombinerPattern::FMULSUBD_OP1: {
4475     // FMUL I=A,B,0
4476     // FSUB R,I,C
4477     // ==> FNMSUB R,A,B,C // = -C + A*B
4478     // --- Create(FNMSUB);
4479     if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
4480       Opc = AArch64::FNMSUBSrrr;
4481       RC = &AArch64::FPR32RegClass;
4482     } else {
4483       Opc = AArch64::FNMSUBDrrr;
4484       RC = &AArch64::FPR64RegClass;
4485     }
4486     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4487     break;
4488   }
4489 
4490   case MachineCombinerPattern::FNMULSUBS_OP1:
4491   case MachineCombinerPattern::FNMULSUBD_OP1: {
4492     // FNMUL I=A,B,0
4493     // FSUB R,I,C
4494     // ==> FNMADD R,A,B,C // = -A*B - C
4495     // --- Create(FNMADD);
4496     if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) {
4497       Opc = AArch64::FNMADDSrrr;
4498       RC = &AArch64::FPR32RegClass;
4499     } else {
4500       Opc = AArch64::FNMADDDrrr;
4501       RC = &AArch64::FPR64RegClass;
4502     }
4503     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4504     break;
4505   }
4506 
4507   case MachineCombinerPattern::FMULSUBS_OP2:
4508   case MachineCombinerPattern::FMULSUBD_OP2: {
4509     // FMUL I=A,B,0
4510     // FSUB R,C,I
4511     // ==> FMSUB R,A,B,C (computes C - A*B)
4512     // --- Create(FMSUB);
4513     if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
4514       Opc = AArch64::FMSUBSrrr;
4515       RC = &AArch64::FPR32RegClass;
4516     } else {
4517       Opc = AArch64::FMSUBDrrr;
4518       RC = &AArch64::FPR64RegClass;
4519     }
4520     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4521     break;
4522   }
4523 
4524   case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
4525     Opc = AArch64::FMLSv1i32_indexed;
4526     RC = &AArch64::FPR32RegClass;
4527     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4528                            FMAInstKind::Indexed);
4529     break;
4530 
4531   case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
4532     Opc = AArch64::FMLSv1i64_indexed;
4533     RC = &AArch64::FPR64RegClass;
4534     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4535                            FMAInstKind::Indexed);
4536     break;
4537 
4538   case MachineCombinerPattern::FMLSv2f32_OP2:
4539   case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
4540     RC = &AArch64::FPR64RegClass;
4541     if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
4542       Opc = AArch64::FMLSv2i32_indexed;
4543       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4544                              FMAInstKind::Indexed);
4545     } else {
4546       Opc = AArch64::FMLSv2f32;
4547       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4548                              FMAInstKind::Accumulator);
4549     }
4550     break;
4551 
4552   case MachineCombinerPattern::FMLSv2f64_OP2:
4553   case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
4554     RC = &AArch64::FPR128RegClass;
4555     if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
4556       Opc = AArch64::FMLSv2i64_indexed;
4557       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4558                              FMAInstKind::Indexed);
4559     } else {
4560       Opc = AArch64::FMLSv2f64;
4561       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4562                              FMAInstKind::Accumulator);
4563     }
4564     break;
4565 
4566   case MachineCombinerPattern::FMLSv4f32_OP2:
4567   case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
4568     RC = &AArch64::FPR128RegClass;
4569     if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
4570       Opc = AArch64::FMLSv4i32_indexed;
4571       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4572                              FMAInstKind::Indexed);
4573     } else {
4574       Opc = AArch64::FMLSv4f32;
4575       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4576                              FMAInstKind::Accumulator);
4577     }
4578     break;
4579   case MachineCombinerPattern::FMLSv2f32_OP1:
4580   case MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
4581     RC = &AArch64::FPR64RegClass;
4582     unsigned NewVR = MRI.createVirtualRegister(RC);
4583     MachineInstrBuilder MIB1 =
4584         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
4585             .add(Root.getOperand(2));
4586     InsInstrs.push_back(MIB1);
4587     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4588     if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
4589       Opc = AArch64::FMLAv2i32_indexed;
4590       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4591                              FMAInstKind::Indexed, &NewVR);
4592     } else {
4593       Opc = AArch64::FMLAv2f32;
4594       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4595                              FMAInstKind::Accumulator, &NewVR);
4596     }
4597     break;
4598   }
4599   case MachineCombinerPattern::FMLSv4f32_OP1:
4600   case MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
4601     RC = &AArch64::FPR128RegClass;
4602     unsigned NewVR = MRI.createVirtualRegister(RC);
4603     MachineInstrBuilder MIB1 =
4604         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
4605             .add(Root.getOperand(2));
4606     InsInstrs.push_back(MIB1);
4607     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4608     if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
4609       Opc = AArch64::FMLAv4i32_indexed;
4610       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4611                              FMAInstKind::Indexed, &NewVR);
4612     } else {
4613       Opc = AArch64::FMLAv4f32;
4614       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4615                              FMAInstKind::Accumulator, &NewVR);
4616     }
4617     break;
4618   }
4619   case MachineCombinerPattern::FMLSv2f64_OP1:
4620   case MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
4621     RC = &AArch64::FPR128RegClass;
4622     unsigned NewVR = MRI.createVirtualRegister(RC);
4623     MachineInstrBuilder MIB1 =
4624         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
4625             .add(Root.getOperand(2));
4626     InsInstrs.push_back(MIB1);
4627     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4628     if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
4629       Opc = AArch64::FMLAv2i64_indexed;
4630       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4631                              FMAInstKind::Indexed, &NewVR);
4632     } else {
4633       Opc = AArch64::FMLAv2f64;
4634       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4635                              FMAInstKind::Accumulator, &NewVR);
4636     }
4637     break;
4638   }
4639   } // end switch (Pattern)
4640   // Record MUL and ADD/SUB for deletion
4641   DelInstrs.push_back(MUL);
4642   DelInstrs.push_back(&Root);
4643 }
4644 
4645 /// Replace csincr-branch sequence by simple conditional branch
4646 ///
4647 /// Examples:
4648 /// 1. \code
4649 ///   csinc  w9, wzr, wzr, <condition code>
4650 ///   tbnz   w9, #0, 0x44
4651 ///    \endcode
4652 /// to
4653 ///    \code
4654 ///   b.<inverted condition code>
4655 ///    \endcode
4656 ///
4657 /// 2. \code
4658 ///   csinc w9, wzr, wzr, <condition code>
4659 ///   tbz   w9, #0, 0x44
4660 ///    \endcode
4661 /// to
4662 ///    \code
4663 ///   b.<condition code>
4664 ///    \endcode
4665 ///
4666 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the
4667 /// compare's constant operand is power of 2.
4668 ///
4669 /// Examples:
4670 ///    \code
4671 ///   and  w8, w8, #0x400
4672 ///   cbnz w8, L1
4673 ///    \endcode
4674 /// to
4675 ///    \code
4676 ///   tbnz w8, #10, L1
4677 ///    \endcode
4678 ///
4679 /// \param  MI Conditional Branch
4680 /// \return True when the simple conditional branch is generated
4681 ///
optimizeCondBranch(MachineInstr & MI) const4682 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
4683   bool IsNegativeBranch = false;
4684   bool IsTestAndBranch = false;
4685   unsigned TargetBBInMI = 0;
4686   switch (MI.getOpcode()) {
4687   default:
4688     llvm_unreachable("Unknown branch instruction?");
4689   case AArch64::Bcc:
4690     return false;
4691   case AArch64::CBZW:
4692   case AArch64::CBZX:
4693     TargetBBInMI = 1;
4694     break;
4695   case AArch64::CBNZW:
4696   case AArch64::CBNZX:
4697     TargetBBInMI = 1;
4698     IsNegativeBranch = true;
4699     break;
4700   case AArch64::TBZW:
4701   case AArch64::TBZX:
4702     TargetBBInMI = 2;
4703     IsTestAndBranch = true;
4704     break;
4705   case AArch64::TBNZW:
4706   case AArch64::TBNZX:
4707     TargetBBInMI = 2;
4708     IsNegativeBranch = true;
4709     IsTestAndBranch = true;
4710     break;
4711   }
4712   // So we increment a zero register and test for bits other
4713   // than bit 0? Conservatively bail out in case the verifier
4714   // missed this case.
4715   if (IsTestAndBranch && MI.getOperand(1).getImm())
4716     return false;
4717 
4718   // Find Definition.
4719   assert(MI.getParent() && "Incomplete machine instruciton\n");
4720   MachineBasicBlock *MBB = MI.getParent();
4721   MachineFunction *MF = MBB->getParent();
4722   MachineRegisterInfo *MRI = &MF->getRegInfo();
4723   unsigned VReg = MI.getOperand(0).getReg();
4724   if (!TargetRegisterInfo::isVirtualRegister(VReg))
4725     return false;
4726 
4727   MachineInstr *DefMI = MRI->getVRegDef(VReg);
4728 
4729   // Look through COPY instructions to find definition.
4730   while (DefMI->isCopy()) {
4731     unsigned CopyVReg = DefMI->getOperand(1).getReg();
4732     if (!MRI->hasOneNonDBGUse(CopyVReg))
4733       return false;
4734     if (!MRI->hasOneDef(CopyVReg))
4735       return false;
4736     DefMI = MRI->getVRegDef(CopyVReg);
4737   }
4738 
4739   switch (DefMI->getOpcode()) {
4740   default:
4741     return false;
4742   // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
4743   case AArch64::ANDWri:
4744   case AArch64::ANDXri: {
4745     if (IsTestAndBranch)
4746       return false;
4747     if (DefMI->getParent() != MBB)
4748       return false;
4749     if (!MRI->hasOneNonDBGUse(VReg))
4750       return false;
4751 
4752     bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
4753     uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
4754         DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
4755     if (!isPowerOf2_64(Mask))
4756       return false;
4757 
4758     MachineOperand &MO = DefMI->getOperand(1);
4759     unsigned NewReg = MO.getReg();
4760     if (!TargetRegisterInfo::isVirtualRegister(NewReg))
4761       return false;
4762 
4763     assert(!MRI->def_empty(NewReg) && "Register must be defined.");
4764 
4765     MachineBasicBlock &RefToMBB = *MBB;
4766     MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
4767     DebugLoc DL = MI.getDebugLoc();
4768     unsigned Imm = Log2_64(Mask);
4769     unsigned Opc = (Imm < 32)
4770                        ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
4771                        : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
4772     MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
4773                               .addReg(NewReg)
4774                               .addImm(Imm)
4775                               .addMBB(TBB);
4776     // Register lives on to the CBZ now.
4777     MO.setIsKill(false);
4778 
4779     // For immediate smaller than 32, we need to use the 32-bit
4780     // variant (W) in all cases. Indeed the 64-bit variant does not
4781     // allow to encode them.
4782     // Therefore, if the input register is 64-bit, we need to take the
4783     // 32-bit sub-part.
4784     if (!Is32Bit && Imm < 32)
4785       NewMI->getOperand(0).setSubReg(AArch64::sub_32);
4786     MI.eraseFromParent();
4787     return true;
4788   }
4789   // Look for CSINC
4790   case AArch64::CSINCWr:
4791   case AArch64::CSINCXr: {
4792     if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
4793           DefMI->getOperand(2).getReg() == AArch64::WZR) &&
4794         !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
4795           DefMI->getOperand(2).getReg() == AArch64::XZR))
4796       return false;
4797 
4798     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
4799       return false;
4800 
4801     AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
4802     // Convert only when the condition code is not modified between
4803     // the CSINC and the branch. The CC may be used by other
4804     // instructions in between.
4805     if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
4806       return false;
4807     MachineBasicBlock &RefToMBB = *MBB;
4808     MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
4809     DebugLoc DL = MI.getDebugLoc();
4810     if (IsNegativeBranch)
4811       CC = AArch64CC::getInvertedCondCode(CC);
4812     BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
4813     MI.eraseFromParent();
4814     return true;
4815   }
4816   }
4817 }
4818 
4819 std::pair<unsigned, unsigned>
decomposeMachineOperandsTargetFlags(unsigned TF) const4820 AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
4821   const unsigned Mask = AArch64II::MO_FRAGMENT;
4822   return std::make_pair(TF & Mask, TF & ~Mask);
4823 }
4824 
4825 ArrayRef<std::pair<unsigned, const char *>>
getSerializableDirectMachineOperandTargetFlags() const4826 AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
4827   using namespace AArch64II;
4828 
4829   static const std::pair<unsigned, const char *> TargetFlags[] = {
4830       {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
4831       {MO_G3, "aarch64-g3"},     {MO_G2, "aarch64-g2"},
4832       {MO_G1, "aarch64-g1"},     {MO_G0, "aarch64-g0"},
4833       {MO_HI12, "aarch64-hi12"}};
4834   return makeArrayRef(TargetFlags);
4835 }
4836 
4837 ArrayRef<std::pair<unsigned, const char *>>
getSerializableBitmaskMachineOperandTargetFlags() const4838 AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
4839   using namespace AArch64II;
4840 
4841   static const std::pair<unsigned, const char *> TargetFlags[] = {
4842       {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"}, {MO_TLS, "aarch64-tls"}};
4843   return makeArrayRef(TargetFlags);
4844 }
4845 
4846 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
getSerializableMachineMemOperandTargetFlags() const4847 AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
4848   static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
4849       {{MOSuppressPair, "aarch64-suppress-pair"},
4850        {MOStridedAccess, "aarch64-strided-access"}};
4851   return makeArrayRef(TargetFlags);
4852 }
4853 
4854 /// Constants defining how certain sequences should be outlined.
4855 /// This encompasses how an outlined function should be called, and what kind of
4856 /// frame should be emitted for that outlined function.
4857 ///
4858 /// \p MachineOutlinerDefault implies that the function should be called with
4859 /// a save and restore of LR to the stack.
4860 ///
4861 /// That is,
4862 ///
4863 /// I1     Save LR                    OUTLINED_FUNCTION:
4864 /// I2 --> BL OUTLINED_FUNCTION       I1
4865 /// I3     Restore LR                 I2
4866 ///                                   I3
4867 ///                                   RET
4868 ///
4869 /// * Call construction overhead: 3 (save + BL + restore)
4870 /// * Frame construction overhead: 1 (ret)
4871 /// * Requires stack fixups? Yes
4872 ///
4873 /// \p MachineOutlinerTailCall implies that the function is being created from
4874 /// a sequence of instructions ending in a return.
4875 ///
4876 /// That is,
4877 ///
4878 /// I1                             OUTLINED_FUNCTION:
4879 /// I2 --> B OUTLINED_FUNCTION     I1
4880 /// RET                            I2
4881 ///                                RET
4882 ///
4883 /// * Call construction overhead: 1 (B)
4884 /// * Frame construction overhead: 0 (Return included in sequence)
4885 /// * Requires stack fixups? No
4886 ///
4887 /// \p MachineOutlinerNoLRSave implies that the function should be called using
4888 /// a BL instruction, but doesn't require LR to be saved and restored. This
4889 /// happens when LR is known to be dead.
4890 ///
4891 /// That is,
4892 ///
4893 /// I1                                OUTLINED_FUNCTION:
4894 /// I2 --> BL OUTLINED_FUNCTION       I1
4895 /// I3                                I2
4896 ///                                   I3
4897 ///                                   RET
4898 ///
4899 /// * Call construction overhead: 1 (BL)
4900 /// * Frame construction overhead: 1 (RET)
4901 /// * Requires stack fixups? No
4902 ///
4903 /// \p MachineOutlinerThunk implies that the function is being created from
4904 /// a sequence of instructions ending in a call. The outlined function is
4905 /// called with a BL instruction, and the outlined function tail-calls the
4906 /// original call destination.
4907 ///
4908 /// That is,
4909 ///
4910 /// I1                                OUTLINED_FUNCTION:
4911 /// I2 --> BL OUTLINED_FUNCTION       I1
4912 /// BL f                              I2
4913 ///                                   B f
4914 /// * Call construction overhead: 1 (BL)
4915 /// * Frame construction overhead: 0
4916 /// * Requires stack fixups? No
4917 ///
4918 /// \p MachineOutlinerRegSave implies that the function should be called with a
4919 /// save and restore of LR to an available register. This allows us to avoid
4920 /// stack fixups. Note that this outlining variant is compatible with the
4921 /// NoLRSave case.
4922 ///
4923 /// That is,
4924 ///
4925 /// I1     Save LR                    OUTLINED_FUNCTION:
4926 /// I2 --> BL OUTLINED_FUNCTION       I1
4927 /// I3     Restore LR                 I2
4928 ///                                   I3
4929 ///                                   RET
4930 ///
4931 /// * Call construction overhead: 3 (save + BL + restore)
4932 /// * Frame construction overhead: 1 (ret)
4933 /// * Requires stack fixups? No
4934 enum MachineOutlinerClass {
4935   MachineOutlinerDefault,  /// Emit a save, restore, call, and return.
4936   MachineOutlinerTailCall, /// Only emit a branch.
4937   MachineOutlinerNoLRSave, /// Emit a call and return.
4938   MachineOutlinerThunk,    /// Emit a call and tail-call.
4939   MachineOutlinerRegSave   /// Same as default, but save to a register.
4940 };
4941 
4942 enum MachineOutlinerMBBFlags {
4943   LRUnavailableSomewhere = 0x2,
4944   HasCalls = 0x4
4945 };
4946 
4947 unsigned
findRegisterToSaveLRTo(const outliner::Candidate & C) const4948 AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
4949   MachineFunction *MF = C.getMF();
4950   const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
4951       MF->getSubtarget().getRegisterInfo());
4952 
4953   // Check if there is an available register across the sequence that we can
4954   // use.
4955   for (unsigned Reg : AArch64::GPR64RegClass) {
4956     if (!ARI->isReservedReg(*MF, Reg) &&
4957         Reg != AArch64::LR &&  // LR is not reserved, but don't use it.
4958         Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
4959         Reg != AArch64::X17 && // Ditto for X17.
4960         C.LRU.available(Reg) && C.UsedInSequence.available(Reg))
4961       return Reg;
4962   }
4963 
4964   // No suitable register. Return 0.
4965   return 0u;
4966 }
4967 
4968 outliner::OutlinedFunction
getOutliningCandidateInfo(std::vector<outliner::Candidate> & RepeatedSequenceLocs) const4969 AArch64InstrInfo::getOutliningCandidateInfo(
4970     std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
4971   unsigned SequenceSize = std::accumulate(
4972       RepeatedSequenceLocs[0].front(),
4973       std::next(RepeatedSequenceLocs[0].back()),
4974       0, [this](unsigned Sum, const MachineInstr &MI) {
4975         return Sum + getInstSizeInBytes(MI);
4976       });
4977 
4978   // Compute liveness information for each candidate.
4979   const TargetRegisterInfo &TRI = getRegisterInfo();
4980   std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
4981                 [&TRI](outliner::Candidate &C) { C.initLRU(TRI); });
4982 
4983   // According to the AArch64 Procedure Call Standard, the following are
4984   // undefined on entry/exit from a function call:
4985   //
4986   // * Registers x16, x17, (and thus w16, w17)
4987   // * Condition codes (and thus the NZCV register)
4988   //
4989   // Because if this, we can't outline any sequence of instructions where
4990   // one
4991   // of these registers is live into/across it. Thus, we need to delete
4992   // those
4993   // candidates.
4994   auto CantGuaranteeValueAcrossCall = [](outliner::Candidate &C) {
4995     LiveRegUnits LRU = C.LRU;
4996     return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) ||
4997             !LRU.available(AArch64::NZCV));
4998   };
4999 
5000   // Erase every candidate that violates the restrictions above. (It could be
5001   // true that we have viable candidates, so it's not worth bailing out in
5002   // the case that, say, 1 out of 20 candidates violate the restructions.)
5003   RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
5004                                             RepeatedSequenceLocs.end(),
5005                                             CantGuaranteeValueAcrossCall),
5006                              RepeatedSequenceLocs.end());
5007 
5008   // If the sequence is empty, we're done.
5009   if (RepeatedSequenceLocs.empty())
5010     return outliner::OutlinedFunction();
5011 
5012   // At this point, we have only "safe" candidates to outline. Figure out
5013   // frame + call instruction information.
5014 
5015   unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode();
5016 
5017   // Helper lambda which sets call information for every candidate.
5018   auto SetCandidateCallInfo =
5019       [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
5020         for (outliner::Candidate &C : RepeatedSequenceLocs)
5021           C.setCallInfo(CallID, NumBytesForCall);
5022       };
5023 
5024   unsigned FrameID = MachineOutlinerDefault;
5025   unsigned NumBytesToCreateFrame = 4;
5026 
5027   // If the last instruction in any candidate is a terminator, then we should
5028   // tail call all of the candidates.
5029   if (RepeatedSequenceLocs[0].back()->isTerminator()) {
5030     FrameID = MachineOutlinerTailCall;
5031     NumBytesToCreateFrame = 0;
5032     SetCandidateCallInfo(MachineOutlinerTailCall, 4);
5033   }
5034 
5035   else if (LastInstrOpcode == AArch64::BL || LastInstrOpcode == AArch64::BLR) {
5036     // FIXME: Do we need to check if the code after this uses the value of LR?
5037     FrameID = MachineOutlinerThunk;
5038     NumBytesToCreateFrame = 0;
5039     SetCandidateCallInfo(MachineOutlinerThunk, 4);
5040   }
5041 
5042   // Make sure that LR isn't live on entry to this candidate. The only
5043   // instructions that use LR that could possibly appear in a repeated sequence
5044   // are calls. Therefore, we only have to check and see if LR is dead on entry
5045   // to (or exit from) some candidate.
5046   else if (std::all_of(RepeatedSequenceLocs.begin(),
5047                        RepeatedSequenceLocs.end(),
5048                        [](outliner::Candidate &C) {
5049                          return C.LRU.available(AArch64::LR);
5050                          })) {
5051     FrameID = MachineOutlinerNoLRSave;
5052     NumBytesToCreateFrame = 4;
5053     SetCandidateCallInfo(MachineOutlinerNoLRSave, 4);
5054   }
5055 
5056   // LR is live, so we need to save it. Decide whether it should be saved to
5057   // the stack, or if it can be saved to a register.
5058   else {
5059     if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
5060                     [this](outliner::Candidate &C) {
5061                       return findRegisterToSaveLRTo(C);
5062                     })) {
5063       // Every candidate has an available callee-saved register for the save.
5064       // We can save LR to a register.
5065       FrameID = MachineOutlinerRegSave;
5066       NumBytesToCreateFrame = 4;
5067       SetCandidateCallInfo(MachineOutlinerRegSave, 12);
5068     }
5069 
5070     else {
5071       // At least one candidate does not have an available callee-saved
5072       // register. We must save LR to the stack.
5073       FrameID = MachineOutlinerDefault;
5074       NumBytesToCreateFrame = 4;
5075       SetCandidateCallInfo(MachineOutlinerDefault, 12);
5076     }
5077   }
5078 
5079   // Check if the range contains a call. These require a save + restore of the
5080   // link register.
5081   if (std::any_of(RepeatedSequenceLocs[0].front(),
5082                   RepeatedSequenceLocs[0].back(),
5083                   [](const MachineInstr &MI) { return MI.isCall(); }))
5084     NumBytesToCreateFrame += 8; // Save + restore the link register.
5085 
5086   // Handle the last instruction separately. If this is a tail call, then the
5087   // last instruction is a call. We don't want to save + restore in this case.
5088   // However, it could be possible that the last instruction is a call without
5089   // it being valid to tail call this sequence. We should consider this as well.
5090   else if (FrameID != MachineOutlinerThunk &&
5091            FrameID != MachineOutlinerTailCall &&
5092            RepeatedSequenceLocs[0].back()->isCall())
5093     NumBytesToCreateFrame += 8;
5094 
5095   return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
5096                                     NumBytesToCreateFrame, FrameID);
5097 }
5098 
isFunctionSafeToOutlineFrom(MachineFunction & MF,bool OutlineFromLinkOnceODRs) const5099 bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
5100     MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
5101   const Function &F = MF.getFunction();
5102 
5103   // Can F be deduplicated by the linker? If it can, don't outline from it.
5104   if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
5105     return false;
5106 
5107   // Don't outline from functions with section markings; the program could
5108   // expect that all the code is in the named section.
5109   // FIXME: Allow outlining from multiple functions with the same section
5110   // marking.
5111   if (F.hasSection())
5112     return false;
5113 
5114   // Outlining from functions with redzones is unsafe since the outliner may
5115   // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
5116   // outline from it.
5117   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
5118   if (!AFI || AFI->hasRedZone().getValueOr(true))
5119     return false;
5120 
5121   // It's safe to outline from MF.
5122   return true;
5123 }
5124 
5125 unsigned
getMachineOutlinerMBBFlags(MachineBasicBlock & MBB) const5126 AArch64InstrInfo::getMachineOutlinerMBBFlags(MachineBasicBlock &MBB) const {
5127   unsigned Flags = 0x0;
5128   // Check if there's a call inside this MachineBasicBlock. If there is, then
5129   // set a flag.
5130   if (std::any_of(MBB.begin(), MBB.end(),
5131                   [](MachineInstr &MI) { return MI.isCall(); }))
5132     Flags |= MachineOutlinerMBBFlags::HasCalls;
5133 
5134   // Check if LR is available through all of the MBB. If it's not, then set
5135   // a flag.
5136   assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
5137          "Suitable Machine Function for outlining must track liveness");
5138   LiveRegUnits LRU(getRegisterInfo());
5139   LRU.addLiveOuts(MBB);
5140 
5141   std::for_each(MBB.rbegin(),
5142                 MBB.rend(),
5143                 [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
5144 
5145   if (!LRU.available(AArch64::LR))
5146       Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
5147 
5148   return Flags;
5149 }
5150 
5151 outliner::InstrType
getOutliningType(MachineBasicBlock::iterator & MIT,unsigned Flags) const5152 AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
5153                                    unsigned Flags) const {
5154   MachineInstr &MI = *MIT;
5155   MachineBasicBlock *MBB = MI.getParent();
5156   MachineFunction *MF = MBB->getParent();
5157   AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
5158 
5159   // Don't outline LOHs.
5160   if (FuncInfo->getLOHRelated().count(&MI))
5161     return outliner::InstrType::Illegal;
5162 
5163   // Don't allow debug values to impact outlining type.
5164   if (MI.isDebugInstr() || MI.isIndirectDebugValue())
5165     return outliner::InstrType::Invisible;
5166 
5167   // At this point, KILL instructions don't really tell us much so we can go
5168   // ahead and skip over them.
5169   if (MI.isKill())
5170     return outliner::InstrType::Invisible;
5171 
5172   // Is this a terminator for a basic block?
5173   if (MI.isTerminator()) {
5174 
5175     // Is this the end of a function?
5176     if (MI.getParent()->succ_empty())
5177       return outliner::InstrType::Legal;
5178 
5179     // It's not, so don't outline it.
5180     return outliner::InstrType::Illegal;
5181   }
5182 
5183   // Make sure none of the operands are un-outlinable.
5184   for (const MachineOperand &MOP : MI.operands()) {
5185     if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
5186         MOP.isTargetIndex())
5187       return outliner::InstrType::Illegal;
5188 
5189     // If it uses LR or W30 explicitly, then don't touch it.
5190     if (MOP.isReg() && !MOP.isImplicit() &&
5191         (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
5192       return outliner::InstrType::Illegal;
5193   }
5194 
5195   // Special cases for instructions that can always be outlined, but will fail
5196   // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
5197   // be outlined because they don't require a *specific* value to be in LR.
5198   if (MI.getOpcode() == AArch64::ADRP)
5199     return outliner::InstrType::Legal;
5200 
5201   // If MI is a call we might be able to outline it. We don't want to outline
5202   // any calls that rely on the position of items on the stack. When we outline
5203   // something containing a call, we have to emit a save and restore of LR in
5204   // the outlined function. Currently, this always happens by saving LR to the
5205   // stack. Thus, if we outline, say, half the parameters for a function call
5206   // plus the call, then we'll break the callee's expectations for the layout
5207   // of the stack.
5208   //
5209   // FIXME: Allow calls to functions which construct a stack frame, as long
5210   // as they don't access arguments on the stack.
5211   // FIXME: Figure out some way to analyze functions defined in other modules.
5212   // We should be able to compute the memory usage based on the IR calling
5213   // convention, even if we can't see the definition.
5214   if (MI.isCall()) {
5215     // Get the function associated with the call. Look at each operand and find
5216     // the one that represents the callee and get its name.
5217     const Function *Callee = nullptr;
5218     for (const MachineOperand &MOP : MI.operands()) {
5219       if (MOP.isGlobal()) {
5220         Callee = dyn_cast<Function>(MOP.getGlobal());
5221         break;
5222       }
5223     }
5224 
5225     // Never outline calls to mcount.  There isn't any rule that would require
5226     // this, but the Linux kernel's "ftrace" feature depends on it.
5227     if (Callee && Callee->getName() == "\01_mcount")
5228       return outliner::InstrType::Illegal;
5229 
5230     // If we don't know anything about the callee, assume it depends on the
5231     // stack layout of the caller. In that case, it's only legal to outline
5232     // as a tail-call.  Whitelist the call instructions we know about so we
5233     // don't get unexpected results with call pseudo-instructions.
5234     auto UnknownCallOutlineType = outliner::InstrType::Illegal;
5235     if (MI.getOpcode() == AArch64::BLR || MI.getOpcode() == AArch64::BL)
5236       UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
5237 
5238     if (!Callee)
5239       return UnknownCallOutlineType;
5240 
5241     // We have a function we have information about. Check it if it's something
5242     // can safely outline.
5243     MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
5244 
5245     // We don't know what's going on with the callee at all. Don't touch it.
5246     if (!CalleeMF)
5247       return UnknownCallOutlineType;
5248 
5249     // Check if we know anything about the callee saves on the function. If we
5250     // don't, then don't touch it, since that implies that we haven't
5251     // computed anything about its stack frame yet.
5252     MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
5253     if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
5254         MFI.getNumObjects() > 0)
5255       return UnknownCallOutlineType;
5256 
5257     // At this point, we can say that CalleeMF ought to not pass anything on the
5258     // stack. Therefore, we can outline it.
5259     return outliner::InstrType::Legal;
5260   }
5261 
5262   // Don't outline positions.
5263   if (MI.isPosition())
5264     return outliner::InstrType::Illegal;
5265 
5266   // Don't touch the link register or W30.
5267   if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
5268       MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
5269     return outliner::InstrType::Illegal;
5270 
5271   // Does this use the stack?
5272   if (MI.modifiesRegister(AArch64::SP, &RI) ||
5273       MI.readsRegister(AArch64::SP, &RI)) {
5274     // True if there is no chance that any outlined candidate from this range
5275     // could require stack fixups. That is, both
5276     // * LR is available in the range (No save/restore around call)
5277     // * The range doesn't include calls (No save/restore in outlined frame)
5278     // are true.
5279     // FIXME: This is very restrictive; the flags check the whole block,
5280     // not just the bit we will try to outline.
5281     bool MightNeedStackFixUp =
5282         (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere |
5283                   MachineOutlinerMBBFlags::HasCalls));
5284 
5285     // If this instruction is in a range where it *never* needs to be fixed
5286     // up, then we can *always* outline it. This is true even if it's not
5287     // possible to fix that instruction up.
5288     //
5289     // Why? Consider two equivalent instructions I1, I2 where both I1 and I2
5290     // use SP. Suppose that I1 sits within a range that definitely doesn't
5291     // need stack fixups, while I2 sits in a range that does.
5292     //
5293     // First, I1 can be outlined as long as we *never* fix up the stack in
5294     // any sequence containing it. I1 is already a safe instruction in the
5295     // original program, so as long as we don't modify it we're good to go.
5296     // So this leaves us with showing that outlining I2 won't break our
5297     // program.
5298     //
5299     // Suppose I1 and I2 belong to equivalent candidate sequences. When we
5300     // look at I2, we need to see if it can be fixed up. Suppose I2, (and
5301     // thus I1) cannot be fixed up. Then I2 will be assigned an unique
5302     // integer label; thus, I2 cannot belong to any candidate sequence (a
5303     // contradiction). Suppose I2 can be fixed up. Then I1 can be fixed up
5304     // as well, so we're good. Thus, I1 is always safe to outline.
5305     //
5306     // This gives us two things: first off, it buys us some more instructions
5307     // for our search space by deeming stack instructions illegal only when
5308     // they can't be fixed up AND we might have to fix them up. Second off,
5309     // This allows us to catch tricky instructions like, say,
5310     // %xi = ADDXri %sp, n, 0. We can't safely outline these since they might
5311     // be paired with later SUBXris, which might *not* end up being outlined.
5312     // If we mess with the stack to save something, then an ADDXri messes with
5313     // it *after*, then we aren't going to restore the right something from
5314     // the stack if we don't outline the corresponding SUBXri first. ADDXris and
5315     // SUBXris are extremely common in prologue/epilogue code, so supporting
5316     // them in the outliner can be a pretty big win!
5317     if (!MightNeedStackFixUp)
5318       return outliner::InstrType::Legal;
5319 
5320     // Any modification of SP will break our code to save/restore LR.
5321     // FIXME: We could handle some instructions which add a constant offset to
5322     // SP, with a bit more work.
5323     if (MI.modifiesRegister(AArch64::SP, &RI))
5324       return outliner::InstrType::Illegal;
5325 
5326     // At this point, we have a stack instruction that we might need to fix
5327     // up. We'll handle it if it's a load or store.
5328     if (MI.mayLoadOrStore()) {
5329       unsigned Base;  // Filled with the base regiser of MI.
5330       int64_t Offset; // Filled with the offset of MI.
5331       unsigned DummyWidth;
5332 
5333       // Does it allow us to offset the base register and is the base SP?
5334       if (!getMemOpBaseRegImmOfsWidth(MI, Base, Offset, DummyWidth, &RI) ||
5335           Base != AArch64::SP)
5336         return outliner::InstrType::Illegal;
5337 
5338       // Find the minimum/maximum offset for this instruction and check if
5339       // fixing it up would be in range.
5340       int64_t MinOffset, MaxOffset; // Unscaled offsets for the instruction.
5341       unsigned Scale;               // The scale to multiply the offsets by.
5342       getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
5343 
5344       // TODO: We should really test what happens if an instruction overflows.
5345       // This is tricky to test with IR tests, but when the outliner is moved
5346       // to a MIR test, it really ought to be checked.
5347       Offset += 16; // Update the offset to what it would be if we outlined.
5348       if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale)
5349         return outliner::InstrType::Illegal;
5350 
5351       // It's in range, so we can outline it.
5352       return outliner::InstrType::Legal;
5353     }
5354 
5355     // FIXME: Add handling for instructions like "add x0, sp, #8".
5356 
5357     // We can't fix it up, so don't outline it.
5358     return outliner::InstrType::Illegal;
5359   }
5360 
5361   return outliner::InstrType::Legal;
5362 }
5363 
fixupPostOutline(MachineBasicBlock & MBB) const5364 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
5365   for (MachineInstr &MI : MBB) {
5366     unsigned Base, Width;
5367     int64_t Offset;
5368 
5369     // Is this a load or store with an immediate offset with SP as the base?
5370     if (!MI.mayLoadOrStore() ||
5371         !getMemOpBaseRegImmOfsWidth(MI, Base, Offset, Width, &RI) ||
5372         Base != AArch64::SP)
5373       continue;
5374 
5375     // It is, so we have to fix it up.
5376     unsigned Scale;
5377     int64_t Dummy1, Dummy2;
5378 
5379     MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
5380     assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
5381     getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
5382     assert(Scale != 0 && "Unexpected opcode!");
5383 
5384     // We've pushed the return address to the stack, so add 16 to the offset.
5385     // This is safe, since we already checked if it would overflow when we
5386     // checked if this instruction was legal to outline.
5387     int64_t NewImm = (Offset + 16) / Scale;
5388     StackOffsetOperand.setImm(NewImm);
5389   }
5390 }
5391 
buildOutlinedFrame(MachineBasicBlock & MBB,MachineFunction & MF,const outliner::OutlinedFunction & OF) const5392 void AArch64InstrInfo::buildOutlinedFrame(
5393     MachineBasicBlock &MBB, MachineFunction &MF,
5394     const outliner::OutlinedFunction &OF) const {
5395   // For thunk outlining, rewrite the last instruction from a call to a
5396   // tail-call.
5397   if (OF.FrameConstructionID == MachineOutlinerThunk) {
5398     MachineInstr *Call = &*--MBB.instr_end();
5399     unsigned TailOpcode;
5400     if (Call->getOpcode() == AArch64::BL) {
5401       TailOpcode = AArch64::TCRETURNdi;
5402     } else {
5403       assert(Call->getOpcode() == AArch64::BLR);
5404       TailOpcode = AArch64::TCRETURNri;
5405     }
5406     MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
5407                             .add(Call->getOperand(0))
5408                             .addImm(0);
5409     MBB.insert(MBB.end(), TC);
5410     Call->eraseFromParent();
5411   }
5412 
5413   // Is there a call in the outlined range?
5414   auto IsNonTailCall = [](MachineInstr &MI) {
5415     return MI.isCall() && !MI.isReturn();
5416   };
5417   if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) {
5418     // Fix up the instructions in the range, since we're going to modify the
5419     // stack.
5420     assert(OF.FrameConstructionID != MachineOutlinerDefault &&
5421            "Can only fix up stack references once");
5422     fixupPostOutline(MBB);
5423 
5424     // LR has to be a live in so that we can save it.
5425     MBB.addLiveIn(AArch64::LR);
5426 
5427     MachineBasicBlock::iterator It = MBB.begin();
5428     MachineBasicBlock::iterator Et = MBB.end();
5429 
5430     if (OF.FrameConstructionID == MachineOutlinerTailCall ||
5431         OF.FrameConstructionID == MachineOutlinerThunk)
5432       Et = std::prev(MBB.end());
5433 
5434     // Insert a save before the outlined region
5435     MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
5436                                 .addReg(AArch64::SP, RegState::Define)
5437                                 .addReg(AArch64::LR)
5438                                 .addReg(AArch64::SP)
5439                                 .addImm(-16);
5440     It = MBB.insert(It, STRXpre);
5441 
5442     const TargetSubtargetInfo &STI = MF.getSubtarget();
5443     const MCRegisterInfo *MRI = STI.getRegisterInfo();
5444     unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
5445 
5446     // Add a CFI saying the stack was moved 16 B down.
5447     int64_t StackPosEntry =
5448         MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 16));
5449     BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
5450         .addCFIIndex(StackPosEntry)
5451         .setMIFlags(MachineInstr::FrameSetup);
5452 
5453     // Add a CFI saying that the LR that we want to find is now 16 B higher than
5454     // before.
5455     int64_t LRPosEntry =
5456         MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, 16));
5457     BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
5458         .addCFIIndex(LRPosEntry)
5459         .setMIFlags(MachineInstr::FrameSetup);
5460 
5461     // Insert a restore before the terminator for the function.
5462     MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
5463                                  .addReg(AArch64::SP, RegState::Define)
5464                                  .addReg(AArch64::LR, RegState::Define)
5465                                  .addReg(AArch64::SP)
5466                                  .addImm(16);
5467     Et = MBB.insert(Et, LDRXpost);
5468   }
5469 
5470   // If this is a tail call outlined function, then there's already a return.
5471   if (OF.FrameConstructionID == MachineOutlinerTailCall ||
5472       OF.FrameConstructionID == MachineOutlinerThunk)
5473     return;
5474 
5475   // It's not a tail call, so we have to insert the return ourselves.
5476   MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
5477                           .addReg(AArch64::LR, RegState::Undef);
5478   MBB.insert(MBB.end(), ret);
5479 
5480   // Did we have to modify the stack by saving the link register?
5481   if (OF.FrameConstructionID != MachineOutlinerDefault)
5482     return;
5483 
5484   // We modified the stack.
5485   // Walk over the basic block and fix up all the stack accesses.
5486   fixupPostOutline(MBB);
5487 }
5488 
insertOutlinedCall(Module & M,MachineBasicBlock & MBB,MachineBasicBlock::iterator & It,MachineFunction & MF,const outliner::Candidate & C) const5489 MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
5490     Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
5491     MachineFunction &MF, const outliner::Candidate &C) const {
5492 
5493   // Are we tail calling?
5494   if (C.CallConstructionID == MachineOutlinerTailCall) {
5495     // If yes, then we can just branch to the label.
5496     It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
5497                             .addGlobalAddress(M.getNamedValue(MF.getName()))
5498                             .addImm(0));
5499     return It;
5500   }
5501 
5502   // Are we saving the link register?
5503   if (C.CallConstructionID == MachineOutlinerNoLRSave ||
5504       C.CallConstructionID == MachineOutlinerThunk) {
5505     // No, so just insert the call.
5506     It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
5507                             .addGlobalAddress(M.getNamedValue(MF.getName())));
5508     return It;
5509   }
5510 
5511   // We want to return the spot where we inserted the call.
5512   MachineBasicBlock::iterator CallPt;
5513 
5514   // Instructions for saving and restoring LR around the call instruction we're
5515   // going to insert.
5516   MachineInstr *Save;
5517   MachineInstr *Restore;
5518   // Can we save to a register?
5519   if (C.CallConstructionID == MachineOutlinerRegSave) {
5520     // FIXME: This logic should be sunk into a target-specific interface so that
5521     // we don't have to recompute the register.
5522     unsigned Reg = findRegisterToSaveLRTo(C);
5523     assert(Reg != 0 && "No callee-saved register available?");
5524 
5525     // Save and restore LR from that register.
5526     Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
5527                .addReg(AArch64::XZR)
5528                .addReg(AArch64::LR)
5529                .addImm(0);
5530     Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
5531                 .addReg(AArch64::XZR)
5532                 .addReg(Reg)
5533                 .addImm(0);
5534   } else {
5535     // We have the default case. Save and restore from SP.
5536     Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
5537                .addReg(AArch64::SP, RegState::Define)
5538                .addReg(AArch64::LR)
5539                .addReg(AArch64::SP)
5540                .addImm(-16);
5541     Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
5542                   .addReg(AArch64::SP, RegState::Define)
5543                   .addReg(AArch64::LR, RegState::Define)
5544                   .addReg(AArch64::SP)
5545                   .addImm(16);
5546   }
5547 
5548   It = MBB.insert(It, Save);
5549   It++;
5550 
5551   // Insert the call.
5552   It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
5553                           .addGlobalAddress(M.getNamedValue(MF.getName())));
5554   CallPt = It;
5555   It++;
5556 
5557   It = MBB.insert(It, Restore);
5558   return CallPt;
5559 }
5560 
shouldOutlineFromFunctionByDefault(MachineFunction & MF) const5561 bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
5562   MachineFunction &MF) const {
5563   return MF.getFunction().optForMinSize();
5564 }
5565