1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the AArch64 implementation of the TargetInstrInfo class.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AArch64InstrInfo.h"
15 #include "AArch64MachineFunctionInfo.h"
16 #include "AArch64Subtarget.h"
17 #include "MCTargetDesc/AArch64AddressingModes.h"
18 #include "Utils/AArch64BaseInfo.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/CodeGen/MachineBasicBlock.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineMemOperand.h"
28 #include "llvm/CodeGen/MachineOperand.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/MachineModuleInfo.h"
31 #include "llvm/CodeGen/StackMaps.h"
32 #include "llvm/CodeGen/TargetRegisterInfo.h"
33 #include "llvm/CodeGen/TargetSubtargetInfo.h"
34 #include "llvm/IR/DebugLoc.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/MC/MCInst.h"
37 #include "llvm/MC/MCInstrDesc.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/CodeGen.h"
40 #include "llvm/Support/CommandLine.h"
41 #include "llvm/Support/Compiler.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/MathExtras.h"
44 #include "llvm/Target/TargetMachine.h"
45 #include "llvm/Target/TargetOptions.h"
46 #include <cassert>
47 #include <cstdint>
48 #include <iterator>
49 #include <utility>
50
51 using namespace llvm;
52
53 #define GET_INSTRINFO_CTOR_DTOR
54 #include "AArch64GenInstrInfo.inc"
55
56 static cl::opt<unsigned> TBZDisplacementBits(
57 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
58 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
59
60 static cl::opt<unsigned> CBZDisplacementBits(
61 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
62 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
63
64 static cl::opt<unsigned>
65 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
66 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
67
AArch64InstrInfo(const AArch64Subtarget & STI)68 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
69 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
70 AArch64::CATCHRET),
71 RI(STI.getTargetTriple()), Subtarget(STI) {}
72
73 /// GetInstSize - Return the number of bytes of code the specified
74 /// instruction may be. This returns the maximum number of bytes.
getInstSizeInBytes(const MachineInstr & MI) const75 unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
76 const MachineBasicBlock &MBB = *MI.getParent();
77 const MachineFunction *MF = MBB.getParent();
78 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
79
80 if (MI.getOpcode() == AArch64::INLINEASM)
81 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
82
83 // FIXME: We currently only handle pseudoinstructions that don't get expanded
84 // before the assembly printer.
85 unsigned NumBytes = 0;
86 const MCInstrDesc &Desc = MI.getDesc();
87 switch (Desc.getOpcode()) {
88 default:
89 // Anything not explicitly designated otherwise is a normal 4-byte insn.
90 NumBytes = 4;
91 break;
92 case TargetOpcode::DBG_VALUE:
93 case TargetOpcode::EH_LABEL:
94 case TargetOpcode::IMPLICIT_DEF:
95 case TargetOpcode::KILL:
96 NumBytes = 0;
97 break;
98 case TargetOpcode::STACKMAP:
99 // The upper bound for a stackmap intrinsic is the full length of its shadow
100 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
101 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
102 break;
103 case TargetOpcode::PATCHPOINT:
104 // The size of the patchpoint intrinsic is the number of bytes requested
105 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
106 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
107 break;
108 case AArch64::TLSDESC_CALLSEQ:
109 // This gets lowered to an instruction sequence which takes 16 bytes
110 NumBytes = 16;
111 break;
112 case AArch64::JumpTableDest32:
113 case AArch64::JumpTableDest16:
114 case AArch64::JumpTableDest8:
115 NumBytes = 12;
116 break;
117 case AArch64::SPACE:
118 NumBytes = MI.getOperand(1).getImm();
119 break;
120 }
121
122 return NumBytes;
123 }
124
parseCondBranch(MachineInstr * LastInst,MachineBasicBlock * & Target,SmallVectorImpl<MachineOperand> & Cond)125 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
126 SmallVectorImpl<MachineOperand> &Cond) {
127 // Block ends with fall-through condbranch.
128 switch (LastInst->getOpcode()) {
129 default:
130 llvm_unreachable("Unknown branch instruction?");
131 case AArch64::Bcc:
132 Target = LastInst->getOperand(1).getMBB();
133 Cond.push_back(LastInst->getOperand(0));
134 break;
135 case AArch64::CBZW:
136 case AArch64::CBZX:
137 case AArch64::CBNZW:
138 case AArch64::CBNZX:
139 Target = LastInst->getOperand(1).getMBB();
140 Cond.push_back(MachineOperand::CreateImm(-1));
141 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
142 Cond.push_back(LastInst->getOperand(0));
143 break;
144 case AArch64::TBZW:
145 case AArch64::TBZX:
146 case AArch64::TBNZW:
147 case AArch64::TBNZX:
148 Target = LastInst->getOperand(2).getMBB();
149 Cond.push_back(MachineOperand::CreateImm(-1));
150 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
151 Cond.push_back(LastInst->getOperand(0));
152 Cond.push_back(LastInst->getOperand(1));
153 }
154 }
155
getBranchDisplacementBits(unsigned Opc)156 static unsigned getBranchDisplacementBits(unsigned Opc) {
157 switch (Opc) {
158 default:
159 llvm_unreachable("unexpected opcode!");
160 case AArch64::B:
161 return 64;
162 case AArch64::TBNZW:
163 case AArch64::TBZW:
164 case AArch64::TBNZX:
165 case AArch64::TBZX:
166 return TBZDisplacementBits;
167 case AArch64::CBNZW:
168 case AArch64::CBZW:
169 case AArch64::CBNZX:
170 case AArch64::CBZX:
171 return CBZDisplacementBits;
172 case AArch64::Bcc:
173 return BCCDisplacementBits;
174 }
175 }
176
isBranchOffsetInRange(unsigned BranchOp,int64_t BrOffset) const177 bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
178 int64_t BrOffset) const {
179 unsigned Bits = getBranchDisplacementBits(BranchOp);
180 assert(Bits >= 3 && "max branch displacement must be enough to jump"
181 "over conditional branch expansion");
182 return isIntN(Bits, BrOffset / 4);
183 }
184
185 MachineBasicBlock *
getBranchDestBlock(const MachineInstr & MI) const186 AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
187 switch (MI.getOpcode()) {
188 default:
189 llvm_unreachable("unexpected opcode!");
190 case AArch64::B:
191 return MI.getOperand(0).getMBB();
192 case AArch64::TBZW:
193 case AArch64::TBNZW:
194 case AArch64::TBZX:
195 case AArch64::TBNZX:
196 return MI.getOperand(2).getMBB();
197 case AArch64::CBZW:
198 case AArch64::CBNZW:
199 case AArch64::CBZX:
200 case AArch64::CBNZX:
201 case AArch64::Bcc:
202 return MI.getOperand(1).getMBB();
203 }
204 }
205
206 // Branch analysis.
analyzeBranch(MachineBasicBlock & MBB,MachineBasicBlock * & TBB,MachineBasicBlock * & FBB,SmallVectorImpl<MachineOperand> & Cond,bool AllowModify) const207 bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
208 MachineBasicBlock *&TBB,
209 MachineBasicBlock *&FBB,
210 SmallVectorImpl<MachineOperand> &Cond,
211 bool AllowModify) const {
212 // If the block has no terminators, it just falls into the block after it.
213 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
214 if (I == MBB.end())
215 return false;
216
217 if (!isUnpredicatedTerminator(*I))
218 return false;
219
220 // Get the last instruction in the block.
221 MachineInstr *LastInst = &*I;
222
223 // If there is only one terminator instruction, process it.
224 unsigned LastOpc = LastInst->getOpcode();
225 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
226 if (isUncondBranchOpcode(LastOpc)) {
227 TBB = LastInst->getOperand(0).getMBB();
228 return false;
229 }
230 if (isCondBranchOpcode(LastOpc)) {
231 // Block ends with fall-through condbranch.
232 parseCondBranch(LastInst, TBB, Cond);
233 return false;
234 }
235 return true; // Can't handle indirect branch.
236 }
237
238 // Get the instruction before it if it is a terminator.
239 MachineInstr *SecondLastInst = &*I;
240 unsigned SecondLastOpc = SecondLastInst->getOpcode();
241
242 // If AllowModify is true and the block ends with two or more unconditional
243 // branches, delete all but the first unconditional branch.
244 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
245 while (isUncondBranchOpcode(SecondLastOpc)) {
246 LastInst->eraseFromParent();
247 LastInst = SecondLastInst;
248 LastOpc = LastInst->getOpcode();
249 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
250 // Return now the only terminator is an unconditional branch.
251 TBB = LastInst->getOperand(0).getMBB();
252 return false;
253 } else {
254 SecondLastInst = &*I;
255 SecondLastOpc = SecondLastInst->getOpcode();
256 }
257 }
258 }
259
260 // If there are three terminators, we don't know what sort of block this is.
261 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
262 return true;
263
264 // If the block ends with a B and a Bcc, handle it.
265 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
266 parseCondBranch(SecondLastInst, TBB, Cond);
267 FBB = LastInst->getOperand(0).getMBB();
268 return false;
269 }
270
271 // If the block ends with two unconditional branches, handle it. The second
272 // one is not executed, so remove it.
273 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
274 TBB = SecondLastInst->getOperand(0).getMBB();
275 I = LastInst;
276 if (AllowModify)
277 I->eraseFromParent();
278 return false;
279 }
280
281 // ...likewise if it ends with an indirect branch followed by an unconditional
282 // branch.
283 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
284 I = LastInst;
285 if (AllowModify)
286 I->eraseFromParent();
287 return true;
288 }
289
290 // Otherwise, can't handle this.
291 return true;
292 }
293
reverseBranchCondition(SmallVectorImpl<MachineOperand> & Cond) const294 bool AArch64InstrInfo::reverseBranchCondition(
295 SmallVectorImpl<MachineOperand> &Cond) const {
296 if (Cond[0].getImm() != -1) {
297 // Regular Bcc
298 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
299 Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
300 } else {
301 // Folded compare-and-branch
302 switch (Cond[1].getImm()) {
303 default:
304 llvm_unreachable("Unknown conditional branch!");
305 case AArch64::CBZW:
306 Cond[1].setImm(AArch64::CBNZW);
307 break;
308 case AArch64::CBNZW:
309 Cond[1].setImm(AArch64::CBZW);
310 break;
311 case AArch64::CBZX:
312 Cond[1].setImm(AArch64::CBNZX);
313 break;
314 case AArch64::CBNZX:
315 Cond[1].setImm(AArch64::CBZX);
316 break;
317 case AArch64::TBZW:
318 Cond[1].setImm(AArch64::TBNZW);
319 break;
320 case AArch64::TBNZW:
321 Cond[1].setImm(AArch64::TBZW);
322 break;
323 case AArch64::TBZX:
324 Cond[1].setImm(AArch64::TBNZX);
325 break;
326 case AArch64::TBNZX:
327 Cond[1].setImm(AArch64::TBZX);
328 break;
329 }
330 }
331
332 return false;
333 }
334
removeBranch(MachineBasicBlock & MBB,int * BytesRemoved) const335 unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
336 int *BytesRemoved) const {
337 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
338 if (I == MBB.end())
339 return 0;
340
341 if (!isUncondBranchOpcode(I->getOpcode()) &&
342 !isCondBranchOpcode(I->getOpcode()))
343 return 0;
344
345 // Remove the branch.
346 I->eraseFromParent();
347
348 I = MBB.end();
349
350 if (I == MBB.begin()) {
351 if (BytesRemoved)
352 *BytesRemoved = 4;
353 return 1;
354 }
355 --I;
356 if (!isCondBranchOpcode(I->getOpcode())) {
357 if (BytesRemoved)
358 *BytesRemoved = 4;
359 return 1;
360 }
361
362 // Remove the branch.
363 I->eraseFromParent();
364 if (BytesRemoved)
365 *BytesRemoved = 8;
366
367 return 2;
368 }
369
instantiateCondBranch(MachineBasicBlock & MBB,const DebugLoc & DL,MachineBasicBlock * TBB,ArrayRef<MachineOperand> Cond) const370 void AArch64InstrInfo::instantiateCondBranch(
371 MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
372 ArrayRef<MachineOperand> Cond) const {
373 if (Cond[0].getImm() != -1) {
374 // Regular Bcc
375 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
376 } else {
377 // Folded compare-and-branch
378 // Note that we use addOperand instead of addReg to keep the flags.
379 const MachineInstrBuilder MIB =
380 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
381 if (Cond.size() > 3)
382 MIB.addImm(Cond[3].getImm());
383 MIB.addMBB(TBB);
384 }
385 }
386
insertBranch(MachineBasicBlock & MBB,MachineBasicBlock * TBB,MachineBasicBlock * FBB,ArrayRef<MachineOperand> Cond,const DebugLoc & DL,int * BytesAdded) const387 unsigned AArch64InstrInfo::insertBranch(
388 MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
389 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
390 // Shouldn't be a fall through.
391 assert(TBB && "insertBranch must not be told to insert a fallthrough");
392
393 if (!FBB) {
394 if (Cond.empty()) // Unconditional branch?
395 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
396 else
397 instantiateCondBranch(MBB, DL, TBB, Cond);
398
399 if (BytesAdded)
400 *BytesAdded = 4;
401
402 return 1;
403 }
404
405 // Two-way conditional branch.
406 instantiateCondBranch(MBB, DL, TBB, Cond);
407 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
408
409 if (BytesAdded)
410 *BytesAdded = 8;
411
412 return 2;
413 }
414
415 // Find the original register that VReg is copied from.
removeCopies(const MachineRegisterInfo & MRI,unsigned VReg)416 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
417 while (TargetRegisterInfo::isVirtualRegister(VReg)) {
418 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
419 if (!DefMI->isFullCopy())
420 return VReg;
421 VReg = DefMI->getOperand(1).getReg();
422 }
423 return VReg;
424 }
425
426 // Determine if VReg is defined by an instruction that can be folded into a
427 // csel instruction. If so, return the folded opcode, and the replacement
428 // register.
canFoldIntoCSel(const MachineRegisterInfo & MRI,unsigned VReg,unsigned * NewVReg=nullptr)429 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
430 unsigned *NewVReg = nullptr) {
431 VReg = removeCopies(MRI, VReg);
432 if (!TargetRegisterInfo::isVirtualRegister(VReg))
433 return 0;
434
435 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
436 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
437 unsigned Opc = 0;
438 unsigned SrcOpNum = 0;
439 switch (DefMI->getOpcode()) {
440 case AArch64::ADDSXri:
441 case AArch64::ADDSWri:
442 // if NZCV is used, do not fold.
443 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
444 return 0;
445 // fall-through to ADDXri and ADDWri.
446 LLVM_FALLTHROUGH;
447 case AArch64::ADDXri:
448 case AArch64::ADDWri:
449 // add x, 1 -> csinc.
450 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
451 DefMI->getOperand(3).getImm() != 0)
452 return 0;
453 SrcOpNum = 1;
454 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
455 break;
456
457 case AArch64::ORNXrr:
458 case AArch64::ORNWrr: {
459 // not x -> csinv, represented as orn dst, xzr, src.
460 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
461 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
462 return 0;
463 SrcOpNum = 2;
464 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
465 break;
466 }
467
468 case AArch64::SUBSXrr:
469 case AArch64::SUBSWrr:
470 // if NZCV is used, do not fold.
471 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
472 return 0;
473 // fall-through to SUBXrr and SUBWrr.
474 LLVM_FALLTHROUGH;
475 case AArch64::SUBXrr:
476 case AArch64::SUBWrr: {
477 // neg x -> csneg, represented as sub dst, xzr, src.
478 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
479 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
480 return 0;
481 SrcOpNum = 2;
482 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
483 break;
484 }
485 default:
486 return 0;
487 }
488 assert(Opc && SrcOpNum && "Missing parameters");
489
490 if (NewVReg)
491 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
492 return Opc;
493 }
494
canInsertSelect(const MachineBasicBlock & MBB,ArrayRef<MachineOperand> Cond,unsigned TrueReg,unsigned FalseReg,int & CondCycles,int & TrueCycles,int & FalseCycles) const495 bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
496 ArrayRef<MachineOperand> Cond,
497 unsigned TrueReg, unsigned FalseReg,
498 int &CondCycles, int &TrueCycles,
499 int &FalseCycles) const {
500 // Check register classes.
501 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
502 const TargetRegisterClass *RC =
503 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
504 if (!RC)
505 return false;
506
507 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
508 unsigned ExtraCondLat = Cond.size() != 1;
509
510 // GPRs are handled by csel.
511 // FIXME: Fold in x+1, -x, and ~x when applicable.
512 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
513 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
514 // Single-cycle csel, csinc, csinv, and csneg.
515 CondCycles = 1 + ExtraCondLat;
516 TrueCycles = FalseCycles = 1;
517 if (canFoldIntoCSel(MRI, TrueReg))
518 TrueCycles = 0;
519 else if (canFoldIntoCSel(MRI, FalseReg))
520 FalseCycles = 0;
521 return true;
522 }
523
524 // Scalar floating point is handled by fcsel.
525 // FIXME: Form fabs, fmin, and fmax when applicable.
526 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
527 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
528 CondCycles = 5 + ExtraCondLat;
529 TrueCycles = FalseCycles = 2;
530 return true;
531 }
532
533 // Can't do vectors.
534 return false;
535 }
536
insertSelect(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,unsigned DstReg,ArrayRef<MachineOperand> Cond,unsigned TrueReg,unsigned FalseReg) const537 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
538 MachineBasicBlock::iterator I,
539 const DebugLoc &DL, unsigned DstReg,
540 ArrayRef<MachineOperand> Cond,
541 unsigned TrueReg, unsigned FalseReg) const {
542 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
543
544 // Parse the condition code, see parseCondBranch() above.
545 AArch64CC::CondCode CC;
546 switch (Cond.size()) {
547 default:
548 llvm_unreachable("Unknown condition opcode in Cond");
549 case 1: // b.cc
550 CC = AArch64CC::CondCode(Cond[0].getImm());
551 break;
552 case 3: { // cbz/cbnz
553 // We must insert a compare against 0.
554 bool Is64Bit;
555 switch (Cond[1].getImm()) {
556 default:
557 llvm_unreachable("Unknown branch opcode in Cond");
558 case AArch64::CBZW:
559 Is64Bit = false;
560 CC = AArch64CC::EQ;
561 break;
562 case AArch64::CBZX:
563 Is64Bit = true;
564 CC = AArch64CC::EQ;
565 break;
566 case AArch64::CBNZW:
567 Is64Bit = false;
568 CC = AArch64CC::NE;
569 break;
570 case AArch64::CBNZX:
571 Is64Bit = true;
572 CC = AArch64CC::NE;
573 break;
574 }
575 unsigned SrcReg = Cond[2].getReg();
576 if (Is64Bit) {
577 // cmp reg, #0 is actually subs xzr, reg, #0.
578 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
579 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
580 .addReg(SrcReg)
581 .addImm(0)
582 .addImm(0);
583 } else {
584 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
585 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
586 .addReg(SrcReg)
587 .addImm(0)
588 .addImm(0);
589 }
590 break;
591 }
592 case 4: { // tbz/tbnz
593 // We must insert a tst instruction.
594 switch (Cond[1].getImm()) {
595 default:
596 llvm_unreachable("Unknown branch opcode in Cond");
597 case AArch64::TBZW:
598 case AArch64::TBZX:
599 CC = AArch64CC::EQ;
600 break;
601 case AArch64::TBNZW:
602 case AArch64::TBNZX:
603 CC = AArch64CC::NE;
604 break;
605 }
606 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
607 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
608 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
609 .addReg(Cond[2].getReg())
610 .addImm(
611 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
612 else
613 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
614 .addReg(Cond[2].getReg())
615 .addImm(
616 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
617 break;
618 }
619 }
620
621 unsigned Opc = 0;
622 const TargetRegisterClass *RC = nullptr;
623 bool TryFold = false;
624 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
625 RC = &AArch64::GPR64RegClass;
626 Opc = AArch64::CSELXr;
627 TryFold = true;
628 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
629 RC = &AArch64::GPR32RegClass;
630 Opc = AArch64::CSELWr;
631 TryFold = true;
632 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
633 RC = &AArch64::FPR64RegClass;
634 Opc = AArch64::FCSELDrrr;
635 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
636 RC = &AArch64::FPR32RegClass;
637 Opc = AArch64::FCSELSrrr;
638 }
639 assert(RC && "Unsupported regclass");
640
641 // Try folding simple instructions into the csel.
642 if (TryFold) {
643 unsigned NewVReg = 0;
644 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
645 if (FoldedOpc) {
646 // The folded opcodes csinc, csinc and csneg apply the operation to
647 // FalseReg, so we need to invert the condition.
648 CC = AArch64CC::getInvertedCondCode(CC);
649 TrueReg = FalseReg;
650 } else
651 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
652
653 // Fold the operation. Leave any dead instructions for DCE to clean up.
654 if (FoldedOpc) {
655 FalseReg = NewVReg;
656 Opc = FoldedOpc;
657 // The extends the live range of NewVReg.
658 MRI.clearKillFlags(NewVReg);
659 }
660 }
661
662 // Pull all virtual register into the appropriate class.
663 MRI.constrainRegClass(TrueReg, RC);
664 MRI.constrainRegClass(FalseReg, RC);
665
666 // Insert the csel.
667 BuildMI(MBB, I, DL, get(Opc), DstReg)
668 .addReg(TrueReg)
669 .addReg(FalseReg)
670 .addImm(CC);
671 }
672
673 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
canBeExpandedToORR(const MachineInstr & MI,unsigned BitSize)674 static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
675 uint64_t Imm = MI.getOperand(1).getImm();
676 uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
677 uint64_t Encoding;
678 return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
679 }
680
681 // FIXME: this implementation should be micro-architecture dependent, so a
682 // micro-architecture target hook should be introduced here in future.
isAsCheapAsAMove(const MachineInstr & MI) const683 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
684 if (!Subtarget.hasCustomCheapAsMoveHandling())
685 return MI.isAsCheapAsAMove();
686
687 const unsigned Opcode = MI.getOpcode();
688
689 // Firstly, check cases gated by features.
690
691 if (Subtarget.hasZeroCycleZeroingFP()) {
692 if (Opcode == AArch64::FMOVH0 ||
693 Opcode == AArch64::FMOVS0 ||
694 Opcode == AArch64::FMOVD0)
695 return true;
696 }
697
698 if (Subtarget.hasZeroCycleZeroingGP()) {
699 if (Opcode == TargetOpcode::COPY &&
700 (MI.getOperand(1).getReg() == AArch64::WZR ||
701 MI.getOperand(1).getReg() == AArch64::XZR))
702 return true;
703 }
704
705 // Secondly, check cases specific to sub-targets.
706
707 if (Subtarget.hasExynosCheapAsMoveHandling()) {
708 if (isExynosCheapAsMove(MI))
709 return true;
710
711 return MI.isAsCheapAsAMove();
712 }
713
714 // Finally, check generic cases.
715
716 switch (Opcode) {
717 default:
718 return false;
719
720 // add/sub on register without shift
721 case AArch64::ADDWri:
722 case AArch64::ADDXri:
723 case AArch64::SUBWri:
724 case AArch64::SUBXri:
725 return (MI.getOperand(3).getImm() == 0);
726
727 // logical ops on immediate
728 case AArch64::ANDWri:
729 case AArch64::ANDXri:
730 case AArch64::EORWri:
731 case AArch64::EORXri:
732 case AArch64::ORRWri:
733 case AArch64::ORRXri:
734 return true;
735
736 // logical ops on register without shift
737 case AArch64::ANDWrr:
738 case AArch64::ANDXrr:
739 case AArch64::BICWrr:
740 case AArch64::BICXrr:
741 case AArch64::EONWrr:
742 case AArch64::EONXrr:
743 case AArch64::EORWrr:
744 case AArch64::EORXrr:
745 case AArch64::ORNWrr:
746 case AArch64::ORNXrr:
747 case AArch64::ORRWrr:
748 case AArch64::ORRXrr:
749 return true;
750
751 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
752 // ORRXri, it is as cheap as MOV
753 case AArch64::MOVi32imm:
754 return canBeExpandedToORR(MI, 32);
755 case AArch64::MOVi64imm:
756 return canBeExpandedToORR(MI, 64);
757 }
758
759 llvm_unreachable("Unknown opcode to check as cheap as a move!");
760 }
761
isFalkorShiftExtFast(const MachineInstr & MI)762 bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
763 switch (MI.getOpcode()) {
764 default:
765 return false;
766
767 case AArch64::ADDWrs:
768 case AArch64::ADDXrs:
769 case AArch64::ADDSWrs:
770 case AArch64::ADDSXrs: {
771 unsigned Imm = MI.getOperand(3).getImm();
772 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
773 if (ShiftVal == 0)
774 return true;
775 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
776 }
777
778 case AArch64::ADDWrx:
779 case AArch64::ADDXrx:
780 case AArch64::ADDXrx64:
781 case AArch64::ADDSWrx:
782 case AArch64::ADDSXrx:
783 case AArch64::ADDSXrx64: {
784 unsigned Imm = MI.getOperand(3).getImm();
785 switch (AArch64_AM::getArithExtendType(Imm)) {
786 default:
787 return false;
788 case AArch64_AM::UXTB:
789 case AArch64_AM::UXTH:
790 case AArch64_AM::UXTW:
791 case AArch64_AM::UXTX:
792 return AArch64_AM::getArithShiftValue(Imm) <= 4;
793 }
794 }
795
796 case AArch64::SUBWrs:
797 case AArch64::SUBSWrs: {
798 unsigned Imm = MI.getOperand(3).getImm();
799 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
800 return ShiftVal == 0 ||
801 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
802 }
803
804 case AArch64::SUBXrs:
805 case AArch64::SUBSXrs: {
806 unsigned Imm = MI.getOperand(3).getImm();
807 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
808 return ShiftVal == 0 ||
809 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
810 }
811
812 case AArch64::SUBWrx:
813 case AArch64::SUBXrx:
814 case AArch64::SUBXrx64:
815 case AArch64::SUBSWrx:
816 case AArch64::SUBSXrx:
817 case AArch64::SUBSXrx64: {
818 unsigned Imm = MI.getOperand(3).getImm();
819 switch (AArch64_AM::getArithExtendType(Imm)) {
820 default:
821 return false;
822 case AArch64_AM::UXTB:
823 case AArch64_AM::UXTH:
824 case AArch64_AM::UXTW:
825 case AArch64_AM::UXTX:
826 return AArch64_AM::getArithShiftValue(Imm) == 0;
827 }
828 }
829
830 case AArch64::LDRBBroW:
831 case AArch64::LDRBBroX:
832 case AArch64::LDRBroW:
833 case AArch64::LDRBroX:
834 case AArch64::LDRDroW:
835 case AArch64::LDRDroX:
836 case AArch64::LDRHHroW:
837 case AArch64::LDRHHroX:
838 case AArch64::LDRHroW:
839 case AArch64::LDRHroX:
840 case AArch64::LDRQroW:
841 case AArch64::LDRQroX:
842 case AArch64::LDRSBWroW:
843 case AArch64::LDRSBWroX:
844 case AArch64::LDRSBXroW:
845 case AArch64::LDRSBXroX:
846 case AArch64::LDRSHWroW:
847 case AArch64::LDRSHWroX:
848 case AArch64::LDRSHXroW:
849 case AArch64::LDRSHXroX:
850 case AArch64::LDRSWroW:
851 case AArch64::LDRSWroX:
852 case AArch64::LDRSroW:
853 case AArch64::LDRSroX:
854 case AArch64::LDRWroW:
855 case AArch64::LDRWroX:
856 case AArch64::LDRXroW:
857 case AArch64::LDRXroX:
858 case AArch64::PRFMroW:
859 case AArch64::PRFMroX:
860 case AArch64::STRBBroW:
861 case AArch64::STRBBroX:
862 case AArch64::STRBroW:
863 case AArch64::STRBroX:
864 case AArch64::STRDroW:
865 case AArch64::STRDroX:
866 case AArch64::STRHHroW:
867 case AArch64::STRHHroX:
868 case AArch64::STRHroW:
869 case AArch64::STRHroX:
870 case AArch64::STRQroW:
871 case AArch64::STRQroX:
872 case AArch64::STRSroW:
873 case AArch64::STRSroX:
874 case AArch64::STRWroW:
875 case AArch64::STRWroX:
876 case AArch64::STRXroW:
877 case AArch64::STRXroX: {
878 unsigned IsSigned = MI.getOperand(3).getImm();
879 return !IsSigned;
880 }
881 }
882 }
883
isSEHInstruction(const MachineInstr & MI)884 bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
885 unsigned Opc = MI.getOpcode();
886 switch (Opc) {
887 default:
888 return false;
889 case AArch64::SEH_StackAlloc:
890 case AArch64::SEH_SaveFPLR:
891 case AArch64::SEH_SaveFPLR_X:
892 case AArch64::SEH_SaveReg:
893 case AArch64::SEH_SaveReg_X:
894 case AArch64::SEH_SaveRegP:
895 case AArch64::SEH_SaveRegP_X:
896 case AArch64::SEH_SaveFReg:
897 case AArch64::SEH_SaveFReg_X:
898 case AArch64::SEH_SaveFRegP:
899 case AArch64::SEH_SaveFRegP_X:
900 case AArch64::SEH_SetFP:
901 case AArch64::SEH_AddFP:
902 case AArch64::SEH_Nop:
903 case AArch64::SEH_PrologEnd:
904 case AArch64::SEH_EpilogStart:
905 case AArch64::SEH_EpilogEnd:
906 return true;
907 }
908 }
909
isCoalescableExtInstr(const MachineInstr & MI,unsigned & SrcReg,unsigned & DstReg,unsigned & SubIdx) const910 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
911 unsigned &SrcReg, unsigned &DstReg,
912 unsigned &SubIdx) const {
913 switch (MI.getOpcode()) {
914 default:
915 return false;
916 case AArch64::SBFMXri: // aka sxtw
917 case AArch64::UBFMXri: // aka uxtw
918 // Check for the 32 -> 64 bit extension case, these instructions can do
919 // much more.
920 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
921 return false;
922 // This is a signed or unsigned 32 -> 64 bit extension.
923 SrcReg = MI.getOperand(1).getReg();
924 DstReg = MI.getOperand(0).getReg();
925 SubIdx = AArch64::sub_32;
926 return true;
927 }
928 }
929
areMemAccessesTriviallyDisjoint(MachineInstr & MIa,MachineInstr & MIb,AliasAnalysis * AA) const930 bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
931 MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
932 const TargetRegisterInfo *TRI = &getRegisterInfo();
933 MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
934 int64_t OffsetA = 0, OffsetB = 0;
935 unsigned WidthA = 0, WidthB = 0;
936
937 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
938 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
939
940 if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
941 MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
942 return false;
943
944 // Retrieve the base, offset from the base and width. Width
945 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
946 // base are identical, and the offset of a lower memory access +
947 // the width doesn't overlap the offset of a higher memory access,
948 // then the memory accesses are different.
949 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&
950 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {
951 if (BaseOpA->isIdenticalTo(*BaseOpB)) {
952 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
953 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
954 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
955 if (LowOffset + LowWidth <= HighOffset)
956 return true;
957 }
958 }
959 return false;
960 }
961
isSchedulingBoundary(const MachineInstr & MI,const MachineBasicBlock * MBB,const MachineFunction & MF) const962 bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
963 const MachineBasicBlock *MBB,
964 const MachineFunction &MF) const {
965 if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
966 return true;
967 switch (MI.getOpcode()) {
968 case AArch64::HINT:
969 // CSDB hints are scheduling barriers.
970 if (MI.getOperand(0).getImm() == 0x14)
971 return true;
972 break;
973 case AArch64::DSB:
974 case AArch64::ISB:
975 // DSB and ISB also are scheduling barriers.
976 return true;
977 default:;
978 }
979 return isSEHInstruction(MI);
980 }
981
982 /// analyzeCompare - For a comparison instruction, return the source registers
983 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
984 /// Return true if the comparison instruction can be analyzed.
analyzeCompare(const MachineInstr & MI,unsigned & SrcReg,unsigned & SrcReg2,int & CmpMask,int & CmpValue) const985 bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
986 unsigned &SrcReg2, int &CmpMask,
987 int &CmpValue) const {
988 // The first operand can be a frame index where we'd normally expect a
989 // register.
990 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
991 if (!MI.getOperand(1).isReg())
992 return false;
993
994 switch (MI.getOpcode()) {
995 default:
996 break;
997 case AArch64::SUBSWrr:
998 case AArch64::SUBSWrs:
999 case AArch64::SUBSWrx:
1000 case AArch64::SUBSXrr:
1001 case AArch64::SUBSXrs:
1002 case AArch64::SUBSXrx:
1003 case AArch64::ADDSWrr:
1004 case AArch64::ADDSWrs:
1005 case AArch64::ADDSWrx:
1006 case AArch64::ADDSXrr:
1007 case AArch64::ADDSXrs:
1008 case AArch64::ADDSXrx:
1009 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1010 SrcReg = MI.getOperand(1).getReg();
1011 SrcReg2 = MI.getOperand(2).getReg();
1012 CmpMask = ~0;
1013 CmpValue = 0;
1014 return true;
1015 case AArch64::SUBSWri:
1016 case AArch64::ADDSWri:
1017 case AArch64::SUBSXri:
1018 case AArch64::ADDSXri:
1019 SrcReg = MI.getOperand(1).getReg();
1020 SrcReg2 = 0;
1021 CmpMask = ~0;
1022 // FIXME: In order to convert CmpValue to 0 or 1
1023 CmpValue = MI.getOperand(2).getImm() != 0;
1024 return true;
1025 case AArch64::ANDSWri:
1026 case AArch64::ANDSXri:
1027 // ANDS does not use the same encoding scheme as the others xxxS
1028 // instructions.
1029 SrcReg = MI.getOperand(1).getReg();
1030 SrcReg2 = 0;
1031 CmpMask = ~0;
1032 // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
1033 // while the type of CmpValue is int. When converting uint64_t to int,
1034 // the high 32 bits of uint64_t will be lost.
1035 // In fact it causes a bug in spec2006-483.xalancbmk
1036 // CmpValue is only used to compare with zero in OptimizeCompareInstr
1037 CmpValue = AArch64_AM::decodeLogicalImmediate(
1038 MI.getOperand(2).getImm(),
1039 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
1040 return true;
1041 }
1042
1043 return false;
1044 }
1045
UpdateOperandRegClass(MachineInstr & Instr)1046 static bool UpdateOperandRegClass(MachineInstr &Instr) {
1047 MachineBasicBlock *MBB = Instr.getParent();
1048 assert(MBB && "Can't get MachineBasicBlock here");
1049 MachineFunction *MF = MBB->getParent();
1050 assert(MF && "Can't get MachineFunction here");
1051 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1052 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
1053 MachineRegisterInfo *MRI = &MF->getRegInfo();
1054
1055 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1056 ++OpIdx) {
1057 MachineOperand &MO = Instr.getOperand(OpIdx);
1058 const TargetRegisterClass *OpRegCstraints =
1059 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1060
1061 // If there's no constraint, there's nothing to do.
1062 if (!OpRegCstraints)
1063 continue;
1064 // If the operand is a frame index, there's nothing to do here.
1065 // A frame index operand will resolve correctly during PEI.
1066 if (MO.isFI())
1067 continue;
1068
1069 assert(MO.isReg() &&
1070 "Operand has register constraints without being a register!");
1071
1072 unsigned Reg = MO.getReg();
1073 if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
1074 if (!OpRegCstraints->contains(Reg))
1075 return false;
1076 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1077 !MRI->constrainRegClass(Reg, OpRegCstraints))
1078 return false;
1079 }
1080
1081 return true;
1082 }
1083
1084 /// Return the opcode that does not set flags when possible - otherwise
1085 /// return the original opcode. The caller is responsible to do the actual
1086 /// substitution and legality checking.
convertToNonFlagSettingOpc(const MachineInstr & MI)1087 static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
1088 // Don't convert all compare instructions, because for some the zero register
1089 // encoding becomes the sp register.
1090 bool MIDefinesZeroReg = false;
1091 if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
1092 MIDefinesZeroReg = true;
1093
1094 switch (MI.getOpcode()) {
1095 default:
1096 return MI.getOpcode();
1097 case AArch64::ADDSWrr:
1098 return AArch64::ADDWrr;
1099 case AArch64::ADDSWri:
1100 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1101 case AArch64::ADDSWrs:
1102 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1103 case AArch64::ADDSWrx:
1104 return AArch64::ADDWrx;
1105 case AArch64::ADDSXrr:
1106 return AArch64::ADDXrr;
1107 case AArch64::ADDSXri:
1108 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1109 case AArch64::ADDSXrs:
1110 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1111 case AArch64::ADDSXrx:
1112 return AArch64::ADDXrx;
1113 case AArch64::SUBSWrr:
1114 return AArch64::SUBWrr;
1115 case AArch64::SUBSWri:
1116 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1117 case AArch64::SUBSWrs:
1118 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1119 case AArch64::SUBSWrx:
1120 return AArch64::SUBWrx;
1121 case AArch64::SUBSXrr:
1122 return AArch64::SUBXrr;
1123 case AArch64::SUBSXri:
1124 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1125 case AArch64::SUBSXrs:
1126 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1127 case AArch64::SUBSXrx:
1128 return AArch64::SUBXrx;
1129 }
1130 }
1131
1132 enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1133
1134 /// True when condition flags are accessed (either by writing or reading)
1135 /// on the instruction trace starting at From and ending at To.
1136 ///
1137 /// Note: If From and To are from different blocks it's assumed CC are accessed
1138 /// on the path.
areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From,MachineBasicBlock::iterator To,const TargetRegisterInfo * TRI,const AccessKind AccessToCheck=AK_All)1139 static bool areCFlagsAccessedBetweenInstrs(
1140 MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
1141 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1142 // Early exit if To is at the beginning of the BB.
1143 if (To == To->getParent()->begin())
1144 return true;
1145
1146 // Check whether the instructions are in the same basic block
1147 // If not, assume the condition flags might get modified somewhere.
1148 if (To->getParent() != From->getParent())
1149 return true;
1150
1151 // From must be above To.
1152 assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
1153 [From](MachineInstr &MI) {
1154 return MI.getIterator() == From;
1155 }) != To->getParent()->rend());
1156
1157 // We iterate backward starting \p To until we hit \p From.
1158 for (--To; To != From; --To) {
1159 const MachineInstr &Instr = *To;
1160
1161 if (((AccessToCheck & AK_Write) &&
1162 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1163 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1164 return true;
1165 }
1166 return false;
1167 }
1168
1169 /// Try to optimize a compare instruction. A compare instruction is an
1170 /// instruction which produces AArch64::NZCV. It can be truly compare
1171 /// instruction
1172 /// when there are no uses of its destination register.
1173 ///
1174 /// The following steps are tried in order:
1175 /// 1. Convert CmpInstr into an unconditional version.
1176 /// 2. Remove CmpInstr if above there is an instruction producing a needed
1177 /// condition code or an instruction which can be converted into such an
1178 /// instruction.
1179 /// Only comparison with zero is supported.
optimizeCompareInstr(MachineInstr & CmpInstr,unsigned SrcReg,unsigned SrcReg2,int CmpMask,int CmpValue,const MachineRegisterInfo * MRI) const1180 bool AArch64InstrInfo::optimizeCompareInstr(
1181 MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
1182 int CmpValue, const MachineRegisterInfo *MRI) const {
1183 assert(CmpInstr.getParent());
1184 assert(MRI);
1185
1186 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1187 int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
1188 if (DeadNZCVIdx != -1) {
1189 if (CmpInstr.definesRegister(AArch64::WZR) ||
1190 CmpInstr.definesRegister(AArch64::XZR)) {
1191 CmpInstr.eraseFromParent();
1192 return true;
1193 }
1194 unsigned Opc = CmpInstr.getOpcode();
1195 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1196 if (NewOpc == Opc)
1197 return false;
1198 const MCInstrDesc &MCID = get(NewOpc);
1199 CmpInstr.setDesc(MCID);
1200 CmpInstr.RemoveOperand(DeadNZCVIdx);
1201 bool succeeded = UpdateOperandRegClass(CmpInstr);
1202 (void)succeeded;
1203 assert(succeeded && "Some operands reg class are incompatible!");
1204 return true;
1205 }
1206
1207 // Continue only if we have a "ri" where immediate is zero.
1208 // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
1209 // function.
1210 assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
1211 if (CmpValue != 0 || SrcReg2 != 0)
1212 return false;
1213
1214 // CmpInstr is a Compare instruction if destination register is not used.
1215 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1216 return false;
1217
1218 return substituteCmpToZero(CmpInstr, SrcReg, MRI);
1219 }
1220
1221 /// Get opcode of S version of Instr.
1222 /// If Instr is S version its opcode is returned.
1223 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1224 /// or we are not interested in it.
sForm(MachineInstr & Instr)1225 static unsigned sForm(MachineInstr &Instr) {
1226 switch (Instr.getOpcode()) {
1227 default:
1228 return AArch64::INSTRUCTION_LIST_END;
1229
1230 case AArch64::ADDSWrr:
1231 case AArch64::ADDSWri:
1232 case AArch64::ADDSXrr:
1233 case AArch64::ADDSXri:
1234 case AArch64::SUBSWrr:
1235 case AArch64::SUBSWri:
1236 case AArch64::SUBSXrr:
1237 case AArch64::SUBSXri:
1238 return Instr.getOpcode();
1239
1240 case AArch64::ADDWrr:
1241 return AArch64::ADDSWrr;
1242 case AArch64::ADDWri:
1243 return AArch64::ADDSWri;
1244 case AArch64::ADDXrr:
1245 return AArch64::ADDSXrr;
1246 case AArch64::ADDXri:
1247 return AArch64::ADDSXri;
1248 case AArch64::ADCWr:
1249 return AArch64::ADCSWr;
1250 case AArch64::ADCXr:
1251 return AArch64::ADCSXr;
1252 case AArch64::SUBWrr:
1253 return AArch64::SUBSWrr;
1254 case AArch64::SUBWri:
1255 return AArch64::SUBSWri;
1256 case AArch64::SUBXrr:
1257 return AArch64::SUBSXrr;
1258 case AArch64::SUBXri:
1259 return AArch64::SUBSXri;
1260 case AArch64::SBCWr:
1261 return AArch64::SBCSWr;
1262 case AArch64::SBCXr:
1263 return AArch64::SBCSXr;
1264 case AArch64::ANDWri:
1265 return AArch64::ANDSWri;
1266 case AArch64::ANDXri:
1267 return AArch64::ANDSXri;
1268 }
1269 }
1270
1271 /// Check if AArch64::NZCV should be alive in successors of MBB.
areCFlagsAliveInSuccessors(MachineBasicBlock * MBB)1272 static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
1273 for (auto *BB : MBB->successors())
1274 if (BB->isLiveIn(AArch64::NZCV))
1275 return true;
1276 return false;
1277 }
1278
1279 namespace {
1280
1281 struct UsedNZCV {
1282 bool N = false;
1283 bool Z = false;
1284 bool C = false;
1285 bool V = false;
1286
1287 UsedNZCV() = default;
1288
operator |=__anonffc4c8070211::UsedNZCV1289 UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
1290 this->N |= UsedFlags.N;
1291 this->Z |= UsedFlags.Z;
1292 this->C |= UsedFlags.C;
1293 this->V |= UsedFlags.V;
1294 return *this;
1295 }
1296 };
1297
1298 } // end anonymous namespace
1299
1300 /// Find a condition code used by the instruction.
1301 /// Returns AArch64CC::Invalid if either the instruction does not use condition
1302 /// codes or we don't optimize CmpInstr in the presence of such instructions.
findCondCodeUsedByInstr(const MachineInstr & Instr)1303 static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
1304 switch (Instr.getOpcode()) {
1305 default:
1306 return AArch64CC::Invalid;
1307
1308 case AArch64::Bcc: {
1309 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1310 assert(Idx >= 2);
1311 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
1312 }
1313
1314 case AArch64::CSINVWr:
1315 case AArch64::CSINVXr:
1316 case AArch64::CSINCWr:
1317 case AArch64::CSINCXr:
1318 case AArch64::CSELWr:
1319 case AArch64::CSELXr:
1320 case AArch64::CSNEGWr:
1321 case AArch64::CSNEGXr:
1322 case AArch64::FCSELSrrr:
1323 case AArch64::FCSELDrrr: {
1324 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1325 assert(Idx >= 1);
1326 return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
1327 }
1328 }
1329 }
1330
getUsedNZCV(AArch64CC::CondCode CC)1331 static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1332 assert(CC != AArch64CC::Invalid);
1333 UsedNZCV UsedFlags;
1334 switch (CC) {
1335 default:
1336 break;
1337
1338 case AArch64CC::EQ: // Z set
1339 case AArch64CC::NE: // Z clear
1340 UsedFlags.Z = true;
1341 break;
1342
1343 case AArch64CC::HI: // Z clear and C set
1344 case AArch64CC::LS: // Z set or C clear
1345 UsedFlags.Z = true;
1346 LLVM_FALLTHROUGH;
1347 case AArch64CC::HS: // C set
1348 case AArch64CC::LO: // C clear
1349 UsedFlags.C = true;
1350 break;
1351
1352 case AArch64CC::MI: // N set
1353 case AArch64CC::PL: // N clear
1354 UsedFlags.N = true;
1355 break;
1356
1357 case AArch64CC::VS: // V set
1358 case AArch64CC::VC: // V clear
1359 UsedFlags.V = true;
1360 break;
1361
1362 case AArch64CC::GT: // Z clear, N and V the same
1363 case AArch64CC::LE: // Z set, N and V differ
1364 UsedFlags.Z = true;
1365 LLVM_FALLTHROUGH;
1366 case AArch64CC::GE: // N and V the same
1367 case AArch64CC::LT: // N and V differ
1368 UsedFlags.N = true;
1369 UsedFlags.V = true;
1370 break;
1371 }
1372 return UsedFlags;
1373 }
1374
isADDSRegImm(unsigned Opcode)1375 static bool isADDSRegImm(unsigned Opcode) {
1376 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1377 }
1378
isSUBSRegImm(unsigned Opcode)1379 static bool isSUBSRegImm(unsigned Opcode) {
1380 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1381 }
1382
1383 /// Check if CmpInstr can be substituted by MI.
1384 ///
1385 /// CmpInstr can be substituted:
1386 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1387 /// - and, MI and CmpInstr are from the same MachineBB
1388 /// - and, condition flags are not alive in successors of the CmpInstr parent
1389 /// - and, if MI opcode is the S form there must be no defs of flags between
1390 /// MI and CmpInstr
1391 /// or if MI opcode is not the S form there must be neither defs of flags
1392 /// nor uses of flags between MI and CmpInstr.
1393 /// - and C/V flags are not used after CmpInstr
canInstrSubstituteCmpInstr(MachineInstr * MI,MachineInstr * CmpInstr,const TargetRegisterInfo * TRI)1394 static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
1395 const TargetRegisterInfo *TRI) {
1396 assert(MI);
1397 assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
1398 assert(CmpInstr);
1399
1400 const unsigned CmpOpcode = CmpInstr->getOpcode();
1401 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1402 return false;
1403
1404 if (MI->getParent() != CmpInstr->getParent())
1405 return false;
1406
1407 if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
1408 return false;
1409
1410 AccessKind AccessToCheck = AK_Write;
1411 if (sForm(*MI) != MI->getOpcode())
1412 AccessToCheck = AK_All;
1413 if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
1414 return false;
1415
1416 UsedNZCV NZCVUsedAfterCmp;
1417 for (auto I = std::next(CmpInstr->getIterator()),
1418 E = CmpInstr->getParent()->instr_end();
1419 I != E; ++I) {
1420 const MachineInstr &Instr = *I;
1421 if (Instr.readsRegister(AArch64::NZCV, TRI)) {
1422 AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
1423 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1424 return false;
1425 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1426 }
1427
1428 if (Instr.modifiesRegister(AArch64::NZCV, TRI))
1429 break;
1430 }
1431
1432 return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
1433 }
1434
1435 /// Substitute an instruction comparing to zero with another instruction
1436 /// which produces needed condition flags.
1437 ///
1438 /// Return true on success.
substituteCmpToZero(MachineInstr & CmpInstr,unsigned SrcReg,const MachineRegisterInfo * MRI) const1439 bool AArch64InstrInfo::substituteCmpToZero(
1440 MachineInstr &CmpInstr, unsigned SrcReg,
1441 const MachineRegisterInfo *MRI) const {
1442 assert(MRI);
1443 // Get the unique definition of SrcReg.
1444 MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
1445 if (!MI)
1446 return false;
1447
1448 const TargetRegisterInfo *TRI = &getRegisterInfo();
1449
1450 unsigned NewOpc = sForm(*MI);
1451 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1452 return false;
1453
1454 if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
1455 return false;
1456
1457 // Update the instruction to set NZCV.
1458 MI->setDesc(get(NewOpc));
1459 CmpInstr.eraseFromParent();
1460 bool succeeded = UpdateOperandRegClass(*MI);
1461 (void)succeeded;
1462 assert(succeeded && "Some operands reg class are incompatible!");
1463 MI->addRegisterDefined(AArch64::NZCV, TRI);
1464 return true;
1465 }
1466
expandPostRAPseudo(MachineInstr & MI) const1467 bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1468 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
1469 MI.getOpcode() != AArch64::CATCHRET)
1470 return false;
1471
1472 MachineBasicBlock &MBB = *MI.getParent();
1473 DebugLoc DL = MI.getDebugLoc();
1474
1475 if (MI.getOpcode() == AArch64::CATCHRET) {
1476 // Skip to the first instruction before the epilog.
1477 const TargetInstrInfo *TII =
1478 MBB.getParent()->getSubtarget().getInstrInfo();
1479 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
1480 auto MBBI = MachineBasicBlock::iterator(MI);
1481 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
1482 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
1483 FirstEpilogSEH != MBB.begin())
1484 FirstEpilogSEH = std::prev(FirstEpilogSEH);
1485 if (FirstEpilogSEH != MBB.begin())
1486 FirstEpilogSEH = std::next(FirstEpilogSEH);
1487 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
1488 .addReg(AArch64::X0, RegState::Define)
1489 .addMBB(TargetMBB);
1490 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
1491 .addReg(AArch64::X0, RegState::Define)
1492 .addReg(AArch64::X0)
1493 .addMBB(TargetMBB)
1494 .addImm(0);
1495 return true;
1496 }
1497
1498 unsigned Reg = MI.getOperand(0).getReg();
1499 const GlobalValue *GV =
1500 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
1501 const TargetMachine &TM = MBB.getParent()->getTarget();
1502 unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1503 const unsigned char MO_NC = AArch64II::MO_NC;
1504
1505 if ((OpFlags & AArch64II::MO_GOT) != 0) {
1506 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1507 .addGlobalAddress(GV, 0, OpFlags);
1508 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1509 .addReg(Reg, RegState::Kill)
1510 .addImm(0)
1511 .addMemOperand(*MI.memoperands_begin());
1512 } else if (TM.getCodeModel() == CodeModel::Large) {
1513 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
1514 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
1515 .addImm(0);
1516 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1517 .addReg(Reg, RegState::Kill)
1518 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
1519 .addImm(16);
1520 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1521 .addReg(Reg, RegState::Kill)
1522 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
1523 .addImm(32);
1524 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1525 .addReg(Reg, RegState::Kill)
1526 .addGlobalAddress(GV, 0, AArch64II::MO_G3)
1527 .addImm(48);
1528 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1529 .addReg(Reg, RegState::Kill)
1530 .addImm(0)
1531 .addMemOperand(*MI.memoperands_begin());
1532 } else if (TM.getCodeModel() == CodeModel::Tiny) {
1533 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
1534 .addGlobalAddress(GV, 0, OpFlags);
1535 } else {
1536 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1537 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1538 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1539 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1540 .addReg(Reg, RegState::Kill)
1541 .addGlobalAddress(GV, 0, LoFlags)
1542 .addMemOperand(*MI.memoperands_begin());
1543 }
1544
1545 MBB.erase(MI);
1546
1547 return true;
1548 }
1549
1550 // Return true if this instruction simply sets its single destination register
1551 // to zero. This is equivalent to a register rename of the zero-register.
isGPRZero(const MachineInstr & MI)1552 bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
1553 switch (MI.getOpcode()) {
1554 default:
1555 break;
1556 case AArch64::MOVZWi:
1557 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
1558 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
1559 assert(MI.getDesc().getNumOperands() == 3 &&
1560 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
1561 return true;
1562 }
1563 break;
1564 case AArch64::ANDWri: // and Rd, Rzr, #imm
1565 return MI.getOperand(1).getReg() == AArch64::WZR;
1566 case AArch64::ANDXri:
1567 return MI.getOperand(1).getReg() == AArch64::XZR;
1568 case TargetOpcode::COPY:
1569 return MI.getOperand(1).getReg() == AArch64::WZR;
1570 }
1571 return false;
1572 }
1573
1574 // Return true if this instruction simply renames a general register without
1575 // modifying bits.
isGPRCopy(const MachineInstr & MI)1576 bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) {
1577 switch (MI.getOpcode()) {
1578 default:
1579 break;
1580 case TargetOpcode::COPY: {
1581 // GPR32 copies will by lowered to ORRXrs
1582 unsigned DstReg = MI.getOperand(0).getReg();
1583 return (AArch64::GPR32RegClass.contains(DstReg) ||
1584 AArch64::GPR64RegClass.contains(DstReg));
1585 }
1586 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
1587 if (MI.getOperand(1).getReg() == AArch64::XZR) {
1588 assert(MI.getDesc().getNumOperands() == 4 &&
1589 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
1590 return true;
1591 }
1592 break;
1593 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
1594 if (MI.getOperand(2).getImm() == 0) {
1595 assert(MI.getDesc().getNumOperands() == 4 &&
1596 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
1597 return true;
1598 }
1599 break;
1600 }
1601 return false;
1602 }
1603
1604 // Return true if this instruction simply renames a general register without
1605 // modifying bits.
isFPRCopy(const MachineInstr & MI)1606 bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
1607 switch (MI.getOpcode()) {
1608 default:
1609 break;
1610 case TargetOpcode::COPY: {
1611 // FPR64 copies will by lowered to ORR.16b
1612 unsigned DstReg = MI.getOperand(0).getReg();
1613 return (AArch64::FPR64RegClass.contains(DstReg) ||
1614 AArch64::FPR128RegClass.contains(DstReg));
1615 }
1616 case AArch64::ORRv16i8:
1617 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
1618 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
1619 "invalid ORRv16i8 operands");
1620 return true;
1621 }
1622 break;
1623 }
1624 return false;
1625 }
1626
isLoadFromStackSlot(const MachineInstr & MI,int & FrameIndex) const1627 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
1628 int &FrameIndex) const {
1629 switch (MI.getOpcode()) {
1630 default:
1631 break;
1632 case AArch64::LDRWui:
1633 case AArch64::LDRXui:
1634 case AArch64::LDRBui:
1635 case AArch64::LDRHui:
1636 case AArch64::LDRSui:
1637 case AArch64::LDRDui:
1638 case AArch64::LDRQui:
1639 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1640 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1641 FrameIndex = MI.getOperand(1).getIndex();
1642 return MI.getOperand(0).getReg();
1643 }
1644 break;
1645 }
1646
1647 return 0;
1648 }
1649
isStoreToStackSlot(const MachineInstr & MI,int & FrameIndex) const1650 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
1651 int &FrameIndex) const {
1652 switch (MI.getOpcode()) {
1653 default:
1654 break;
1655 case AArch64::STRWui:
1656 case AArch64::STRXui:
1657 case AArch64::STRBui:
1658 case AArch64::STRHui:
1659 case AArch64::STRSui:
1660 case AArch64::STRDui:
1661 case AArch64::STRQui:
1662 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1663 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1664 FrameIndex = MI.getOperand(1).getIndex();
1665 return MI.getOperand(0).getReg();
1666 }
1667 break;
1668 }
1669 return 0;
1670 }
1671
1672 /// Check all MachineMemOperands for a hint to suppress pairing.
isLdStPairSuppressed(const MachineInstr & MI)1673 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
1674 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1675 return MMO->getFlags() & MOSuppressPair;
1676 });
1677 }
1678
1679 /// Set a flag on the first MachineMemOperand to suppress pairing.
suppressLdStPair(MachineInstr & MI)1680 void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) {
1681 if (MI.memoperands_empty())
1682 return;
1683 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
1684 }
1685
1686 /// Check all MachineMemOperands for a hint that the load/store is strided.
isStridedAccess(const MachineInstr & MI)1687 bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {
1688 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1689 return MMO->getFlags() & MOStridedAccess;
1690 });
1691 }
1692
isUnscaledLdSt(unsigned Opc)1693 bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) {
1694 switch (Opc) {
1695 default:
1696 return false;
1697 case AArch64::STURSi:
1698 case AArch64::STURDi:
1699 case AArch64::STURQi:
1700 case AArch64::STURBBi:
1701 case AArch64::STURHHi:
1702 case AArch64::STURWi:
1703 case AArch64::STURXi:
1704 case AArch64::LDURSi:
1705 case AArch64::LDURDi:
1706 case AArch64::LDURQi:
1707 case AArch64::LDURWi:
1708 case AArch64::LDURXi:
1709 case AArch64::LDURSWi:
1710 case AArch64::LDURHHi:
1711 case AArch64::LDURBBi:
1712 case AArch64::LDURSBWi:
1713 case AArch64::LDURSHWi:
1714 return true;
1715 }
1716 }
1717
isPairableLdStInst(const MachineInstr & MI)1718 bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
1719 switch (MI.getOpcode()) {
1720 default:
1721 return false;
1722 // Scaled instructions.
1723 case AArch64::STRSui:
1724 case AArch64::STRDui:
1725 case AArch64::STRQui:
1726 case AArch64::STRXui:
1727 case AArch64::STRWui:
1728 case AArch64::LDRSui:
1729 case AArch64::LDRDui:
1730 case AArch64::LDRQui:
1731 case AArch64::LDRXui:
1732 case AArch64::LDRWui:
1733 case AArch64::LDRSWui:
1734 // Unscaled instructions.
1735 case AArch64::STURSi:
1736 case AArch64::STURDi:
1737 case AArch64::STURQi:
1738 case AArch64::STURWi:
1739 case AArch64::STURXi:
1740 case AArch64::LDURSi:
1741 case AArch64::LDURDi:
1742 case AArch64::LDURQi:
1743 case AArch64::LDURWi:
1744 case AArch64::LDURXi:
1745 case AArch64::LDURSWi:
1746 return true;
1747 }
1748 }
1749
convertToFlagSettingOpc(unsigned Opc,bool & Is64Bit)1750 unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc,
1751 bool &Is64Bit) {
1752 switch (Opc) {
1753 default:
1754 llvm_unreachable("Opcode has no flag setting equivalent!");
1755 // 32-bit cases:
1756 case AArch64::ADDWri:
1757 Is64Bit = false;
1758 return AArch64::ADDSWri;
1759 case AArch64::ADDWrr:
1760 Is64Bit = false;
1761 return AArch64::ADDSWrr;
1762 case AArch64::ADDWrs:
1763 Is64Bit = false;
1764 return AArch64::ADDSWrs;
1765 case AArch64::ADDWrx:
1766 Is64Bit = false;
1767 return AArch64::ADDSWrx;
1768 case AArch64::ANDWri:
1769 Is64Bit = false;
1770 return AArch64::ANDSWri;
1771 case AArch64::ANDWrr:
1772 Is64Bit = false;
1773 return AArch64::ANDSWrr;
1774 case AArch64::ANDWrs:
1775 Is64Bit = false;
1776 return AArch64::ANDSWrs;
1777 case AArch64::BICWrr:
1778 Is64Bit = false;
1779 return AArch64::BICSWrr;
1780 case AArch64::BICWrs:
1781 Is64Bit = false;
1782 return AArch64::BICSWrs;
1783 case AArch64::SUBWri:
1784 Is64Bit = false;
1785 return AArch64::SUBSWri;
1786 case AArch64::SUBWrr:
1787 Is64Bit = false;
1788 return AArch64::SUBSWrr;
1789 case AArch64::SUBWrs:
1790 Is64Bit = false;
1791 return AArch64::SUBSWrs;
1792 case AArch64::SUBWrx:
1793 Is64Bit = false;
1794 return AArch64::SUBSWrx;
1795 // 64-bit cases:
1796 case AArch64::ADDXri:
1797 Is64Bit = true;
1798 return AArch64::ADDSXri;
1799 case AArch64::ADDXrr:
1800 Is64Bit = true;
1801 return AArch64::ADDSXrr;
1802 case AArch64::ADDXrs:
1803 Is64Bit = true;
1804 return AArch64::ADDSXrs;
1805 case AArch64::ADDXrx:
1806 Is64Bit = true;
1807 return AArch64::ADDSXrx;
1808 case AArch64::ANDXri:
1809 Is64Bit = true;
1810 return AArch64::ANDSXri;
1811 case AArch64::ANDXrr:
1812 Is64Bit = true;
1813 return AArch64::ANDSXrr;
1814 case AArch64::ANDXrs:
1815 Is64Bit = true;
1816 return AArch64::ANDSXrs;
1817 case AArch64::BICXrr:
1818 Is64Bit = true;
1819 return AArch64::BICSXrr;
1820 case AArch64::BICXrs:
1821 Is64Bit = true;
1822 return AArch64::BICSXrs;
1823 case AArch64::SUBXri:
1824 Is64Bit = true;
1825 return AArch64::SUBSXri;
1826 case AArch64::SUBXrr:
1827 Is64Bit = true;
1828 return AArch64::SUBSXrr;
1829 case AArch64::SUBXrs:
1830 Is64Bit = true;
1831 return AArch64::SUBSXrs;
1832 case AArch64::SUBXrx:
1833 Is64Bit = true;
1834 return AArch64::SUBSXrx;
1835 }
1836 }
1837
1838 // Is this a candidate for ld/st merging or pairing? For example, we don't
1839 // touch volatiles or load/stores that have a hint to avoid pair formation.
isCandidateToMergeOrPair(MachineInstr & MI) const1840 bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
1841 // If this is a volatile load/store, don't mess with it.
1842 if (MI.hasOrderedMemoryRef())
1843 return false;
1844
1845 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
1846 assert((MI.getOperand(1).isReg() || MI.getOperand(1).isFI()) &&
1847 "Expected a reg or frame index operand.");
1848 if (!MI.getOperand(2).isImm())
1849 return false;
1850
1851 // Can't merge/pair if the instruction modifies the base register.
1852 // e.g., ldr x0, [x0]
1853 // This case will never occur with an FI base.
1854 if (MI.getOperand(1).isReg()) {
1855 unsigned BaseReg = MI.getOperand(1).getReg();
1856 const TargetRegisterInfo *TRI = &getRegisterInfo();
1857 if (MI.modifiesRegister(BaseReg, TRI))
1858 return false;
1859 }
1860
1861 // Check if this load/store has a hint to avoid pair formation.
1862 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
1863 if (isLdStPairSuppressed(MI))
1864 return false;
1865
1866 // On some CPUs quad load/store pairs are slower than two single load/stores.
1867 if (Subtarget.isPaired128Slow()) {
1868 switch (MI.getOpcode()) {
1869 default:
1870 break;
1871 case AArch64::LDURQi:
1872 case AArch64::STURQi:
1873 case AArch64::LDRQui:
1874 case AArch64::STRQui:
1875 return false;
1876 }
1877 }
1878
1879 return true;
1880 }
1881
getMemOperandWithOffset(MachineInstr & LdSt,MachineOperand * & BaseOp,int64_t & Offset,const TargetRegisterInfo * TRI) const1882 bool AArch64InstrInfo::getMemOperandWithOffset(MachineInstr &LdSt,
1883 MachineOperand *&BaseOp,
1884 int64_t &Offset,
1885 const TargetRegisterInfo *TRI) const {
1886 unsigned Width;
1887 return getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI);
1888 }
1889
getMemOperandWithOffsetWidth(MachineInstr & LdSt,MachineOperand * & BaseOp,int64_t & Offset,unsigned & Width,const TargetRegisterInfo * TRI) const1890 bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
1891 MachineInstr &LdSt, MachineOperand *&BaseOp, int64_t &Offset,
1892 unsigned &Width, const TargetRegisterInfo *TRI) const {
1893 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
1894 // Handle only loads/stores with base register followed by immediate offset.
1895 if (LdSt.getNumExplicitOperands() == 3) {
1896 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
1897 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
1898 !LdSt.getOperand(2).isImm())
1899 return false;
1900 } else if (LdSt.getNumExplicitOperands() == 4) {
1901 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
1902 if (!LdSt.getOperand(1).isReg() ||
1903 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
1904 !LdSt.getOperand(3).isImm())
1905 return false;
1906 } else
1907 return false;
1908
1909 // Get the scaling factor for the instruction and set the width for the
1910 // instruction.
1911 unsigned Scale = 0;
1912 int64_t Dummy1, Dummy2;
1913
1914 // If this returns false, then it's an instruction we don't want to handle.
1915 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
1916 return false;
1917
1918 // Compute the offset. Offset is calculated as the immediate operand
1919 // multiplied by the scaling factor. Unscaled instructions have scaling factor
1920 // set to 1.
1921 if (LdSt.getNumExplicitOperands() == 3) {
1922 BaseOp = &LdSt.getOperand(1);
1923 Offset = LdSt.getOperand(2).getImm() * Scale;
1924 } else {
1925 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
1926 BaseOp = &LdSt.getOperand(2);
1927 Offset = LdSt.getOperand(3).getImm() * Scale;
1928 }
1929
1930 assert((BaseOp->isReg() || BaseOp->isFI()) &&
1931 "getMemOperandWithOffset only supports base "
1932 "operands of type register or frame index.");
1933
1934 return true;
1935 }
1936
1937 MachineOperand &
getMemOpBaseRegImmOfsOffsetOperand(MachineInstr & LdSt) const1938 AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
1939 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
1940 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
1941 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
1942 return OfsOp;
1943 }
1944
getMemOpInfo(unsigned Opcode,unsigned & Scale,unsigned & Width,int64_t & MinOffset,int64_t & MaxOffset) const1945 bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
1946 unsigned &Width, int64_t &MinOffset,
1947 int64_t &MaxOffset) const {
1948 switch (Opcode) {
1949 // Not a memory operation or something we want to handle.
1950 default:
1951 Scale = Width = 0;
1952 MinOffset = MaxOffset = 0;
1953 return false;
1954 case AArch64::STRWpost:
1955 case AArch64::LDRWpost:
1956 Width = 32;
1957 Scale = 4;
1958 MinOffset = -256;
1959 MaxOffset = 255;
1960 break;
1961 case AArch64::LDURQi:
1962 case AArch64::STURQi:
1963 Width = 16;
1964 Scale = 1;
1965 MinOffset = -256;
1966 MaxOffset = 255;
1967 break;
1968 case AArch64::LDURXi:
1969 case AArch64::LDURDi:
1970 case AArch64::STURXi:
1971 case AArch64::STURDi:
1972 Width = 8;
1973 Scale = 1;
1974 MinOffset = -256;
1975 MaxOffset = 255;
1976 break;
1977 case AArch64::LDURWi:
1978 case AArch64::LDURSi:
1979 case AArch64::LDURSWi:
1980 case AArch64::STURWi:
1981 case AArch64::STURSi:
1982 Width = 4;
1983 Scale = 1;
1984 MinOffset = -256;
1985 MaxOffset = 255;
1986 break;
1987 case AArch64::LDURHi:
1988 case AArch64::LDURHHi:
1989 case AArch64::LDURSHXi:
1990 case AArch64::LDURSHWi:
1991 case AArch64::STURHi:
1992 case AArch64::STURHHi:
1993 Width = 2;
1994 Scale = 1;
1995 MinOffset = -256;
1996 MaxOffset = 255;
1997 break;
1998 case AArch64::LDURBi:
1999 case AArch64::LDURBBi:
2000 case AArch64::LDURSBXi:
2001 case AArch64::LDURSBWi:
2002 case AArch64::STURBi:
2003 case AArch64::STURBBi:
2004 Width = 1;
2005 Scale = 1;
2006 MinOffset = -256;
2007 MaxOffset = 255;
2008 break;
2009 case AArch64::LDPQi:
2010 case AArch64::LDNPQi:
2011 case AArch64::STPQi:
2012 case AArch64::STNPQi:
2013 Scale = 16;
2014 Width = 32;
2015 MinOffset = -64;
2016 MaxOffset = 63;
2017 break;
2018 case AArch64::LDRQui:
2019 case AArch64::STRQui:
2020 Scale = Width = 16;
2021 MinOffset = 0;
2022 MaxOffset = 4095;
2023 break;
2024 case AArch64::LDPXi:
2025 case AArch64::LDPDi:
2026 case AArch64::LDNPXi:
2027 case AArch64::LDNPDi:
2028 case AArch64::STPXi:
2029 case AArch64::STPDi:
2030 case AArch64::STNPXi:
2031 case AArch64::STNPDi:
2032 Scale = 8;
2033 Width = 16;
2034 MinOffset = -64;
2035 MaxOffset = 63;
2036 break;
2037 case AArch64::LDRXui:
2038 case AArch64::LDRDui:
2039 case AArch64::STRXui:
2040 case AArch64::STRDui:
2041 Scale = Width = 8;
2042 MinOffset = 0;
2043 MaxOffset = 4095;
2044 break;
2045 case AArch64::LDPWi:
2046 case AArch64::LDPSi:
2047 case AArch64::LDNPWi:
2048 case AArch64::LDNPSi:
2049 case AArch64::STPWi:
2050 case AArch64::STPSi:
2051 case AArch64::STNPWi:
2052 case AArch64::STNPSi:
2053 Scale = 4;
2054 Width = 8;
2055 MinOffset = -64;
2056 MaxOffset = 63;
2057 break;
2058 case AArch64::LDRWui:
2059 case AArch64::LDRSui:
2060 case AArch64::LDRSWui:
2061 case AArch64::STRWui:
2062 case AArch64::STRSui:
2063 Scale = Width = 4;
2064 MinOffset = 0;
2065 MaxOffset = 4095;
2066 break;
2067 case AArch64::LDRHui:
2068 case AArch64::LDRHHui:
2069 case AArch64::STRHui:
2070 case AArch64::STRHHui:
2071 Scale = Width = 2;
2072 MinOffset = 0;
2073 MaxOffset = 4095;
2074 break;
2075 case AArch64::LDRBui:
2076 case AArch64::LDRBBui:
2077 case AArch64::STRBui:
2078 case AArch64::STRBBui:
2079 Scale = Width = 1;
2080 MinOffset = 0;
2081 MaxOffset = 4095;
2082 break;
2083 }
2084
2085 return true;
2086 }
2087
getOffsetStride(unsigned Opc)2088 static unsigned getOffsetStride(unsigned Opc) {
2089 switch (Opc) {
2090 default:
2091 return 0;
2092 case AArch64::LDURQi:
2093 case AArch64::STURQi:
2094 return 16;
2095 case AArch64::LDURXi:
2096 case AArch64::LDURDi:
2097 case AArch64::STURXi:
2098 case AArch64::STURDi:
2099 return 8;
2100 case AArch64::LDURWi:
2101 case AArch64::LDURSi:
2102 case AArch64::LDURSWi:
2103 case AArch64::STURWi:
2104 case AArch64::STURSi:
2105 return 4;
2106 }
2107 }
2108
2109 // Scale the unscaled offsets. Returns false if the unscaled offset can't be
2110 // scaled.
scaleOffset(unsigned Opc,int64_t & Offset)2111 static bool scaleOffset(unsigned Opc, int64_t &Offset) {
2112 unsigned OffsetStride = getOffsetStride(Opc);
2113 if (OffsetStride == 0)
2114 return false;
2115 // If the byte-offset isn't a multiple of the stride, we can't scale this
2116 // offset.
2117 if (Offset % OffsetStride != 0)
2118 return false;
2119
2120 // Convert the byte-offset used by unscaled into an "element" offset used
2121 // by the scaled pair load/store instructions.
2122 Offset /= OffsetStride;
2123 return true;
2124 }
2125
2126 // Unscale the scaled offsets. Returns false if the scaled offset can't be
2127 // unscaled.
unscaleOffset(unsigned Opc,int64_t & Offset)2128 static bool unscaleOffset(unsigned Opc, int64_t &Offset) {
2129 unsigned OffsetStride = getOffsetStride(Opc);
2130 if (OffsetStride == 0)
2131 return false;
2132
2133 // Convert the "element" offset used by scaled pair load/store instructions
2134 // into the byte-offset used by unscaled.
2135 Offset *= OffsetStride;
2136 return true;
2137 }
2138
canPairLdStOpc(unsigned FirstOpc,unsigned SecondOpc)2139 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
2140 if (FirstOpc == SecondOpc)
2141 return true;
2142 // We can also pair sign-ext and zero-ext instructions.
2143 switch (FirstOpc) {
2144 default:
2145 return false;
2146 case AArch64::LDRWui:
2147 case AArch64::LDURWi:
2148 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
2149 case AArch64::LDRSWui:
2150 case AArch64::LDURSWi:
2151 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
2152 }
2153 // These instructions can't be paired based on their opcodes.
2154 return false;
2155 }
2156
shouldClusterFI(const MachineFrameInfo & MFI,int FI1,int64_t Offset1,unsigned Opcode1,int FI2,int64_t Offset2,unsigned Opcode2)2157 static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
2158 int64_t Offset1, unsigned Opcode1, int FI2,
2159 int64_t Offset2, unsigned Opcode2) {
2160 // Accesses through fixed stack object frame indices may access a different
2161 // fixed stack slot. Check that the object offsets + offsets match.
2162 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
2163 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
2164 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
2165 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
2166 // Get the byte-offset from the object offset.
2167 if (!unscaleOffset(Opcode1, Offset1) || !unscaleOffset(Opcode2, Offset2))
2168 return false;
2169 ObjectOffset1 += Offset1;
2170 ObjectOffset2 += Offset2;
2171 // Get the "element" index in the object.
2172 if (!scaleOffset(Opcode1, ObjectOffset1) ||
2173 !scaleOffset(Opcode2, ObjectOffset2))
2174 return false;
2175 return ObjectOffset1 + 1 == ObjectOffset2;
2176 }
2177
2178 return FI1 == FI2;
2179 }
2180
2181 /// Detect opportunities for ldp/stp formation.
2182 ///
2183 /// Only called for LdSt for which getMemOperandWithOffset returns true.
shouldClusterMemOps(MachineOperand & BaseOp1,MachineOperand & BaseOp2,unsigned NumLoads) const2184 bool AArch64InstrInfo::shouldClusterMemOps(MachineOperand &BaseOp1,
2185 MachineOperand &BaseOp2,
2186 unsigned NumLoads) const {
2187 MachineInstr &FirstLdSt = *BaseOp1.getParent();
2188 MachineInstr &SecondLdSt = *BaseOp2.getParent();
2189 if (BaseOp1.getType() != BaseOp2.getType())
2190 return false;
2191
2192 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
2193 "Only base registers and frame indices are supported.");
2194
2195 // Check for both base regs and base FI.
2196 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
2197 return false;
2198
2199 // Only cluster up to a single pair.
2200 if (NumLoads > 1)
2201 return false;
2202
2203 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
2204 return false;
2205
2206 // Can we pair these instructions based on their opcodes?
2207 unsigned FirstOpc = FirstLdSt.getOpcode();
2208 unsigned SecondOpc = SecondLdSt.getOpcode();
2209 if (!canPairLdStOpc(FirstOpc, SecondOpc))
2210 return false;
2211
2212 // Can't merge volatiles or load/stores that have a hint to avoid pair
2213 // formation, for example.
2214 if (!isCandidateToMergeOrPair(FirstLdSt) ||
2215 !isCandidateToMergeOrPair(SecondLdSt))
2216 return false;
2217
2218 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
2219 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
2220 if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
2221 return false;
2222
2223 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
2224 if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
2225 return false;
2226
2227 // Pairwise instructions have a 7-bit signed offset field.
2228 if (Offset1 > 63 || Offset1 < -64)
2229 return false;
2230
2231 // The caller should already have ordered First/SecondLdSt by offset.
2232 // Note: except for non-equal frame index bases
2233 if (BaseOp1.isFI()) {
2234 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 >= Offset2) &&
2235 "Caller should have ordered offsets.");
2236
2237 const MachineFrameInfo &MFI =
2238 FirstLdSt.getParent()->getParent()->getFrameInfo();
2239 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
2240 BaseOp2.getIndex(), Offset2, SecondOpc);
2241 }
2242
2243 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
2244 "Caller should have ordered offsets.");
2245
2246 return Offset1 + 1 == Offset2;
2247 }
2248
AddSubReg(const MachineInstrBuilder & MIB,unsigned Reg,unsigned SubIdx,unsigned State,const TargetRegisterInfo * TRI)2249 static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
2250 unsigned Reg, unsigned SubIdx,
2251 unsigned State,
2252 const TargetRegisterInfo *TRI) {
2253 if (!SubIdx)
2254 return MIB.addReg(Reg, State);
2255
2256 if (TargetRegisterInfo::isPhysicalRegister(Reg))
2257 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
2258 return MIB.addReg(Reg, State, SubIdx);
2259 }
2260
forwardCopyWillClobberTuple(unsigned DestReg,unsigned SrcReg,unsigned NumRegs)2261 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
2262 unsigned NumRegs) {
2263 // We really want the positive remainder mod 32 here, that happens to be
2264 // easily obtainable with a mask.
2265 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
2266 }
2267
copyPhysRegTuple(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,unsigned DestReg,unsigned SrcReg,bool KillSrc,unsigned Opcode,ArrayRef<unsigned> Indices) const2268 void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
2269 MachineBasicBlock::iterator I,
2270 const DebugLoc &DL, unsigned DestReg,
2271 unsigned SrcReg, bool KillSrc,
2272 unsigned Opcode,
2273 ArrayRef<unsigned> Indices) const {
2274 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
2275 const TargetRegisterInfo *TRI = &getRegisterInfo();
2276 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2277 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2278 unsigned NumRegs = Indices.size();
2279
2280 int SubReg = 0, End = NumRegs, Incr = 1;
2281 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
2282 SubReg = NumRegs - 1;
2283 End = -1;
2284 Incr = -1;
2285 }
2286
2287 for (; SubReg != End; SubReg += Incr) {
2288 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
2289 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2290 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
2291 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2292 }
2293 }
2294
copyGPRRegTuple(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,DebugLoc DL,unsigned DestReg,unsigned SrcReg,bool KillSrc,unsigned Opcode,unsigned ZeroReg,llvm::ArrayRef<unsigned> Indices) const2295 void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB,
2296 MachineBasicBlock::iterator I,
2297 DebugLoc DL, unsigned DestReg,
2298 unsigned SrcReg, bool KillSrc,
2299 unsigned Opcode, unsigned ZeroReg,
2300 llvm::ArrayRef<unsigned> Indices) const {
2301 const TargetRegisterInfo *TRI = &getRegisterInfo();
2302 unsigned NumRegs = Indices.size();
2303
2304 #ifndef NDEBUG
2305 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2306 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2307 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
2308 "GPR reg sequences should not be able to overlap");
2309 #endif
2310
2311 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
2312 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
2313 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2314 MIB.addReg(ZeroReg);
2315 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2316 MIB.addImm(0);
2317 }
2318 }
2319
copyPhysReg(MachineBasicBlock & MBB,MachineBasicBlock::iterator I,const DebugLoc & DL,unsigned DestReg,unsigned SrcReg,bool KillSrc) const2320 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
2321 MachineBasicBlock::iterator I,
2322 const DebugLoc &DL, unsigned DestReg,
2323 unsigned SrcReg, bool KillSrc) const {
2324 if (AArch64::GPR32spRegClass.contains(DestReg) &&
2325 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
2326 const TargetRegisterInfo *TRI = &getRegisterInfo();
2327
2328 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
2329 // If either operand is WSP, expand to ADD #0.
2330 if (Subtarget.hasZeroCycleRegMove()) {
2331 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
2332 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2333 &AArch64::GPR64spRegClass);
2334 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2335 &AArch64::GPR64spRegClass);
2336 // This instruction is reading and writing X registers. This may upset
2337 // the register scavenger and machine verifier, so we need to indicate
2338 // that we are reading an undefined value from SrcRegX, but a proper
2339 // value from SrcReg.
2340 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
2341 .addReg(SrcRegX, RegState::Undef)
2342 .addImm(0)
2343 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
2344 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2345 } else {
2346 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
2347 .addReg(SrcReg, getKillRegState(KillSrc))
2348 .addImm(0)
2349 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2350 }
2351 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
2352 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
2353 .addImm(0)
2354 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2355 } else {
2356 if (Subtarget.hasZeroCycleRegMove()) {
2357 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
2358 unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2359 &AArch64::GPR64spRegClass);
2360 unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2361 &AArch64::GPR64spRegClass);
2362 // This instruction is reading and writing X registers. This may upset
2363 // the register scavenger and machine verifier, so we need to indicate
2364 // that we are reading an undefined value from SrcRegX, but a proper
2365 // value from SrcReg.
2366 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
2367 .addReg(AArch64::XZR)
2368 .addReg(SrcRegX, RegState::Undef)
2369 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2370 } else {
2371 // Otherwise, expand to ORR WZR.
2372 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
2373 .addReg(AArch64::WZR)
2374 .addReg(SrcReg, getKillRegState(KillSrc));
2375 }
2376 }
2377 return;
2378 }
2379
2380 if (AArch64::GPR64spRegClass.contains(DestReg) &&
2381 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
2382 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
2383 // If either operand is SP, expand to ADD #0.
2384 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
2385 .addReg(SrcReg, getKillRegState(KillSrc))
2386 .addImm(0)
2387 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2388 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
2389 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
2390 .addImm(0)
2391 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2392 } else {
2393 // Otherwise, expand to ORR XZR.
2394 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
2395 .addReg(AArch64::XZR)
2396 .addReg(SrcReg, getKillRegState(KillSrc));
2397 }
2398 return;
2399 }
2400
2401 // Copy a DDDD register quad by copying the individual sub-registers.
2402 if (AArch64::DDDDRegClass.contains(DestReg) &&
2403 AArch64::DDDDRegClass.contains(SrcReg)) {
2404 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2405 AArch64::dsub2, AArch64::dsub3};
2406 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2407 Indices);
2408 return;
2409 }
2410
2411 // Copy a DDD register triple by copying the individual sub-registers.
2412 if (AArch64::DDDRegClass.contains(DestReg) &&
2413 AArch64::DDDRegClass.contains(SrcReg)) {
2414 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2415 AArch64::dsub2};
2416 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2417 Indices);
2418 return;
2419 }
2420
2421 // Copy a DD register pair by copying the individual sub-registers.
2422 if (AArch64::DDRegClass.contains(DestReg) &&
2423 AArch64::DDRegClass.contains(SrcReg)) {
2424 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
2425 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2426 Indices);
2427 return;
2428 }
2429
2430 // Copy a QQQQ register quad by copying the individual sub-registers.
2431 if (AArch64::QQQQRegClass.contains(DestReg) &&
2432 AArch64::QQQQRegClass.contains(SrcReg)) {
2433 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2434 AArch64::qsub2, AArch64::qsub3};
2435 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2436 Indices);
2437 return;
2438 }
2439
2440 // Copy a QQQ register triple by copying the individual sub-registers.
2441 if (AArch64::QQQRegClass.contains(DestReg) &&
2442 AArch64::QQQRegClass.contains(SrcReg)) {
2443 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2444 AArch64::qsub2};
2445 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2446 Indices);
2447 return;
2448 }
2449
2450 // Copy a QQ register pair by copying the individual sub-registers.
2451 if (AArch64::QQRegClass.contains(DestReg) &&
2452 AArch64::QQRegClass.contains(SrcReg)) {
2453 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
2454 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2455 Indices);
2456 return;
2457 }
2458
2459 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
2460 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
2461 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
2462 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
2463 AArch64::XZR, Indices);
2464 return;
2465 }
2466
2467 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
2468 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
2469 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
2470 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
2471 AArch64::WZR, Indices);
2472 return;
2473 }
2474
2475 if (AArch64::FPR128RegClass.contains(DestReg) &&
2476 AArch64::FPR128RegClass.contains(SrcReg)) {
2477 if (Subtarget.hasNEON()) {
2478 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2479 .addReg(SrcReg)
2480 .addReg(SrcReg, getKillRegState(KillSrc));
2481 } else {
2482 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
2483 .addReg(AArch64::SP, RegState::Define)
2484 .addReg(SrcReg, getKillRegState(KillSrc))
2485 .addReg(AArch64::SP)
2486 .addImm(-16);
2487 BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
2488 .addReg(AArch64::SP, RegState::Define)
2489 .addReg(DestReg, RegState::Define)
2490 .addReg(AArch64::SP)
2491 .addImm(16);
2492 }
2493 return;
2494 }
2495
2496 if (AArch64::FPR64RegClass.contains(DestReg) &&
2497 AArch64::FPR64RegClass.contains(SrcReg)) {
2498 if (Subtarget.hasNEON()) {
2499 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
2500 &AArch64::FPR128RegClass);
2501 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
2502 &AArch64::FPR128RegClass);
2503 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2504 .addReg(SrcReg)
2505 .addReg(SrcReg, getKillRegState(KillSrc));
2506 } else {
2507 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
2508 .addReg(SrcReg, getKillRegState(KillSrc));
2509 }
2510 return;
2511 }
2512
2513 if (AArch64::FPR32RegClass.contains(DestReg) &&
2514 AArch64::FPR32RegClass.contains(SrcReg)) {
2515 if (Subtarget.hasNEON()) {
2516 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
2517 &AArch64::FPR128RegClass);
2518 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
2519 &AArch64::FPR128RegClass);
2520 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2521 .addReg(SrcReg)
2522 .addReg(SrcReg, getKillRegState(KillSrc));
2523 } else {
2524 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2525 .addReg(SrcReg, getKillRegState(KillSrc));
2526 }
2527 return;
2528 }
2529
2530 if (AArch64::FPR16RegClass.contains(DestReg) &&
2531 AArch64::FPR16RegClass.contains(SrcReg)) {
2532 if (Subtarget.hasNEON()) {
2533 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2534 &AArch64::FPR128RegClass);
2535 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2536 &AArch64::FPR128RegClass);
2537 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2538 .addReg(SrcReg)
2539 .addReg(SrcReg, getKillRegState(KillSrc));
2540 } else {
2541 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2542 &AArch64::FPR32RegClass);
2543 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2544 &AArch64::FPR32RegClass);
2545 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2546 .addReg(SrcReg, getKillRegState(KillSrc));
2547 }
2548 return;
2549 }
2550
2551 if (AArch64::FPR8RegClass.contains(DestReg) &&
2552 AArch64::FPR8RegClass.contains(SrcReg)) {
2553 if (Subtarget.hasNEON()) {
2554 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2555 &AArch64::FPR128RegClass);
2556 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2557 &AArch64::FPR128RegClass);
2558 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2559 .addReg(SrcReg)
2560 .addReg(SrcReg, getKillRegState(KillSrc));
2561 } else {
2562 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2563 &AArch64::FPR32RegClass);
2564 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2565 &AArch64::FPR32RegClass);
2566 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2567 .addReg(SrcReg, getKillRegState(KillSrc));
2568 }
2569 return;
2570 }
2571
2572 // Copies between GPR64 and FPR64.
2573 if (AArch64::FPR64RegClass.contains(DestReg) &&
2574 AArch64::GPR64RegClass.contains(SrcReg)) {
2575 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
2576 .addReg(SrcReg, getKillRegState(KillSrc));
2577 return;
2578 }
2579 if (AArch64::GPR64RegClass.contains(DestReg) &&
2580 AArch64::FPR64RegClass.contains(SrcReg)) {
2581 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
2582 .addReg(SrcReg, getKillRegState(KillSrc));
2583 return;
2584 }
2585 // Copies between GPR32 and FPR32.
2586 if (AArch64::FPR32RegClass.contains(DestReg) &&
2587 AArch64::GPR32RegClass.contains(SrcReg)) {
2588 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
2589 .addReg(SrcReg, getKillRegState(KillSrc));
2590 return;
2591 }
2592 if (AArch64::GPR32RegClass.contains(DestReg) &&
2593 AArch64::FPR32RegClass.contains(SrcReg)) {
2594 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
2595 .addReg(SrcReg, getKillRegState(KillSrc));
2596 return;
2597 }
2598
2599 if (DestReg == AArch64::NZCV) {
2600 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
2601 BuildMI(MBB, I, DL, get(AArch64::MSR))
2602 .addImm(AArch64SysReg::NZCV)
2603 .addReg(SrcReg, getKillRegState(KillSrc))
2604 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
2605 return;
2606 }
2607
2608 if (SrcReg == AArch64::NZCV) {
2609 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
2610 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
2611 .addImm(AArch64SysReg::NZCV)
2612 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
2613 return;
2614 }
2615
2616 llvm_unreachable("unimplemented reg-to-reg copy");
2617 }
2618
storeRegPairToStackSlot(const TargetRegisterInfo & TRI,MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const MCInstrDesc & MCID,unsigned SrcReg,bool IsKill,unsigned SubIdx0,unsigned SubIdx1,int FI,MachineMemOperand * MMO)2619 static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI,
2620 MachineBasicBlock &MBB,
2621 MachineBasicBlock::iterator InsertBefore,
2622 const MCInstrDesc &MCID,
2623 unsigned SrcReg, bool IsKill,
2624 unsigned SubIdx0, unsigned SubIdx1, int FI,
2625 MachineMemOperand *MMO) {
2626 unsigned SrcReg0 = SrcReg;
2627 unsigned SrcReg1 = SrcReg;
2628 if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
2629 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
2630 SubIdx0 = 0;
2631 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
2632 SubIdx1 = 0;
2633 }
2634 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
2635 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
2636 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
2637 .addFrameIndex(FI)
2638 .addImm(0)
2639 .addMemOperand(MMO);
2640 }
2641
storeRegToStackSlot(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned SrcReg,bool isKill,int FI,const TargetRegisterClass * RC,const TargetRegisterInfo * TRI) const2642 void AArch64InstrInfo::storeRegToStackSlot(
2643 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
2644 bool isKill, int FI, const TargetRegisterClass *RC,
2645 const TargetRegisterInfo *TRI) const {
2646 MachineFunction &MF = *MBB.getParent();
2647 MachineFrameInfo &MFI = MF.getFrameInfo();
2648 unsigned Align = MFI.getObjectAlignment(FI);
2649
2650 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
2651 MachineMemOperand *MMO = MF.getMachineMemOperand(
2652 PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
2653 unsigned Opc = 0;
2654 bool Offset = true;
2655 switch (TRI->getSpillSize(*RC)) {
2656 case 1:
2657 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2658 Opc = AArch64::STRBui;
2659 break;
2660 case 2:
2661 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2662 Opc = AArch64::STRHui;
2663 break;
2664 case 4:
2665 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2666 Opc = AArch64::STRWui;
2667 if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2668 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2669 else
2670 assert(SrcReg != AArch64::WSP);
2671 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2672 Opc = AArch64::STRSui;
2673 break;
2674 case 8:
2675 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2676 Opc = AArch64::STRXui;
2677 if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2678 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2679 else
2680 assert(SrcReg != AArch64::SP);
2681 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
2682 Opc = AArch64::STRDui;
2683 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
2684 storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
2685 get(AArch64::STPWi), SrcReg, isKill,
2686 AArch64::sube32, AArch64::subo32, FI, MMO);
2687 return;
2688 }
2689 break;
2690 case 16:
2691 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2692 Opc = AArch64::STRQui;
2693 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2694 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2695 Opc = AArch64::ST1Twov1d;
2696 Offset = false;
2697 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
2698 storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
2699 get(AArch64::STPXi), SrcReg, isKill,
2700 AArch64::sube64, AArch64::subo64, FI, MMO);
2701 return;
2702 }
2703 break;
2704 case 24:
2705 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2706 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2707 Opc = AArch64::ST1Threev1d;
2708 Offset = false;
2709 }
2710 break;
2711 case 32:
2712 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2713 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2714 Opc = AArch64::ST1Fourv1d;
2715 Offset = false;
2716 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2717 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2718 Opc = AArch64::ST1Twov2d;
2719 Offset = false;
2720 }
2721 break;
2722 case 48:
2723 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2724 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2725 Opc = AArch64::ST1Threev2d;
2726 Offset = false;
2727 }
2728 break;
2729 case 64:
2730 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2731 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2732 Opc = AArch64::ST1Fourv2d;
2733 Offset = false;
2734 }
2735 break;
2736 }
2737 assert(Opc && "Unknown register class");
2738
2739 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
2740 .addReg(SrcReg, getKillRegState(isKill))
2741 .addFrameIndex(FI);
2742
2743 if (Offset)
2744 MI.addImm(0);
2745 MI.addMemOperand(MMO);
2746 }
2747
loadRegPairFromStackSlot(const TargetRegisterInfo & TRI,MachineBasicBlock & MBB,MachineBasicBlock::iterator InsertBefore,const MCInstrDesc & MCID,unsigned DestReg,unsigned SubIdx0,unsigned SubIdx1,int FI,MachineMemOperand * MMO)2748 static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI,
2749 MachineBasicBlock &MBB,
2750 MachineBasicBlock::iterator InsertBefore,
2751 const MCInstrDesc &MCID,
2752 unsigned DestReg, unsigned SubIdx0,
2753 unsigned SubIdx1, int FI,
2754 MachineMemOperand *MMO) {
2755 unsigned DestReg0 = DestReg;
2756 unsigned DestReg1 = DestReg;
2757 bool IsUndef = true;
2758 if (TargetRegisterInfo::isPhysicalRegister(DestReg)) {
2759 DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
2760 SubIdx0 = 0;
2761 DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
2762 SubIdx1 = 0;
2763 IsUndef = false;
2764 }
2765 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
2766 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
2767 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
2768 .addFrameIndex(FI)
2769 .addImm(0)
2770 .addMemOperand(MMO);
2771 }
2772
loadRegFromStackSlot(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned DestReg,int FI,const TargetRegisterClass * RC,const TargetRegisterInfo * TRI) const2773 void AArch64InstrInfo::loadRegFromStackSlot(
2774 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
2775 int FI, const TargetRegisterClass *RC,
2776 const TargetRegisterInfo *TRI) const {
2777 MachineFunction &MF = *MBB.getParent();
2778 MachineFrameInfo &MFI = MF.getFrameInfo();
2779 unsigned Align = MFI.getObjectAlignment(FI);
2780 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
2781 MachineMemOperand *MMO = MF.getMachineMemOperand(
2782 PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
2783
2784 unsigned Opc = 0;
2785 bool Offset = true;
2786 switch (TRI->getSpillSize(*RC)) {
2787 case 1:
2788 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2789 Opc = AArch64::LDRBui;
2790 break;
2791 case 2:
2792 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2793 Opc = AArch64::LDRHui;
2794 break;
2795 case 4:
2796 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2797 Opc = AArch64::LDRWui;
2798 if (TargetRegisterInfo::isVirtualRegister(DestReg))
2799 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
2800 else
2801 assert(DestReg != AArch64::WSP);
2802 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2803 Opc = AArch64::LDRSui;
2804 break;
2805 case 8:
2806 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2807 Opc = AArch64::LDRXui;
2808 if (TargetRegisterInfo::isVirtualRegister(DestReg))
2809 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
2810 else
2811 assert(DestReg != AArch64::SP);
2812 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
2813 Opc = AArch64::LDRDui;
2814 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
2815 loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
2816 get(AArch64::LDPWi), DestReg, AArch64::sube32,
2817 AArch64::subo32, FI, MMO);
2818 return;
2819 }
2820 break;
2821 case 16:
2822 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2823 Opc = AArch64::LDRQui;
2824 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2825 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2826 Opc = AArch64::LD1Twov1d;
2827 Offset = false;
2828 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
2829 loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
2830 get(AArch64::LDPXi), DestReg, AArch64::sube64,
2831 AArch64::subo64, FI, MMO);
2832 return;
2833 }
2834 break;
2835 case 24:
2836 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2837 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2838 Opc = AArch64::LD1Threev1d;
2839 Offset = false;
2840 }
2841 break;
2842 case 32:
2843 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2844 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2845 Opc = AArch64::LD1Fourv1d;
2846 Offset = false;
2847 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2848 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2849 Opc = AArch64::LD1Twov2d;
2850 Offset = false;
2851 }
2852 break;
2853 case 48:
2854 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2855 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2856 Opc = AArch64::LD1Threev2d;
2857 Offset = false;
2858 }
2859 break;
2860 case 64:
2861 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2862 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2863 Opc = AArch64::LD1Fourv2d;
2864 Offset = false;
2865 }
2866 break;
2867 }
2868 assert(Opc && "Unknown register class");
2869
2870 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
2871 .addReg(DestReg, getDefRegState(true))
2872 .addFrameIndex(FI);
2873 if (Offset)
2874 MI.addImm(0);
2875 MI.addMemOperand(MMO);
2876 }
2877
emitFrameOffset(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const DebugLoc & DL,unsigned DestReg,unsigned SrcReg,int Offset,const TargetInstrInfo * TII,MachineInstr::MIFlag Flag,bool SetNZCV,bool NeedsWinCFI)2878 void llvm::emitFrameOffset(MachineBasicBlock &MBB,
2879 MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
2880 unsigned DestReg, unsigned SrcReg, int Offset,
2881 const TargetInstrInfo *TII,
2882 MachineInstr::MIFlag Flag, bool SetNZCV,
2883 bool NeedsWinCFI) {
2884 if (DestReg == SrcReg && Offset == 0)
2885 return;
2886
2887 assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
2888 "SP increment/decrement not 16-byte aligned");
2889
2890 bool isSub = Offset < 0;
2891 if (isSub)
2892 Offset = -Offset;
2893
2894 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
2895 // scratch register. If DestReg is a virtual register, use it as the
2896 // scratch register; otherwise, create a new virtual register (to be
2897 // replaced by the scavenger at the end of PEI). That case can be optimized
2898 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
2899 // register can be loaded with offset%8 and the add/sub can use an extending
2900 // instruction with LSL#3.
2901 // Currently the function handles any offsets but generates a poor sequence
2902 // of code.
2903 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
2904
2905 unsigned Opc;
2906 if (SetNZCV)
2907 Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
2908 else
2909 Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
2910 const unsigned MaxEncoding = 0xfff;
2911 const unsigned ShiftSize = 12;
2912 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
2913 while (((unsigned)Offset) >= (1 << ShiftSize)) {
2914 unsigned ThisVal;
2915 if (((unsigned)Offset) > MaxEncodableValue) {
2916 ThisVal = MaxEncodableValue;
2917 } else {
2918 ThisVal = Offset & MaxEncodableValue;
2919 }
2920 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
2921 "Encoding cannot handle value that big");
2922 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2923 .addReg(SrcReg)
2924 .addImm(ThisVal >> ShiftSize)
2925 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
2926 .setMIFlag(Flag);
2927
2928 if (NeedsWinCFI && SrcReg == AArch64::SP && DestReg == AArch64::SP)
2929 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
2930 .addImm(ThisVal)
2931 .setMIFlag(Flag);
2932
2933 SrcReg = DestReg;
2934 Offset -= ThisVal;
2935 if (Offset == 0)
2936 return;
2937 }
2938 BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
2939 .addReg(SrcReg)
2940 .addImm(Offset)
2941 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
2942 .setMIFlag(Flag);
2943
2944 if (NeedsWinCFI) {
2945 if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
2946 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
2947 if (Offset == 0)
2948 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).
2949 setMIFlag(Flag);
2950 else
2951 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP)).
2952 addImm(Offset).setMIFlag(Flag);
2953 } else if (DestReg == AArch64::SP) {
2954 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)).
2955 addImm(Offset).setMIFlag(Flag);
2956 }
2957 }
2958 }
2959
foldMemoryOperandImpl(MachineFunction & MF,MachineInstr & MI,ArrayRef<unsigned> Ops,MachineBasicBlock::iterator InsertPt,int FrameIndex,LiveIntervals * LIS) const2960 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
2961 MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
2962 MachineBasicBlock::iterator InsertPt, int FrameIndex,
2963 LiveIntervals *LIS) const {
2964 // This is a bit of a hack. Consider this instruction:
2965 //
2966 // %0 = COPY %sp; GPR64all:%0
2967 //
2968 // We explicitly chose GPR64all for the virtual register so such a copy might
2969 // be eliminated by RegisterCoalescer. However, that may not be possible, and
2970 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
2971 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
2972 //
2973 // To prevent that, we are going to constrain the %0 register class here.
2974 //
2975 // <rdar://problem/11522048>
2976 //
2977 if (MI.isFullCopy()) {
2978 unsigned DstReg = MI.getOperand(0).getReg();
2979 unsigned SrcReg = MI.getOperand(1).getReg();
2980 if (SrcReg == AArch64::SP &&
2981 TargetRegisterInfo::isVirtualRegister(DstReg)) {
2982 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
2983 return nullptr;
2984 }
2985 if (DstReg == AArch64::SP &&
2986 TargetRegisterInfo::isVirtualRegister(SrcReg)) {
2987 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2988 return nullptr;
2989 }
2990 }
2991
2992 // Handle the case where a copy is being spilled or filled but the source
2993 // and destination register class don't match. For example:
2994 //
2995 // %0 = COPY %xzr; GPR64common:%0
2996 //
2997 // In this case we can still safely fold away the COPY and generate the
2998 // following spill code:
2999 //
3000 // STRXui %xzr, %stack.0
3001 //
3002 // This also eliminates spilled cross register class COPYs (e.g. between x and
3003 // d regs) of the same size. For example:
3004 //
3005 // %0 = COPY %1; GPR64:%0, FPR64:%1
3006 //
3007 // will be filled as
3008 //
3009 // LDRDui %0, fi<#0>
3010 //
3011 // instead of
3012 //
3013 // LDRXui %Temp, fi<#0>
3014 // %0 = FMOV %Temp
3015 //
3016 if (MI.isCopy() && Ops.size() == 1 &&
3017 // Make sure we're only folding the explicit COPY defs/uses.
3018 (Ops[0] == 0 || Ops[0] == 1)) {
3019 bool IsSpill = Ops[0] == 0;
3020 bool IsFill = !IsSpill;
3021 const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
3022 const MachineRegisterInfo &MRI = MF.getRegInfo();
3023 MachineBasicBlock &MBB = *MI.getParent();
3024 const MachineOperand &DstMO = MI.getOperand(0);
3025 const MachineOperand &SrcMO = MI.getOperand(1);
3026 unsigned DstReg = DstMO.getReg();
3027 unsigned SrcReg = SrcMO.getReg();
3028 // This is slightly expensive to compute for physical regs since
3029 // getMinimalPhysRegClass is slow.
3030 auto getRegClass = [&](unsigned Reg) {
3031 return TargetRegisterInfo::isVirtualRegister(Reg)
3032 ? MRI.getRegClass(Reg)
3033 : TRI.getMinimalPhysRegClass(Reg);
3034 };
3035
3036 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
3037 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
3038 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
3039 "Mismatched register size in non subreg COPY");
3040 if (IsSpill)
3041 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
3042 getRegClass(SrcReg), &TRI);
3043 else
3044 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
3045 getRegClass(DstReg), &TRI);
3046 return &*--InsertPt;
3047 }
3048
3049 // Handle cases like spilling def of:
3050 //
3051 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
3052 //
3053 // where the physical register source can be widened and stored to the full
3054 // virtual reg destination stack slot, in this case producing:
3055 //
3056 // STRXui %xzr, %stack.0
3057 //
3058 if (IsSpill && DstMO.isUndef() &&
3059 TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
3060 assert(SrcMO.getSubReg() == 0 &&
3061 "Unexpected subreg on physical register");
3062 const TargetRegisterClass *SpillRC;
3063 unsigned SpillSubreg;
3064 switch (DstMO.getSubReg()) {
3065 default:
3066 SpillRC = nullptr;
3067 break;
3068 case AArch64::sub_32:
3069 case AArch64::ssub:
3070 if (AArch64::GPR32RegClass.contains(SrcReg)) {
3071 SpillRC = &AArch64::GPR64RegClass;
3072 SpillSubreg = AArch64::sub_32;
3073 } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
3074 SpillRC = &AArch64::FPR64RegClass;
3075 SpillSubreg = AArch64::ssub;
3076 } else
3077 SpillRC = nullptr;
3078 break;
3079 case AArch64::dsub:
3080 if (AArch64::FPR64RegClass.contains(SrcReg)) {
3081 SpillRC = &AArch64::FPR128RegClass;
3082 SpillSubreg = AArch64::dsub;
3083 } else
3084 SpillRC = nullptr;
3085 break;
3086 }
3087
3088 if (SpillRC)
3089 if (unsigned WidenedSrcReg =
3090 TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
3091 storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
3092 FrameIndex, SpillRC, &TRI);
3093 return &*--InsertPt;
3094 }
3095 }
3096
3097 // Handle cases like filling use of:
3098 //
3099 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
3100 //
3101 // where we can load the full virtual reg source stack slot, into the subreg
3102 // destination, in this case producing:
3103 //
3104 // LDRWui %0:sub_32<def,read-undef>, %stack.0
3105 //
3106 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
3107 const TargetRegisterClass *FillRC;
3108 switch (DstMO.getSubReg()) {
3109 default:
3110 FillRC = nullptr;
3111 break;
3112 case AArch64::sub_32:
3113 FillRC = &AArch64::GPR32RegClass;
3114 break;
3115 case AArch64::ssub:
3116 FillRC = &AArch64::FPR32RegClass;
3117 break;
3118 case AArch64::dsub:
3119 FillRC = &AArch64::FPR64RegClass;
3120 break;
3121 }
3122
3123 if (FillRC) {
3124 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
3125 TRI.getRegSizeInBits(*FillRC) &&
3126 "Mismatched regclass size on folded subreg COPY");
3127 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
3128 MachineInstr &LoadMI = *--InsertPt;
3129 MachineOperand &LoadDst = LoadMI.getOperand(0);
3130 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
3131 LoadDst.setSubReg(DstMO.getSubReg());
3132 LoadDst.setIsUndef();
3133 return &LoadMI;
3134 }
3135 }
3136 }
3137
3138 // Cannot fold.
3139 return nullptr;
3140 }
3141
isAArch64FrameOffsetLegal(const MachineInstr & MI,int & Offset,bool * OutUseUnscaledOp,unsigned * OutUnscaledOp,int * EmittableOffset)3142 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
3143 bool *OutUseUnscaledOp,
3144 unsigned *OutUnscaledOp,
3145 int *EmittableOffset) {
3146 int Scale = 1;
3147 bool IsSigned = false;
3148 // The ImmIdx should be changed case by case if it is not 2.
3149 unsigned ImmIdx = 2;
3150 unsigned UnscaledOp = 0;
3151 // Set output values in case of early exit.
3152 if (EmittableOffset)
3153 *EmittableOffset = 0;
3154 if (OutUseUnscaledOp)
3155 *OutUseUnscaledOp = false;
3156 if (OutUnscaledOp)
3157 *OutUnscaledOp = 0;
3158 switch (MI.getOpcode()) {
3159 default:
3160 llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
3161 // Vector spills/fills can't take an immediate offset.
3162 case AArch64::LD1Twov2d:
3163 case AArch64::LD1Threev2d:
3164 case AArch64::LD1Fourv2d:
3165 case AArch64::LD1Twov1d:
3166 case AArch64::LD1Threev1d:
3167 case AArch64::LD1Fourv1d:
3168 case AArch64::ST1Twov2d:
3169 case AArch64::ST1Threev2d:
3170 case AArch64::ST1Fourv2d:
3171 case AArch64::ST1Twov1d:
3172 case AArch64::ST1Threev1d:
3173 case AArch64::ST1Fourv1d:
3174 return AArch64FrameOffsetCannotUpdate;
3175 case AArch64::PRFMui:
3176 Scale = 8;
3177 UnscaledOp = AArch64::PRFUMi;
3178 break;
3179 case AArch64::LDRXui:
3180 Scale = 8;
3181 UnscaledOp = AArch64::LDURXi;
3182 break;
3183 case AArch64::LDRWui:
3184 Scale = 4;
3185 UnscaledOp = AArch64::LDURWi;
3186 break;
3187 case AArch64::LDRBui:
3188 Scale = 1;
3189 UnscaledOp = AArch64::LDURBi;
3190 break;
3191 case AArch64::LDRHui:
3192 Scale = 2;
3193 UnscaledOp = AArch64::LDURHi;
3194 break;
3195 case AArch64::LDRSui:
3196 Scale = 4;
3197 UnscaledOp = AArch64::LDURSi;
3198 break;
3199 case AArch64::LDRDui:
3200 Scale = 8;
3201 UnscaledOp = AArch64::LDURDi;
3202 break;
3203 case AArch64::LDRQui:
3204 Scale = 16;
3205 UnscaledOp = AArch64::LDURQi;
3206 break;
3207 case AArch64::LDRBBui:
3208 Scale = 1;
3209 UnscaledOp = AArch64::LDURBBi;
3210 break;
3211 case AArch64::LDRHHui:
3212 Scale = 2;
3213 UnscaledOp = AArch64::LDURHHi;
3214 break;
3215 case AArch64::LDRSBXui:
3216 Scale = 1;
3217 UnscaledOp = AArch64::LDURSBXi;
3218 break;
3219 case AArch64::LDRSBWui:
3220 Scale = 1;
3221 UnscaledOp = AArch64::LDURSBWi;
3222 break;
3223 case AArch64::LDRSHXui:
3224 Scale = 2;
3225 UnscaledOp = AArch64::LDURSHXi;
3226 break;
3227 case AArch64::LDRSHWui:
3228 Scale = 2;
3229 UnscaledOp = AArch64::LDURSHWi;
3230 break;
3231 case AArch64::LDRSWui:
3232 Scale = 4;
3233 UnscaledOp = AArch64::LDURSWi;
3234 break;
3235
3236 case AArch64::STRXui:
3237 Scale = 8;
3238 UnscaledOp = AArch64::STURXi;
3239 break;
3240 case AArch64::STRWui:
3241 Scale = 4;
3242 UnscaledOp = AArch64::STURWi;
3243 break;
3244 case AArch64::STRBui:
3245 Scale = 1;
3246 UnscaledOp = AArch64::STURBi;
3247 break;
3248 case AArch64::STRHui:
3249 Scale = 2;
3250 UnscaledOp = AArch64::STURHi;
3251 break;
3252 case AArch64::STRSui:
3253 Scale = 4;
3254 UnscaledOp = AArch64::STURSi;
3255 break;
3256 case AArch64::STRDui:
3257 Scale = 8;
3258 UnscaledOp = AArch64::STURDi;
3259 break;
3260 case AArch64::STRQui:
3261 Scale = 16;
3262 UnscaledOp = AArch64::STURQi;
3263 break;
3264 case AArch64::STRBBui:
3265 Scale = 1;
3266 UnscaledOp = AArch64::STURBBi;
3267 break;
3268 case AArch64::STRHHui:
3269 Scale = 2;
3270 UnscaledOp = AArch64::STURHHi;
3271 break;
3272
3273 case AArch64::LDPXi:
3274 case AArch64::LDPDi:
3275 case AArch64::STPXi:
3276 case AArch64::STPDi:
3277 case AArch64::LDNPXi:
3278 case AArch64::LDNPDi:
3279 case AArch64::STNPXi:
3280 case AArch64::STNPDi:
3281 ImmIdx = 3;
3282 IsSigned = true;
3283 Scale = 8;
3284 break;
3285 case AArch64::LDPQi:
3286 case AArch64::STPQi:
3287 case AArch64::LDNPQi:
3288 case AArch64::STNPQi:
3289 ImmIdx = 3;
3290 IsSigned = true;
3291 Scale = 16;
3292 break;
3293 case AArch64::LDPWi:
3294 case AArch64::LDPSi:
3295 case AArch64::STPWi:
3296 case AArch64::STPSi:
3297 case AArch64::LDNPWi:
3298 case AArch64::LDNPSi:
3299 case AArch64::STNPWi:
3300 case AArch64::STNPSi:
3301 ImmIdx = 3;
3302 IsSigned = true;
3303 Scale = 4;
3304 break;
3305
3306 case AArch64::LDURXi:
3307 case AArch64::LDURWi:
3308 case AArch64::LDURBi:
3309 case AArch64::LDURHi:
3310 case AArch64::LDURSi:
3311 case AArch64::LDURDi:
3312 case AArch64::LDURQi:
3313 case AArch64::LDURHHi:
3314 case AArch64::LDURBBi:
3315 case AArch64::LDURSBXi:
3316 case AArch64::LDURSBWi:
3317 case AArch64::LDURSHXi:
3318 case AArch64::LDURSHWi:
3319 case AArch64::LDURSWi:
3320 case AArch64::STURXi:
3321 case AArch64::STURWi:
3322 case AArch64::STURBi:
3323 case AArch64::STURHi:
3324 case AArch64::STURSi:
3325 case AArch64::STURDi:
3326 case AArch64::STURQi:
3327 case AArch64::STURBBi:
3328 case AArch64::STURHHi:
3329 Scale = 1;
3330 break;
3331 }
3332
3333 Offset += MI.getOperand(ImmIdx).getImm() * Scale;
3334
3335 bool useUnscaledOp = false;
3336 // If the offset doesn't match the scale, we rewrite the instruction to
3337 // use the unscaled instruction instead. Likewise, if we have a negative
3338 // offset (and have an unscaled op to use).
3339 if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
3340 useUnscaledOp = true;
3341
3342 // Use an unscaled addressing mode if the instruction has a negative offset
3343 // (or if the instruction is already using an unscaled addressing mode).
3344 unsigned MaskBits;
3345 if (IsSigned) {
3346 // ldp/stp instructions.
3347 MaskBits = 7;
3348 Offset /= Scale;
3349 } else if (UnscaledOp == 0 || useUnscaledOp) {
3350 MaskBits = 9;
3351 IsSigned = true;
3352 Scale = 1;
3353 } else {
3354 MaskBits = 12;
3355 IsSigned = false;
3356 Offset /= Scale;
3357 }
3358
3359 // Attempt to fold address computation.
3360 int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
3361 int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
3362 if (Offset >= MinOff && Offset <= MaxOff) {
3363 if (EmittableOffset)
3364 *EmittableOffset = Offset;
3365 Offset = 0;
3366 } else {
3367 int NewOff = Offset < 0 ? MinOff : MaxOff;
3368 if (EmittableOffset)
3369 *EmittableOffset = NewOff;
3370 Offset = (Offset - NewOff) * Scale;
3371 }
3372 if (OutUseUnscaledOp)
3373 *OutUseUnscaledOp = useUnscaledOp;
3374 if (OutUnscaledOp)
3375 *OutUnscaledOp = UnscaledOp;
3376 return AArch64FrameOffsetCanUpdate |
3377 (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
3378 }
3379
rewriteAArch64FrameIndex(MachineInstr & MI,unsigned FrameRegIdx,unsigned FrameReg,int & Offset,const AArch64InstrInfo * TII)3380 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
3381 unsigned FrameReg, int &Offset,
3382 const AArch64InstrInfo *TII) {
3383 unsigned Opcode = MI.getOpcode();
3384 unsigned ImmIdx = FrameRegIdx + 1;
3385
3386 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
3387 Offset += MI.getOperand(ImmIdx).getImm();
3388 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
3389 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
3390 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
3391 MI.eraseFromParent();
3392 Offset = 0;
3393 return true;
3394 }
3395
3396 int NewOffset;
3397 unsigned UnscaledOp;
3398 bool UseUnscaledOp;
3399 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
3400 &UnscaledOp, &NewOffset);
3401 if (Status & AArch64FrameOffsetCanUpdate) {
3402 if (Status & AArch64FrameOffsetIsLegal)
3403 // Replace the FrameIndex with FrameReg.
3404 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
3405 if (UseUnscaledOp)
3406 MI.setDesc(TII->get(UnscaledOp));
3407
3408 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
3409 return Offset == 0;
3410 }
3411
3412 return false;
3413 }
3414
getNoop(MCInst & NopInst) const3415 void AArch64InstrInfo::getNoop(MCInst &NopInst) const {
3416 NopInst.setOpcode(AArch64::HINT);
3417 NopInst.addOperand(MCOperand::createImm(0));
3418 }
3419
3420 // AArch64 supports MachineCombiner.
useMachineCombiner() const3421 bool AArch64InstrInfo::useMachineCombiner() const { return true; }
3422
3423 // True when Opc sets flag
isCombineInstrSettingFlag(unsigned Opc)3424 static bool isCombineInstrSettingFlag(unsigned Opc) {
3425 switch (Opc) {
3426 case AArch64::ADDSWrr:
3427 case AArch64::ADDSWri:
3428 case AArch64::ADDSXrr:
3429 case AArch64::ADDSXri:
3430 case AArch64::SUBSWrr:
3431 case AArch64::SUBSXrr:
3432 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3433 case AArch64::SUBSWri:
3434 case AArch64::SUBSXri:
3435 return true;
3436 default:
3437 break;
3438 }
3439 return false;
3440 }
3441
3442 // 32b Opcodes that can be combined with a MUL
isCombineInstrCandidate32(unsigned Opc)3443 static bool isCombineInstrCandidate32(unsigned Opc) {
3444 switch (Opc) {
3445 case AArch64::ADDWrr:
3446 case AArch64::ADDWri:
3447 case AArch64::SUBWrr:
3448 case AArch64::ADDSWrr:
3449 case AArch64::ADDSWri:
3450 case AArch64::SUBSWrr:
3451 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3452 case AArch64::SUBWri:
3453 case AArch64::SUBSWri:
3454 return true;
3455 default:
3456 break;
3457 }
3458 return false;
3459 }
3460
3461 // 64b Opcodes that can be combined with a MUL
isCombineInstrCandidate64(unsigned Opc)3462 static bool isCombineInstrCandidate64(unsigned Opc) {
3463 switch (Opc) {
3464 case AArch64::ADDXrr:
3465 case AArch64::ADDXri:
3466 case AArch64::SUBXrr:
3467 case AArch64::ADDSXrr:
3468 case AArch64::ADDSXri:
3469 case AArch64::SUBSXrr:
3470 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3471 case AArch64::SUBXri:
3472 case AArch64::SUBSXri:
3473 return true;
3474 default:
3475 break;
3476 }
3477 return false;
3478 }
3479
3480 // FP Opcodes that can be combined with a FMUL
isCombineInstrCandidateFP(const MachineInstr & Inst)3481 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
3482 switch (Inst.getOpcode()) {
3483 default:
3484 break;
3485 case AArch64::FADDSrr:
3486 case AArch64::FADDDrr:
3487 case AArch64::FADDv2f32:
3488 case AArch64::FADDv2f64:
3489 case AArch64::FADDv4f32:
3490 case AArch64::FSUBSrr:
3491 case AArch64::FSUBDrr:
3492 case AArch64::FSUBv2f32:
3493 case AArch64::FSUBv2f64:
3494 case AArch64::FSUBv4f32:
3495 TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
3496 return (Options.UnsafeFPMath ||
3497 Options.AllowFPOpFusion == FPOpFusion::Fast);
3498 }
3499 return false;
3500 }
3501
3502 // Opcodes that can be combined with a MUL
isCombineInstrCandidate(unsigned Opc)3503 static bool isCombineInstrCandidate(unsigned Opc) {
3504 return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
3505 }
3506
3507 //
3508 // Utility routine that checks if \param MO is defined by an
3509 // \param CombineOpc instruction in the basic block \param MBB
canCombine(MachineBasicBlock & MBB,MachineOperand & MO,unsigned CombineOpc,unsigned ZeroReg=0,bool CheckZeroReg=false)3510 static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
3511 unsigned CombineOpc, unsigned ZeroReg = 0,
3512 bool CheckZeroReg = false) {
3513 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3514 MachineInstr *MI = nullptr;
3515
3516 if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
3517 MI = MRI.getUniqueVRegDef(MO.getReg());
3518 // And it needs to be in the trace (otherwise, it won't have a depth).
3519 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
3520 return false;
3521 // Must only used by the user we combine with.
3522 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
3523 return false;
3524
3525 if (CheckZeroReg) {
3526 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
3527 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
3528 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
3529 // The third input reg must be zero.
3530 if (MI->getOperand(3).getReg() != ZeroReg)
3531 return false;
3532 }
3533
3534 return true;
3535 }
3536
3537 //
3538 // Is \param MO defined by an integer multiply and can be combined?
canCombineWithMUL(MachineBasicBlock & MBB,MachineOperand & MO,unsigned MulOpc,unsigned ZeroReg)3539 static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3540 unsigned MulOpc, unsigned ZeroReg) {
3541 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
3542 }
3543
3544 //
3545 // Is \param MO defined by a floating-point multiply and can be combined?
canCombineWithFMUL(MachineBasicBlock & MBB,MachineOperand & MO,unsigned MulOpc)3546 static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3547 unsigned MulOpc) {
3548 return canCombine(MBB, MO, MulOpc);
3549 }
3550
3551 // TODO: There are many more machine instruction opcodes to match:
3552 // 1. Other data types (integer, vectors)
3553 // 2. Other math / logic operations (xor, or)
3554 // 3. Other forms of the same operation (intrinsics and other variants)
isAssociativeAndCommutative(const MachineInstr & Inst) const3555 bool AArch64InstrInfo::isAssociativeAndCommutative(
3556 const MachineInstr &Inst) const {
3557 switch (Inst.getOpcode()) {
3558 case AArch64::FADDDrr:
3559 case AArch64::FADDSrr:
3560 case AArch64::FADDv2f32:
3561 case AArch64::FADDv2f64:
3562 case AArch64::FADDv4f32:
3563 case AArch64::FMULDrr:
3564 case AArch64::FMULSrr:
3565 case AArch64::FMULX32:
3566 case AArch64::FMULX64:
3567 case AArch64::FMULXv2f32:
3568 case AArch64::FMULXv2f64:
3569 case AArch64::FMULXv4f32:
3570 case AArch64::FMULv2f32:
3571 case AArch64::FMULv2f64:
3572 case AArch64::FMULv4f32:
3573 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
3574 default:
3575 return false;
3576 }
3577 }
3578
3579 /// Find instructions that can be turned into madd.
getMaddPatterns(MachineInstr & Root,SmallVectorImpl<MachineCombinerPattern> & Patterns)3580 static bool getMaddPatterns(MachineInstr &Root,
3581 SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3582 unsigned Opc = Root.getOpcode();
3583 MachineBasicBlock &MBB = *Root.getParent();
3584 bool Found = false;
3585
3586 if (!isCombineInstrCandidate(Opc))
3587 return false;
3588 if (isCombineInstrSettingFlag(Opc)) {
3589 int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
3590 // When NZCV is live bail out.
3591 if (Cmp_NZCV == -1)
3592 return false;
3593 unsigned NewOpc = convertToNonFlagSettingOpc(Root);
3594 // When opcode can't change bail out.
3595 // CHECKME: do we miss any cases for opcode conversion?
3596 if (NewOpc == Opc)
3597 return false;
3598 Opc = NewOpc;
3599 }
3600
3601 switch (Opc) {
3602 default:
3603 break;
3604 case AArch64::ADDWrr:
3605 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3606 "ADDWrr does not have register operands");
3607 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3608 AArch64::WZR)) {
3609 Patterns.push_back(MachineCombinerPattern::MULADDW_OP1);
3610 Found = true;
3611 }
3612 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3613 AArch64::WZR)) {
3614 Patterns.push_back(MachineCombinerPattern::MULADDW_OP2);
3615 Found = true;
3616 }
3617 break;
3618 case AArch64::ADDXrr:
3619 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3620 AArch64::XZR)) {
3621 Patterns.push_back(MachineCombinerPattern::MULADDX_OP1);
3622 Found = true;
3623 }
3624 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3625 AArch64::XZR)) {
3626 Patterns.push_back(MachineCombinerPattern::MULADDX_OP2);
3627 Found = true;
3628 }
3629 break;
3630 case AArch64::SUBWrr:
3631 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3632 AArch64::WZR)) {
3633 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1);
3634 Found = true;
3635 }
3636 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3637 AArch64::WZR)) {
3638 Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2);
3639 Found = true;
3640 }
3641 break;
3642 case AArch64::SUBXrr:
3643 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3644 AArch64::XZR)) {
3645 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1);
3646 Found = true;
3647 }
3648 if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3649 AArch64::XZR)) {
3650 Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2);
3651 Found = true;
3652 }
3653 break;
3654 case AArch64::ADDWri:
3655 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3656 AArch64::WZR)) {
3657 Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1);
3658 Found = true;
3659 }
3660 break;
3661 case AArch64::ADDXri:
3662 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3663 AArch64::XZR)) {
3664 Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1);
3665 Found = true;
3666 }
3667 break;
3668 case AArch64::SUBWri:
3669 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3670 AArch64::WZR)) {
3671 Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1);
3672 Found = true;
3673 }
3674 break;
3675 case AArch64::SUBXri:
3676 if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3677 AArch64::XZR)) {
3678 Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1);
3679 Found = true;
3680 }
3681 break;
3682 }
3683 return Found;
3684 }
3685 /// Floating-Point Support
3686
3687 /// Find instructions that can be turned into madd.
getFMAPatterns(MachineInstr & Root,SmallVectorImpl<MachineCombinerPattern> & Patterns)3688 static bool getFMAPatterns(MachineInstr &Root,
3689 SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3690
3691 if (!isCombineInstrCandidateFP(Root))
3692 return false;
3693
3694 MachineBasicBlock &MBB = *Root.getParent();
3695 bool Found = false;
3696
3697 switch (Root.getOpcode()) {
3698 default:
3699 assert(false && "Unsupported FP instruction in combiner\n");
3700 break;
3701 case AArch64::FADDSrr:
3702 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3703 "FADDWrr does not have register operands");
3704 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3705 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1);
3706 Found = true;
3707 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3708 AArch64::FMULv1i32_indexed)) {
3709 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1);
3710 Found = true;
3711 }
3712 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3713 Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2);
3714 Found = true;
3715 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3716 AArch64::FMULv1i32_indexed)) {
3717 Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2);
3718 Found = true;
3719 }
3720 break;
3721 case AArch64::FADDDrr:
3722 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3723 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1);
3724 Found = true;
3725 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3726 AArch64::FMULv1i64_indexed)) {
3727 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1);
3728 Found = true;
3729 }
3730 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3731 Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2);
3732 Found = true;
3733 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3734 AArch64::FMULv1i64_indexed)) {
3735 Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2);
3736 Found = true;
3737 }
3738 break;
3739 case AArch64::FADDv2f32:
3740 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3741 AArch64::FMULv2i32_indexed)) {
3742 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1);
3743 Found = true;
3744 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3745 AArch64::FMULv2f32)) {
3746 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1);
3747 Found = true;
3748 }
3749 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3750 AArch64::FMULv2i32_indexed)) {
3751 Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2);
3752 Found = true;
3753 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3754 AArch64::FMULv2f32)) {
3755 Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2);
3756 Found = true;
3757 }
3758 break;
3759 case AArch64::FADDv2f64:
3760 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3761 AArch64::FMULv2i64_indexed)) {
3762 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1);
3763 Found = true;
3764 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3765 AArch64::FMULv2f64)) {
3766 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1);
3767 Found = true;
3768 }
3769 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3770 AArch64::FMULv2i64_indexed)) {
3771 Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2);
3772 Found = true;
3773 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3774 AArch64::FMULv2f64)) {
3775 Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2);
3776 Found = true;
3777 }
3778 break;
3779 case AArch64::FADDv4f32:
3780 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3781 AArch64::FMULv4i32_indexed)) {
3782 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1);
3783 Found = true;
3784 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3785 AArch64::FMULv4f32)) {
3786 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1);
3787 Found = true;
3788 }
3789 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3790 AArch64::FMULv4i32_indexed)) {
3791 Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2);
3792 Found = true;
3793 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3794 AArch64::FMULv4f32)) {
3795 Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2);
3796 Found = true;
3797 }
3798 break;
3799
3800 case AArch64::FSUBSrr:
3801 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3802 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1);
3803 Found = true;
3804 }
3805 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3806 Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2);
3807 Found = true;
3808 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3809 AArch64::FMULv1i32_indexed)) {
3810 Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2);
3811 Found = true;
3812 }
3813 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) {
3814 Patterns.push_back(MachineCombinerPattern::FNMULSUBS_OP1);
3815 Found = true;
3816 }
3817 break;
3818 case AArch64::FSUBDrr:
3819 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3820 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1);
3821 Found = true;
3822 }
3823 if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3824 Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2);
3825 Found = true;
3826 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3827 AArch64::FMULv1i64_indexed)) {
3828 Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2);
3829 Found = true;
3830 }
3831 if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) {
3832 Patterns.push_back(MachineCombinerPattern::FNMULSUBD_OP1);
3833 Found = true;
3834 }
3835 break;
3836 case AArch64::FSUBv2f32:
3837 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3838 AArch64::FMULv2i32_indexed)) {
3839 Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
3840 Found = true;
3841 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3842 AArch64::FMULv2f32)) {
3843 Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2);
3844 Found = true;
3845 }
3846 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3847 AArch64::FMULv2i32_indexed)) {
3848 Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1);
3849 Found = true;
3850 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3851 AArch64::FMULv2f32)) {
3852 Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1);
3853 Found = true;
3854 }
3855 break;
3856 case AArch64::FSUBv2f64:
3857 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3858 AArch64::FMULv2i64_indexed)) {
3859 Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
3860 Found = true;
3861 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3862 AArch64::FMULv2f64)) {
3863 Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2);
3864 Found = true;
3865 }
3866 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3867 AArch64::FMULv2i64_indexed)) {
3868 Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1);
3869 Found = true;
3870 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3871 AArch64::FMULv2f64)) {
3872 Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1);
3873 Found = true;
3874 }
3875 break;
3876 case AArch64::FSUBv4f32:
3877 if (canCombineWithFMUL(MBB, Root.getOperand(2),
3878 AArch64::FMULv4i32_indexed)) {
3879 Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
3880 Found = true;
3881 } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3882 AArch64::FMULv4f32)) {
3883 Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2);
3884 Found = true;
3885 }
3886 if (canCombineWithFMUL(MBB, Root.getOperand(1),
3887 AArch64::FMULv4i32_indexed)) {
3888 Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1);
3889 Found = true;
3890 } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3891 AArch64::FMULv4f32)) {
3892 Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1);
3893 Found = true;
3894 }
3895 break;
3896 }
3897 return Found;
3898 }
3899
3900 /// Return true when a code sequence can improve throughput. It
3901 /// should be called only for instructions in loops.
3902 /// \param Pattern - combiner pattern
isThroughputPattern(MachineCombinerPattern Pattern) const3903 bool AArch64InstrInfo::isThroughputPattern(
3904 MachineCombinerPattern Pattern) const {
3905 switch (Pattern) {
3906 default:
3907 break;
3908 case MachineCombinerPattern::FMULADDS_OP1:
3909 case MachineCombinerPattern::FMULADDS_OP2:
3910 case MachineCombinerPattern::FMULSUBS_OP1:
3911 case MachineCombinerPattern::FMULSUBS_OP2:
3912 case MachineCombinerPattern::FMULADDD_OP1:
3913 case MachineCombinerPattern::FMULADDD_OP2:
3914 case MachineCombinerPattern::FMULSUBD_OP1:
3915 case MachineCombinerPattern::FMULSUBD_OP2:
3916 case MachineCombinerPattern::FNMULSUBS_OP1:
3917 case MachineCombinerPattern::FNMULSUBD_OP1:
3918 case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
3919 case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
3920 case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
3921 case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
3922 case MachineCombinerPattern::FMLAv2f32_OP2:
3923 case MachineCombinerPattern::FMLAv2f32_OP1:
3924 case MachineCombinerPattern::FMLAv2f64_OP1:
3925 case MachineCombinerPattern::FMLAv2f64_OP2:
3926 case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
3927 case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
3928 case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
3929 case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
3930 case MachineCombinerPattern::FMLAv4f32_OP1:
3931 case MachineCombinerPattern::FMLAv4f32_OP2:
3932 case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
3933 case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
3934 case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
3935 case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
3936 case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
3937 case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
3938 case MachineCombinerPattern::FMLSv2f32_OP2:
3939 case MachineCombinerPattern::FMLSv2f64_OP2:
3940 case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
3941 case MachineCombinerPattern::FMLSv4f32_OP2:
3942 return true;
3943 } // end switch (Pattern)
3944 return false;
3945 }
3946 /// Return true when there is potentially a faster code sequence for an
3947 /// instruction chain ending in \p Root. All potential patterns are listed in
3948 /// the \p Pattern vector. Pattern should be sorted in priority order since the
3949 /// pattern evaluator stops checking as soon as it finds a faster sequence.
3950
getMachineCombinerPatterns(MachineInstr & Root,SmallVectorImpl<MachineCombinerPattern> & Patterns) const3951 bool AArch64InstrInfo::getMachineCombinerPatterns(
3952 MachineInstr &Root,
3953 SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
3954 // Integer patterns
3955 if (getMaddPatterns(Root, Patterns))
3956 return true;
3957 // Floating point patterns
3958 if (getFMAPatterns(Root, Patterns))
3959 return true;
3960
3961 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
3962 }
3963
3964 enum class FMAInstKind { Default, Indexed, Accumulator };
3965 /// genFusedMultiply - Generate fused multiply instructions.
3966 /// This function supports both integer and floating point instructions.
3967 /// A typical example:
3968 /// F|MUL I=A,B,0
3969 /// F|ADD R,I,C
3970 /// ==> F|MADD R,A,B,C
3971 /// \param MF Containing MachineFunction
3972 /// \param MRI Register information
3973 /// \param TII Target information
3974 /// \param Root is the F|ADD instruction
3975 /// \param [out] InsInstrs is a vector of machine instructions and will
3976 /// contain the generated madd instruction
3977 /// \param IdxMulOpd is index of operand in Root that is the result of
3978 /// the F|MUL. In the example above IdxMulOpd is 1.
3979 /// \param MaddOpc the opcode fo the f|madd instruction
3980 /// \param RC Register class of operands
3981 /// \param kind of fma instruction (addressing mode) to be generated
3982 /// \param ReplacedAddend is the result register from the instruction
3983 /// replacing the non-combined operand, if any.
3984 static MachineInstr *
genFusedMultiply(MachineFunction & MF,MachineRegisterInfo & MRI,const TargetInstrInfo * TII,MachineInstr & Root,SmallVectorImpl<MachineInstr * > & InsInstrs,unsigned IdxMulOpd,unsigned MaddOpc,const TargetRegisterClass * RC,FMAInstKind kind=FMAInstKind::Default,const unsigned * ReplacedAddend=nullptr)3985 genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
3986 const TargetInstrInfo *TII, MachineInstr &Root,
3987 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
3988 unsigned MaddOpc, const TargetRegisterClass *RC,
3989 FMAInstKind kind = FMAInstKind::Default,
3990 const unsigned *ReplacedAddend = nullptr) {
3991 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
3992
3993 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
3994 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
3995 unsigned ResultReg = Root.getOperand(0).getReg();
3996 unsigned SrcReg0 = MUL->getOperand(1).getReg();
3997 bool Src0IsKill = MUL->getOperand(1).isKill();
3998 unsigned SrcReg1 = MUL->getOperand(2).getReg();
3999 bool Src1IsKill = MUL->getOperand(2).isKill();
4000
4001 unsigned SrcReg2;
4002 bool Src2IsKill;
4003 if (ReplacedAddend) {
4004 // If we just generated a new addend, we must be it's only use.
4005 SrcReg2 = *ReplacedAddend;
4006 Src2IsKill = true;
4007 } else {
4008 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
4009 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
4010 }
4011
4012 if (TargetRegisterInfo::isVirtualRegister(ResultReg))
4013 MRI.constrainRegClass(ResultReg, RC);
4014 if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
4015 MRI.constrainRegClass(SrcReg0, RC);
4016 if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
4017 MRI.constrainRegClass(SrcReg1, RC);
4018 if (TargetRegisterInfo::isVirtualRegister(SrcReg2))
4019 MRI.constrainRegClass(SrcReg2, RC);
4020
4021 MachineInstrBuilder MIB;
4022 if (kind == FMAInstKind::Default)
4023 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4024 .addReg(SrcReg0, getKillRegState(Src0IsKill))
4025 .addReg(SrcReg1, getKillRegState(Src1IsKill))
4026 .addReg(SrcReg2, getKillRegState(Src2IsKill));
4027 else if (kind == FMAInstKind::Indexed)
4028 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4029 .addReg(SrcReg2, getKillRegState(Src2IsKill))
4030 .addReg(SrcReg0, getKillRegState(Src0IsKill))
4031 .addReg(SrcReg1, getKillRegState(Src1IsKill))
4032 .addImm(MUL->getOperand(3).getImm());
4033 else if (kind == FMAInstKind::Accumulator)
4034 MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4035 .addReg(SrcReg2, getKillRegState(Src2IsKill))
4036 .addReg(SrcReg0, getKillRegState(Src0IsKill))
4037 .addReg(SrcReg1, getKillRegState(Src1IsKill));
4038 else
4039 assert(false && "Invalid FMA instruction kind \n");
4040 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
4041 InsInstrs.push_back(MIB);
4042 return MUL;
4043 }
4044
4045 /// genMaddR - Generate madd instruction and combine mul and add using
4046 /// an extra virtual register
4047 /// Example - an ADD intermediate needs to be stored in a register:
4048 /// MUL I=A,B,0
4049 /// ADD R,I,Imm
4050 /// ==> ORR V, ZR, Imm
4051 /// ==> MADD R,A,B,V
4052 /// \param MF Containing MachineFunction
4053 /// \param MRI Register information
4054 /// \param TII Target information
4055 /// \param Root is the ADD instruction
4056 /// \param [out] InsInstrs is a vector of machine instructions and will
4057 /// contain the generated madd instruction
4058 /// \param IdxMulOpd is index of operand in Root that is the result of
4059 /// the MUL. In the example above IdxMulOpd is 1.
4060 /// \param MaddOpc the opcode fo the madd instruction
4061 /// \param VR is a virtual register that holds the value of an ADD operand
4062 /// (V in the example above).
4063 /// \param RC Register class of operands
genMaddR(MachineFunction & MF,MachineRegisterInfo & MRI,const TargetInstrInfo * TII,MachineInstr & Root,SmallVectorImpl<MachineInstr * > & InsInstrs,unsigned IdxMulOpd,unsigned MaddOpc,unsigned VR,const TargetRegisterClass * RC)4064 static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
4065 const TargetInstrInfo *TII, MachineInstr &Root,
4066 SmallVectorImpl<MachineInstr *> &InsInstrs,
4067 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
4068 const TargetRegisterClass *RC) {
4069 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
4070
4071 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
4072 unsigned ResultReg = Root.getOperand(0).getReg();
4073 unsigned SrcReg0 = MUL->getOperand(1).getReg();
4074 bool Src0IsKill = MUL->getOperand(1).isKill();
4075 unsigned SrcReg1 = MUL->getOperand(2).getReg();
4076 bool Src1IsKill = MUL->getOperand(2).isKill();
4077
4078 if (TargetRegisterInfo::isVirtualRegister(ResultReg))
4079 MRI.constrainRegClass(ResultReg, RC);
4080 if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
4081 MRI.constrainRegClass(SrcReg0, RC);
4082 if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
4083 MRI.constrainRegClass(SrcReg1, RC);
4084 if (TargetRegisterInfo::isVirtualRegister(VR))
4085 MRI.constrainRegClass(VR, RC);
4086
4087 MachineInstrBuilder MIB =
4088 BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4089 .addReg(SrcReg0, getKillRegState(Src0IsKill))
4090 .addReg(SrcReg1, getKillRegState(Src1IsKill))
4091 .addReg(VR);
4092 // Insert the MADD
4093 InsInstrs.push_back(MIB);
4094 return MUL;
4095 }
4096
4097 /// When getMachineCombinerPatterns() finds potential patterns,
4098 /// this function generates the instructions that could replace the
4099 /// original code sequence
genAlternativeCodeSequence(MachineInstr & Root,MachineCombinerPattern Pattern,SmallVectorImpl<MachineInstr * > & InsInstrs,SmallVectorImpl<MachineInstr * > & DelInstrs,DenseMap<unsigned,unsigned> & InstrIdxForVirtReg) const4100 void AArch64InstrInfo::genAlternativeCodeSequence(
4101 MachineInstr &Root, MachineCombinerPattern Pattern,
4102 SmallVectorImpl<MachineInstr *> &InsInstrs,
4103 SmallVectorImpl<MachineInstr *> &DelInstrs,
4104 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
4105 MachineBasicBlock &MBB = *Root.getParent();
4106 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
4107 MachineFunction &MF = *MBB.getParent();
4108 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
4109
4110 MachineInstr *MUL;
4111 const TargetRegisterClass *RC;
4112 unsigned Opc;
4113 switch (Pattern) {
4114 default:
4115 // Reassociate instructions.
4116 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
4117 DelInstrs, InstrIdxForVirtReg);
4118 return;
4119 case MachineCombinerPattern::MULADDW_OP1:
4120 case MachineCombinerPattern::MULADDX_OP1:
4121 // MUL I=A,B,0
4122 // ADD R,I,C
4123 // ==> MADD R,A,B,C
4124 // --- Create(MADD);
4125 if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
4126 Opc = AArch64::MADDWrrr;
4127 RC = &AArch64::GPR32RegClass;
4128 } else {
4129 Opc = AArch64::MADDXrrr;
4130 RC = &AArch64::GPR64RegClass;
4131 }
4132 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4133 break;
4134 case MachineCombinerPattern::MULADDW_OP2:
4135 case MachineCombinerPattern::MULADDX_OP2:
4136 // MUL I=A,B,0
4137 // ADD R,C,I
4138 // ==> MADD R,A,B,C
4139 // --- Create(MADD);
4140 if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
4141 Opc = AArch64::MADDWrrr;
4142 RC = &AArch64::GPR32RegClass;
4143 } else {
4144 Opc = AArch64::MADDXrrr;
4145 RC = &AArch64::GPR64RegClass;
4146 }
4147 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4148 break;
4149 case MachineCombinerPattern::MULADDWI_OP1:
4150 case MachineCombinerPattern::MULADDXI_OP1: {
4151 // MUL I=A,B,0
4152 // ADD R,I,Imm
4153 // ==> ORR V, ZR, Imm
4154 // ==> MADD R,A,B,V
4155 // --- Create(MADD);
4156 const TargetRegisterClass *OrrRC;
4157 unsigned BitSize, OrrOpc, ZeroReg;
4158 if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
4159 OrrOpc = AArch64::ORRWri;
4160 OrrRC = &AArch64::GPR32spRegClass;
4161 BitSize = 32;
4162 ZeroReg = AArch64::WZR;
4163 Opc = AArch64::MADDWrrr;
4164 RC = &AArch64::GPR32RegClass;
4165 } else {
4166 OrrOpc = AArch64::ORRXri;
4167 OrrRC = &AArch64::GPR64spRegClass;
4168 BitSize = 64;
4169 ZeroReg = AArch64::XZR;
4170 Opc = AArch64::MADDXrrr;
4171 RC = &AArch64::GPR64RegClass;
4172 }
4173 unsigned NewVR = MRI.createVirtualRegister(OrrRC);
4174 uint64_t Imm = Root.getOperand(2).getImm();
4175
4176 if (Root.getOperand(3).isImm()) {
4177 unsigned Val = Root.getOperand(3).getImm();
4178 Imm = Imm << Val;
4179 }
4180 uint64_t UImm = SignExtend64(Imm, BitSize);
4181 uint64_t Encoding;
4182 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4183 MachineInstrBuilder MIB1 =
4184 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4185 .addReg(ZeroReg)
4186 .addImm(Encoding);
4187 InsInstrs.push_back(MIB1);
4188 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4189 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4190 }
4191 break;
4192 }
4193 case MachineCombinerPattern::MULSUBW_OP1:
4194 case MachineCombinerPattern::MULSUBX_OP1: {
4195 // MUL I=A,B,0
4196 // SUB R,I, C
4197 // ==> SUB V, 0, C
4198 // ==> MADD R,A,B,V // = -C + A*B
4199 // --- Create(MADD);
4200 const TargetRegisterClass *SubRC;
4201 unsigned SubOpc, ZeroReg;
4202 if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
4203 SubOpc = AArch64::SUBWrr;
4204 SubRC = &AArch64::GPR32spRegClass;
4205 ZeroReg = AArch64::WZR;
4206 Opc = AArch64::MADDWrrr;
4207 RC = &AArch64::GPR32RegClass;
4208 } else {
4209 SubOpc = AArch64::SUBXrr;
4210 SubRC = &AArch64::GPR64spRegClass;
4211 ZeroReg = AArch64::XZR;
4212 Opc = AArch64::MADDXrrr;
4213 RC = &AArch64::GPR64RegClass;
4214 }
4215 unsigned NewVR = MRI.createVirtualRegister(SubRC);
4216 // SUB NewVR, 0, C
4217 MachineInstrBuilder MIB1 =
4218 BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
4219 .addReg(ZeroReg)
4220 .add(Root.getOperand(2));
4221 InsInstrs.push_back(MIB1);
4222 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4223 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4224 break;
4225 }
4226 case MachineCombinerPattern::MULSUBW_OP2:
4227 case MachineCombinerPattern::MULSUBX_OP2:
4228 // MUL I=A,B,0
4229 // SUB R,C,I
4230 // ==> MSUB R,A,B,C (computes C - A*B)
4231 // --- Create(MSUB);
4232 if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
4233 Opc = AArch64::MSUBWrrr;
4234 RC = &AArch64::GPR32RegClass;
4235 } else {
4236 Opc = AArch64::MSUBXrrr;
4237 RC = &AArch64::GPR64RegClass;
4238 }
4239 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4240 break;
4241 case MachineCombinerPattern::MULSUBWI_OP1:
4242 case MachineCombinerPattern::MULSUBXI_OP1: {
4243 // MUL I=A,B,0
4244 // SUB R,I, Imm
4245 // ==> ORR V, ZR, -Imm
4246 // ==> MADD R,A,B,V // = -Imm + A*B
4247 // --- Create(MADD);
4248 const TargetRegisterClass *OrrRC;
4249 unsigned BitSize, OrrOpc, ZeroReg;
4250 if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
4251 OrrOpc = AArch64::ORRWri;
4252 OrrRC = &AArch64::GPR32spRegClass;
4253 BitSize = 32;
4254 ZeroReg = AArch64::WZR;
4255 Opc = AArch64::MADDWrrr;
4256 RC = &AArch64::GPR32RegClass;
4257 } else {
4258 OrrOpc = AArch64::ORRXri;
4259 OrrRC = &AArch64::GPR64spRegClass;
4260 BitSize = 64;
4261 ZeroReg = AArch64::XZR;
4262 Opc = AArch64::MADDXrrr;
4263 RC = &AArch64::GPR64RegClass;
4264 }
4265 unsigned NewVR = MRI.createVirtualRegister(OrrRC);
4266 uint64_t Imm = Root.getOperand(2).getImm();
4267 if (Root.getOperand(3).isImm()) {
4268 unsigned Val = Root.getOperand(3).getImm();
4269 Imm = Imm << Val;
4270 }
4271 uint64_t UImm = SignExtend64(-Imm, BitSize);
4272 uint64_t Encoding;
4273 if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4274 MachineInstrBuilder MIB1 =
4275 BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4276 .addReg(ZeroReg)
4277 .addImm(Encoding);
4278 InsInstrs.push_back(MIB1);
4279 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4280 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4281 }
4282 break;
4283 }
4284 // Floating Point Support
4285 case MachineCombinerPattern::FMULADDS_OP1:
4286 case MachineCombinerPattern::FMULADDD_OP1:
4287 // MUL I=A,B,0
4288 // ADD R,I,C
4289 // ==> MADD R,A,B,C
4290 // --- Create(MADD);
4291 if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
4292 Opc = AArch64::FMADDSrrr;
4293 RC = &AArch64::FPR32RegClass;
4294 } else {
4295 Opc = AArch64::FMADDDrrr;
4296 RC = &AArch64::FPR64RegClass;
4297 }
4298 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4299 break;
4300 case MachineCombinerPattern::FMULADDS_OP2:
4301 case MachineCombinerPattern::FMULADDD_OP2:
4302 // FMUL I=A,B,0
4303 // FADD R,C,I
4304 // ==> FMADD R,A,B,C
4305 // --- Create(FMADD);
4306 if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
4307 Opc = AArch64::FMADDSrrr;
4308 RC = &AArch64::FPR32RegClass;
4309 } else {
4310 Opc = AArch64::FMADDDrrr;
4311 RC = &AArch64::FPR64RegClass;
4312 }
4313 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4314 break;
4315
4316 case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
4317 Opc = AArch64::FMLAv1i32_indexed;
4318 RC = &AArch64::FPR32RegClass;
4319 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4320 FMAInstKind::Indexed);
4321 break;
4322 case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
4323 Opc = AArch64::FMLAv1i32_indexed;
4324 RC = &AArch64::FPR32RegClass;
4325 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4326 FMAInstKind::Indexed);
4327 break;
4328
4329 case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
4330 Opc = AArch64::FMLAv1i64_indexed;
4331 RC = &AArch64::FPR64RegClass;
4332 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4333 FMAInstKind::Indexed);
4334 break;
4335 case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
4336 Opc = AArch64::FMLAv1i64_indexed;
4337 RC = &AArch64::FPR64RegClass;
4338 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4339 FMAInstKind::Indexed);
4340 break;
4341
4342 case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
4343 case MachineCombinerPattern::FMLAv2f32_OP1:
4344 RC = &AArch64::FPR64RegClass;
4345 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
4346 Opc = AArch64::FMLAv2i32_indexed;
4347 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4348 FMAInstKind::Indexed);
4349 } else {
4350 Opc = AArch64::FMLAv2f32;
4351 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4352 FMAInstKind::Accumulator);
4353 }
4354 break;
4355 case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
4356 case MachineCombinerPattern::FMLAv2f32_OP2:
4357 RC = &AArch64::FPR64RegClass;
4358 if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
4359 Opc = AArch64::FMLAv2i32_indexed;
4360 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4361 FMAInstKind::Indexed);
4362 } else {
4363 Opc = AArch64::FMLAv2f32;
4364 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4365 FMAInstKind::Accumulator);
4366 }
4367 break;
4368
4369 case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
4370 case MachineCombinerPattern::FMLAv2f64_OP1:
4371 RC = &AArch64::FPR128RegClass;
4372 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
4373 Opc = AArch64::FMLAv2i64_indexed;
4374 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4375 FMAInstKind::Indexed);
4376 } else {
4377 Opc = AArch64::FMLAv2f64;
4378 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4379 FMAInstKind::Accumulator);
4380 }
4381 break;
4382 case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
4383 case MachineCombinerPattern::FMLAv2f64_OP2:
4384 RC = &AArch64::FPR128RegClass;
4385 if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
4386 Opc = AArch64::FMLAv2i64_indexed;
4387 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4388 FMAInstKind::Indexed);
4389 } else {
4390 Opc = AArch64::FMLAv2f64;
4391 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4392 FMAInstKind::Accumulator);
4393 }
4394 break;
4395
4396 case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
4397 case MachineCombinerPattern::FMLAv4f32_OP1:
4398 RC = &AArch64::FPR128RegClass;
4399 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
4400 Opc = AArch64::FMLAv4i32_indexed;
4401 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4402 FMAInstKind::Indexed);
4403 } else {
4404 Opc = AArch64::FMLAv4f32;
4405 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4406 FMAInstKind::Accumulator);
4407 }
4408 break;
4409
4410 case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
4411 case MachineCombinerPattern::FMLAv4f32_OP2:
4412 RC = &AArch64::FPR128RegClass;
4413 if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
4414 Opc = AArch64::FMLAv4i32_indexed;
4415 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4416 FMAInstKind::Indexed);
4417 } else {
4418 Opc = AArch64::FMLAv4f32;
4419 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4420 FMAInstKind::Accumulator);
4421 }
4422 break;
4423
4424 case MachineCombinerPattern::FMULSUBS_OP1:
4425 case MachineCombinerPattern::FMULSUBD_OP1: {
4426 // FMUL I=A,B,0
4427 // FSUB R,I,C
4428 // ==> FNMSUB R,A,B,C // = -C + A*B
4429 // --- Create(FNMSUB);
4430 if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
4431 Opc = AArch64::FNMSUBSrrr;
4432 RC = &AArch64::FPR32RegClass;
4433 } else {
4434 Opc = AArch64::FNMSUBDrrr;
4435 RC = &AArch64::FPR64RegClass;
4436 }
4437 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4438 break;
4439 }
4440
4441 case MachineCombinerPattern::FNMULSUBS_OP1:
4442 case MachineCombinerPattern::FNMULSUBD_OP1: {
4443 // FNMUL I=A,B,0
4444 // FSUB R,I,C
4445 // ==> FNMADD R,A,B,C // = -A*B - C
4446 // --- Create(FNMADD);
4447 if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) {
4448 Opc = AArch64::FNMADDSrrr;
4449 RC = &AArch64::FPR32RegClass;
4450 } else {
4451 Opc = AArch64::FNMADDDrrr;
4452 RC = &AArch64::FPR64RegClass;
4453 }
4454 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4455 break;
4456 }
4457
4458 case MachineCombinerPattern::FMULSUBS_OP2:
4459 case MachineCombinerPattern::FMULSUBD_OP2: {
4460 // FMUL I=A,B,0
4461 // FSUB R,C,I
4462 // ==> FMSUB R,A,B,C (computes C - A*B)
4463 // --- Create(FMSUB);
4464 if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
4465 Opc = AArch64::FMSUBSrrr;
4466 RC = &AArch64::FPR32RegClass;
4467 } else {
4468 Opc = AArch64::FMSUBDrrr;
4469 RC = &AArch64::FPR64RegClass;
4470 }
4471 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4472 break;
4473 }
4474
4475 case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
4476 Opc = AArch64::FMLSv1i32_indexed;
4477 RC = &AArch64::FPR32RegClass;
4478 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4479 FMAInstKind::Indexed);
4480 break;
4481
4482 case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
4483 Opc = AArch64::FMLSv1i64_indexed;
4484 RC = &AArch64::FPR64RegClass;
4485 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4486 FMAInstKind::Indexed);
4487 break;
4488
4489 case MachineCombinerPattern::FMLSv2f32_OP2:
4490 case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
4491 RC = &AArch64::FPR64RegClass;
4492 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
4493 Opc = AArch64::FMLSv2i32_indexed;
4494 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4495 FMAInstKind::Indexed);
4496 } else {
4497 Opc = AArch64::FMLSv2f32;
4498 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4499 FMAInstKind::Accumulator);
4500 }
4501 break;
4502
4503 case MachineCombinerPattern::FMLSv2f64_OP2:
4504 case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
4505 RC = &AArch64::FPR128RegClass;
4506 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
4507 Opc = AArch64::FMLSv2i64_indexed;
4508 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4509 FMAInstKind::Indexed);
4510 } else {
4511 Opc = AArch64::FMLSv2f64;
4512 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4513 FMAInstKind::Accumulator);
4514 }
4515 break;
4516
4517 case MachineCombinerPattern::FMLSv4f32_OP2:
4518 case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
4519 RC = &AArch64::FPR128RegClass;
4520 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
4521 Opc = AArch64::FMLSv4i32_indexed;
4522 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4523 FMAInstKind::Indexed);
4524 } else {
4525 Opc = AArch64::FMLSv4f32;
4526 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4527 FMAInstKind::Accumulator);
4528 }
4529 break;
4530 case MachineCombinerPattern::FMLSv2f32_OP1:
4531 case MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
4532 RC = &AArch64::FPR64RegClass;
4533 unsigned NewVR = MRI.createVirtualRegister(RC);
4534 MachineInstrBuilder MIB1 =
4535 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
4536 .add(Root.getOperand(2));
4537 InsInstrs.push_back(MIB1);
4538 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4539 if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
4540 Opc = AArch64::FMLAv2i32_indexed;
4541 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4542 FMAInstKind::Indexed, &NewVR);
4543 } else {
4544 Opc = AArch64::FMLAv2f32;
4545 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4546 FMAInstKind::Accumulator, &NewVR);
4547 }
4548 break;
4549 }
4550 case MachineCombinerPattern::FMLSv4f32_OP1:
4551 case MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
4552 RC = &AArch64::FPR128RegClass;
4553 unsigned NewVR = MRI.createVirtualRegister(RC);
4554 MachineInstrBuilder MIB1 =
4555 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
4556 .add(Root.getOperand(2));
4557 InsInstrs.push_back(MIB1);
4558 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4559 if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
4560 Opc = AArch64::FMLAv4i32_indexed;
4561 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4562 FMAInstKind::Indexed, &NewVR);
4563 } else {
4564 Opc = AArch64::FMLAv4f32;
4565 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4566 FMAInstKind::Accumulator, &NewVR);
4567 }
4568 break;
4569 }
4570 case MachineCombinerPattern::FMLSv2f64_OP1:
4571 case MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
4572 RC = &AArch64::FPR128RegClass;
4573 unsigned NewVR = MRI.createVirtualRegister(RC);
4574 MachineInstrBuilder MIB1 =
4575 BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
4576 .add(Root.getOperand(2));
4577 InsInstrs.push_back(MIB1);
4578 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4579 if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
4580 Opc = AArch64::FMLAv2i64_indexed;
4581 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4582 FMAInstKind::Indexed, &NewVR);
4583 } else {
4584 Opc = AArch64::FMLAv2f64;
4585 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4586 FMAInstKind::Accumulator, &NewVR);
4587 }
4588 break;
4589 }
4590 } // end switch (Pattern)
4591 // Record MUL and ADD/SUB for deletion
4592 DelInstrs.push_back(MUL);
4593 DelInstrs.push_back(&Root);
4594 }
4595
4596 /// Replace csincr-branch sequence by simple conditional branch
4597 ///
4598 /// Examples:
4599 /// 1. \code
4600 /// csinc w9, wzr, wzr, <condition code>
4601 /// tbnz w9, #0, 0x44
4602 /// \endcode
4603 /// to
4604 /// \code
4605 /// b.<inverted condition code>
4606 /// \endcode
4607 ///
4608 /// 2. \code
4609 /// csinc w9, wzr, wzr, <condition code>
4610 /// tbz w9, #0, 0x44
4611 /// \endcode
4612 /// to
4613 /// \code
4614 /// b.<condition code>
4615 /// \endcode
4616 ///
4617 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the
4618 /// compare's constant operand is power of 2.
4619 ///
4620 /// Examples:
4621 /// \code
4622 /// and w8, w8, #0x400
4623 /// cbnz w8, L1
4624 /// \endcode
4625 /// to
4626 /// \code
4627 /// tbnz w8, #10, L1
4628 /// \endcode
4629 ///
4630 /// \param MI Conditional Branch
4631 /// \return True when the simple conditional branch is generated
4632 ///
optimizeCondBranch(MachineInstr & MI) const4633 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
4634 bool IsNegativeBranch = false;
4635 bool IsTestAndBranch = false;
4636 unsigned TargetBBInMI = 0;
4637 switch (MI.getOpcode()) {
4638 default:
4639 llvm_unreachable("Unknown branch instruction?");
4640 case AArch64::Bcc:
4641 return false;
4642 case AArch64::CBZW:
4643 case AArch64::CBZX:
4644 TargetBBInMI = 1;
4645 break;
4646 case AArch64::CBNZW:
4647 case AArch64::CBNZX:
4648 TargetBBInMI = 1;
4649 IsNegativeBranch = true;
4650 break;
4651 case AArch64::TBZW:
4652 case AArch64::TBZX:
4653 TargetBBInMI = 2;
4654 IsTestAndBranch = true;
4655 break;
4656 case AArch64::TBNZW:
4657 case AArch64::TBNZX:
4658 TargetBBInMI = 2;
4659 IsNegativeBranch = true;
4660 IsTestAndBranch = true;
4661 break;
4662 }
4663 // So we increment a zero register and test for bits other
4664 // than bit 0? Conservatively bail out in case the verifier
4665 // missed this case.
4666 if (IsTestAndBranch && MI.getOperand(1).getImm())
4667 return false;
4668
4669 // Find Definition.
4670 assert(MI.getParent() && "Incomplete machine instruciton\n");
4671 MachineBasicBlock *MBB = MI.getParent();
4672 MachineFunction *MF = MBB->getParent();
4673 MachineRegisterInfo *MRI = &MF->getRegInfo();
4674 unsigned VReg = MI.getOperand(0).getReg();
4675 if (!TargetRegisterInfo::isVirtualRegister(VReg))
4676 return false;
4677
4678 MachineInstr *DefMI = MRI->getVRegDef(VReg);
4679
4680 // Look through COPY instructions to find definition.
4681 while (DefMI->isCopy()) {
4682 unsigned CopyVReg = DefMI->getOperand(1).getReg();
4683 if (!MRI->hasOneNonDBGUse(CopyVReg))
4684 return false;
4685 if (!MRI->hasOneDef(CopyVReg))
4686 return false;
4687 DefMI = MRI->getVRegDef(CopyVReg);
4688 }
4689
4690 switch (DefMI->getOpcode()) {
4691 default:
4692 return false;
4693 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
4694 case AArch64::ANDWri:
4695 case AArch64::ANDXri: {
4696 if (IsTestAndBranch)
4697 return false;
4698 if (DefMI->getParent() != MBB)
4699 return false;
4700 if (!MRI->hasOneNonDBGUse(VReg))
4701 return false;
4702
4703 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
4704 uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
4705 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
4706 if (!isPowerOf2_64(Mask))
4707 return false;
4708
4709 MachineOperand &MO = DefMI->getOperand(1);
4710 unsigned NewReg = MO.getReg();
4711 if (!TargetRegisterInfo::isVirtualRegister(NewReg))
4712 return false;
4713
4714 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
4715
4716 MachineBasicBlock &RefToMBB = *MBB;
4717 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
4718 DebugLoc DL = MI.getDebugLoc();
4719 unsigned Imm = Log2_64(Mask);
4720 unsigned Opc = (Imm < 32)
4721 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
4722 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
4723 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
4724 .addReg(NewReg)
4725 .addImm(Imm)
4726 .addMBB(TBB);
4727 // Register lives on to the CBZ now.
4728 MO.setIsKill(false);
4729
4730 // For immediate smaller than 32, we need to use the 32-bit
4731 // variant (W) in all cases. Indeed the 64-bit variant does not
4732 // allow to encode them.
4733 // Therefore, if the input register is 64-bit, we need to take the
4734 // 32-bit sub-part.
4735 if (!Is32Bit && Imm < 32)
4736 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
4737 MI.eraseFromParent();
4738 return true;
4739 }
4740 // Look for CSINC
4741 case AArch64::CSINCWr:
4742 case AArch64::CSINCXr: {
4743 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
4744 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
4745 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
4746 DefMI->getOperand(2).getReg() == AArch64::XZR))
4747 return false;
4748
4749 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
4750 return false;
4751
4752 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
4753 // Convert only when the condition code is not modified between
4754 // the CSINC and the branch. The CC may be used by other
4755 // instructions in between.
4756 if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
4757 return false;
4758 MachineBasicBlock &RefToMBB = *MBB;
4759 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
4760 DebugLoc DL = MI.getDebugLoc();
4761 if (IsNegativeBranch)
4762 CC = AArch64CC::getInvertedCondCode(CC);
4763 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
4764 MI.eraseFromParent();
4765 return true;
4766 }
4767 }
4768 }
4769
4770 std::pair<unsigned, unsigned>
decomposeMachineOperandsTargetFlags(unsigned TF) const4771 AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
4772 const unsigned Mask = AArch64II::MO_FRAGMENT;
4773 return std::make_pair(TF & Mask, TF & ~Mask);
4774 }
4775
4776 ArrayRef<std::pair<unsigned, const char *>>
getSerializableDirectMachineOperandTargetFlags() const4777 AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
4778 using namespace AArch64II;
4779
4780 static const std::pair<unsigned, const char *> TargetFlags[] = {
4781 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
4782 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
4783 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
4784 {MO_HI12, "aarch64-hi12"}};
4785 return makeArrayRef(TargetFlags);
4786 }
4787
4788 ArrayRef<std::pair<unsigned, const char *>>
getSerializableBitmaskMachineOperandTargetFlags() const4789 AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
4790 using namespace AArch64II;
4791
4792 static const std::pair<unsigned, const char *> TargetFlags[] = {
4793 {MO_COFFSTUB, "aarch64-coffstub"},
4794 {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"},
4795 {MO_S, "aarch64-s"}, {MO_TLS, "aarch64-tls"},
4796 {MO_DLLIMPORT, "aarch64-dllimport"}};
4797 return makeArrayRef(TargetFlags);
4798 }
4799
4800 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
getSerializableMachineMemOperandTargetFlags() const4801 AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
4802 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
4803 {{MOSuppressPair, "aarch64-suppress-pair"},
4804 {MOStridedAccess, "aarch64-strided-access"}};
4805 return makeArrayRef(TargetFlags);
4806 }
4807
4808 /// Constants defining how certain sequences should be outlined.
4809 /// This encompasses how an outlined function should be called, and what kind of
4810 /// frame should be emitted for that outlined function.
4811 ///
4812 /// \p MachineOutlinerDefault implies that the function should be called with
4813 /// a save and restore of LR to the stack.
4814 ///
4815 /// That is,
4816 ///
4817 /// I1 Save LR OUTLINED_FUNCTION:
4818 /// I2 --> BL OUTLINED_FUNCTION I1
4819 /// I3 Restore LR I2
4820 /// I3
4821 /// RET
4822 ///
4823 /// * Call construction overhead: 3 (save + BL + restore)
4824 /// * Frame construction overhead: 1 (ret)
4825 /// * Requires stack fixups? Yes
4826 ///
4827 /// \p MachineOutlinerTailCall implies that the function is being created from
4828 /// a sequence of instructions ending in a return.
4829 ///
4830 /// That is,
4831 ///
4832 /// I1 OUTLINED_FUNCTION:
4833 /// I2 --> B OUTLINED_FUNCTION I1
4834 /// RET I2
4835 /// RET
4836 ///
4837 /// * Call construction overhead: 1 (B)
4838 /// * Frame construction overhead: 0 (Return included in sequence)
4839 /// * Requires stack fixups? No
4840 ///
4841 /// \p MachineOutlinerNoLRSave implies that the function should be called using
4842 /// a BL instruction, but doesn't require LR to be saved and restored. This
4843 /// happens when LR is known to be dead.
4844 ///
4845 /// That is,
4846 ///
4847 /// I1 OUTLINED_FUNCTION:
4848 /// I2 --> BL OUTLINED_FUNCTION I1
4849 /// I3 I2
4850 /// I3
4851 /// RET
4852 ///
4853 /// * Call construction overhead: 1 (BL)
4854 /// * Frame construction overhead: 1 (RET)
4855 /// * Requires stack fixups? No
4856 ///
4857 /// \p MachineOutlinerThunk implies that the function is being created from
4858 /// a sequence of instructions ending in a call. The outlined function is
4859 /// called with a BL instruction, and the outlined function tail-calls the
4860 /// original call destination.
4861 ///
4862 /// That is,
4863 ///
4864 /// I1 OUTLINED_FUNCTION:
4865 /// I2 --> BL OUTLINED_FUNCTION I1
4866 /// BL f I2
4867 /// B f
4868 /// * Call construction overhead: 1 (BL)
4869 /// * Frame construction overhead: 0
4870 /// * Requires stack fixups? No
4871 ///
4872 /// \p MachineOutlinerRegSave implies that the function should be called with a
4873 /// save and restore of LR to an available register. This allows us to avoid
4874 /// stack fixups. Note that this outlining variant is compatible with the
4875 /// NoLRSave case.
4876 ///
4877 /// That is,
4878 ///
4879 /// I1 Save LR OUTLINED_FUNCTION:
4880 /// I2 --> BL OUTLINED_FUNCTION I1
4881 /// I3 Restore LR I2
4882 /// I3
4883 /// RET
4884 ///
4885 /// * Call construction overhead: 3 (save + BL + restore)
4886 /// * Frame construction overhead: 1 (ret)
4887 /// * Requires stack fixups? No
4888 enum MachineOutlinerClass {
4889 MachineOutlinerDefault, /// Emit a save, restore, call, and return.
4890 MachineOutlinerTailCall, /// Only emit a branch.
4891 MachineOutlinerNoLRSave, /// Emit a call and return.
4892 MachineOutlinerThunk, /// Emit a call and tail-call.
4893 MachineOutlinerRegSave /// Same as default, but save to a register.
4894 };
4895
4896 enum MachineOutlinerMBBFlags {
4897 LRUnavailableSomewhere = 0x2,
4898 HasCalls = 0x4,
4899 UnsafeRegsDead = 0x8
4900 };
4901
4902 unsigned
findRegisterToSaveLRTo(const outliner::Candidate & C) const4903 AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
4904 assert(C.LRUWasSet && "LRU wasn't set?");
4905 MachineFunction *MF = C.getMF();
4906 const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
4907 MF->getSubtarget().getRegisterInfo());
4908
4909 // Check if there is an available register across the sequence that we can
4910 // use.
4911 for (unsigned Reg : AArch64::GPR64RegClass) {
4912 if (!ARI->isReservedReg(*MF, Reg) &&
4913 Reg != AArch64::LR && // LR is not reserved, but don't use it.
4914 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
4915 Reg != AArch64::X17 && // Ditto for X17.
4916 C.LRU.available(Reg) && C.UsedInSequence.available(Reg))
4917 return Reg;
4918 }
4919
4920 // No suitable register. Return 0.
4921 return 0u;
4922 }
4923
4924 outliner::OutlinedFunction
getOutliningCandidateInfo(std::vector<outliner::Candidate> & RepeatedSequenceLocs) const4925 AArch64InstrInfo::getOutliningCandidateInfo(
4926 std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
4927 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
4928 unsigned SequenceSize =
4929 std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0,
4930 [this](unsigned Sum, const MachineInstr &MI) {
4931 return Sum + getInstSizeInBytes(MI);
4932 });
4933
4934 // Properties about candidate MBBs that hold for all of them.
4935 unsigned FlagsSetInAll = 0xF;
4936
4937 // Compute liveness information for each candidate, and set FlagsSetInAll.
4938 const TargetRegisterInfo &TRI = getRegisterInfo();
4939 std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
4940 [&FlagsSetInAll](outliner::Candidate &C) {
4941 FlagsSetInAll &= C.Flags;
4942 });
4943
4944 // According to the AArch64 Procedure Call Standard, the following are
4945 // undefined on entry/exit from a function call:
4946 //
4947 // * Registers x16, x17, (and thus w16, w17)
4948 // * Condition codes (and thus the NZCV register)
4949 //
4950 // Because if this, we can't outline any sequence of instructions where
4951 // one
4952 // of these registers is live into/across it. Thus, we need to delete
4953 // those
4954 // candidates.
4955 auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
4956 // If the unsafe registers in this block are all dead, then we don't need
4957 // to compute liveness here.
4958 if (C.Flags & UnsafeRegsDead)
4959 return false;
4960 C.initLRU(TRI);
4961 LiveRegUnits LRU = C.LRU;
4962 return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) ||
4963 !LRU.available(AArch64::NZCV));
4964 };
4965
4966 // Are there any candidates where those registers are live?
4967 if (!(FlagsSetInAll & UnsafeRegsDead)) {
4968 // Erase every candidate that violates the restrictions above. (It could be
4969 // true that we have viable candidates, so it's not worth bailing out in
4970 // the case that, say, 1 out of 20 candidates violate the restructions.)
4971 RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
4972 RepeatedSequenceLocs.end(),
4973 CantGuaranteeValueAcrossCall),
4974 RepeatedSequenceLocs.end());
4975
4976 // If the sequence doesn't have enough candidates left, then we're done.
4977 if (RepeatedSequenceLocs.size() < 2)
4978 return outliner::OutlinedFunction();
4979 }
4980
4981 // At this point, we have only "safe" candidates to outline. Figure out
4982 // frame + call instruction information.
4983
4984 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode();
4985
4986 // Helper lambda which sets call information for every candidate.
4987 auto SetCandidateCallInfo =
4988 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
4989 for (outliner::Candidate &C : RepeatedSequenceLocs)
4990 C.setCallInfo(CallID, NumBytesForCall);
4991 };
4992
4993 unsigned FrameID = MachineOutlinerDefault;
4994 unsigned NumBytesToCreateFrame = 4;
4995
4996 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
4997 return C.getMF()->getFunction().hasFnAttribute("branch-target-enforcement");
4998 });
4999
5000 // Returns true if an instructions is safe to fix up, false otherwise.
5001 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
5002 if (MI.isCall())
5003 return true;
5004
5005 if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
5006 !MI.readsRegister(AArch64::SP, &TRI))
5007 return true;
5008
5009 // Any modification of SP will break our code to save/restore LR.
5010 // FIXME: We could handle some instructions which add a constant
5011 // offset to SP, with a bit more work.
5012 if (MI.modifiesRegister(AArch64::SP, &TRI))
5013 return false;
5014
5015 // At this point, we have a stack instruction that we might need to
5016 // fix up. We'll handle it if it's a load or store.
5017 if (MI.mayLoadOrStore()) {
5018 MachineOperand *Base; // Filled with the base operand of MI.
5019 int64_t Offset; // Filled with the offset of MI.
5020
5021 // Does it allow us to offset the base operand and is the base the
5022 // register SP?
5023 if (!getMemOperandWithOffset(MI, Base, Offset, &TRI) || !Base->isReg() ||
5024 Base->getReg() != AArch64::SP)
5025 return false;
5026
5027 // Find the minimum/maximum offset for this instruction and check
5028 // if fixing it up would be in range.
5029 int64_t MinOffset,
5030 MaxOffset; // Unscaled offsets for the instruction.
5031 unsigned Scale; // The scale to multiply the offsets by.
5032 unsigned DummyWidth;
5033 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
5034
5035 Offset += 16; // Update the offset to what it would be if we outlined.
5036 if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale)
5037 return false;
5038
5039 // It's in range, so we can outline it.
5040 return true;
5041 }
5042
5043 // FIXME: Add handling for instructions like "add x0, sp, #8".
5044
5045 // We can't fix it up, so don't outline it.
5046 return false;
5047 };
5048
5049 // True if it's possible to fix up each stack instruction in this sequence.
5050 // Important for frames/call variants that modify the stack.
5051 bool AllStackInstrsSafe = std::all_of(
5052 FirstCand.front(), std::next(FirstCand.back()), IsSafeToFixup);
5053
5054 // If the last instruction in any candidate is a terminator, then we should
5055 // tail call all of the candidates.
5056 if (RepeatedSequenceLocs[0].back()->isTerminator()) {
5057 FrameID = MachineOutlinerTailCall;
5058 NumBytesToCreateFrame = 0;
5059 SetCandidateCallInfo(MachineOutlinerTailCall, 4);
5060 }
5061
5062 else if (LastInstrOpcode == AArch64::BL ||
5063 (LastInstrOpcode == AArch64::BLR && !HasBTI)) {
5064 // FIXME: Do we need to check if the code after this uses the value of LR?
5065 FrameID = MachineOutlinerThunk;
5066 NumBytesToCreateFrame = 0;
5067 SetCandidateCallInfo(MachineOutlinerThunk, 4);
5068 }
5069
5070 else {
5071 // We need to decide how to emit calls + frames. We can always emit the same
5072 // frame if we don't need to save to the stack. If we have to save to the
5073 // stack, then we need a different frame.
5074 unsigned NumBytesNoStackCalls = 0;
5075 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
5076
5077 for (outliner::Candidate &C : RepeatedSequenceLocs) {
5078 C.initLRU(TRI);
5079
5080 // Is LR available? If so, we don't need a save.
5081 if (C.LRU.available(AArch64::LR)) {
5082 NumBytesNoStackCalls += 4;
5083 C.setCallInfo(MachineOutlinerNoLRSave, 4);
5084 CandidatesWithoutStackFixups.push_back(C);
5085 }
5086
5087 // Is an unused register available? If so, we won't modify the stack, so
5088 // we can outline with the same frame type as those that don't save LR.
5089 else if (findRegisterToSaveLRTo(C)) {
5090 NumBytesNoStackCalls += 12;
5091 C.setCallInfo(MachineOutlinerRegSave, 12);
5092 CandidatesWithoutStackFixups.push_back(C);
5093 }
5094
5095 // Is SP used in the sequence at all? If not, we don't have to modify
5096 // the stack, so we are guaranteed to get the same frame.
5097 else if (C.UsedInSequence.available(AArch64::SP)) {
5098 NumBytesNoStackCalls += 12;
5099 C.setCallInfo(MachineOutlinerDefault, 12);
5100 CandidatesWithoutStackFixups.push_back(C);
5101 }
5102
5103 // If we outline this, we need to modify the stack. Pretend we don't
5104 // outline this by saving all of its bytes.
5105 else {
5106 NumBytesNoStackCalls += SequenceSize;
5107 }
5108 }
5109
5110 // If there are no places where we have to save LR, then note that we
5111 // don't have to update the stack. Otherwise, give every candidate the
5112 // default call type, as long as it's safe to do so.
5113 if (!AllStackInstrsSafe ||
5114 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
5115 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
5116 FrameID = MachineOutlinerNoLRSave;
5117 } else {
5118 SetCandidateCallInfo(MachineOutlinerDefault, 12);
5119 }
5120
5121 // If we dropped all of the candidates, bail out here.
5122 if (RepeatedSequenceLocs.size() < 2) {
5123 RepeatedSequenceLocs.clear();
5124 return outliner::OutlinedFunction();
5125 }
5126 }
5127
5128 // Does every candidate's MBB contain a call? If so, then we might have a call
5129 // in the range.
5130 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
5131 // Check if the range contains a call. These require a save + restore of the
5132 // link register.
5133 bool ModStackToSaveLR = false;
5134 if (std::any_of(FirstCand.front(), FirstCand.back(),
5135 [](const MachineInstr &MI) { return MI.isCall(); }))
5136 ModStackToSaveLR = true;
5137
5138 // Handle the last instruction separately. If this is a tail call, then the
5139 // last instruction is a call. We don't want to save + restore in this case.
5140 // However, it could be possible that the last instruction is a call without
5141 // it being valid to tail call this sequence. We should consider this as
5142 // well.
5143 else if (FrameID != MachineOutlinerThunk &&
5144 FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall())
5145 ModStackToSaveLR = true;
5146
5147 if (ModStackToSaveLR) {
5148 // We can't fix up the stack. Bail out.
5149 if (!AllStackInstrsSafe) {
5150 RepeatedSequenceLocs.clear();
5151 return outliner::OutlinedFunction();
5152 }
5153
5154 // Save + restore LR.
5155 NumBytesToCreateFrame += 8;
5156 }
5157 }
5158
5159 return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
5160 NumBytesToCreateFrame, FrameID);
5161 }
5162
isFunctionSafeToOutlineFrom(MachineFunction & MF,bool OutlineFromLinkOnceODRs) const5163 bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
5164 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
5165 const Function &F = MF.getFunction();
5166
5167 // Can F be deduplicated by the linker? If it can, don't outline from it.
5168 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
5169 return false;
5170
5171 // Don't outline from functions with section markings; the program could
5172 // expect that all the code is in the named section.
5173 // FIXME: Allow outlining from multiple functions with the same section
5174 // marking.
5175 if (F.hasSection())
5176 return false;
5177
5178 // Outlining from functions with redzones is unsafe since the outliner may
5179 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
5180 // outline from it.
5181 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
5182 if (!AFI || AFI->hasRedZone().getValueOr(true))
5183 return false;
5184
5185 // It's safe to outline from MF.
5186 return true;
5187 }
5188
isMBBSafeToOutlineFrom(MachineBasicBlock & MBB,unsigned & Flags) const5189 bool AArch64InstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
5190 unsigned &Flags) const {
5191 // Check if LR is available through all of the MBB. If it's not, then set
5192 // a flag.
5193 assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
5194 "Suitable Machine Function for outlining must track liveness");
5195 LiveRegUnits LRU(getRegisterInfo());
5196
5197 std::for_each(MBB.rbegin(), MBB.rend(),
5198 [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
5199
5200 // Check if each of the unsafe registers are available...
5201 bool W16AvailableInBlock = LRU.available(AArch64::W16);
5202 bool W17AvailableInBlock = LRU.available(AArch64::W17);
5203 bool NZCVAvailableInBlock = LRU.available(AArch64::NZCV);
5204
5205 // If all of these are dead (and not live out), we know we don't have to check
5206 // them later.
5207 if (W16AvailableInBlock && W17AvailableInBlock && NZCVAvailableInBlock)
5208 Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead;
5209
5210 // Now, add the live outs to the set.
5211 LRU.addLiveOuts(MBB);
5212
5213 // If any of these registers is available in the MBB, but also a live out of
5214 // the block, then we know outlining is unsafe.
5215 if (W16AvailableInBlock && !LRU.available(AArch64::W16))
5216 return false;
5217 if (W17AvailableInBlock && !LRU.available(AArch64::W17))
5218 return false;
5219 if (NZCVAvailableInBlock && !LRU.available(AArch64::NZCV))
5220 return false;
5221
5222 // Check if there's a call inside this MachineBasicBlock. If there is, then
5223 // set a flag.
5224 if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
5225 Flags |= MachineOutlinerMBBFlags::HasCalls;
5226
5227 MachineFunction *MF = MBB.getParent();
5228
5229 // In the event that we outline, we may have to save LR. If there is an
5230 // available register in the MBB, then we'll always save LR there. Check if
5231 // this is true.
5232 bool CanSaveLR = false;
5233 const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
5234 MF->getSubtarget().getRegisterInfo());
5235
5236 // Check if there is an available register across the sequence that we can
5237 // use.
5238 for (unsigned Reg : AArch64::GPR64RegClass) {
5239 if (!ARI->isReservedReg(*MF, Reg) && Reg != AArch64::LR &&
5240 Reg != AArch64::X16 && Reg != AArch64::X17 && LRU.available(Reg)) {
5241 CanSaveLR = true;
5242 break;
5243 }
5244 }
5245
5246 // Check if we have a register we can save LR to, and if LR was used
5247 // somewhere. If both of those things are true, then we need to evaluate the
5248 // safety of outlining stack instructions later.
5249 if (!CanSaveLR && !LRU.available(AArch64::LR))
5250 Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
5251
5252 return true;
5253 }
5254
5255 outliner::InstrType
getOutliningType(MachineBasicBlock::iterator & MIT,unsigned Flags) const5256 AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
5257 unsigned Flags) const {
5258 MachineInstr &MI = *MIT;
5259 MachineBasicBlock *MBB = MI.getParent();
5260 MachineFunction *MF = MBB->getParent();
5261 AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
5262
5263 // Don't outline LOHs.
5264 if (FuncInfo->getLOHRelated().count(&MI))
5265 return outliner::InstrType::Illegal;
5266
5267 // Don't allow debug values to impact outlining type.
5268 if (MI.isDebugInstr() || MI.isIndirectDebugValue())
5269 return outliner::InstrType::Invisible;
5270
5271 // At this point, KILL instructions don't really tell us much so we can go
5272 // ahead and skip over them.
5273 if (MI.isKill())
5274 return outliner::InstrType::Invisible;
5275
5276 // Is this a terminator for a basic block?
5277 if (MI.isTerminator()) {
5278
5279 // Is this the end of a function?
5280 if (MI.getParent()->succ_empty())
5281 return outliner::InstrType::Legal;
5282
5283 // It's not, so don't outline it.
5284 return outliner::InstrType::Illegal;
5285 }
5286
5287 // Make sure none of the operands are un-outlinable.
5288 for (const MachineOperand &MOP : MI.operands()) {
5289 if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
5290 MOP.isTargetIndex())
5291 return outliner::InstrType::Illegal;
5292
5293 // If it uses LR or W30 explicitly, then don't touch it.
5294 if (MOP.isReg() && !MOP.isImplicit() &&
5295 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
5296 return outliner::InstrType::Illegal;
5297 }
5298
5299 // Special cases for instructions that can always be outlined, but will fail
5300 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
5301 // be outlined because they don't require a *specific* value to be in LR.
5302 if (MI.getOpcode() == AArch64::ADRP)
5303 return outliner::InstrType::Legal;
5304
5305 // If MI is a call we might be able to outline it. We don't want to outline
5306 // any calls that rely on the position of items on the stack. When we outline
5307 // something containing a call, we have to emit a save and restore of LR in
5308 // the outlined function. Currently, this always happens by saving LR to the
5309 // stack. Thus, if we outline, say, half the parameters for a function call
5310 // plus the call, then we'll break the callee's expectations for the layout
5311 // of the stack.
5312 //
5313 // FIXME: Allow calls to functions which construct a stack frame, as long
5314 // as they don't access arguments on the stack.
5315 // FIXME: Figure out some way to analyze functions defined in other modules.
5316 // We should be able to compute the memory usage based on the IR calling
5317 // convention, even if we can't see the definition.
5318 if (MI.isCall()) {
5319 // Get the function associated with the call. Look at each operand and find
5320 // the one that represents the callee and get its name.
5321 const Function *Callee = nullptr;
5322 for (const MachineOperand &MOP : MI.operands()) {
5323 if (MOP.isGlobal()) {
5324 Callee = dyn_cast<Function>(MOP.getGlobal());
5325 break;
5326 }
5327 }
5328
5329 // Never outline calls to mcount. There isn't any rule that would require
5330 // this, but the Linux kernel's "ftrace" feature depends on it.
5331 if (Callee && Callee->getName() == "\01_mcount")
5332 return outliner::InstrType::Illegal;
5333
5334 // If we don't know anything about the callee, assume it depends on the
5335 // stack layout of the caller. In that case, it's only legal to outline
5336 // as a tail-call. Whitelist the call instructions we know about so we
5337 // don't get unexpected results with call pseudo-instructions.
5338 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
5339 if (MI.getOpcode() == AArch64::BLR || MI.getOpcode() == AArch64::BL)
5340 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
5341
5342 if (!Callee)
5343 return UnknownCallOutlineType;
5344
5345 // We have a function we have information about. Check it if it's something
5346 // can safely outline.
5347 MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
5348
5349 // We don't know what's going on with the callee at all. Don't touch it.
5350 if (!CalleeMF)
5351 return UnknownCallOutlineType;
5352
5353 // Check if we know anything about the callee saves on the function. If we
5354 // don't, then don't touch it, since that implies that we haven't
5355 // computed anything about its stack frame yet.
5356 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
5357 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
5358 MFI.getNumObjects() > 0)
5359 return UnknownCallOutlineType;
5360
5361 // At this point, we can say that CalleeMF ought to not pass anything on the
5362 // stack. Therefore, we can outline it.
5363 return outliner::InstrType::Legal;
5364 }
5365
5366 // Don't outline positions.
5367 if (MI.isPosition())
5368 return outliner::InstrType::Illegal;
5369
5370 // Don't touch the link register or W30.
5371 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
5372 MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
5373 return outliner::InstrType::Illegal;
5374
5375 return outliner::InstrType::Legal;
5376 }
5377
fixupPostOutline(MachineBasicBlock & MBB) const5378 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
5379 for (MachineInstr &MI : MBB) {
5380 MachineOperand *Base;
5381 unsigned Width;
5382 int64_t Offset;
5383
5384 // Is this a load or store with an immediate offset with SP as the base?
5385 if (!MI.mayLoadOrStore() ||
5386 !getMemOperandWithOffsetWidth(MI, Base, Offset, Width, &RI) ||
5387 (Base->isReg() && Base->getReg() != AArch64::SP))
5388 continue;
5389
5390 // It is, so we have to fix it up.
5391 unsigned Scale;
5392 int64_t Dummy1, Dummy2;
5393
5394 MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
5395 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
5396 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
5397 assert(Scale != 0 && "Unexpected opcode!");
5398
5399 // We've pushed the return address to the stack, so add 16 to the offset.
5400 // This is safe, since we already checked if it would overflow when we
5401 // checked if this instruction was legal to outline.
5402 int64_t NewImm = (Offset + 16) / Scale;
5403 StackOffsetOperand.setImm(NewImm);
5404 }
5405 }
5406
buildOutlinedFrame(MachineBasicBlock & MBB,MachineFunction & MF,const outliner::OutlinedFunction & OF) const5407 void AArch64InstrInfo::buildOutlinedFrame(
5408 MachineBasicBlock &MBB, MachineFunction &MF,
5409 const outliner::OutlinedFunction &OF) const {
5410 // For thunk outlining, rewrite the last instruction from a call to a
5411 // tail-call.
5412 if (OF.FrameConstructionID == MachineOutlinerThunk) {
5413 MachineInstr *Call = &*--MBB.instr_end();
5414 unsigned TailOpcode;
5415 if (Call->getOpcode() == AArch64::BL) {
5416 TailOpcode = AArch64::TCRETURNdi;
5417 } else {
5418 assert(Call->getOpcode() == AArch64::BLR);
5419 TailOpcode = AArch64::TCRETURNriALL;
5420 }
5421 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
5422 .add(Call->getOperand(0))
5423 .addImm(0);
5424 MBB.insert(MBB.end(), TC);
5425 Call->eraseFromParent();
5426 }
5427
5428 // Is there a call in the outlined range?
5429 auto IsNonTailCall = [](MachineInstr &MI) {
5430 return MI.isCall() && !MI.isReturn();
5431 };
5432 if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) {
5433 // Fix up the instructions in the range, since we're going to modify the
5434 // stack.
5435 assert(OF.FrameConstructionID != MachineOutlinerDefault &&
5436 "Can only fix up stack references once");
5437 fixupPostOutline(MBB);
5438
5439 // LR has to be a live in so that we can save it.
5440 MBB.addLiveIn(AArch64::LR);
5441
5442 MachineBasicBlock::iterator It = MBB.begin();
5443 MachineBasicBlock::iterator Et = MBB.end();
5444
5445 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
5446 OF.FrameConstructionID == MachineOutlinerThunk)
5447 Et = std::prev(MBB.end());
5448
5449 // Insert a save before the outlined region
5450 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
5451 .addReg(AArch64::SP, RegState::Define)
5452 .addReg(AArch64::LR)
5453 .addReg(AArch64::SP)
5454 .addImm(-16);
5455 It = MBB.insert(It, STRXpre);
5456
5457 const TargetSubtargetInfo &STI = MF.getSubtarget();
5458 const MCRegisterInfo *MRI = STI.getRegisterInfo();
5459 unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
5460
5461 // Add a CFI saying the stack was moved 16 B down.
5462 int64_t StackPosEntry =
5463 MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 16));
5464 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
5465 .addCFIIndex(StackPosEntry)
5466 .setMIFlags(MachineInstr::FrameSetup);
5467
5468 // Add a CFI saying that the LR that we want to find is now 16 B higher than
5469 // before.
5470 int64_t LRPosEntry =
5471 MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, 16));
5472 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
5473 .addCFIIndex(LRPosEntry)
5474 .setMIFlags(MachineInstr::FrameSetup);
5475
5476 // Insert a restore before the terminator for the function.
5477 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
5478 .addReg(AArch64::SP, RegState::Define)
5479 .addReg(AArch64::LR, RegState::Define)
5480 .addReg(AArch64::SP)
5481 .addImm(16);
5482 Et = MBB.insert(Et, LDRXpost);
5483 }
5484
5485 // If this is a tail call outlined function, then there's already a return.
5486 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
5487 OF.FrameConstructionID == MachineOutlinerThunk)
5488 return;
5489
5490 // It's not a tail call, so we have to insert the return ourselves.
5491 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
5492 .addReg(AArch64::LR, RegState::Undef);
5493 MBB.insert(MBB.end(), ret);
5494
5495 // Did we have to modify the stack by saving the link register?
5496 if (OF.FrameConstructionID != MachineOutlinerDefault)
5497 return;
5498
5499 // We modified the stack.
5500 // Walk over the basic block and fix up all the stack accesses.
5501 fixupPostOutline(MBB);
5502 }
5503
insertOutlinedCall(Module & M,MachineBasicBlock & MBB,MachineBasicBlock::iterator & It,MachineFunction & MF,const outliner::Candidate & C) const5504 MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
5505 Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
5506 MachineFunction &MF, const outliner::Candidate &C) const {
5507
5508 // Are we tail calling?
5509 if (C.CallConstructionID == MachineOutlinerTailCall) {
5510 // If yes, then we can just branch to the label.
5511 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
5512 .addGlobalAddress(M.getNamedValue(MF.getName()))
5513 .addImm(0));
5514 return It;
5515 }
5516
5517 // Are we saving the link register?
5518 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
5519 C.CallConstructionID == MachineOutlinerThunk) {
5520 // No, so just insert the call.
5521 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
5522 .addGlobalAddress(M.getNamedValue(MF.getName())));
5523 return It;
5524 }
5525
5526 // We want to return the spot where we inserted the call.
5527 MachineBasicBlock::iterator CallPt;
5528
5529 // Instructions for saving and restoring LR around the call instruction we're
5530 // going to insert.
5531 MachineInstr *Save;
5532 MachineInstr *Restore;
5533 // Can we save to a register?
5534 if (C.CallConstructionID == MachineOutlinerRegSave) {
5535 // FIXME: This logic should be sunk into a target-specific interface so that
5536 // we don't have to recompute the register.
5537 unsigned Reg = findRegisterToSaveLRTo(C);
5538 assert(Reg != 0 && "No callee-saved register available?");
5539
5540 // Save and restore LR from that register.
5541 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
5542 .addReg(AArch64::XZR)
5543 .addReg(AArch64::LR)
5544 .addImm(0);
5545 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
5546 .addReg(AArch64::XZR)
5547 .addReg(Reg)
5548 .addImm(0);
5549 } else {
5550 // We have the default case. Save and restore from SP.
5551 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
5552 .addReg(AArch64::SP, RegState::Define)
5553 .addReg(AArch64::LR)
5554 .addReg(AArch64::SP)
5555 .addImm(-16);
5556 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
5557 .addReg(AArch64::SP, RegState::Define)
5558 .addReg(AArch64::LR, RegState::Define)
5559 .addReg(AArch64::SP)
5560 .addImm(16);
5561 }
5562
5563 It = MBB.insert(It, Save);
5564 It++;
5565
5566 // Insert the call.
5567 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
5568 .addGlobalAddress(M.getNamedValue(MF.getName())));
5569 CallPt = It;
5570 It++;
5571
5572 It = MBB.insert(It, Restore);
5573 return CallPt;
5574 }
5575
shouldOutlineFromFunctionByDefault(MachineFunction & MF) const5576 bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
5577 MachineFunction &MF) const {
5578 return MF.getFunction().optForMinSize();
5579 }
5580
5581 #define GET_INSTRINFO_HELPERS
5582 #include "AArch64GenInstrInfo.inc"
5583