1349cc55cSDimitry Andric //===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===//
2349cc55cSDimitry Andric //
3349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6349cc55cSDimitry Andric //
7349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
8349cc55cSDimitry Andric //
9349cc55cSDimitry Andric // This pass performs below peephole optimizations on MIR level.
10349cc55cSDimitry Andric //
11349cc55cSDimitry Andric // 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
12349cc55cSDimitry Andric // MOVi64imm + ANDXrr ==> ANDXri + ANDXri
13349cc55cSDimitry Andric //
1404eeddc0SDimitry Andric // 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
1504eeddc0SDimitry Andric // MOVi64imm + ADDXrr ==> ANDXri + ANDXri
1604eeddc0SDimitry Andric //
1704eeddc0SDimitry Andric // 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi
1804eeddc0SDimitry Andric // MOVi64imm + SUBXrr ==> SUBXri + SUBXri
1904eeddc0SDimitry Andric //
20349cc55cSDimitry Andric // The mov pseudo instruction could be expanded to multiple mov instructions
21349cc55cSDimitry Andric // later. In this case, we could try to split the constant operand of mov
2204eeddc0SDimitry Andric // instruction into two immediates which can be directly encoded into
2304eeddc0SDimitry Andric // *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of
2404eeddc0SDimitry Andric // multiple `mov` + `and/add/sub` instructions.
25349cc55cSDimitry Andric //
2604eeddc0SDimitry Andric // 4. Remove redundant ORRWrs which is generated by zero-extend.
27349cc55cSDimitry Andric //
28349cc55cSDimitry Andric // %3:gpr32 = ORRWrs $wzr, %2, 0
29349cc55cSDimitry Andric // %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32
30349cc55cSDimitry Andric //
31349cc55cSDimitry Andric // If AArch64's 32-bit form of instruction defines the source operand of
32349cc55cSDimitry Andric // ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source
33349cc55cSDimitry Andric // operand are set to zero.
34349cc55cSDimitry Andric //
35bdd1243dSDimitry Andric // 5. %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
36bdd1243dSDimitry Andric // ==> %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx
37bdd1243dSDimitry Andric //
38*06c3fb27SDimitry Andric // 6. %intermediate:gpr32 = COPY %src:fpr128
39*06c3fb27SDimitry Andric // %dst:fpr128 = INSvi32gpr %dst_vec:fpr128, dst_index, %intermediate:gpr32
40*06c3fb27SDimitry Andric // ==> %dst:fpr128 = INSvi32lane %dst_vec:fpr128, dst_index, %src:fpr128, 0
41*06c3fb27SDimitry Andric //
42*06c3fb27SDimitry Andric // In cases where a source FPR is copied to a GPR in order to be copied
43*06c3fb27SDimitry Andric // to a destination FPR, we can directly copy the values between the FPRs,
44*06c3fb27SDimitry Andric // eliminating the use of the Integer unit. When we match a pattern of
45*06c3fb27SDimitry Andric // INSvi[X]gpr that is preceded by a chain of COPY instructions from a FPR
46*06c3fb27SDimitry Andric // source, we use the INSvi[X]lane to replace the COPY & INSvi[X]gpr
47*06c3fb27SDimitry Andric // instructions.
48*06c3fb27SDimitry Andric //
49*06c3fb27SDimitry Andric // 7. If MI sets zero for high 64-bits implicitly, remove `mov 0` for high
50*06c3fb27SDimitry Andric // 64-bits. For example,
51*06c3fb27SDimitry Andric //
52*06c3fb27SDimitry Andric // %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr
53*06c3fb27SDimitry Andric // %2:fpr64 = MOVID 0
54*06c3fb27SDimitry Andric // %4:fpr128 = IMPLICIT_DEF
55*06c3fb27SDimitry Andric // %3:fpr128 = INSERT_SUBREG %4:fpr128(tied-def 0), killed %2:fpr64, %subreg.dsub
56*06c3fb27SDimitry Andric // %6:fpr128 = IMPLICIT_DEF
57*06c3fb27SDimitry Andric // %5:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), killed %1:fpr64, %subreg.dsub
58*06c3fb27SDimitry Andric // %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, killed %3:fpr128, 0
59*06c3fb27SDimitry Andric // ==>
60*06c3fb27SDimitry Andric // %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr
61*06c3fb27SDimitry Andric // %6:fpr128 = IMPLICIT_DEF
62*06c3fb27SDimitry Andric // %7:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), killed %1:fpr64, %subreg.dsub
63*06c3fb27SDimitry Andric //
64349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
65349cc55cSDimitry Andric
66349cc55cSDimitry Andric #include "AArch64ExpandImm.h"
67349cc55cSDimitry Andric #include "AArch64InstrInfo.h"
68349cc55cSDimitry Andric #include "MCTargetDesc/AArch64AddressingModes.h"
69349cc55cSDimitry Andric #include "llvm/CodeGen/MachineDominators.h"
70349cc55cSDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h"
71349cc55cSDimitry Andric
72349cc55cSDimitry Andric using namespace llvm;
73349cc55cSDimitry Andric
74349cc55cSDimitry Andric #define DEBUG_TYPE "aarch64-mi-peephole-opt"
75349cc55cSDimitry Andric
76349cc55cSDimitry Andric namespace {
77349cc55cSDimitry Andric
78349cc55cSDimitry Andric struct AArch64MIPeepholeOpt : public MachineFunctionPass {
79349cc55cSDimitry Andric static char ID;
80349cc55cSDimitry Andric
AArch64MIPeepholeOpt__anon5841462a0111::AArch64MIPeepholeOpt81349cc55cSDimitry Andric AArch64MIPeepholeOpt() : MachineFunctionPass(ID) {
82349cc55cSDimitry Andric initializeAArch64MIPeepholeOptPass(*PassRegistry::getPassRegistry());
83349cc55cSDimitry Andric }
84349cc55cSDimitry Andric
85349cc55cSDimitry Andric const AArch64InstrInfo *TII;
8604eeddc0SDimitry Andric const AArch64RegisterInfo *TRI;
87349cc55cSDimitry Andric MachineLoopInfo *MLI;
88349cc55cSDimitry Andric MachineRegisterInfo *MRI;
89349cc55cSDimitry Andric
9081ad6265SDimitry Andric using OpcodePair = std::pair<unsigned, unsigned>;
91349cc55cSDimitry Andric template <typename T>
9204eeddc0SDimitry Andric using SplitAndOpcFunc =
93bdd1243dSDimitry Andric std::function<std::optional<OpcodePair>(T, unsigned, T &, T &)>;
9404eeddc0SDimitry Andric using BuildMIFunc =
9581ad6265SDimitry Andric std::function<void(MachineInstr &, OpcodePair, unsigned, unsigned,
9681ad6265SDimitry Andric Register, Register, Register)>;
9704eeddc0SDimitry Andric
9804eeddc0SDimitry Andric /// For instructions where an immediate operand could be split into two
9904eeddc0SDimitry Andric /// separate immediate instructions, use the splitTwoPartImm two handle the
10004eeddc0SDimitry Andric /// optimization.
10104eeddc0SDimitry Andric ///
10204eeddc0SDimitry Andric /// To implement, the following function types must be passed to
10304eeddc0SDimitry Andric /// splitTwoPartImm. A SplitAndOpcFunc must be implemented that determines if
10404eeddc0SDimitry Andric /// splitting the immediate is valid and returns the associated new opcode. A
10504eeddc0SDimitry Andric /// BuildMIFunc must be implemented to build the two immediate instructions.
10604eeddc0SDimitry Andric ///
10704eeddc0SDimitry Andric /// Example Pattern (where IMM would require 2+ MOV instructions):
10804eeddc0SDimitry Andric /// %dst = <Instr>rr %src IMM [...]
10904eeddc0SDimitry Andric /// becomes:
11004eeddc0SDimitry Andric /// %tmp = <Instr>ri %src (encode half IMM) [...]
11104eeddc0SDimitry Andric /// %dst = <Instr>ri %tmp (encode half IMM) [...]
11204eeddc0SDimitry Andric template <typename T>
11304eeddc0SDimitry Andric bool splitTwoPartImm(MachineInstr &MI,
11404eeddc0SDimitry Andric SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr);
11504eeddc0SDimitry Andric
11604eeddc0SDimitry Andric bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI,
11704eeddc0SDimitry Andric MachineInstr *&SubregToRegMI);
11804eeddc0SDimitry Andric
11904eeddc0SDimitry Andric template <typename T>
12081ad6265SDimitry Andric bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI);
12104eeddc0SDimitry Andric template <typename T>
12281ad6265SDimitry Andric bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI);
12381ad6265SDimitry Andric
12481ad6265SDimitry Andric template <typename T>
12581ad6265SDimitry Andric bool visitAND(unsigned Opc, MachineInstr &MI);
12681ad6265SDimitry Andric bool visitORR(MachineInstr &MI);
127bdd1243dSDimitry Andric bool visitINSERT(MachineInstr &MI);
128*06c3fb27SDimitry Andric bool visitINSviGPR(MachineInstr &MI, unsigned Opc);
129*06c3fb27SDimitry Andric bool visitINSvi64lane(MachineInstr &MI);
130349cc55cSDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override;
131349cc55cSDimitry Andric
getPassName__anon5841462a0111::AArch64MIPeepholeOpt132349cc55cSDimitry Andric StringRef getPassName() const override {
133349cc55cSDimitry Andric return "AArch64 MI Peephole Optimization pass";
134349cc55cSDimitry Andric }
135349cc55cSDimitry Andric
getAnalysisUsage__anon5841462a0111::AArch64MIPeepholeOpt136349cc55cSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
137349cc55cSDimitry Andric AU.setPreservesCFG();
138349cc55cSDimitry Andric AU.addRequired<MachineLoopInfo>();
139349cc55cSDimitry Andric MachineFunctionPass::getAnalysisUsage(AU);
140349cc55cSDimitry Andric }
141349cc55cSDimitry Andric };
142349cc55cSDimitry Andric
143349cc55cSDimitry Andric char AArch64MIPeepholeOpt::ID = 0;
144349cc55cSDimitry Andric
145349cc55cSDimitry Andric } // end anonymous namespace
146349cc55cSDimitry Andric
147349cc55cSDimitry Andric INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt",
148349cc55cSDimitry Andric "AArch64 MI Peephole Optimization", false, false)
149349cc55cSDimitry Andric
150349cc55cSDimitry Andric template <typename T>
splitBitmaskImm(T Imm,unsigned RegSize,T & Imm1Enc,T & Imm2Enc)151349cc55cSDimitry Andric static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
152349cc55cSDimitry Andric T UImm = static_cast<T>(Imm);
153349cc55cSDimitry Andric if (AArch64_AM::isLogicalImmediate(UImm, RegSize))
154349cc55cSDimitry Andric return false;
155349cc55cSDimitry Andric
156349cc55cSDimitry Andric // If this immediate can be handled by one instruction, do not split it.
157349cc55cSDimitry Andric SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
158349cc55cSDimitry Andric AArch64_IMM::expandMOVImm(UImm, RegSize, Insn);
159349cc55cSDimitry Andric if (Insn.size() == 1)
160349cc55cSDimitry Andric return false;
161349cc55cSDimitry Andric
162349cc55cSDimitry Andric // The bitmask immediate consists of consecutive ones. Let's say there is
163349cc55cSDimitry Andric // constant 0b00000000001000000000010000000000 which does not consist of
164349cc55cSDimitry Andric // consecutive ones. We can split it in to two bitmask immediate like
165349cc55cSDimitry Andric // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111.
166349cc55cSDimitry Andric // If we do AND with these two bitmask immediate, we can see original one.
167*06c3fb27SDimitry Andric unsigned LowestBitSet = llvm::countr_zero(UImm);
168349cc55cSDimitry Andric unsigned HighestBitSet = Log2_64(UImm);
169349cc55cSDimitry Andric
170349cc55cSDimitry Andric // Create a mask which is filled with one from the position of lowest bit set
171349cc55cSDimitry Andric // to the position of highest bit set.
172349cc55cSDimitry Andric T NewImm1 = (static_cast<T>(2) << HighestBitSet) -
173349cc55cSDimitry Andric (static_cast<T>(1) << LowestBitSet);
174349cc55cSDimitry Andric // Create a mask which is filled with one outside the position of lowest bit
175349cc55cSDimitry Andric // set and the position of highest bit set.
176349cc55cSDimitry Andric T NewImm2 = UImm | ~NewImm1;
177349cc55cSDimitry Andric
178349cc55cSDimitry Andric // If the split value is not valid bitmask immediate, do not split this
179349cc55cSDimitry Andric // constant.
180349cc55cSDimitry Andric if (!AArch64_AM::isLogicalImmediate(NewImm2, RegSize))
181349cc55cSDimitry Andric return false;
182349cc55cSDimitry Andric
183349cc55cSDimitry Andric Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize);
184349cc55cSDimitry Andric Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize);
185349cc55cSDimitry Andric return true;
186349cc55cSDimitry Andric }
187349cc55cSDimitry Andric
188349cc55cSDimitry Andric template <typename T>
visitAND(unsigned Opc,MachineInstr & MI)189349cc55cSDimitry Andric bool AArch64MIPeepholeOpt::visitAND(
19081ad6265SDimitry Andric unsigned Opc, MachineInstr &MI) {
191349cc55cSDimitry Andric // Try below transformation.
192349cc55cSDimitry Andric //
193349cc55cSDimitry Andric // MOVi32imm + ANDWrr ==> ANDWri + ANDWri
194349cc55cSDimitry Andric // MOVi64imm + ANDXrr ==> ANDXri + ANDXri
195349cc55cSDimitry Andric //
196349cc55cSDimitry Andric // The mov pseudo instruction could be expanded to multiple mov instructions
197349cc55cSDimitry Andric // later. Let's try to split the constant operand of mov instruction into two
198349cc55cSDimitry Andric // bitmask immediates. It makes only two AND instructions intead of multiple
199349cc55cSDimitry Andric // mov + and instructions.
200349cc55cSDimitry Andric
20104eeddc0SDimitry Andric return splitTwoPartImm<T>(
20281ad6265SDimitry Andric MI,
203bdd1243dSDimitry Andric [Opc](T Imm, unsigned RegSize, T &Imm0,
204bdd1243dSDimitry Andric T &Imm1) -> std::optional<OpcodePair> {
20504eeddc0SDimitry Andric if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1))
20681ad6265SDimitry Andric return std::make_pair(Opc, Opc);
207bdd1243dSDimitry Andric return std::nullopt;
20804eeddc0SDimitry Andric },
20981ad6265SDimitry Andric [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
21004eeddc0SDimitry Andric unsigned Imm1, Register SrcReg, Register NewTmpReg,
21104eeddc0SDimitry Andric Register NewDstReg) {
212349cc55cSDimitry Andric DebugLoc DL = MI.getDebugLoc();
21304eeddc0SDimitry Andric MachineBasicBlock *MBB = MI.getParent();
21481ad6265SDimitry Andric BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
215349cc55cSDimitry Andric .addReg(SrcReg)
21604eeddc0SDimitry Andric .addImm(Imm0);
21781ad6265SDimitry Andric BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
218349cc55cSDimitry Andric .addReg(NewTmpReg)
21904eeddc0SDimitry Andric .addImm(Imm1);
22004eeddc0SDimitry Andric });
221349cc55cSDimitry Andric }
222349cc55cSDimitry Andric
visitORR(MachineInstr & MI)22381ad6265SDimitry Andric bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) {
224349cc55cSDimitry Andric // Check this ORR comes from below zero-extend pattern.
225349cc55cSDimitry Andric //
226349cc55cSDimitry Andric // def : Pat<(i64 (zext GPR32:$src)),
227349cc55cSDimitry Andric // (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
228349cc55cSDimitry Andric if (MI.getOperand(3).getImm() != 0)
229349cc55cSDimitry Andric return false;
230349cc55cSDimitry Andric
231349cc55cSDimitry Andric if (MI.getOperand(1).getReg() != AArch64::WZR)
232349cc55cSDimitry Andric return false;
233349cc55cSDimitry Andric
234349cc55cSDimitry Andric MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
235349cc55cSDimitry Andric if (!SrcMI)
236349cc55cSDimitry Andric return false;
237349cc55cSDimitry Andric
238349cc55cSDimitry Andric // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
239349cc55cSDimitry Andric //
240349cc55cSDimitry Andric // When you use the 32-bit form of an instruction, the upper 32 bits of the
241349cc55cSDimitry Andric // source registers are ignored and the upper 32 bits of the destination
242349cc55cSDimitry Andric // register are set to zero.
243349cc55cSDimitry Andric //
244349cc55cSDimitry Andric // If AArch64's 32-bit form of instruction defines the source operand of
245349cc55cSDimitry Andric // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
246349cc55cSDimitry Andric // real AArch64 instruction and if it is not, do not process the opcode
247349cc55cSDimitry Andric // conservatively.
24881ad6265SDimitry Andric if (SrcMI->getOpcode() == TargetOpcode::COPY &&
24981ad6265SDimitry Andric SrcMI->getOperand(1).getReg().isVirtual()) {
25081ad6265SDimitry Andric const TargetRegisterClass *RC =
25181ad6265SDimitry Andric MRI->getRegClass(SrcMI->getOperand(1).getReg());
25281ad6265SDimitry Andric
25381ad6265SDimitry Andric // A COPY from an FPR will become a FMOVSWr, so do so now so that we know
25481ad6265SDimitry Andric // that the upper bits are zero.
25581ad6265SDimitry Andric if (RC != &AArch64::FPR32RegClass &&
25681ad6265SDimitry Andric ((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass) ||
25781ad6265SDimitry Andric SrcMI->getOperand(1).getSubReg() != AArch64::ssub))
25881ad6265SDimitry Andric return false;
25981ad6265SDimitry Andric Register CpySrc = SrcMI->getOperand(1).getReg();
26081ad6265SDimitry Andric if (SrcMI->getOperand(1).getSubReg() == AArch64::ssub) {
26181ad6265SDimitry Andric CpySrc = MRI->createVirtualRegister(&AArch64::FPR32RegClass);
26281ad6265SDimitry Andric BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(),
26381ad6265SDimitry Andric TII->get(TargetOpcode::COPY), CpySrc)
26481ad6265SDimitry Andric .add(SrcMI->getOperand(1));
26581ad6265SDimitry Andric }
26681ad6265SDimitry Andric BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(),
26781ad6265SDimitry Andric TII->get(AArch64::FMOVSWr), SrcMI->getOperand(0).getReg())
26881ad6265SDimitry Andric .addReg(CpySrc);
26981ad6265SDimitry Andric SrcMI->eraseFromParent();
27081ad6265SDimitry Andric }
27181ad6265SDimitry Andric else if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)
272349cc55cSDimitry Andric return false;
273349cc55cSDimitry Andric
274349cc55cSDimitry Andric Register DefReg = MI.getOperand(0).getReg();
275349cc55cSDimitry Andric Register SrcReg = MI.getOperand(2).getReg();
276349cc55cSDimitry Andric MRI->replaceRegWith(DefReg, SrcReg);
277349cc55cSDimitry Andric MRI->clearKillFlags(SrcReg);
27804eeddc0SDimitry Andric LLVM_DEBUG(dbgs() << "Removed: " << MI << "\n");
27981ad6265SDimitry Andric MI.eraseFromParent();
28004eeddc0SDimitry Andric
28104eeddc0SDimitry Andric return true;
28204eeddc0SDimitry Andric }
28304eeddc0SDimitry Andric
visitINSERT(MachineInstr & MI)284bdd1243dSDimitry Andric bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &MI) {
285bdd1243dSDimitry Andric // Check this INSERT_SUBREG comes from below zero-extend pattern.
286bdd1243dSDimitry Andric //
287bdd1243dSDimitry Andric // From %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
288bdd1243dSDimitry Andric // To %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx
289bdd1243dSDimitry Andric //
290bdd1243dSDimitry Andric // We're assuming the first operand to INSERT_SUBREG is irrelevant because a
291bdd1243dSDimitry Andric // COPY would destroy the upper part of the register anyway
292bdd1243dSDimitry Andric if (!MI.isRegTiedToDefOperand(1))
293bdd1243dSDimitry Andric return false;
294bdd1243dSDimitry Andric
295bdd1243dSDimitry Andric Register DstReg = MI.getOperand(0).getReg();
296bdd1243dSDimitry Andric const TargetRegisterClass *RC = MRI->getRegClass(DstReg);
297bdd1243dSDimitry Andric MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
298bdd1243dSDimitry Andric if (!SrcMI)
299bdd1243dSDimitry Andric return false;
300bdd1243dSDimitry Andric
301bdd1243dSDimitry Andric // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
302bdd1243dSDimitry Andric //
303bdd1243dSDimitry Andric // When you use the 32-bit form of an instruction, the upper 32 bits of the
304bdd1243dSDimitry Andric // source registers are ignored and the upper 32 bits of the destination
305bdd1243dSDimitry Andric // register are set to zero.
306bdd1243dSDimitry Andric //
307bdd1243dSDimitry Andric // If AArch64's 32-bit form of instruction defines the source operand of
308bdd1243dSDimitry Andric // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
309bdd1243dSDimitry Andric // real AArch64 instruction and if it is not, do not process the opcode
310bdd1243dSDimitry Andric // conservatively.
311bdd1243dSDimitry Andric if ((SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) ||
312bdd1243dSDimitry Andric !AArch64::GPR64allRegClass.hasSubClassEq(RC))
313bdd1243dSDimitry Andric return false;
314bdd1243dSDimitry Andric
315bdd1243dSDimitry Andric // Build a SUBREG_TO_REG instruction
316bdd1243dSDimitry Andric MachineInstr *SubregMI =
317bdd1243dSDimitry Andric BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
318bdd1243dSDimitry Andric TII->get(TargetOpcode::SUBREG_TO_REG), DstReg)
319bdd1243dSDimitry Andric .addImm(0)
320bdd1243dSDimitry Andric .add(MI.getOperand(2))
321bdd1243dSDimitry Andric .add(MI.getOperand(3));
322bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << MI << " replace by:\n: " << *SubregMI << "\n");
323bdd1243dSDimitry Andric (void)SubregMI;
324bdd1243dSDimitry Andric MI.eraseFromParent();
325bdd1243dSDimitry Andric
326bdd1243dSDimitry Andric return true;
327bdd1243dSDimitry Andric }
328bdd1243dSDimitry Andric
32904eeddc0SDimitry Andric template <typename T>
splitAddSubImm(T Imm,unsigned RegSize,T & Imm0,T & Imm1)33004eeddc0SDimitry Andric static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) {
33104eeddc0SDimitry Andric // The immediate must be in the form of ((imm0 << 12) + imm1), in which both
33204eeddc0SDimitry Andric // imm0 and imm1 are non-zero 12-bit unsigned int.
33304eeddc0SDimitry Andric if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 ||
33404eeddc0SDimitry Andric (Imm & ~static_cast<T>(0xffffff)) != 0)
33504eeddc0SDimitry Andric return false;
33604eeddc0SDimitry Andric
33704eeddc0SDimitry Andric // The immediate can not be composed via a single instruction.
33804eeddc0SDimitry Andric SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
33904eeddc0SDimitry Andric AArch64_IMM::expandMOVImm(Imm, RegSize, Insn);
34004eeddc0SDimitry Andric if (Insn.size() == 1)
34104eeddc0SDimitry Andric return false;
34204eeddc0SDimitry Andric
34304eeddc0SDimitry Andric // Split Imm into (Imm0 << 12) + Imm1;
34404eeddc0SDimitry Andric Imm0 = (Imm >> 12) & 0xfff;
34504eeddc0SDimitry Andric Imm1 = Imm & 0xfff;
34604eeddc0SDimitry Andric return true;
34704eeddc0SDimitry Andric }
34804eeddc0SDimitry Andric
34904eeddc0SDimitry Andric template <typename T>
visitADDSUB(unsigned PosOpc,unsigned NegOpc,MachineInstr & MI)35004eeddc0SDimitry Andric bool AArch64MIPeepholeOpt::visitADDSUB(
35181ad6265SDimitry Andric unsigned PosOpc, unsigned NegOpc, MachineInstr &MI) {
35204eeddc0SDimitry Andric // Try below transformation.
35304eeddc0SDimitry Andric //
354*06c3fb27SDimitry Andric // ADDWrr X, MOVi32imm ==> ADDWri + ADDWri
355*06c3fb27SDimitry Andric // ADDXrr X, MOVi64imm ==> ADDXri + ADDXri
35604eeddc0SDimitry Andric //
357*06c3fb27SDimitry Andric // SUBWrr X, MOVi32imm ==> SUBWri + SUBWri
358*06c3fb27SDimitry Andric // SUBXrr X, MOVi64imm ==> SUBXri + SUBXri
35904eeddc0SDimitry Andric //
36004eeddc0SDimitry Andric // The mov pseudo instruction could be expanded to multiple mov instructions
36104eeddc0SDimitry Andric // later. Let's try to split the constant operand of mov instruction into two
36204eeddc0SDimitry Andric // legal add/sub immediates. It makes only two ADD/SUB instructions intead of
36304eeddc0SDimitry Andric // multiple `mov` + `and/sub` instructions.
36404eeddc0SDimitry Andric
365*06c3fb27SDimitry Andric // We can sometimes have ADDWrr WZR, MULi32imm that have not been constant
366*06c3fb27SDimitry Andric // folded. Make sure that we don't generate invalid instructions that use XZR
367*06c3fb27SDimitry Andric // in those cases.
368*06c3fb27SDimitry Andric if (MI.getOperand(1).getReg() == AArch64::XZR ||
369*06c3fb27SDimitry Andric MI.getOperand(1).getReg() == AArch64::WZR)
370*06c3fb27SDimitry Andric return false;
371*06c3fb27SDimitry Andric
37204eeddc0SDimitry Andric return splitTwoPartImm<T>(
37381ad6265SDimitry Andric MI,
37404eeddc0SDimitry Andric [PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0,
375bdd1243dSDimitry Andric T &Imm1) -> std::optional<OpcodePair> {
37604eeddc0SDimitry Andric if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
37781ad6265SDimitry Andric return std::make_pair(PosOpc, PosOpc);
37804eeddc0SDimitry Andric if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
37981ad6265SDimitry Andric return std::make_pair(NegOpc, NegOpc);
380bdd1243dSDimitry Andric return std::nullopt;
38104eeddc0SDimitry Andric },
38281ad6265SDimitry Andric [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
38304eeddc0SDimitry Andric unsigned Imm1, Register SrcReg, Register NewTmpReg,
38404eeddc0SDimitry Andric Register NewDstReg) {
38504eeddc0SDimitry Andric DebugLoc DL = MI.getDebugLoc();
38604eeddc0SDimitry Andric MachineBasicBlock *MBB = MI.getParent();
38781ad6265SDimitry Andric BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
38804eeddc0SDimitry Andric .addReg(SrcReg)
38904eeddc0SDimitry Andric .addImm(Imm0)
39004eeddc0SDimitry Andric .addImm(12);
39181ad6265SDimitry Andric BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
39281ad6265SDimitry Andric .addReg(NewTmpReg)
39381ad6265SDimitry Andric .addImm(Imm1)
39481ad6265SDimitry Andric .addImm(0);
39581ad6265SDimitry Andric });
39681ad6265SDimitry Andric }
39781ad6265SDimitry Andric
39881ad6265SDimitry Andric template <typename T>
visitADDSSUBS(OpcodePair PosOpcs,OpcodePair NegOpcs,MachineInstr & MI)39981ad6265SDimitry Andric bool AArch64MIPeepholeOpt::visitADDSSUBS(
40081ad6265SDimitry Andric OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI) {
40181ad6265SDimitry Andric // Try the same transformation as ADDSUB but with additional requirement
40281ad6265SDimitry Andric // that the condition code usages are only for Equal and Not Equal
403*06c3fb27SDimitry Andric
404*06c3fb27SDimitry Andric if (MI.getOperand(1).getReg() == AArch64::XZR ||
405*06c3fb27SDimitry Andric MI.getOperand(1).getReg() == AArch64::WZR)
406*06c3fb27SDimitry Andric return false;
407*06c3fb27SDimitry Andric
40881ad6265SDimitry Andric return splitTwoPartImm<T>(
40981ad6265SDimitry Andric MI,
410bdd1243dSDimitry Andric [PosOpcs, NegOpcs, &MI, &TRI = TRI,
411bdd1243dSDimitry Andric &MRI = MRI](T Imm, unsigned RegSize, T &Imm0,
412bdd1243dSDimitry Andric T &Imm1) -> std::optional<OpcodePair> {
41381ad6265SDimitry Andric OpcodePair OP;
41481ad6265SDimitry Andric if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
41581ad6265SDimitry Andric OP = PosOpcs;
41681ad6265SDimitry Andric else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
41781ad6265SDimitry Andric OP = NegOpcs;
41881ad6265SDimitry Andric else
419bdd1243dSDimitry Andric return std::nullopt;
42081ad6265SDimitry Andric // Check conditional uses last since it is expensive for scanning
42181ad6265SDimitry Andric // proceeding instructions
42281ad6265SDimitry Andric MachineInstr &SrcMI = *MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
423bdd1243dSDimitry Andric std::optional<UsedNZCV> NZCVUsed = examineCFlagsUse(SrcMI, MI, *TRI);
42481ad6265SDimitry Andric if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V)
425bdd1243dSDimitry Andric return std::nullopt;
42681ad6265SDimitry Andric return OP;
42781ad6265SDimitry Andric },
42881ad6265SDimitry Andric [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
42981ad6265SDimitry Andric unsigned Imm1, Register SrcReg, Register NewTmpReg,
43081ad6265SDimitry Andric Register NewDstReg) {
43181ad6265SDimitry Andric DebugLoc DL = MI.getDebugLoc();
43281ad6265SDimitry Andric MachineBasicBlock *MBB = MI.getParent();
43381ad6265SDimitry Andric BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
43481ad6265SDimitry Andric .addReg(SrcReg)
43581ad6265SDimitry Andric .addImm(Imm0)
43681ad6265SDimitry Andric .addImm(12);
43781ad6265SDimitry Andric BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
43804eeddc0SDimitry Andric .addReg(NewTmpReg)
43904eeddc0SDimitry Andric .addImm(Imm1)
44004eeddc0SDimitry Andric .addImm(0);
44104eeddc0SDimitry Andric });
44204eeddc0SDimitry Andric }
44304eeddc0SDimitry Andric
44404eeddc0SDimitry Andric // Checks if the corresponding MOV immediate instruction is applicable for
44504eeddc0SDimitry Andric // this peephole optimization.
checkMovImmInstr(MachineInstr & MI,MachineInstr * & MovMI,MachineInstr * & SubregToRegMI)44604eeddc0SDimitry Andric bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI,
44704eeddc0SDimitry Andric MachineInstr *&MovMI,
44804eeddc0SDimitry Andric MachineInstr *&SubregToRegMI) {
44904eeddc0SDimitry Andric // Check whether current MBB is in loop and the AND is loop invariant.
45004eeddc0SDimitry Andric MachineBasicBlock *MBB = MI.getParent();
45104eeddc0SDimitry Andric MachineLoop *L = MLI->getLoopFor(MBB);
45204eeddc0SDimitry Andric if (L && !L->isLoopInvariant(MI))
45304eeddc0SDimitry Andric return false;
45404eeddc0SDimitry Andric
45504eeddc0SDimitry Andric // Check whether current MI's operand is MOV with immediate.
45604eeddc0SDimitry Andric MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
45704eeddc0SDimitry Andric if (!MovMI)
45804eeddc0SDimitry Andric return false;
45904eeddc0SDimitry Andric
46004eeddc0SDimitry Andric // If it is SUBREG_TO_REG, check its operand.
46104eeddc0SDimitry Andric SubregToRegMI = nullptr;
46204eeddc0SDimitry Andric if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {
46304eeddc0SDimitry Andric SubregToRegMI = MovMI;
46404eeddc0SDimitry Andric MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg());
46504eeddc0SDimitry Andric if (!MovMI)
46604eeddc0SDimitry Andric return false;
46704eeddc0SDimitry Andric }
46804eeddc0SDimitry Andric
46904eeddc0SDimitry Andric if (MovMI->getOpcode() != AArch64::MOVi32imm &&
47004eeddc0SDimitry Andric MovMI->getOpcode() != AArch64::MOVi64imm)
47104eeddc0SDimitry Andric return false;
47204eeddc0SDimitry Andric
47304eeddc0SDimitry Andric // If the MOV has multiple uses, do not split the immediate because it causes
47404eeddc0SDimitry Andric // more instructions.
47504eeddc0SDimitry Andric if (!MRI->hasOneUse(MovMI->getOperand(0).getReg()))
47604eeddc0SDimitry Andric return false;
47704eeddc0SDimitry Andric if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg()))
47804eeddc0SDimitry Andric return false;
47904eeddc0SDimitry Andric
48004eeddc0SDimitry Andric // It is OK to perform this peephole optimization.
48104eeddc0SDimitry Andric return true;
48204eeddc0SDimitry Andric }
48304eeddc0SDimitry Andric
48404eeddc0SDimitry Andric template <typename T>
splitTwoPartImm(MachineInstr & MI,SplitAndOpcFunc<T> SplitAndOpc,BuildMIFunc BuildInstr)48504eeddc0SDimitry Andric bool AArch64MIPeepholeOpt::splitTwoPartImm(
48681ad6265SDimitry Andric MachineInstr &MI,
48704eeddc0SDimitry Andric SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) {
48804eeddc0SDimitry Andric unsigned RegSize = sizeof(T) * 8;
48904eeddc0SDimitry Andric assert((RegSize == 32 || RegSize == 64) &&
49004eeddc0SDimitry Andric "Invalid RegSize for legal immediate peephole optimization");
49104eeddc0SDimitry Andric
49204eeddc0SDimitry Andric // Perform several essential checks against current MI.
49304eeddc0SDimitry Andric MachineInstr *MovMI, *SubregToRegMI;
49404eeddc0SDimitry Andric if (!checkMovImmInstr(MI, MovMI, SubregToRegMI))
49504eeddc0SDimitry Andric return false;
49604eeddc0SDimitry Andric
49704eeddc0SDimitry Andric // Split the immediate to Imm0 and Imm1, and calculate the Opcode.
49804eeddc0SDimitry Andric T Imm = static_cast<T>(MovMI->getOperand(1).getImm()), Imm0, Imm1;
49904eeddc0SDimitry Andric // For the 32 bit form of instruction, the upper 32 bits of the destination
50004eeddc0SDimitry Andric // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits
50104eeddc0SDimitry Andric // of Imm to zero. This is essential if the Immediate value was a negative
50204eeddc0SDimitry Andric // number since it was sign extended when we assign to the 64-bit Imm.
50304eeddc0SDimitry Andric if (SubregToRegMI)
50404eeddc0SDimitry Andric Imm &= 0xFFFFFFFF;
50581ad6265SDimitry Andric OpcodePair Opcode;
50604eeddc0SDimitry Andric if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1))
50781ad6265SDimitry Andric Opcode = *R;
50804eeddc0SDimitry Andric else
50904eeddc0SDimitry Andric return false;
51004eeddc0SDimitry Andric
51181ad6265SDimitry Andric // Create new MIs using the first and second opcodes. Opcodes might differ for
51281ad6265SDimitry Andric // flag setting operations that should only set flags on second instruction.
51381ad6265SDimitry Andric // NewTmpReg = Opcode.first SrcReg Imm0
51481ad6265SDimitry Andric // NewDstReg = Opcode.second NewTmpReg Imm1
51581ad6265SDimitry Andric
51681ad6265SDimitry Andric // Determine register classes for destinations and register operands
51704eeddc0SDimitry Andric MachineFunction *MF = MI.getMF();
51881ad6265SDimitry Andric const TargetRegisterClass *FirstInstrDstRC =
51981ad6265SDimitry Andric TII->getRegClass(TII->get(Opcode.first), 0, TRI, *MF);
52081ad6265SDimitry Andric const TargetRegisterClass *FirstInstrOperandRC =
52181ad6265SDimitry Andric TII->getRegClass(TII->get(Opcode.first), 1, TRI, *MF);
52281ad6265SDimitry Andric const TargetRegisterClass *SecondInstrDstRC =
52381ad6265SDimitry Andric (Opcode.first == Opcode.second)
52481ad6265SDimitry Andric ? FirstInstrDstRC
52581ad6265SDimitry Andric : TII->getRegClass(TII->get(Opcode.second), 0, TRI, *MF);
52681ad6265SDimitry Andric const TargetRegisterClass *SecondInstrOperandRC =
52781ad6265SDimitry Andric (Opcode.first == Opcode.second)
52881ad6265SDimitry Andric ? FirstInstrOperandRC
52981ad6265SDimitry Andric : TII->getRegClass(TII->get(Opcode.second), 1, TRI, *MF);
53081ad6265SDimitry Andric
53181ad6265SDimitry Andric // Get old registers destinations and new register destinations
53204eeddc0SDimitry Andric Register DstReg = MI.getOperand(0).getReg();
53304eeddc0SDimitry Andric Register SrcReg = MI.getOperand(1).getReg();
53481ad6265SDimitry Andric Register NewTmpReg = MRI->createVirtualRegister(FirstInstrDstRC);
53581ad6265SDimitry Andric // In the situation that DstReg is not Virtual (likely WZR or XZR), we want to
53681ad6265SDimitry Andric // reuse that same destination register.
53781ad6265SDimitry Andric Register NewDstReg = DstReg.isVirtual()
53881ad6265SDimitry Andric ? MRI->createVirtualRegister(SecondInstrDstRC)
53981ad6265SDimitry Andric : DstReg;
54004eeddc0SDimitry Andric
54181ad6265SDimitry Andric // Constrain registers based on their new uses
54281ad6265SDimitry Andric MRI->constrainRegClass(SrcReg, FirstInstrOperandRC);
54381ad6265SDimitry Andric MRI->constrainRegClass(NewTmpReg, SecondInstrOperandRC);
54481ad6265SDimitry Andric if (DstReg != NewDstReg)
54504eeddc0SDimitry Andric MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg));
54604eeddc0SDimitry Andric
54781ad6265SDimitry Andric // Call the delegating operation to build the instruction
54804eeddc0SDimitry Andric BuildInstr(MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg);
54904eeddc0SDimitry Andric
55004eeddc0SDimitry Andric // replaceRegWith changes MI's definition register. Keep it for SSA form until
55181ad6265SDimitry Andric // deleting MI. Only if we made a new destination register.
55281ad6265SDimitry Andric if (DstReg != NewDstReg) {
55381ad6265SDimitry Andric MRI->replaceRegWith(DstReg, NewDstReg);
55404eeddc0SDimitry Andric MI.getOperand(0).setReg(DstReg);
55581ad6265SDimitry Andric }
55604eeddc0SDimitry Andric
55704eeddc0SDimitry Andric // Record the MIs need to be removed.
55881ad6265SDimitry Andric MI.eraseFromParent();
55904eeddc0SDimitry Andric if (SubregToRegMI)
56081ad6265SDimitry Andric SubregToRegMI->eraseFromParent();
56181ad6265SDimitry Andric MovMI->eraseFromParent();
562349cc55cSDimitry Andric
563349cc55cSDimitry Andric return true;
564349cc55cSDimitry Andric }
565349cc55cSDimitry Andric
visitINSviGPR(MachineInstr & MI,unsigned Opc)566*06c3fb27SDimitry Andric bool AArch64MIPeepholeOpt::visitINSviGPR(MachineInstr &MI, unsigned Opc) {
567*06c3fb27SDimitry Andric // Check if this INSvi[X]gpr comes from COPY of a source FPR128
568*06c3fb27SDimitry Andric //
569*06c3fb27SDimitry Andric // From
570*06c3fb27SDimitry Andric // %intermediate1:gpr64 = COPY %src:fpr128
571*06c3fb27SDimitry Andric // %intermediate2:gpr32 = COPY %intermediate1:gpr64
572*06c3fb27SDimitry Andric // %dst:fpr128 = INSvi[X]gpr %dst_vec:fpr128, dst_index, %intermediate2:gpr32
573*06c3fb27SDimitry Andric // To
574*06c3fb27SDimitry Andric // %dst:fpr128 = INSvi[X]lane %dst_vec:fpr128, dst_index, %src:fpr128,
575*06c3fb27SDimitry Andric // src_index
576*06c3fb27SDimitry Andric // where src_index = 0, X = [8|16|32|64]
577*06c3fb27SDimitry Andric
578*06c3fb27SDimitry Andric MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(3).getReg());
579*06c3fb27SDimitry Andric
580*06c3fb27SDimitry Andric // For a chain of COPY instructions, find the initial source register
581*06c3fb27SDimitry Andric // and check if it's an FPR128
582*06c3fb27SDimitry Andric while (true) {
583*06c3fb27SDimitry Andric if (!SrcMI || SrcMI->getOpcode() != TargetOpcode::COPY)
584*06c3fb27SDimitry Andric return false;
585*06c3fb27SDimitry Andric
586*06c3fb27SDimitry Andric if (!SrcMI->getOperand(1).getReg().isVirtual())
587*06c3fb27SDimitry Andric return false;
588*06c3fb27SDimitry Andric
589*06c3fb27SDimitry Andric if (MRI->getRegClass(SrcMI->getOperand(1).getReg()) ==
590*06c3fb27SDimitry Andric &AArch64::FPR128RegClass) {
591*06c3fb27SDimitry Andric break;
592*06c3fb27SDimitry Andric }
593*06c3fb27SDimitry Andric SrcMI = MRI->getUniqueVRegDef(SrcMI->getOperand(1).getReg());
594*06c3fb27SDimitry Andric }
595*06c3fb27SDimitry Andric
596*06c3fb27SDimitry Andric Register DstReg = MI.getOperand(0).getReg();
597*06c3fb27SDimitry Andric Register SrcReg = SrcMI->getOperand(1).getReg();
598*06c3fb27SDimitry Andric MachineInstr *INSvilaneMI =
599*06c3fb27SDimitry Andric BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opc), DstReg)
600*06c3fb27SDimitry Andric .add(MI.getOperand(1))
601*06c3fb27SDimitry Andric .add(MI.getOperand(2))
602*06c3fb27SDimitry Andric .addUse(SrcReg, getRegState(SrcMI->getOperand(1)))
603*06c3fb27SDimitry Andric .addImm(0);
604*06c3fb27SDimitry Andric
605*06c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << MI << " replace by:\n: " << *INSvilaneMI << "\n");
606*06c3fb27SDimitry Andric (void)INSvilaneMI;
607*06c3fb27SDimitry Andric MI.eraseFromParent();
608*06c3fb27SDimitry Andric return true;
609*06c3fb27SDimitry Andric }
610*06c3fb27SDimitry Andric
611*06c3fb27SDimitry Andric // All instructions that set a FPR64 will implicitly zero the top bits of the
612*06c3fb27SDimitry Andric // register.
is64bitDefwithZeroHigh64bit(MachineInstr * MI,MachineRegisterInfo * MRI)613*06c3fb27SDimitry Andric static bool is64bitDefwithZeroHigh64bit(MachineInstr *MI,
614*06c3fb27SDimitry Andric MachineRegisterInfo *MRI) {
615*06c3fb27SDimitry Andric if (!MI->getOperand(0).isReg() || !MI->getOperand(0).isDef())
616*06c3fb27SDimitry Andric return false;
617*06c3fb27SDimitry Andric const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg());
618*06c3fb27SDimitry Andric if (RC != &AArch64::FPR64RegClass)
619*06c3fb27SDimitry Andric return false;
620*06c3fb27SDimitry Andric return MI->getOpcode() > TargetOpcode::GENERIC_OP_END;
621*06c3fb27SDimitry Andric }
622*06c3fb27SDimitry Andric
visitINSvi64lane(MachineInstr & MI)623*06c3fb27SDimitry Andric bool AArch64MIPeepholeOpt::visitINSvi64lane(MachineInstr &MI) {
624*06c3fb27SDimitry Andric // Check the MI for low 64-bits sets zero for high 64-bits implicitly.
625*06c3fb27SDimitry Andric // We are expecting below case.
626*06c3fb27SDimitry Andric //
627*06c3fb27SDimitry Andric // %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr
628*06c3fb27SDimitry Andric // %6:fpr128 = IMPLICIT_DEF
629*06c3fb27SDimitry Andric // %5:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), killed %1:fpr64, %subreg.dsub
630*06c3fb27SDimitry Andric // %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, killed %3:fpr128, 0
631*06c3fb27SDimitry Andric MachineInstr *Low64MI = MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
632*06c3fb27SDimitry Andric if (Low64MI->getOpcode() != AArch64::INSERT_SUBREG)
633*06c3fb27SDimitry Andric return false;
634*06c3fb27SDimitry Andric Low64MI = MRI->getUniqueVRegDef(Low64MI->getOperand(2).getReg());
635*06c3fb27SDimitry Andric if (!Low64MI || !is64bitDefwithZeroHigh64bit(Low64MI, MRI))
636*06c3fb27SDimitry Andric return false;
637*06c3fb27SDimitry Andric
638*06c3fb27SDimitry Andric // Check there is `mov 0` MI for high 64-bits.
639*06c3fb27SDimitry Andric // We are expecting below cases.
640*06c3fb27SDimitry Andric //
641*06c3fb27SDimitry Andric // %2:fpr64 = MOVID 0
642*06c3fb27SDimitry Andric // %4:fpr128 = IMPLICIT_DEF
643*06c3fb27SDimitry Andric // %3:fpr128 = INSERT_SUBREG %4:fpr128(tied-def 0), killed %2:fpr64, %subreg.dsub
644*06c3fb27SDimitry Andric // %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, killed %3:fpr128, 0
645*06c3fb27SDimitry Andric // or
646*06c3fb27SDimitry Andric // %5:fpr128 = MOVIv2d_ns 0
647*06c3fb27SDimitry Andric // %6:fpr64 = COPY %5.dsub:fpr128
648*06c3fb27SDimitry Andric // %8:fpr128 = IMPLICIT_DEF
649*06c3fb27SDimitry Andric // %7:fpr128 = INSERT_SUBREG %8:fpr128(tied-def 0), killed %6:fpr64, %subreg.dsub
650*06c3fb27SDimitry Andric // %11:fpr128 = INSvi64lane %9:fpr128(tied-def 0), 1, killed %7:fpr128, 0
651*06c3fb27SDimitry Andric MachineInstr *High64MI = MRI->getUniqueVRegDef(MI.getOperand(3).getReg());
652*06c3fb27SDimitry Andric if (!High64MI || High64MI->getOpcode() != AArch64::INSERT_SUBREG)
653*06c3fb27SDimitry Andric return false;
654*06c3fb27SDimitry Andric High64MI = MRI->getUniqueVRegDef(High64MI->getOperand(2).getReg());
655*06c3fb27SDimitry Andric if (High64MI && High64MI->getOpcode() == TargetOpcode::COPY)
656*06c3fb27SDimitry Andric High64MI = MRI->getUniqueVRegDef(High64MI->getOperand(1).getReg());
657*06c3fb27SDimitry Andric if (!High64MI || (High64MI->getOpcode() != AArch64::MOVID &&
658*06c3fb27SDimitry Andric High64MI->getOpcode() != AArch64::MOVIv2d_ns))
659*06c3fb27SDimitry Andric return false;
660*06c3fb27SDimitry Andric if (High64MI->getOperand(1).getImm() != 0)
661*06c3fb27SDimitry Andric return false;
662*06c3fb27SDimitry Andric
663*06c3fb27SDimitry Andric // Let's remove MIs for high 64-bits.
664*06c3fb27SDimitry Andric Register OldDef = MI.getOperand(0).getReg();
665*06c3fb27SDimitry Andric Register NewDef = MI.getOperand(1).getReg();
666*06c3fb27SDimitry Andric MRI->constrainRegClass(NewDef, MRI->getRegClass(OldDef));
667*06c3fb27SDimitry Andric MRI->replaceRegWith(OldDef, NewDef);
668*06c3fb27SDimitry Andric MI.eraseFromParent();
669*06c3fb27SDimitry Andric
670*06c3fb27SDimitry Andric return true;
671*06c3fb27SDimitry Andric }
672*06c3fb27SDimitry Andric
runOnMachineFunction(MachineFunction & MF)673349cc55cSDimitry Andric bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
674349cc55cSDimitry Andric if (skipFunction(MF.getFunction()))
675349cc55cSDimitry Andric return false;
676349cc55cSDimitry Andric
677349cc55cSDimitry Andric TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
67804eeddc0SDimitry Andric TRI = static_cast<const AArch64RegisterInfo *>(
67904eeddc0SDimitry Andric MF.getSubtarget().getRegisterInfo());
680349cc55cSDimitry Andric MLI = &getAnalysis<MachineLoopInfo>();
681349cc55cSDimitry Andric MRI = &MF.getRegInfo();
682349cc55cSDimitry Andric
68304eeddc0SDimitry Andric assert(MRI->isSSA() && "Expected to be run on SSA form!");
684349cc55cSDimitry Andric
685349cc55cSDimitry Andric bool Changed = false;
686349cc55cSDimitry Andric
687349cc55cSDimitry Andric for (MachineBasicBlock &MBB : MF) {
68881ad6265SDimitry Andric for (MachineInstr &MI : make_early_inc_range(MBB)) {
689349cc55cSDimitry Andric switch (MI.getOpcode()) {
690349cc55cSDimitry Andric default:
691349cc55cSDimitry Andric break;
692bdd1243dSDimitry Andric case AArch64::INSERT_SUBREG:
693*06c3fb27SDimitry Andric Changed |= visitINSERT(MI);
694bdd1243dSDimitry Andric break;
695349cc55cSDimitry Andric case AArch64::ANDWrr:
696*06c3fb27SDimitry Andric Changed |= visitAND<uint32_t>(AArch64::ANDWri, MI);
697349cc55cSDimitry Andric break;
698349cc55cSDimitry Andric case AArch64::ANDXrr:
699*06c3fb27SDimitry Andric Changed |= visitAND<uint64_t>(AArch64::ANDXri, MI);
700349cc55cSDimitry Andric break;
701349cc55cSDimitry Andric case AArch64::ORRWrs:
702*06c3fb27SDimitry Andric Changed |= visitORR(MI);
70304eeddc0SDimitry Andric break;
70404eeddc0SDimitry Andric case AArch64::ADDWrr:
705*06c3fb27SDimitry Andric Changed |= visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri, MI);
70604eeddc0SDimitry Andric break;
70704eeddc0SDimitry Andric case AArch64::SUBWrr:
708*06c3fb27SDimitry Andric Changed |= visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri, MI);
70904eeddc0SDimitry Andric break;
71004eeddc0SDimitry Andric case AArch64::ADDXrr:
711*06c3fb27SDimitry Andric Changed |= visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri, MI);
71204eeddc0SDimitry Andric break;
71304eeddc0SDimitry Andric case AArch64::SUBXrr:
714*06c3fb27SDimitry Andric Changed |= visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri, MI);
71581ad6265SDimitry Andric break;
71681ad6265SDimitry Andric case AArch64::ADDSWrr:
717*06c3fb27SDimitry Andric Changed |=
718*06c3fb27SDimitry Andric visitADDSSUBS<uint32_t>({AArch64::ADDWri, AArch64::ADDSWri},
719*06c3fb27SDimitry Andric {AArch64::SUBWri, AArch64::SUBSWri}, MI);
72081ad6265SDimitry Andric break;
72181ad6265SDimitry Andric case AArch64::SUBSWrr:
722*06c3fb27SDimitry Andric Changed |=
723*06c3fb27SDimitry Andric visitADDSSUBS<uint32_t>({AArch64::SUBWri, AArch64::SUBSWri},
724*06c3fb27SDimitry Andric {AArch64::ADDWri, AArch64::ADDSWri}, MI);
72581ad6265SDimitry Andric break;
72681ad6265SDimitry Andric case AArch64::ADDSXrr:
727*06c3fb27SDimitry Andric Changed |=
728*06c3fb27SDimitry Andric visitADDSSUBS<uint64_t>({AArch64::ADDXri, AArch64::ADDSXri},
729*06c3fb27SDimitry Andric {AArch64::SUBXri, AArch64::SUBSXri}, MI);
73081ad6265SDimitry Andric break;
73181ad6265SDimitry Andric case AArch64::SUBSXrr:
732*06c3fb27SDimitry Andric Changed |=
733*06c3fb27SDimitry Andric visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri},
734*06c3fb27SDimitry Andric {AArch64::ADDXri, AArch64::ADDSXri}, MI);
735*06c3fb27SDimitry Andric break;
736*06c3fb27SDimitry Andric case AArch64::INSvi64gpr:
737*06c3fb27SDimitry Andric Changed |= visitINSviGPR(MI, AArch64::INSvi64lane);
738*06c3fb27SDimitry Andric break;
739*06c3fb27SDimitry Andric case AArch64::INSvi32gpr:
740*06c3fb27SDimitry Andric Changed |= visitINSviGPR(MI, AArch64::INSvi32lane);
741*06c3fb27SDimitry Andric break;
742*06c3fb27SDimitry Andric case AArch64::INSvi16gpr:
743*06c3fb27SDimitry Andric Changed |= visitINSviGPR(MI, AArch64::INSvi16lane);
744*06c3fb27SDimitry Andric break;
745*06c3fb27SDimitry Andric case AArch64::INSvi8gpr:
746*06c3fb27SDimitry Andric Changed |= visitINSviGPR(MI, AArch64::INSvi8lane);
747*06c3fb27SDimitry Andric break;
748*06c3fb27SDimitry Andric case AArch64::INSvi64lane:
749*06c3fb27SDimitry Andric Changed |= visitINSvi64lane(MI);
75004eeddc0SDimitry Andric break;
751349cc55cSDimitry Andric }
752349cc55cSDimitry Andric }
753349cc55cSDimitry Andric }
754349cc55cSDimitry Andric
755349cc55cSDimitry Andric return Changed;
756349cc55cSDimitry Andric }
757349cc55cSDimitry Andric
createAArch64MIPeepholeOptPass()758349cc55cSDimitry Andric FunctionPass *llvm::createAArch64MIPeepholeOptPass() {
759349cc55cSDimitry Andric return new AArch64MIPeepholeOpt();
760349cc55cSDimitry Andric }
761