1349cc55cSDimitry Andric //===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===//
2349cc55cSDimitry Andric //
3349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6349cc55cSDimitry Andric //
7349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
8349cc55cSDimitry Andric //
9349cc55cSDimitry Andric // This pass performs below peephole optimizations on MIR level.
10349cc55cSDimitry Andric //
11349cc55cSDimitry Andric // 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
12349cc55cSDimitry Andric //    MOVi64imm + ANDXrr ==> ANDXri + ANDXri
13349cc55cSDimitry Andric //
14*04eeddc0SDimitry Andric // 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
15*04eeddc0SDimitry Andric //    MOVi64imm + ADDXrr ==> ANDXri + ANDXri
16*04eeddc0SDimitry Andric //
17*04eeddc0SDimitry Andric // 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi
18*04eeddc0SDimitry Andric //    MOVi64imm + SUBXrr ==> SUBXri + SUBXri
19*04eeddc0SDimitry Andric //
20349cc55cSDimitry Andric //    The mov pseudo instruction could be expanded to multiple mov instructions
21349cc55cSDimitry Andric //    later. In this case, we could try to split the constant  operand of mov
22*04eeddc0SDimitry Andric //    instruction into two immediates which can be directly encoded into
23*04eeddc0SDimitry Andric //    *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of
24*04eeddc0SDimitry Andric //    multiple `mov` + `and/add/sub` instructions.
25349cc55cSDimitry Andric //
26*04eeddc0SDimitry Andric // 4. Remove redundant ORRWrs which is generated by zero-extend.
27349cc55cSDimitry Andric //
28349cc55cSDimitry Andric //    %3:gpr32 = ORRWrs $wzr, %2, 0
29349cc55cSDimitry Andric //    %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32
30349cc55cSDimitry Andric //
31349cc55cSDimitry Andric //    If AArch64's 32-bit form of instruction defines the source operand of
32349cc55cSDimitry Andric //    ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source
33349cc55cSDimitry Andric //    operand are set to zero.
34349cc55cSDimitry Andric //
35349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
36349cc55cSDimitry Andric 
37349cc55cSDimitry Andric #include "AArch64ExpandImm.h"
38349cc55cSDimitry Andric #include "AArch64InstrInfo.h"
39349cc55cSDimitry Andric #include "MCTargetDesc/AArch64AddressingModes.h"
40*04eeddc0SDimitry Andric #include "llvm/ADT/Optional.h"
41349cc55cSDimitry Andric #include "llvm/ADT/SetVector.h"
42349cc55cSDimitry Andric #include "llvm/CodeGen/MachineDominators.h"
43349cc55cSDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h"
44349cc55cSDimitry Andric 
45349cc55cSDimitry Andric using namespace llvm;
46349cc55cSDimitry Andric 
47349cc55cSDimitry Andric #define DEBUG_TYPE "aarch64-mi-peephole-opt"
48349cc55cSDimitry Andric 
49349cc55cSDimitry Andric namespace {
50349cc55cSDimitry Andric 
51349cc55cSDimitry Andric struct AArch64MIPeepholeOpt : public MachineFunctionPass {
52349cc55cSDimitry Andric   static char ID;
53349cc55cSDimitry Andric 
54349cc55cSDimitry Andric   AArch64MIPeepholeOpt() : MachineFunctionPass(ID) {
55349cc55cSDimitry Andric     initializeAArch64MIPeepholeOptPass(*PassRegistry::getPassRegistry());
56349cc55cSDimitry Andric   }
57349cc55cSDimitry Andric 
58349cc55cSDimitry Andric   const AArch64InstrInfo *TII;
59*04eeddc0SDimitry Andric   const AArch64RegisterInfo *TRI;
60349cc55cSDimitry Andric   MachineLoopInfo *MLI;
61349cc55cSDimitry Andric   MachineRegisterInfo *MRI;
62349cc55cSDimitry Andric 
63349cc55cSDimitry Andric   template <typename T>
64*04eeddc0SDimitry Andric   using SplitAndOpcFunc =
65*04eeddc0SDimitry Andric       std::function<Optional<unsigned>(T, unsigned, T &, T &)>;
66*04eeddc0SDimitry Andric   using BuildMIFunc =
67*04eeddc0SDimitry Andric       std::function<void(MachineInstr &, unsigned, unsigned, unsigned, Register,
68*04eeddc0SDimitry Andric                          Register, Register)>;
69*04eeddc0SDimitry Andric 
70*04eeddc0SDimitry Andric   /// For instructions where an immediate operand could be split into two
71*04eeddc0SDimitry Andric   /// separate immediate instructions, use the splitTwoPartImm two handle the
72*04eeddc0SDimitry Andric   /// optimization.
73*04eeddc0SDimitry Andric   ///
74*04eeddc0SDimitry Andric   /// To implement, the following function types must be passed to
75*04eeddc0SDimitry Andric   /// splitTwoPartImm. A SplitAndOpcFunc must be implemented that determines if
76*04eeddc0SDimitry Andric   /// splitting the immediate is valid and returns the associated new opcode. A
77*04eeddc0SDimitry Andric   /// BuildMIFunc must be implemented to build the two immediate instructions.
78*04eeddc0SDimitry Andric   ///
79*04eeddc0SDimitry Andric   /// Example Pattern (where IMM would require 2+ MOV instructions):
80*04eeddc0SDimitry Andric   ///     %dst = <Instr>rr %src IMM [...]
81*04eeddc0SDimitry Andric   /// becomes:
82*04eeddc0SDimitry Andric   ///     %tmp = <Instr>ri %src (encode half IMM) [...]
83*04eeddc0SDimitry Andric   ///     %dst = <Instr>ri %tmp (encode half IMM) [...]
84*04eeddc0SDimitry Andric   template <typename T>
85*04eeddc0SDimitry Andric   bool splitTwoPartImm(MachineInstr &MI,
86*04eeddc0SDimitry Andric                        SmallSetVector<MachineInstr *, 8> &ToBeRemoved,
87*04eeddc0SDimitry Andric                        SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr);
88*04eeddc0SDimitry Andric 
89*04eeddc0SDimitry Andric   bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI,
90*04eeddc0SDimitry Andric                         MachineInstr *&SubregToRegMI);
91*04eeddc0SDimitry Andric 
92*04eeddc0SDimitry Andric   template <typename T>
93*04eeddc0SDimitry Andric   bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI,
94*04eeddc0SDimitry Andric                    SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
95*04eeddc0SDimitry Andric   template <typename T>
96*04eeddc0SDimitry Andric   bool visitAND(unsigned Opc, MachineInstr &MI,
97349cc55cSDimitry Andric                 SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
98349cc55cSDimitry Andric   bool visitORR(MachineInstr &MI,
99349cc55cSDimitry Andric                 SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
100349cc55cSDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
101349cc55cSDimitry Andric 
102349cc55cSDimitry Andric   StringRef getPassName() const override {
103349cc55cSDimitry Andric     return "AArch64 MI Peephole Optimization pass";
104349cc55cSDimitry Andric   }
105349cc55cSDimitry Andric 
106349cc55cSDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
107349cc55cSDimitry Andric     AU.setPreservesCFG();
108349cc55cSDimitry Andric     AU.addRequired<MachineLoopInfo>();
109349cc55cSDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
110349cc55cSDimitry Andric   }
111349cc55cSDimitry Andric };
112349cc55cSDimitry Andric 
113349cc55cSDimitry Andric char AArch64MIPeepholeOpt::ID = 0;
114349cc55cSDimitry Andric 
115349cc55cSDimitry Andric } // end anonymous namespace
116349cc55cSDimitry Andric 
117349cc55cSDimitry Andric INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt",
118349cc55cSDimitry Andric                 "AArch64 MI Peephole Optimization", false, false)
119349cc55cSDimitry Andric 
120349cc55cSDimitry Andric template <typename T>
121349cc55cSDimitry Andric static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
122349cc55cSDimitry Andric   T UImm = static_cast<T>(Imm);
123349cc55cSDimitry Andric   if (AArch64_AM::isLogicalImmediate(UImm, RegSize))
124349cc55cSDimitry Andric     return false;
125349cc55cSDimitry Andric 
126349cc55cSDimitry Andric   // If this immediate can be handled by one instruction, do not split it.
127349cc55cSDimitry Andric   SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
128349cc55cSDimitry Andric   AArch64_IMM::expandMOVImm(UImm, RegSize, Insn);
129349cc55cSDimitry Andric   if (Insn.size() == 1)
130349cc55cSDimitry Andric     return false;
131349cc55cSDimitry Andric 
132349cc55cSDimitry Andric   // The bitmask immediate consists of consecutive ones.  Let's say there is
133349cc55cSDimitry Andric   // constant 0b00000000001000000000010000000000 which does not consist of
134349cc55cSDimitry Andric   // consecutive ones. We can split it in to two bitmask immediate like
135349cc55cSDimitry Andric   // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111.
136349cc55cSDimitry Andric   // If we do AND with these two bitmask immediate, we can see original one.
137349cc55cSDimitry Andric   unsigned LowestBitSet = countTrailingZeros(UImm);
138349cc55cSDimitry Andric   unsigned HighestBitSet = Log2_64(UImm);
139349cc55cSDimitry Andric 
140349cc55cSDimitry Andric   // Create a mask which is filled with one from the position of lowest bit set
141349cc55cSDimitry Andric   // to the position of highest bit set.
142349cc55cSDimitry Andric   T NewImm1 = (static_cast<T>(2) << HighestBitSet) -
143349cc55cSDimitry Andric               (static_cast<T>(1) << LowestBitSet);
144349cc55cSDimitry Andric   // Create a mask which is filled with one outside the position of lowest bit
145349cc55cSDimitry Andric   // set and the position of highest bit set.
146349cc55cSDimitry Andric   T NewImm2 = UImm | ~NewImm1;
147349cc55cSDimitry Andric 
148349cc55cSDimitry Andric   // If the split value is not valid bitmask immediate, do not split this
149349cc55cSDimitry Andric   // constant.
150349cc55cSDimitry Andric   if (!AArch64_AM::isLogicalImmediate(NewImm2, RegSize))
151349cc55cSDimitry Andric     return false;
152349cc55cSDimitry Andric 
153349cc55cSDimitry Andric   Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize);
154349cc55cSDimitry Andric   Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize);
155349cc55cSDimitry Andric   return true;
156349cc55cSDimitry Andric }
157349cc55cSDimitry Andric 
158349cc55cSDimitry Andric template <typename T>
159349cc55cSDimitry Andric bool AArch64MIPeepholeOpt::visitAND(
160*04eeddc0SDimitry Andric     unsigned Opc, MachineInstr &MI,
161*04eeddc0SDimitry Andric     SmallSetVector<MachineInstr *, 8> &ToBeRemoved) {
162349cc55cSDimitry Andric   // Try below transformation.
163349cc55cSDimitry Andric   //
164349cc55cSDimitry Andric   // MOVi32imm + ANDWrr ==> ANDWri + ANDWri
165349cc55cSDimitry Andric   // MOVi64imm + ANDXrr ==> ANDXri + ANDXri
166349cc55cSDimitry Andric   //
167349cc55cSDimitry Andric   // The mov pseudo instruction could be expanded to multiple mov instructions
168349cc55cSDimitry Andric   // later. Let's try to split the constant operand of mov instruction into two
169349cc55cSDimitry Andric   // bitmask immediates. It makes only two AND instructions intead of multiple
170349cc55cSDimitry Andric   // mov + and instructions.
171349cc55cSDimitry Andric 
172*04eeddc0SDimitry Andric   return splitTwoPartImm<T>(
173*04eeddc0SDimitry Andric       MI, ToBeRemoved,
174*04eeddc0SDimitry Andric       [Opc](T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional<unsigned> {
175*04eeddc0SDimitry Andric         if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1))
176*04eeddc0SDimitry Andric           return Opc;
177*04eeddc0SDimitry Andric         return None;
178*04eeddc0SDimitry Andric       },
179*04eeddc0SDimitry Andric       [&TII = TII](MachineInstr &MI, unsigned Opcode, unsigned Imm0,
180*04eeddc0SDimitry Andric                    unsigned Imm1, Register SrcReg, Register NewTmpReg,
181*04eeddc0SDimitry Andric                    Register NewDstReg) {
182349cc55cSDimitry Andric         DebugLoc DL = MI.getDebugLoc();
183*04eeddc0SDimitry Andric         MachineBasicBlock *MBB = MI.getParent();
184349cc55cSDimitry Andric         BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg)
185349cc55cSDimitry Andric             .addReg(SrcReg)
186*04eeddc0SDimitry Andric             .addImm(Imm0);
187349cc55cSDimitry Andric         BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg)
188349cc55cSDimitry Andric             .addReg(NewTmpReg)
189*04eeddc0SDimitry Andric             .addImm(Imm1);
190*04eeddc0SDimitry Andric       });
191349cc55cSDimitry Andric }
192349cc55cSDimitry Andric 
193349cc55cSDimitry Andric bool AArch64MIPeepholeOpt::visitORR(
194349cc55cSDimitry Andric     MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved) {
195349cc55cSDimitry Andric   // Check this ORR comes from below zero-extend pattern.
196349cc55cSDimitry Andric   //
197349cc55cSDimitry Andric   // def : Pat<(i64 (zext GPR32:$src)),
198349cc55cSDimitry Andric   //           (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
199349cc55cSDimitry Andric   if (MI.getOperand(3).getImm() != 0)
200349cc55cSDimitry Andric     return false;
201349cc55cSDimitry Andric 
202349cc55cSDimitry Andric   if (MI.getOperand(1).getReg() != AArch64::WZR)
203349cc55cSDimitry Andric     return false;
204349cc55cSDimitry Andric 
205349cc55cSDimitry Andric   MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
206349cc55cSDimitry Andric   if (!SrcMI)
207349cc55cSDimitry Andric     return false;
208349cc55cSDimitry Andric 
209349cc55cSDimitry Andric   // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
210349cc55cSDimitry Andric   //
211349cc55cSDimitry Andric   // When you use the 32-bit form of an instruction, the upper 32 bits of the
212349cc55cSDimitry Andric   // source registers are ignored and the upper 32 bits of the destination
213349cc55cSDimitry Andric   // register are set to zero.
214349cc55cSDimitry Andric   //
215349cc55cSDimitry Andric   // If AArch64's 32-bit form of instruction defines the source operand of
216349cc55cSDimitry Andric   // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
217349cc55cSDimitry Andric   // real AArch64 instruction and if it is not, do not process the opcode
218349cc55cSDimitry Andric   // conservatively.
219349cc55cSDimitry Andric   if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)
220349cc55cSDimitry Andric     return false;
221349cc55cSDimitry Andric 
222349cc55cSDimitry Andric   Register DefReg = MI.getOperand(0).getReg();
223349cc55cSDimitry Andric   Register SrcReg = MI.getOperand(2).getReg();
224349cc55cSDimitry Andric   MRI->replaceRegWith(DefReg, SrcReg);
225349cc55cSDimitry Andric   MRI->clearKillFlags(SrcReg);
226349cc55cSDimitry Andric   // replaceRegWith changes MI's definition register. Keep it for SSA form until
227349cc55cSDimitry Andric   // deleting MI.
228349cc55cSDimitry Andric   MI.getOperand(0).setReg(DefReg);
229349cc55cSDimitry Andric   ToBeRemoved.insert(&MI);
230349cc55cSDimitry Andric 
231*04eeddc0SDimitry Andric   LLVM_DEBUG(dbgs() << "Removed: " << MI << "\n");
232*04eeddc0SDimitry Andric 
233*04eeddc0SDimitry Andric   return true;
234*04eeddc0SDimitry Andric }
235*04eeddc0SDimitry Andric 
236*04eeddc0SDimitry Andric template <typename T>
237*04eeddc0SDimitry Andric static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) {
238*04eeddc0SDimitry Andric   // The immediate must be in the form of ((imm0 << 12) + imm1), in which both
239*04eeddc0SDimitry Andric   // imm0 and imm1 are non-zero 12-bit unsigned int.
240*04eeddc0SDimitry Andric   if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 ||
241*04eeddc0SDimitry Andric       (Imm & ~static_cast<T>(0xffffff)) != 0)
242*04eeddc0SDimitry Andric     return false;
243*04eeddc0SDimitry Andric 
244*04eeddc0SDimitry Andric   // The immediate can not be composed via a single instruction.
245*04eeddc0SDimitry Andric   SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
246*04eeddc0SDimitry Andric   AArch64_IMM::expandMOVImm(Imm, RegSize, Insn);
247*04eeddc0SDimitry Andric   if (Insn.size() == 1)
248*04eeddc0SDimitry Andric     return false;
249*04eeddc0SDimitry Andric 
250*04eeddc0SDimitry Andric   // Split Imm into (Imm0 << 12) + Imm1;
251*04eeddc0SDimitry Andric   Imm0 = (Imm >> 12) & 0xfff;
252*04eeddc0SDimitry Andric   Imm1 = Imm & 0xfff;
253*04eeddc0SDimitry Andric   return true;
254*04eeddc0SDimitry Andric }
255*04eeddc0SDimitry Andric 
256*04eeddc0SDimitry Andric template <typename T>
257*04eeddc0SDimitry Andric bool AArch64MIPeepholeOpt::visitADDSUB(
258*04eeddc0SDimitry Andric     unsigned PosOpc, unsigned NegOpc, MachineInstr &MI,
259*04eeddc0SDimitry Andric     SmallSetVector<MachineInstr *, 8> &ToBeRemoved) {
260*04eeddc0SDimitry Andric   // Try below transformation.
261*04eeddc0SDimitry Andric   //
262*04eeddc0SDimitry Andric   // MOVi32imm + ADDWrr ==> ADDWri + ADDWri
263*04eeddc0SDimitry Andric   // MOVi64imm + ADDXrr ==> ADDXri + ADDXri
264*04eeddc0SDimitry Andric   //
265*04eeddc0SDimitry Andric   // MOVi32imm + SUBWrr ==> SUBWri + SUBWri
266*04eeddc0SDimitry Andric   // MOVi64imm + SUBXrr ==> SUBXri + SUBXri
267*04eeddc0SDimitry Andric   //
268*04eeddc0SDimitry Andric   // The mov pseudo instruction could be expanded to multiple mov instructions
269*04eeddc0SDimitry Andric   // later. Let's try to split the constant operand of mov instruction into two
270*04eeddc0SDimitry Andric   // legal add/sub immediates. It makes only two ADD/SUB instructions intead of
271*04eeddc0SDimitry Andric   // multiple `mov` + `and/sub` instructions.
272*04eeddc0SDimitry Andric 
273*04eeddc0SDimitry Andric   return splitTwoPartImm<T>(
274*04eeddc0SDimitry Andric       MI, ToBeRemoved,
275*04eeddc0SDimitry Andric       [PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0,
276*04eeddc0SDimitry Andric                        T &Imm1) -> Optional<unsigned> {
277*04eeddc0SDimitry Andric         if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
278*04eeddc0SDimitry Andric           return PosOpc;
279*04eeddc0SDimitry Andric         if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
280*04eeddc0SDimitry Andric           return NegOpc;
281*04eeddc0SDimitry Andric         return None;
282*04eeddc0SDimitry Andric       },
283*04eeddc0SDimitry Andric       [&TII = TII](MachineInstr &MI, unsigned Opcode, unsigned Imm0,
284*04eeddc0SDimitry Andric                    unsigned Imm1, Register SrcReg, Register NewTmpReg,
285*04eeddc0SDimitry Andric                    Register NewDstReg) {
286*04eeddc0SDimitry Andric         DebugLoc DL = MI.getDebugLoc();
287*04eeddc0SDimitry Andric         MachineBasicBlock *MBB = MI.getParent();
288*04eeddc0SDimitry Andric         BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg)
289*04eeddc0SDimitry Andric             .addReg(SrcReg)
290*04eeddc0SDimitry Andric             .addImm(Imm0)
291*04eeddc0SDimitry Andric             .addImm(12);
292*04eeddc0SDimitry Andric         BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg)
293*04eeddc0SDimitry Andric             .addReg(NewTmpReg)
294*04eeddc0SDimitry Andric             .addImm(Imm1)
295*04eeddc0SDimitry Andric             .addImm(0);
296*04eeddc0SDimitry Andric       });
297*04eeddc0SDimitry Andric }
298*04eeddc0SDimitry Andric 
299*04eeddc0SDimitry Andric // Checks if the corresponding MOV immediate instruction is applicable for
300*04eeddc0SDimitry Andric // this peephole optimization.
301*04eeddc0SDimitry Andric bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI,
302*04eeddc0SDimitry Andric                                             MachineInstr *&MovMI,
303*04eeddc0SDimitry Andric                                             MachineInstr *&SubregToRegMI) {
304*04eeddc0SDimitry Andric   // Check whether current MBB is in loop and the AND is loop invariant.
305*04eeddc0SDimitry Andric   MachineBasicBlock *MBB = MI.getParent();
306*04eeddc0SDimitry Andric   MachineLoop *L = MLI->getLoopFor(MBB);
307*04eeddc0SDimitry Andric   if (L && !L->isLoopInvariant(MI))
308*04eeddc0SDimitry Andric     return false;
309*04eeddc0SDimitry Andric 
310*04eeddc0SDimitry Andric   // Check whether current MI's operand is MOV with immediate.
311*04eeddc0SDimitry Andric   MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
312*04eeddc0SDimitry Andric   if (!MovMI)
313*04eeddc0SDimitry Andric     return false;
314*04eeddc0SDimitry Andric 
315*04eeddc0SDimitry Andric   // If it is SUBREG_TO_REG, check its operand.
316*04eeddc0SDimitry Andric   SubregToRegMI = nullptr;
317*04eeddc0SDimitry Andric   if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {
318*04eeddc0SDimitry Andric     SubregToRegMI = MovMI;
319*04eeddc0SDimitry Andric     MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg());
320*04eeddc0SDimitry Andric     if (!MovMI)
321*04eeddc0SDimitry Andric       return false;
322*04eeddc0SDimitry Andric   }
323*04eeddc0SDimitry Andric 
324*04eeddc0SDimitry Andric   if (MovMI->getOpcode() != AArch64::MOVi32imm &&
325*04eeddc0SDimitry Andric       MovMI->getOpcode() != AArch64::MOVi64imm)
326*04eeddc0SDimitry Andric     return false;
327*04eeddc0SDimitry Andric 
328*04eeddc0SDimitry Andric   // If the MOV has multiple uses, do not split the immediate because it causes
329*04eeddc0SDimitry Andric   // more instructions.
330*04eeddc0SDimitry Andric   if (!MRI->hasOneUse(MovMI->getOperand(0).getReg()))
331*04eeddc0SDimitry Andric     return false;
332*04eeddc0SDimitry Andric   if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg()))
333*04eeddc0SDimitry Andric     return false;
334*04eeddc0SDimitry Andric 
335*04eeddc0SDimitry Andric   // It is OK to perform this peephole optimization.
336*04eeddc0SDimitry Andric   return true;
337*04eeddc0SDimitry Andric }
338*04eeddc0SDimitry Andric 
339*04eeddc0SDimitry Andric template <typename T>
340*04eeddc0SDimitry Andric bool AArch64MIPeepholeOpt::splitTwoPartImm(
341*04eeddc0SDimitry Andric     MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved,
342*04eeddc0SDimitry Andric     SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) {
343*04eeddc0SDimitry Andric   unsigned RegSize = sizeof(T) * 8;
344*04eeddc0SDimitry Andric   assert((RegSize == 32 || RegSize == 64) &&
345*04eeddc0SDimitry Andric          "Invalid RegSize for legal immediate peephole optimization");
346*04eeddc0SDimitry Andric 
347*04eeddc0SDimitry Andric   // Perform several essential checks against current MI.
348*04eeddc0SDimitry Andric   MachineInstr *MovMI, *SubregToRegMI;
349*04eeddc0SDimitry Andric   if (!checkMovImmInstr(MI, MovMI, SubregToRegMI))
350*04eeddc0SDimitry Andric     return false;
351*04eeddc0SDimitry Andric 
352*04eeddc0SDimitry Andric   // Split the immediate to Imm0 and Imm1, and calculate the Opcode.
353*04eeddc0SDimitry Andric   T Imm = static_cast<T>(MovMI->getOperand(1).getImm()), Imm0, Imm1;
354*04eeddc0SDimitry Andric   // For the 32 bit form of instruction, the upper 32 bits of the destination
355*04eeddc0SDimitry Andric   // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits
356*04eeddc0SDimitry Andric   // of Imm to zero. This is essential if the Immediate value was a negative
357*04eeddc0SDimitry Andric   // number since it was sign extended when we assign to the 64-bit Imm.
358*04eeddc0SDimitry Andric   if (SubregToRegMI)
359*04eeddc0SDimitry Andric     Imm &= 0xFFFFFFFF;
360*04eeddc0SDimitry Andric   unsigned Opcode;
361*04eeddc0SDimitry Andric   if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1))
362*04eeddc0SDimitry Andric     Opcode = R.getValue();
363*04eeddc0SDimitry Andric   else
364*04eeddc0SDimitry Andric     return false;
365*04eeddc0SDimitry Andric 
366*04eeddc0SDimitry Andric   // Create new ADD/SUB MIs.
367*04eeddc0SDimitry Andric   MachineFunction *MF = MI.getMF();
368*04eeddc0SDimitry Andric   const TargetRegisterClass *RC =
369*04eeddc0SDimitry Andric       TII->getRegClass(TII->get(Opcode), 0, TRI, *MF);
370*04eeddc0SDimitry Andric   const TargetRegisterClass *ORC =
371*04eeddc0SDimitry Andric       TII->getRegClass(TII->get(Opcode), 1, TRI, *MF);
372*04eeddc0SDimitry Andric   Register DstReg = MI.getOperand(0).getReg();
373*04eeddc0SDimitry Andric   Register SrcReg = MI.getOperand(1).getReg();
374*04eeddc0SDimitry Andric   Register NewTmpReg = MRI->createVirtualRegister(RC);
375*04eeddc0SDimitry Andric   Register NewDstReg = MRI->createVirtualRegister(RC);
376*04eeddc0SDimitry Andric 
377*04eeddc0SDimitry Andric   MRI->constrainRegClass(SrcReg, RC);
378*04eeddc0SDimitry Andric   MRI->constrainRegClass(NewTmpReg, ORC);
379*04eeddc0SDimitry Andric   MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg));
380*04eeddc0SDimitry Andric 
381*04eeddc0SDimitry Andric   BuildInstr(MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg);
382*04eeddc0SDimitry Andric 
383*04eeddc0SDimitry Andric   MRI->replaceRegWith(DstReg, NewDstReg);
384*04eeddc0SDimitry Andric   // replaceRegWith changes MI's definition register. Keep it for SSA form until
385*04eeddc0SDimitry Andric   // deleting MI.
386*04eeddc0SDimitry Andric   MI.getOperand(0).setReg(DstReg);
387*04eeddc0SDimitry Andric 
388*04eeddc0SDimitry Andric   // Record the MIs need to be removed.
389*04eeddc0SDimitry Andric   ToBeRemoved.insert(&MI);
390*04eeddc0SDimitry Andric   if (SubregToRegMI)
391*04eeddc0SDimitry Andric     ToBeRemoved.insert(SubregToRegMI);
392*04eeddc0SDimitry Andric   ToBeRemoved.insert(MovMI);
393349cc55cSDimitry Andric 
394349cc55cSDimitry Andric   return true;
395349cc55cSDimitry Andric }
396349cc55cSDimitry Andric 
397349cc55cSDimitry Andric bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
398349cc55cSDimitry Andric   if (skipFunction(MF.getFunction()))
399349cc55cSDimitry Andric     return false;
400349cc55cSDimitry Andric 
401349cc55cSDimitry Andric   TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
402*04eeddc0SDimitry Andric   TRI = static_cast<const AArch64RegisterInfo *>(
403*04eeddc0SDimitry Andric       MF.getSubtarget().getRegisterInfo());
404349cc55cSDimitry Andric   MLI = &getAnalysis<MachineLoopInfo>();
405349cc55cSDimitry Andric   MRI = &MF.getRegInfo();
406349cc55cSDimitry Andric 
407*04eeddc0SDimitry Andric   assert(MRI->isSSA() && "Expected to be run on SSA form!");
408349cc55cSDimitry Andric 
409349cc55cSDimitry Andric   bool Changed = false;
410349cc55cSDimitry Andric   SmallSetVector<MachineInstr *, 8> ToBeRemoved;
411349cc55cSDimitry Andric 
412349cc55cSDimitry Andric   for (MachineBasicBlock &MBB : MF) {
413349cc55cSDimitry Andric     for (MachineInstr &MI : MBB) {
414349cc55cSDimitry Andric       switch (MI.getOpcode()) {
415349cc55cSDimitry Andric       default:
416349cc55cSDimitry Andric         break;
417349cc55cSDimitry Andric       case AArch64::ANDWrr:
418*04eeddc0SDimitry Andric         Changed = visitAND<uint32_t>(AArch64::ANDWri, MI, ToBeRemoved);
419349cc55cSDimitry Andric         break;
420349cc55cSDimitry Andric       case AArch64::ANDXrr:
421*04eeddc0SDimitry Andric         Changed = visitAND<uint64_t>(AArch64::ANDXri, MI, ToBeRemoved);
422349cc55cSDimitry Andric         break;
423349cc55cSDimitry Andric       case AArch64::ORRWrs:
424349cc55cSDimitry Andric         Changed = visitORR(MI, ToBeRemoved);
425*04eeddc0SDimitry Andric         break;
426*04eeddc0SDimitry Andric       case AArch64::ADDWrr:
427*04eeddc0SDimitry Andric         Changed = visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri, MI,
428*04eeddc0SDimitry Andric                                         ToBeRemoved);
429*04eeddc0SDimitry Andric         break;
430*04eeddc0SDimitry Andric       case AArch64::SUBWrr:
431*04eeddc0SDimitry Andric         Changed = visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri, MI,
432*04eeddc0SDimitry Andric                                         ToBeRemoved);
433*04eeddc0SDimitry Andric         break;
434*04eeddc0SDimitry Andric       case AArch64::ADDXrr:
435*04eeddc0SDimitry Andric         Changed = visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri, MI,
436*04eeddc0SDimitry Andric                                         ToBeRemoved);
437*04eeddc0SDimitry Andric         break;
438*04eeddc0SDimitry Andric       case AArch64::SUBXrr:
439*04eeddc0SDimitry Andric         Changed = visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri, MI,
440*04eeddc0SDimitry Andric                                         ToBeRemoved);
441*04eeddc0SDimitry Andric         break;
442349cc55cSDimitry Andric       }
443349cc55cSDimitry Andric     }
444349cc55cSDimitry Andric   }
445349cc55cSDimitry Andric 
446349cc55cSDimitry Andric   for (MachineInstr *MI : ToBeRemoved)
447349cc55cSDimitry Andric     MI->eraseFromParent();
448349cc55cSDimitry Andric 
449349cc55cSDimitry Andric   return Changed;
450349cc55cSDimitry Andric }
451349cc55cSDimitry Andric 
452349cc55cSDimitry Andric FunctionPass *llvm::createAArch64MIPeepholeOptPass() {
453349cc55cSDimitry Andric   return new AArch64MIPeepholeOpt();
454349cc55cSDimitry Andric }
455