1349cc55cSDimitry Andric //===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===// 2349cc55cSDimitry Andric // 3349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6349cc55cSDimitry Andric // 7349cc55cSDimitry Andric //===----------------------------------------------------------------------===// 8349cc55cSDimitry Andric // 9349cc55cSDimitry Andric // This pass performs below peephole optimizations on MIR level. 10349cc55cSDimitry Andric // 11349cc55cSDimitry Andric // 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri 12349cc55cSDimitry Andric // MOVi64imm + ANDXrr ==> ANDXri + ANDXri 13349cc55cSDimitry Andric // 14*04eeddc0SDimitry Andric // 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi 15*04eeddc0SDimitry Andric // MOVi64imm + ADDXrr ==> ANDXri + ANDXri 16*04eeddc0SDimitry Andric // 17*04eeddc0SDimitry Andric // 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi 18*04eeddc0SDimitry Andric // MOVi64imm + SUBXrr ==> SUBXri + SUBXri 19*04eeddc0SDimitry Andric // 20349cc55cSDimitry Andric // The mov pseudo instruction could be expanded to multiple mov instructions 21349cc55cSDimitry Andric // later. In this case, we could try to split the constant operand of mov 22*04eeddc0SDimitry Andric // instruction into two immediates which can be directly encoded into 23*04eeddc0SDimitry Andric // *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of 24*04eeddc0SDimitry Andric // multiple `mov` + `and/add/sub` instructions. 25349cc55cSDimitry Andric // 26*04eeddc0SDimitry Andric // 4. Remove redundant ORRWrs which is generated by zero-extend. 27349cc55cSDimitry Andric // 28349cc55cSDimitry Andric // %3:gpr32 = ORRWrs $wzr, %2, 0 29349cc55cSDimitry Andric // %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32 30349cc55cSDimitry Andric // 31349cc55cSDimitry Andric // If AArch64's 32-bit form of instruction defines the source operand of 32349cc55cSDimitry Andric // ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source 33349cc55cSDimitry Andric // operand are set to zero. 34349cc55cSDimitry Andric // 35349cc55cSDimitry Andric //===----------------------------------------------------------------------===// 36349cc55cSDimitry Andric 37349cc55cSDimitry Andric #include "AArch64ExpandImm.h" 38349cc55cSDimitry Andric #include "AArch64InstrInfo.h" 39349cc55cSDimitry Andric #include "MCTargetDesc/AArch64AddressingModes.h" 40*04eeddc0SDimitry Andric #include "llvm/ADT/Optional.h" 41349cc55cSDimitry Andric #include "llvm/ADT/SetVector.h" 42349cc55cSDimitry Andric #include "llvm/CodeGen/MachineDominators.h" 43349cc55cSDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h" 44349cc55cSDimitry Andric 45349cc55cSDimitry Andric using namespace llvm; 46349cc55cSDimitry Andric 47349cc55cSDimitry Andric #define DEBUG_TYPE "aarch64-mi-peephole-opt" 48349cc55cSDimitry Andric 49349cc55cSDimitry Andric namespace { 50349cc55cSDimitry Andric 51349cc55cSDimitry Andric struct AArch64MIPeepholeOpt : public MachineFunctionPass { 52349cc55cSDimitry Andric static char ID; 53349cc55cSDimitry Andric 54349cc55cSDimitry Andric AArch64MIPeepholeOpt() : MachineFunctionPass(ID) { 55349cc55cSDimitry Andric initializeAArch64MIPeepholeOptPass(*PassRegistry::getPassRegistry()); 56349cc55cSDimitry Andric } 57349cc55cSDimitry Andric 58349cc55cSDimitry Andric const AArch64InstrInfo *TII; 59*04eeddc0SDimitry Andric const AArch64RegisterInfo *TRI; 60349cc55cSDimitry Andric MachineLoopInfo *MLI; 61349cc55cSDimitry Andric MachineRegisterInfo *MRI; 62349cc55cSDimitry Andric 63349cc55cSDimitry Andric template <typename T> 64*04eeddc0SDimitry Andric using SplitAndOpcFunc = 65*04eeddc0SDimitry Andric std::function<Optional<unsigned>(T, unsigned, T &, T &)>; 66*04eeddc0SDimitry Andric using BuildMIFunc = 67*04eeddc0SDimitry Andric std::function<void(MachineInstr &, unsigned, unsigned, unsigned, Register, 68*04eeddc0SDimitry Andric Register, Register)>; 69*04eeddc0SDimitry Andric 70*04eeddc0SDimitry Andric /// For instructions where an immediate operand could be split into two 71*04eeddc0SDimitry Andric /// separate immediate instructions, use the splitTwoPartImm two handle the 72*04eeddc0SDimitry Andric /// optimization. 73*04eeddc0SDimitry Andric /// 74*04eeddc0SDimitry Andric /// To implement, the following function types must be passed to 75*04eeddc0SDimitry Andric /// splitTwoPartImm. A SplitAndOpcFunc must be implemented that determines if 76*04eeddc0SDimitry Andric /// splitting the immediate is valid and returns the associated new opcode. A 77*04eeddc0SDimitry Andric /// BuildMIFunc must be implemented to build the two immediate instructions. 78*04eeddc0SDimitry Andric /// 79*04eeddc0SDimitry Andric /// Example Pattern (where IMM would require 2+ MOV instructions): 80*04eeddc0SDimitry Andric /// %dst = <Instr>rr %src IMM [...] 81*04eeddc0SDimitry Andric /// becomes: 82*04eeddc0SDimitry Andric /// %tmp = <Instr>ri %src (encode half IMM) [...] 83*04eeddc0SDimitry Andric /// %dst = <Instr>ri %tmp (encode half IMM) [...] 84*04eeddc0SDimitry Andric template <typename T> 85*04eeddc0SDimitry Andric bool splitTwoPartImm(MachineInstr &MI, 86*04eeddc0SDimitry Andric SmallSetVector<MachineInstr *, 8> &ToBeRemoved, 87*04eeddc0SDimitry Andric SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr); 88*04eeddc0SDimitry Andric 89*04eeddc0SDimitry Andric bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI, 90*04eeddc0SDimitry Andric MachineInstr *&SubregToRegMI); 91*04eeddc0SDimitry Andric 92*04eeddc0SDimitry Andric template <typename T> 93*04eeddc0SDimitry Andric bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI, 94*04eeddc0SDimitry Andric SmallSetVector<MachineInstr *, 8> &ToBeRemoved); 95*04eeddc0SDimitry Andric template <typename T> 96*04eeddc0SDimitry Andric bool visitAND(unsigned Opc, MachineInstr &MI, 97349cc55cSDimitry Andric SmallSetVector<MachineInstr *, 8> &ToBeRemoved); 98349cc55cSDimitry Andric bool visitORR(MachineInstr &MI, 99349cc55cSDimitry Andric SmallSetVector<MachineInstr *, 8> &ToBeRemoved); 100349cc55cSDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override; 101349cc55cSDimitry Andric 102349cc55cSDimitry Andric StringRef getPassName() const override { 103349cc55cSDimitry Andric return "AArch64 MI Peephole Optimization pass"; 104349cc55cSDimitry Andric } 105349cc55cSDimitry Andric 106349cc55cSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 107349cc55cSDimitry Andric AU.setPreservesCFG(); 108349cc55cSDimitry Andric AU.addRequired<MachineLoopInfo>(); 109349cc55cSDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 110349cc55cSDimitry Andric } 111349cc55cSDimitry Andric }; 112349cc55cSDimitry Andric 113349cc55cSDimitry Andric char AArch64MIPeepholeOpt::ID = 0; 114349cc55cSDimitry Andric 115349cc55cSDimitry Andric } // end anonymous namespace 116349cc55cSDimitry Andric 117349cc55cSDimitry Andric INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt", 118349cc55cSDimitry Andric "AArch64 MI Peephole Optimization", false, false) 119349cc55cSDimitry Andric 120349cc55cSDimitry Andric template <typename T> 121349cc55cSDimitry Andric static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) { 122349cc55cSDimitry Andric T UImm = static_cast<T>(Imm); 123349cc55cSDimitry Andric if (AArch64_AM::isLogicalImmediate(UImm, RegSize)) 124349cc55cSDimitry Andric return false; 125349cc55cSDimitry Andric 126349cc55cSDimitry Andric // If this immediate can be handled by one instruction, do not split it. 127349cc55cSDimitry Andric SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; 128349cc55cSDimitry Andric AArch64_IMM::expandMOVImm(UImm, RegSize, Insn); 129349cc55cSDimitry Andric if (Insn.size() == 1) 130349cc55cSDimitry Andric return false; 131349cc55cSDimitry Andric 132349cc55cSDimitry Andric // The bitmask immediate consists of consecutive ones. Let's say there is 133349cc55cSDimitry Andric // constant 0b00000000001000000000010000000000 which does not consist of 134349cc55cSDimitry Andric // consecutive ones. We can split it in to two bitmask immediate like 135349cc55cSDimitry Andric // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111. 136349cc55cSDimitry Andric // If we do AND with these two bitmask immediate, we can see original one. 137349cc55cSDimitry Andric unsigned LowestBitSet = countTrailingZeros(UImm); 138349cc55cSDimitry Andric unsigned HighestBitSet = Log2_64(UImm); 139349cc55cSDimitry Andric 140349cc55cSDimitry Andric // Create a mask which is filled with one from the position of lowest bit set 141349cc55cSDimitry Andric // to the position of highest bit set. 142349cc55cSDimitry Andric T NewImm1 = (static_cast<T>(2) << HighestBitSet) - 143349cc55cSDimitry Andric (static_cast<T>(1) << LowestBitSet); 144349cc55cSDimitry Andric // Create a mask which is filled with one outside the position of lowest bit 145349cc55cSDimitry Andric // set and the position of highest bit set. 146349cc55cSDimitry Andric T NewImm2 = UImm | ~NewImm1; 147349cc55cSDimitry Andric 148349cc55cSDimitry Andric // If the split value is not valid bitmask immediate, do not split this 149349cc55cSDimitry Andric // constant. 150349cc55cSDimitry Andric if (!AArch64_AM::isLogicalImmediate(NewImm2, RegSize)) 151349cc55cSDimitry Andric return false; 152349cc55cSDimitry Andric 153349cc55cSDimitry Andric Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize); 154349cc55cSDimitry Andric Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize); 155349cc55cSDimitry Andric return true; 156349cc55cSDimitry Andric } 157349cc55cSDimitry Andric 158349cc55cSDimitry Andric template <typename T> 159349cc55cSDimitry Andric bool AArch64MIPeepholeOpt::visitAND( 160*04eeddc0SDimitry Andric unsigned Opc, MachineInstr &MI, 161*04eeddc0SDimitry Andric SmallSetVector<MachineInstr *, 8> &ToBeRemoved) { 162349cc55cSDimitry Andric // Try below transformation. 163349cc55cSDimitry Andric // 164349cc55cSDimitry Andric // MOVi32imm + ANDWrr ==> ANDWri + ANDWri 165349cc55cSDimitry Andric // MOVi64imm + ANDXrr ==> ANDXri + ANDXri 166349cc55cSDimitry Andric // 167349cc55cSDimitry Andric // The mov pseudo instruction could be expanded to multiple mov instructions 168349cc55cSDimitry Andric // later. Let's try to split the constant operand of mov instruction into two 169349cc55cSDimitry Andric // bitmask immediates. It makes only two AND instructions intead of multiple 170349cc55cSDimitry Andric // mov + and instructions. 171349cc55cSDimitry Andric 172*04eeddc0SDimitry Andric return splitTwoPartImm<T>( 173*04eeddc0SDimitry Andric MI, ToBeRemoved, 174*04eeddc0SDimitry Andric [Opc](T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional<unsigned> { 175*04eeddc0SDimitry Andric if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1)) 176*04eeddc0SDimitry Andric return Opc; 177*04eeddc0SDimitry Andric return None; 178*04eeddc0SDimitry Andric }, 179*04eeddc0SDimitry Andric [&TII = TII](MachineInstr &MI, unsigned Opcode, unsigned Imm0, 180*04eeddc0SDimitry Andric unsigned Imm1, Register SrcReg, Register NewTmpReg, 181*04eeddc0SDimitry Andric Register NewDstReg) { 182349cc55cSDimitry Andric DebugLoc DL = MI.getDebugLoc(); 183*04eeddc0SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 184349cc55cSDimitry Andric BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg) 185349cc55cSDimitry Andric .addReg(SrcReg) 186*04eeddc0SDimitry Andric .addImm(Imm0); 187349cc55cSDimitry Andric BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg) 188349cc55cSDimitry Andric .addReg(NewTmpReg) 189*04eeddc0SDimitry Andric .addImm(Imm1); 190*04eeddc0SDimitry Andric }); 191349cc55cSDimitry Andric } 192349cc55cSDimitry Andric 193349cc55cSDimitry Andric bool AArch64MIPeepholeOpt::visitORR( 194349cc55cSDimitry Andric MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved) { 195349cc55cSDimitry Andric // Check this ORR comes from below zero-extend pattern. 196349cc55cSDimitry Andric // 197349cc55cSDimitry Andric // def : Pat<(i64 (zext GPR32:$src)), 198349cc55cSDimitry Andric // (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>; 199349cc55cSDimitry Andric if (MI.getOperand(3).getImm() != 0) 200349cc55cSDimitry Andric return false; 201349cc55cSDimitry Andric 202349cc55cSDimitry Andric if (MI.getOperand(1).getReg() != AArch64::WZR) 203349cc55cSDimitry Andric return false; 204349cc55cSDimitry Andric 205349cc55cSDimitry Andric MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg()); 206349cc55cSDimitry Andric if (!SrcMI) 207349cc55cSDimitry Andric return false; 208349cc55cSDimitry Andric 209349cc55cSDimitry Andric // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC 210349cc55cSDimitry Andric // 211349cc55cSDimitry Andric // When you use the 32-bit form of an instruction, the upper 32 bits of the 212349cc55cSDimitry Andric // source registers are ignored and the upper 32 bits of the destination 213349cc55cSDimitry Andric // register are set to zero. 214349cc55cSDimitry Andric // 215349cc55cSDimitry Andric // If AArch64's 32-bit form of instruction defines the source operand of 216349cc55cSDimitry Andric // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is 217349cc55cSDimitry Andric // real AArch64 instruction and if it is not, do not process the opcode 218349cc55cSDimitry Andric // conservatively. 219349cc55cSDimitry Andric if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) 220349cc55cSDimitry Andric return false; 221349cc55cSDimitry Andric 222349cc55cSDimitry Andric Register DefReg = MI.getOperand(0).getReg(); 223349cc55cSDimitry Andric Register SrcReg = MI.getOperand(2).getReg(); 224349cc55cSDimitry Andric MRI->replaceRegWith(DefReg, SrcReg); 225349cc55cSDimitry Andric MRI->clearKillFlags(SrcReg); 226349cc55cSDimitry Andric // replaceRegWith changes MI's definition register. Keep it for SSA form until 227349cc55cSDimitry Andric // deleting MI. 228349cc55cSDimitry Andric MI.getOperand(0).setReg(DefReg); 229349cc55cSDimitry Andric ToBeRemoved.insert(&MI); 230349cc55cSDimitry Andric 231*04eeddc0SDimitry Andric LLVM_DEBUG(dbgs() << "Removed: " << MI << "\n"); 232*04eeddc0SDimitry Andric 233*04eeddc0SDimitry Andric return true; 234*04eeddc0SDimitry Andric } 235*04eeddc0SDimitry Andric 236*04eeddc0SDimitry Andric template <typename T> 237*04eeddc0SDimitry Andric static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) { 238*04eeddc0SDimitry Andric // The immediate must be in the form of ((imm0 << 12) + imm1), in which both 239*04eeddc0SDimitry Andric // imm0 and imm1 are non-zero 12-bit unsigned int. 240*04eeddc0SDimitry Andric if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 || 241*04eeddc0SDimitry Andric (Imm & ~static_cast<T>(0xffffff)) != 0) 242*04eeddc0SDimitry Andric return false; 243*04eeddc0SDimitry Andric 244*04eeddc0SDimitry Andric // The immediate can not be composed via a single instruction. 245*04eeddc0SDimitry Andric SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; 246*04eeddc0SDimitry Andric AArch64_IMM::expandMOVImm(Imm, RegSize, Insn); 247*04eeddc0SDimitry Andric if (Insn.size() == 1) 248*04eeddc0SDimitry Andric return false; 249*04eeddc0SDimitry Andric 250*04eeddc0SDimitry Andric // Split Imm into (Imm0 << 12) + Imm1; 251*04eeddc0SDimitry Andric Imm0 = (Imm >> 12) & 0xfff; 252*04eeddc0SDimitry Andric Imm1 = Imm & 0xfff; 253*04eeddc0SDimitry Andric return true; 254*04eeddc0SDimitry Andric } 255*04eeddc0SDimitry Andric 256*04eeddc0SDimitry Andric template <typename T> 257*04eeddc0SDimitry Andric bool AArch64MIPeepholeOpt::visitADDSUB( 258*04eeddc0SDimitry Andric unsigned PosOpc, unsigned NegOpc, MachineInstr &MI, 259*04eeddc0SDimitry Andric SmallSetVector<MachineInstr *, 8> &ToBeRemoved) { 260*04eeddc0SDimitry Andric // Try below transformation. 261*04eeddc0SDimitry Andric // 262*04eeddc0SDimitry Andric // MOVi32imm + ADDWrr ==> ADDWri + ADDWri 263*04eeddc0SDimitry Andric // MOVi64imm + ADDXrr ==> ADDXri + ADDXri 264*04eeddc0SDimitry Andric // 265*04eeddc0SDimitry Andric // MOVi32imm + SUBWrr ==> SUBWri + SUBWri 266*04eeddc0SDimitry Andric // MOVi64imm + SUBXrr ==> SUBXri + SUBXri 267*04eeddc0SDimitry Andric // 268*04eeddc0SDimitry Andric // The mov pseudo instruction could be expanded to multiple mov instructions 269*04eeddc0SDimitry Andric // later. Let's try to split the constant operand of mov instruction into two 270*04eeddc0SDimitry Andric // legal add/sub immediates. It makes only two ADD/SUB instructions intead of 271*04eeddc0SDimitry Andric // multiple `mov` + `and/sub` instructions. 272*04eeddc0SDimitry Andric 273*04eeddc0SDimitry Andric return splitTwoPartImm<T>( 274*04eeddc0SDimitry Andric MI, ToBeRemoved, 275*04eeddc0SDimitry Andric [PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0, 276*04eeddc0SDimitry Andric T &Imm1) -> Optional<unsigned> { 277*04eeddc0SDimitry Andric if (splitAddSubImm(Imm, RegSize, Imm0, Imm1)) 278*04eeddc0SDimitry Andric return PosOpc; 279*04eeddc0SDimitry Andric if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1)) 280*04eeddc0SDimitry Andric return NegOpc; 281*04eeddc0SDimitry Andric return None; 282*04eeddc0SDimitry Andric }, 283*04eeddc0SDimitry Andric [&TII = TII](MachineInstr &MI, unsigned Opcode, unsigned Imm0, 284*04eeddc0SDimitry Andric unsigned Imm1, Register SrcReg, Register NewTmpReg, 285*04eeddc0SDimitry Andric Register NewDstReg) { 286*04eeddc0SDimitry Andric DebugLoc DL = MI.getDebugLoc(); 287*04eeddc0SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 288*04eeddc0SDimitry Andric BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg) 289*04eeddc0SDimitry Andric .addReg(SrcReg) 290*04eeddc0SDimitry Andric .addImm(Imm0) 291*04eeddc0SDimitry Andric .addImm(12); 292*04eeddc0SDimitry Andric BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg) 293*04eeddc0SDimitry Andric .addReg(NewTmpReg) 294*04eeddc0SDimitry Andric .addImm(Imm1) 295*04eeddc0SDimitry Andric .addImm(0); 296*04eeddc0SDimitry Andric }); 297*04eeddc0SDimitry Andric } 298*04eeddc0SDimitry Andric 299*04eeddc0SDimitry Andric // Checks if the corresponding MOV immediate instruction is applicable for 300*04eeddc0SDimitry Andric // this peephole optimization. 301*04eeddc0SDimitry Andric bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI, 302*04eeddc0SDimitry Andric MachineInstr *&MovMI, 303*04eeddc0SDimitry Andric MachineInstr *&SubregToRegMI) { 304*04eeddc0SDimitry Andric // Check whether current MBB is in loop and the AND is loop invariant. 305*04eeddc0SDimitry Andric MachineBasicBlock *MBB = MI.getParent(); 306*04eeddc0SDimitry Andric MachineLoop *L = MLI->getLoopFor(MBB); 307*04eeddc0SDimitry Andric if (L && !L->isLoopInvariant(MI)) 308*04eeddc0SDimitry Andric return false; 309*04eeddc0SDimitry Andric 310*04eeddc0SDimitry Andric // Check whether current MI's operand is MOV with immediate. 311*04eeddc0SDimitry Andric MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg()); 312*04eeddc0SDimitry Andric if (!MovMI) 313*04eeddc0SDimitry Andric return false; 314*04eeddc0SDimitry Andric 315*04eeddc0SDimitry Andric // If it is SUBREG_TO_REG, check its operand. 316*04eeddc0SDimitry Andric SubregToRegMI = nullptr; 317*04eeddc0SDimitry Andric if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) { 318*04eeddc0SDimitry Andric SubregToRegMI = MovMI; 319*04eeddc0SDimitry Andric MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg()); 320*04eeddc0SDimitry Andric if (!MovMI) 321*04eeddc0SDimitry Andric return false; 322*04eeddc0SDimitry Andric } 323*04eeddc0SDimitry Andric 324*04eeddc0SDimitry Andric if (MovMI->getOpcode() != AArch64::MOVi32imm && 325*04eeddc0SDimitry Andric MovMI->getOpcode() != AArch64::MOVi64imm) 326*04eeddc0SDimitry Andric return false; 327*04eeddc0SDimitry Andric 328*04eeddc0SDimitry Andric // If the MOV has multiple uses, do not split the immediate because it causes 329*04eeddc0SDimitry Andric // more instructions. 330*04eeddc0SDimitry Andric if (!MRI->hasOneUse(MovMI->getOperand(0).getReg())) 331*04eeddc0SDimitry Andric return false; 332*04eeddc0SDimitry Andric if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg())) 333*04eeddc0SDimitry Andric return false; 334*04eeddc0SDimitry Andric 335*04eeddc0SDimitry Andric // It is OK to perform this peephole optimization. 336*04eeddc0SDimitry Andric return true; 337*04eeddc0SDimitry Andric } 338*04eeddc0SDimitry Andric 339*04eeddc0SDimitry Andric template <typename T> 340*04eeddc0SDimitry Andric bool AArch64MIPeepholeOpt::splitTwoPartImm( 341*04eeddc0SDimitry Andric MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved, 342*04eeddc0SDimitry Andric SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) { 343*04eeddc0SDimitry Andric unsigned RegSize = sizeof(T) * 8; 344*04eeddc0SDimitry Andric assert((RegSize == 32 || RegSize == 64) && 345*04eeddc0SDimitry Andric "Invalid RegSize for legal immediate peephole optimization"); 346*04eeddc0SDimitry Andric 347*04eeddc0SDimitry Andric // Perform several essential checks against current MI. 348*04eeddc0SDimitry Andric MachineInstr *MovMI, *SubregToRegMI; 349*04eeddc0SDimitry Andric if (!checkMovImmInstr(MI, MovMI, SubregToRegMI)) 350*04eeddc0SDimitry Andric return false; 351*04eeddc0SDimitry Andric 352*04eeddc0SDimitry Andric // Split the immediate to Imm0 and Imm1, and calculate the Opcode. 353*04eeddc0SDimitry Andric T Imm = static_cast<T>(MovMI->getOperand(1).getImm()), Imm0, Imm1; 354*04eeddc0SDimitry Andric // For the 32 bit form of instruction, the upper 32 bits of the destination 355*04eeddc0SDimitry Andric // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits 356*04eeddc0SDimitry Andric // of Imm to zero. This is essential if the Immediate value was a negative 357*04eeddc0SDimitry Andric // number since it was sign extended when we assign to the 64-bit Imm. 358*04eeddc0SDimitry Andric if (SubregToRegMI) 359*04eeddc0SDimitry Andric Imm &= 0xFFFFFFFF; 360*04eeddc0SDimitry Andric unsigned Opcode; 361*04eeddc0SDimitry Andric if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1)) 362*04eeddc0SDimitry Andric Opcode = R.getValue(); 363*04eeddc0SDimitry Andric else 364*04eeddc0SDimitry Andric return false; 365*04eeddc0SDimitry Andric 366*04eeddc0SDimitry Andric // Create new ADD/SUB MIs. 367*04eeddc0SDimitry Andric MachineFunction *MF = MI.getMF(); 368*04eeddc0SDimitry Andric const TargetRegisterClass *RC = 369*04eeddc0SDimitry Andric TII->getRegClass(TII->get(Opcode), 0, TRI, *MF); 370*04eeddc0SDimitry Andric const TargetRegisterClass *ORC = 371*04eeddc0SDimitry Andric TII->getRegClass(TII->get(Opcode), 1, TRI, *MF); 372*04eeddc0SDimitry Andric Register DstReg = MI.getOperand(0).getReg(); 373*04eeddc0SDimitry Andric Register SrcReg = MI.getOperand(1).getReg(); 374*04eeddc0SDimitry Andric Register NewTmpReg = MRI->createVirtualRegister(RC); 375*04eeddc0SDimitry Andric Register NewDstReg = MRI->createVirtualRegister(RC); 376*04eeddc0SDimitry Andric 377*04eeddc0SDimitry Andric MRI->constrainRegClass(SrcReg, RC); 378*04eeddc0SDimitry Andric MRI->constrainRegClass(NewTmpReg, ORC); 379*04eeddc0SDimitry Andric MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg)); 380*04eeddc0SDimitry Andric 381*04eeddc0SDimitry Andric BuildInstr(MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg); 382*04eeddc0SDimitry Andric 383*04eeddc0SDimitry Andric MRI->replaceRegWith(DstReg, NewDstReg); 384*04eeddc0SDimitry Andric // replaceRegWith changes MI's definition register. Keep it for SSA form until 385*04eeddc0SDimitry Andric // deleting MI. 386*04eeddc0SDimitry Andric MI.getOperand(0).setReg(DstReg); 387*04eeddc0SDimitry Andric 388*04eeddc0SDimitry Andric // Record the MIs need to be removed. 389*04eeddc0SDimitry Andric ToBeRemoved.insert(&MI); 390*04eeddc0SDimitry Andric if (SubregToRegMI) 391*04eeddc0SDimitry Andric ToBeRemoved.insert(SubregToRegMI); 392*04eeddc0SDimitry Andric ToBeRemoved.insert(MovMI); 393349cc55cSDimitry Andric 394349cc55cSDimitry Andric return true; 395349cc55cSDimitry Andric } 396349cc55cSDimitry Andric 397349cc55cSDimitry Andric bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { 398349cc55cSDimitry Andric if (skipFunction(MF.getFunction())) 399349cc55cSDimitry Andric return false; 400349cc55cSDimitry Andric 401349cc55cSDimitry Andric TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 402*04eeddc0SDimitry Andric TRI = static_cast<const AArch64RegisterInfo *>( 403*04eeddc0SDimitry Andric MF.getSubtarget().getRegisterInfo()); 404349cc55cSDimitry Andric MLI = &getAnalysis<MachineLoopInfo>(); 405349cc55cSDimitry Andric MRI = &MF.getRegInfo(); 406349cc55cSDimitry Andric 407*04eeddc0SDimitry Andric assert(MRI->isSSA() && "Expected to be run on SSA form!"); 408349cc55cSDimitry Andric 409349cc55cSDimitry Andric bool Changed = false; 410349cc55cSDimitry Andric SmallSetVector<MachineInstr *, 8> ToBeRemoved; 411349cc55cSDimitry Andric 412349cc55cSDimitry Andric for (MachineBasicBlock &MBB : MF) { 413349cc55cSDimitry Andric for (MachineInstr &MI : MBB) { 414349cc55cSDimitry Andric switch (MI.getOpcode()) { 415349cc55cSDimitry Andric default: 416349cc55cSDimitry Andric break; 417349cc55cSDimitry Andric case AArch64::ANDWrr: 418*04eeddc0SDimitry Andric Changed = visitAND<uint32_t>(AArch64::ANDWri, MI, ToBeRemoved); 419349cc55cSDimitry Andric break; 420349cc55cSDimitry Andric case AArch64::ANDXrr: 421*04eeddc0SDimitry Andric Changed = visitAND<uint64_t>(AArch64::ANDXri, MI, ToBeRemoved); 422349cc55cSDimitry Andric break; 423349cc55cSDimitry Andric case AArch64::ORRWrs: 424349cc55cSDimitry Andric Changed = visitORR(MI, ToBeRemoved); 425*04eeddc0SDimitry Andric break; 426*04eeddc0SDimitry Andric case AArch64::ADDWrr: 427*04eeddc0SDimitry Andric Changed = visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri, MI, 428*04eeddc0SDimitry Andric ToBeRemoved); 429*04eeddc0SDimitry Andric break; 430*04eeddc0SDimitry Andric case AArch64::SUBWrr: 431*04eeddc0SDimitry Andric Changed = visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri, MI, 432*04eeddc0SDimitry Andric ToBeRemoved); 433*04eeddc0SDimitry Andric break; 434*04eeddc0SDimitry Andric case AArch64::ADDXrr: 435*04eeddc0SDimitry Andric Changed = visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri, MI, 436*04eeddc0SDimitry Andric ToBeRemoved); 437*04eeddc0SDimitry Andric break; 438*04eeddc0SDimitry Andric case AArch64::SUBXrr: 439*04eeddc0SDimitry Andric Changed = visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri, MI, 440*04eeddc0SDimitry Andric ToBeRemoved); 441*04eeddc0SDimitry Andric break; 442349cc55cSDimitry Andric } 443349cc55cSDimitry Andric } 444349cc55cSDimitry Andric } 445349cc55cSDimitry Andric 446349cc55cSDimitry Andric for (MachineInstr *MI : ToBeRemoved) 447349cc55cSDimitry Andric MI->eraseFromParent(); 448349cc55cSDimitry Andric 449349cc55cSDimitry Andric return Changed; 450349cc55cSDimitry Andric } 451349cc55cSDimitry Andric 452349cc55cSDimitry Andric FunctionPass *llvm::createAArch64MIPeepholeOptPass() { 453349cc55cSDimitry Andric return new AArch64MIPeepholeOpt(); 454349cc55cSDimitry Andric } 455