1 //===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs below peephole optimizations on MIR level.
10 //
11 // 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
12 //    MOVi64imm + ANDXrr ==> ANDXri + ANDXri
13 //
14 //    The mov pseudo instruction could be expanded to multiple mov instructions
15 //    later. In this case, we could try to split the constant  operand of mov
16 //    instruction into two bitmask immediates. It makes two AND instructions
17 //    intead of multiple `mov` + `and` instructions.
18 //===----------------------------------------------------------------------===//
19 
20 #include "AArch64ExpandImm.h"
21 #include "AArch64InstrInfo.h"
22 #include "MCTargetDesc/AArch64AddressingModes.h"
23 #include "llvm/ADT/SetVector.h"
24 #include "llvm/CodeGen/MachineDominators.h"
25 #include "llvm/CodeGen/MachineLoopInfo.h"
26 
27 using namespace llvm;
28 
29 #define DEBUG_TYPE "aarch64-mi-peephole-opt"
30 
31 namespace {
32 
33 struct AArch64MIPeepholeOpt : public MachineFunctionPass {
34   static char ID;
35 
AArch64MIPeepholeOpt__anon0c3b953b0111::AArch64MIPeepholeOpt36   AArch64MIPeepholeOpt() : MachineFunctionPass(ID) {
37     initializeAArch64MIPeepholeOptPass(*PassRegistry::getPassRegistry());
38   }
39 
40   const AArch64InstrInfo *TII;
41   MachineLoopInfo *MLI;
42   MachineRegisterInfo *MRI;
43 
44   template <typename T>
45   bool visitAND(MachineInstr &MI,
46                 SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
47   bool runOnMachineFunction(MachineFunction &MF) override;
48 
getPassName__anon0c3b953b0111::AArch64MIPeepholeOpt49   StringRef getPassName() const override {
50     return "AArch64 MI Peephole Optimization pass";
51   }
52 
getAnalysisUsage__anon0c3b953b0111::AArch64MIPeepholeOpt53   void getAnalysisUsage(AnalysisUsage &AU) const override {
54     AU.setPreservesCFG();
55     AU.addRequired<MachineLoopInfo>();
56     MachineFunctionPass::getAnalysisUsage(AU);
57   }
58 };
59 
60 char AArch64MIPeepholeOpt::ID = 0;
61 
62 } // end anonymous namespace
63 
64 INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt",
65                 "AArch64 MI Peephole Optimization", false, false)
66 
67 template <typename T>
splitBitmaskImm(T Imm,unsigned RegSize,T & Imm1Enc,T & Imm2Enc)68 static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
69   T UImm = static_cast<T>(Imm);
70   if (AArch64_AM::isLogicalImmediate(UImm, RegSize))
71     return false;
72 
73   // If this immediate can be handled by one instruction, do not split it.
74   SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
75   AArch64_IMM::expandMOVImm(UImm, RegSize, Insn);
76   if (Insn.size() == 1)
77     return false;
78 
79   // The bitmask immediate consists of consecutive ones.  Let's say there is
80   // constant 0b00000000001000000000010000000000 which does not consist of
81   // consecutive ones. We can split it in to two bitmask immediate like
82   // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111.
83   // If we do AND with these two bitmask immediate, we can see original one.
84   unsigned LowestBitSet = countTrailingZeros(UImm);
85   unsigned HighestBitSet = Log2_64(UImm);
86 
87   // Create a mask which is filled with one from the position of lowest bit set
88   // to the position of highest bit set.
89   T NewImm1 = (static_cast<T>(2) << HighestBitSet) -
90               (static_cast<T>(1) << LowestBitSet);
91   // Create a mask which is filled with one outside the position of lowest bit
92   // set and the position of highest bit set.
93   T NewImm2 = UImm | ~NewImm1;
94 
95   // If the split value is not valid bitmask immediate, do not split this
96   // constant.
97   if (!AArch64_AM::isLogicalImmediate(NewImm2, RegSize))
98     return false;
99 
100   Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize);
101   Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize);
102   return true;
103 }
104 
105 template <typename T>
visitAND(MachineInstr & MI,SmallSetVector<MachineInstr *,8> & ToBeRemoved)106 bool AArch64MIPeepholeOpt::visitAND(
107     MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved) {
108   // Try below transformation.
109   //
110   // MOVi32imm + ANDWrr ==> ANDWri + ANDWri
111   // MOVi64imm + ANDXrr ==> ANDXri + ANDXri
112   //
113   // The mov pseudo instruction could be expanded to multiple mov instructions
114   // later. Let's try to split the constant operand of mov instruction into two
115   // bitmask immediates. It makes only two AND instructions intead of multiple
116   // mov + and instructions.
117 
118   unsigned RegSize = sizeof(T) * 8;
119   assert((RegSize == 32 || RegSize == 64) &&
120          "Invalid RegSize for AND bitmask peephole optimization");
121 
122   // Check whether AND's MBB is in loop and the AND is loop invariant.
123   MachineBasicBlock *MBB = MI.getParent();
124   MachineLoop *L = MLI->getLoopFor(MBB);
125   if (L && !L->isLoopInvariant(MI))
126     return false;
127 
128   // Check whether AND's operand is MOV with immediate.
129   MachineInstr *MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
130   MachineInstr *SubregToRegMI = nullptr;
131   // If it is SUBREG_TO_REG, check its operand.
132   if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {
133     SubregToRegMI = MovMI;
134     MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg());
135   }
136 
137   if (MovMI->getOpcode() != AArch64::MOVi32imm &&
138       MovMI->getOpcode() != AArch64::MOVi64imm)
139     return false;
140 
141   // If the MOV has multiple uses, do not split the immediate because it causes
142   // more instructions.
143   if (!MRI->hasOneUse(MovMI->getOperand(0).getReg()))
144     return false;
145 
146   if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg()))
147     return false;
148 
149   // Split the bitmask immediate into two.
150   T UImm = static_cast<T>(MovMI->getOperand(1).getImm());
151   // For the 32 bit form of instruction, the upper 32 bits of the destination
152   // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits
153   // of UImm to zero.
154   if (SubregToRegMI)
155     UImm &= 0xFFFFFFFF;
156   T Imm1Enc;
157   T Imm2Enc;
158   if (!splitBitmaskImm(UImm, RegSize, Imm1Enc, Imm2Enc))
159     return false;
160 
161   // Create new AND MIs.
162   DebugLoc DL = MI.getDebugLoc();
163   const TargetRegisterClass *ANDImmRC =
164       (RegSize == 32) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass;
165   Register DstReg = MI.getOperand(0).getReg();
166   Register SrcReg = MI.getOperand(1).getReg();
167   Register NewTmpReg = MRI->createVirtualRegister(ANDImmRC);
168   unsigned Opcode = (RegSize == 32) ? AArch64::ANDWri : AArch64::ANDXri;
169 
170   MRI->constrainRegClass(NewTmpReg, MRI->getRegClass(SrcReg));
171   BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg)
172       .addReg(SrcReg)
173       .addImm(Imm1Enc);
174 
175   MRI->constrainRegClass(DstReg, ANDImmRC);
176   BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
177       .addReg(NewTmpReg)
178       .addImm(Imm2Enc);
179 
180   ToBeRemoved.insert(&MI);
181   if (SubregToRegMI)
182     ToBeRemoved.insert(SubregToRegMI);
183   ToBeRemoved.insert(MovMI);
184 
185   return true;
186 }
187 
runOnMachineFunction(MachineFunction & MF)188 bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
189   if (skipFunction(MF.getFunction()))
190     return false;
191 
192   TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
193   MLI = &getAnalysis<MachineLoopInfo>();
194   MRI = &MF.getRegInfo();
195 
196   if (!MRI->isSSA())
197     return false;
198 
199   bool Changed = false;
200   SmallSetVector<MachineInstr *, 8> ToBeRemoved;
201 
202   for (MachineBasicBlock &MBB : MF) {
203     for (MachineInstr &MI : MBB) {
204       switch (MI.getOpcode()) {
205       default:
206         break;
207       case AArch64::ANDWrr:
208         Changed = visitAND<uint32_t>(MI, ToBeRemoved);
209         break;
210       case AArch64::ANDXrr:
211         Changed = visitAND<uint64_t>(MI, ToBeRemoved);
212         break;
213       }
214     }
215   }
216 
217   for (MachineInstr *MI : ToBeRemoved)
218     MI->eraseFromParent();
219 
220   return Changed;
221 }
222 
createAArch64MIPeepholeOptPass()223 FunctionPass *llvm::createAArch64MIPeepholeOptPass() {
224   return new AArch64MIPeepholeOpt();
225 }
226