1 //===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs below peephole optimizations on MIR level.
10 //
11 // 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
12 // MOVi64imm + ANDXrr ==> ANDXri + ANDXri
13 //
14 // The mov pseudo instruction could be expanded to multiple mov instructions
15 // later. In this case, we could try to split the constant operand of mov
16 // instruction into two bitmask immediates. It makes two AND instructions
17 // intead of multiple `mov` + `and` instructions.
18 //===----------------------------------------------------------------------===//
19
20 #include "AArch64ExpandImm.h"
21 #include "AArch64InstrInfo.h"
22 #include "MCTargetDesc/AArch64AddressingModes.h"
23 #include "llvm/ADT/SetVector.h"
24 #include "llvm/CodeGen/MachineDominators.h"
25 #include "llvm/CodeGen/MachineLoopInfo.h"
26
27 using namespace llvm;
28
29 #define DEBUG_TYPE "aarch64-mi-peephole-opt"
30
31 namespace {
32
33 struct AArch64MIPeepholeOpt : public MachineFunctionPass {
34 static char ID;
35
AArch64MIPeepholeOpt__anon0c3b953b0111::AArch64MIPeepholeOpt36 AArch64MIPeepholeOpt() : MachineFunctionPass(ID) {
37 initializeAArch64MIPeepholeOptPass(*PassRegistry::getPassRegistry());
38 }
39
40 const AArch64InstrInfo *TII;
41 MachineLoopInfo *MLI;
42 MachineRegisterInfo *MRI;
43
44 template <typename T>
45 bool visitAND(MachineInstr &MI,
46 SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
47 bool runOnMachineFunction(MachineFunction &MF) override;
48
getPassName__anon0c3b953b0111::AArch64MIPeepholeOpt49 StringRef getPassName() const override {
50 return "AArch64 MI Peephole Optimization pass";
51 }
52
getAnalysisUsage__anon0c3b953b0111::AArch64MIPeepholeOpt53 void getAnalysisUsage(AnalysisUsage &AU) const override {
54 AU.setPreservesCFG();
55 AU.addRequired<MachineLoopInfo>();
56 MachineFunctionPass::getAnalysisUsage(AU);
57 }
58 };
59
60 char AArch64MIPeepholeOpt::ID = 0;
61
62 } // end anonymous namespace
63
64 INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt",
65 "AArch64 MI Peephole Optimization", false, false)
66
67 template <typename T>
splitBitmaskImm(T Imm,unsigned RegSize,T & Imm1Enc,T & Imm2Enc)68 static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
69 T UImm = static_cast<T>(Imm);
70 if (AArch64_AM::isLogicalImmediate(UImm, RegSize))
71 return false;
72
73 // If this immediate can be handled by one instruction, do not split it.
74 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
75 AArch64_IMM::expandMOVImm(UImm, RegSize, Insn);
76 if (Insn.size() == 1)
77 return false;
78
79 // The bitmask immediate consists of consecutive ones. Let's say there is
80 // constant 0b00000000001000000000010000000000 which does not consist of
81 // consecutive ones. We can split it in to two bitmask immediate like
82 // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111.
83 // If we do AND with these two bitmask immediate, we can see original one.
84 unsigned LowestBitSet = countTrailingZeros(UImm);
85 unsigned HighestBitSet = Log2_64(UImm);
86
87 // Create a mask which is filled with one from the position of lowest bit set
88 // to the position of highest bit set.
89 T NewImm1 = (static_cast<T>(2) << HighestBitSet) -
90 (static_cast<T>(1) << LowestBitSet);
91 // Create a mask which is filled with one outside the position of lowest bit
92 // set and the position of highest bit set.
93 T NewImm2 = UImm | ~NewImm1;
94
95 // If the split value is not valid bitmask immediate, do not split this
96 // constant.
97 if (!AArch64_AM::isLogicalImmediate(NewImm2, RegSize))
98 return false;
99
100 Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize);
101 Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize);
102 return true;
103 }
104
105 template <typename T>
visitAND(MachineInstr & MI,SmallSetVector<MachineInstr *,8> & ToBeRemoved)106 bool AArch64MIPeepholeOpt::visitAND(
107 MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved) {
108 // Try below transformation.
109 //
110 // MOVi32imm + ANDWrr ==> ANDWri + ANDWri
111 // MOVi64imm + ANDXrr ==> ANDXri + ANDXri
112 //
113 // The mov pseudo instruction could be expanded to multiple mov instructions
114 // later. Let's try to split the constant operand of mov instruction into two
115 // bitmask immediates. It makes only two AND instructions intead of multiple
116 // mov + and instructions.
117
118 unsigned RegSize = sizeof(T) * 8;
119 assert((RegSize == 32 || RegSize == 64) &&
120 "Invalid RegSize for AND bitmask peephole optimization");
121
122 // Check whether AND's MBB is in loop and the AND is loop invariant.
123 MachineBasicBlock *MBB = MI.getParent();
124 MachineLoop *L = MLI->getLoopFor(MBB);
125 if (L && !L->isLoopInvariant(MI))
126 return false;
127
128 // Check whether AND's operand is MOV with immediate.
129 MachineInstr *MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
130 MachineInstr *SubregToRegMI = nullptr;
131 // If it is SUBREG_TO_REG, check its operand.
132 if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {
133 SubregToRegMI = MovMI;
134 MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg());
135 }
136
137 if (MovMI->getOpcode() != AArch64::MOVi32imm &&
138 MovMI->getOpcode() != AArch64::MOVi64imm)
139 return false;
140
141 // If the MOV has multiple uses, do not split the immediate because it causes
142 // more instructions.
143 if (!MRI->hasOneUse(MovMI->getOperand(0).getReg()))
144 return false;
145
146 if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg()))
147 return false;
148
149 // Split the bitmask immediate into two.
150 T UImm = static_cast<T>(MovMI->getOperand(1).getImm());
151 // For the 32 bit form of instruction, the upper 32 bits of the destination
152 // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits
153 // of UImm to zero.
154 if (SubregToRegMI)
155 UImm &= 0xFFFFFFFF;
156 T Imm1Enc;
157 T Imm2Enc;
158 if (!splitBitmaskImm(UImm, RegSize, Imm1Enc, Imm2Enc))
159 return false;
160
161 // Create new AND MIs.
162 DebugLoc DL = MI.getDebugLoc();
163 const TargetRegisterClass *ANDImmRC =
164 (RegSize == 32) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass;
165 Register DstReg = MI.getOperand(0).getReg();
166 Register SrcReg = MI.getOperand(1).getReg();
167 Register NewTmpReg = MRI->createVirtualRegister(ANDImmRC);
168 unsigned Opcode = (RegSize == 32) ? AArch64::ANDWri : AArch64::ANDXri;
169
170 MRI->constrainRegClass(NewTmpReg, MRI->getRegClass(SrcReg));
171 BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg)
172 .addReg(SrcReg)
173 .addImm(Imm1Enc);
174
175 MRI->constrainRegClass(DstReg, ANDImmRC);
176 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
177 .addReg(NewTmpReg)
178 .addImm(Imm2Enc);
179
180 ToBeRemoved.insert(&MI);
181 if (SubregToRegMI)
182 ToBeRemoved.insert(SubregToRegMI);
183 ToBeRemoved.insert(MovMI);
184
185 return true;
186 }
187
runOnMachineFunction(MachineFunction & MF)188 bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
189 if (skipFunction(MF.getFunction()))
190 return false;
191
192 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
193 MLI = &getAnalysis<MachineLoopInfo>();
194 MRI = &MF.getRegInfo();
195
196 if (!MRI->isSSA())
197 return false;
198
199 bool Changed = false;
200 SmallSetVector<MachineInstr *, 8> ToBeRemoved;
201
202 for (MachineBasicBlock &MBB : MF) {
203 for (MachineInstr &MI : MBB) {
204 switch (MI.getOpcode()) {
205 default:
206 break;
207 case AArch64::ANDWrr:
208 Changed = visitAND<uint32_t>(MI, ToBeRemoved);
209 break;
210 case AArch64::ANDXrr:
211 Changed = visitAND<uint64_t>(MI, ToBeRemoved);
212 break;
213 }
214 }
215 }
216
217 for (MachineInstr *MI : ToBeRemoved)
218 MI->eraseFromParent();
219
220 return Changed;
221 }
222
createAArch64MIPeepholeOptPass()223 FunctionPass *llvm::createAArch64MIPeepholeOptPass() {
224 return new AArch64MIPeepholeOpt();
225 }
226