1 //===-------------- BPFMIPeephole.cpp - MI Peephole Cleanups  -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs peephole optimizations to cleanup ugly code sequences at
10 // MachineInstruction layer.
11 //
12 // Currently, there are two optimizations implemented:
13 //  - One pre-RA MachineSSA pass to eliminate type promotion sequences, those
14 //    zero extend 32-bit subregisters to 64-bit registers, if the compiler
15 //    could prove the subregisters is defined by 32-bit operations in which
16 //    case the upper half of the underlying 64-bit registers were zeroed
17 //    implicitly.
18 //
19 //  - One post-RA PreEmit pass to do final cleanup on some redundant
20 //    instructions generated due to bad RA on subregister.
21 //===----------------------------------------------------------------------===//
22 
23 #include "BPF.h"
24 #include "BPFInstrInfo.h"
25 #include "BPFTargetMachine.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/CodeGen/MachineInstrBuilder.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/Support/Debug.h"
30 
31 using namespace llvm;
32 
33 #define DEBUG_TYPE "bpf-mi-zext-elim"
34 
35 STATISTIC(ZExtElemNum, "Number of zero extension shifts eliminated");
36 
37 namespace {
38 
39 struct BPFMIPeephole : public MachineFunctionPass {
40 
41   static char ID;
42   const BPFInstrInfo *TII;
43   MachineFunction *MF;
44   MachineRegisterInfo *MRI;
45 
46   BPFMIPeephole() : MachineFunctionPass(ID) {
47     initializeBPFMIPeepholePass(*PassRegistry::getPassRegistry());
48   }
49 
50 private:
51   // Initialize class variables.
52   void initialize(MachineFunction &MFParm);
53 
54   bool isCopyFrom32Def(MachineInstr *CopyMI);
55   bool isInsnFrom32Def(MachineInstr *DefInsn);
56   bool isPhiFrom32Def(MachineInstr *MovMI);
57   bool isMovFrom32Def(MachineInstr *MovMI);
58   bool eliminateZExtSeq(void);
59 
60   std::set<MachineInstr *> PhiInsns;
61 
62 public:
63 
64   // Main entry point for this pass.
65   bool runOnMachineFunction(MachineFunction &MF) override {
66     if (skipFunction(MF.getFunction()))
67       return false;
68 
69     initialize(MF);
70 
71     return eliminateZExtSeq();
72   }
73 };
74 
75 // Initialize class variables.
76 void BPFMIPeephole::initialize(MachineFunction &MFParm) {
77   MF = &MFParm;
78   MRI = &MF->getRegInfo();
79   TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
80   LLVM_DEBUG(dbgs() << "*** BPF MachineSSA ZEXT Elim peephole pass ***\n\n");
81 }
82 
83 bool BPFMIPeephole::isCopyFrom32Def(MachineInstr *CopyMI)
84 {
85   MachineOperand &opnd = CopyMI->getOperand(1);
86 
87   if (!opnd.isReg())
88     return false;
89 
90   // Return false if getting value from a 32bit physical register.
91   // Most likely, this physical register is aliased to
92   // function call return value or current function parameters.
93   Register Reg = opnd.getReg();
94   if (!Register::isVirtualRegister(Reg))
95     return false;
96 
97   if (MRI->getRegClass(Reg) == &BPF::GPRRegClass)
98     return false;
99 
100   MachineInstr *DefInsn = MRI->getVRegDef(Reg);
101   if (!isInsnFrom32Def(DefInsn))
102     return false;
103 
104   return true;
105 }
106 
107 bool BPFMIPeephole::isPhiFrom32Def(MachineInstr *PhiMI)
108 {
109   for (unsigned i = 1, e = PhiMI->getNumOperands(); i < e; i += 2) {
110     MachineOperand &opnd = PhiMI->getOperand(i);
111 
112     if (!opnd.isReg())
113       return false;
114 
115     MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg());
116     if (!PhiDef)
117       return false;
118     if (PhiDef->isPHI()) {
119       if (PhiInsns.find(PhiDef) != PhiInsns.end())
120         return false;
121       PhiInsns.insert(PhiDef);
122       if (!isPhiFrom32Def(PhiDef))
123         return false;
124     }
125     if (PhiDef->getOpcode() == BPF::COPY && !isCopyFrom32Def(PhiDef))
126       return false;
127   }
128 
129   return true;
130 }
131 
132 // The \p DefInsn instruction defines a virtual register.
133 bool BPFMIPeephole::isInsnFrom32Def(MachineInstr *DefInsn)
134 {
135   if (!DefInsn)
136     return false;
137 
138   if (DefInsn->isPHI()) {
139     if (PhiInsns.find(DefInsn) != PhiInsns.end())
140       return false;
141     PhiInsns.insert(DefInsn);
142     if (!isPhiFrom32Def(DefInsn))
143       return false;
144   } else if (DefInsn->getOpcode() == BPF::COPY) {
145     if (!isCopyFrom32Def(DefInsn))
146       return false;
147   }
148 
149   return true;
150 }
151 
152 bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI)
153 {
154   MachineInstr *DefInsn = MRI->getVRegDef(MovMI->getOperand(1).getReg());
155 
156   LLVM_DEBUG(dbgs() << "  Def of Mov Src:");
157   LLVM_DEBUG(DefInsn->dump());
158 
159   PhiInsns.clear();
160   if (!isInsnFrom32Def(DefInsn))
161     return false;
162 
163   LLVM_DEBUG(dbgs() << "  One ZExt elim sequence identified.\n");
164 
165   return true;
166 }
167 
168 bool BPFMIPeephole::eliminateZExtSeq(void) {
169   MachineInstr* ToErase = nullptr;
170   bool Eliminated = false;
171 
172   for (MachineBasicBlock &MBB : *MF) {
173     for (MachineInstr &MI : MBB) {
174       // If the previous instruction was marked for elimination, remove it now.
175       if (ToErase) {
176         ToErase->eraseFromParent();
177         ToErase = nullptr;
178       }
179 
180       // Eliminate the 32-bit to 64-bit zero extension sequence when possible.
181       //
182       //   MOV_32_64 rB, wA
183       //   SLL_ri    rB, rB, 32
184       //   SRL_ri    rB, rB, 32
185       if (MI.getOpcode() == BPF::SRL_ri &&
186           MI.getOperand(2).getImm() == 32) {
187         Register DstReg = MI.getOperand(0).getReg();
188         Register ShfReg = MI.getOperand(1).getReg();
189         MachineInstr *SllMI = MRI->getVRegDef(ShfReg);
190 
191         LLVM_DEBUG(dbgs() << "Starting SRL found:");
192         LLVM_DEBUG(MI.dump());
193 
194         if (!SllMI ||
195             SllMI->isPHI() ||
196             SllMI->getOpcode() != BPF::SLL_ri ||
197             SllMI->getOperand(2).getImm() != 32)
198           continue;
199 
200         LLVM_DEBUG(dbgs() << "  SLL found:");
201         LLVM_DEBUG(SllMI->dump());
202 
203         MachineInstr *MovMI = MRI->getVRegDef(SllMI->getOperand(1).getReg());
204         if (!MovMI ||
205             MovMI->isPHI() ||
206             MovMI->getOpcode() != BPF::MOV_32_64)
207           continue;
208 
209         LLVM_DEBUG(dbgs() << "  Type cast Mov found:");
210         LLVM_DEBUG(MovMI->dump());
211 
212         Register SubReg = MovMI->getOperand(1).getReg();
213         if (!isMovFrom32Def(MovMI)) {
214           LLVM_DEBUG(dbgs()
215                      << "  One ZExt elim sequence failed qualifying elim.\n");
216           continue;
217         }
218 
219         BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::SUBREG_TO_REG), DstReg)
220           .addImm(0).addReg(SubReg).addImm(BPF::sub_32);
221 
222         SllMI->eraseFromParent();
223         MovMI->eraseFromParent();
224         // MI is the right shift, we can't erase it in it's own iteration.
225         // Mark it to ToErase, and erase in the next iteration.
226         ToErase = &MI;
227         ZExtElemNum++;
228         Eliminated = true;
229       }
230     }
231   }
232 
233   return Eliminated;
234 }
235 
236 } // end default namespace
237 
238 INITIALIZE_PASS(BPFMIPeephole, DEBUG_TYPE,
239                 "BPF MachineSSA Peephole Optimization For ZEXT Eliminate",
240                 false, false)
241 
242 char BPFMIPeephole::ID = 0;
243 FunctionPass* llvm::createBPFMIPeepholePass() { return new BPFMIPeephole(); }
244 
245 STATISTIC(RedundantMovElemNum, "Number of redundant moves eliminated");
246 
247 namespace {
248 
249 struct BPFMIPreEmitPeephole : public MachineFunctionPass {
250 
251   static char ID;
252   MachineFunction *MF;
253   const TargetRegisterInfo *TRI;
254 
255   BPFMIPreEmitPeephole() : MachineFunctionPass(ID) {
256     initializeBPFMIPreEmitPeepholePass(*PassRegistry::getPassRegistry());
257   }
258 
259 private:
260   // Initialize class variables.
261   void initialize(MachineFunction &MFParm);
262 
263   bool eliminateRedundantMov(void);
264 
265 public:
266 
267   // Main entry point for this pass.
268   bool runOnMachineFunction(MachineFunction &MF) override {
269     if (skipFunction(MF.getFunction()))
270       return false;
271 
272     initialize(MF);
273 
274     return eliminateRedundantMov();
275   }
276 };
277 
278 // Initialize class variables.
279 void BPFMIPreEmitPeephole::initialize(MachineFunction &MFParm) {
280   MF = &MFParm;
281   TRI = MF->getSubtarget<BPFSubtarget>().getRegisterInfo();
282   LLVM_DEBUG(dbgs() << "*** BPF PreEmit peephole pass ***\n\n");
283 }
284 
285 bool BPFMIPreEmitPeephole::eliminateRedundantMov(void) {
286   MachineInstr* ToErase = nullptr;
287   bool Eliminated = false;
288 
289   for (MachineBasicBlock &MBB : *MF) {
290     for (MachineInstr &MI : MBB) {
291       // If the previous instruction was marked for elimination, remove it now.
292       if (ToErase) {
293         LLVM_DEBUG(dbgs() << "  Redundant Mov Eliminated:");
294         LLVM_DEBUG(ToErase->dump());
295         ToErase->eraseFromParent();
296         ToErase = nullptr;
297       }
298 
299       // Eliminate identical move:
300       //
301       //   MOV rA, rA
302       //
303       // This is particularly possible to happen when sub-register support
304       // enabled. The special type cast insn MOV_32_64 involves different
305       // register class on src (i32) and dst (i64), RA could generate useless
306       // instruction due to this.
307       unsigned Opcode = MI.getOpcode();
308       if (Opcode == BPF::MOV_32_64 ||
309           Opcode == BPF::MOV_rr || Opcode == BPF::MOV_rr_32) {
310         Register dst = MI.getOperand(0).getReg();
311         Register src = MI.getOperand(1).getReg();
312 
313         if (Opcode == BPF::MOV_32_64)
314           dst = TRI->getSubReg(dst, BPF::sub_32);
315 
316         if (dst != src)
317           continue;
318 
319         ToErase = &MI;
320         RedundantMovElemNum++;
321         Eliminated = true;
322       }
323     }
324   }
325 
326   return Eliminated;
327 }
328 
329 } // end default namespace
330 
331 INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole",
332                 "BPF PreEmit Peephole Optimization", false, false)
333 
334 char BPFMIPreEmitPeephole::ID = 0;
335 FunctionPass* llvm::createBPFMIPreEmitPeepholePass()
336 {
337   return new BPFMIPreEmitPeephole();
338 }
339 
340 STATISTIC(TruncElemNum, "Number of truncation eliminated");
341 
342 namespace {
343 
344 struct BPFMIPeepholeTruncElim : public MachineFunctionPass {
345 
346   static char ID;
347   const BPFInstrInfo *TII;
348   MachineFunction *MF;
349   MachineRegisterInfo *MRI;
350 
351   BPFMIPeepholeTruncElim() : MachineFunctionPass(ID) {
352     initializeBPFMIPeepholeTruncElimPass(*PassRegistry::getPassRegistry());
353   }
354 
355 private:
356   // Initialize class variables.
357   void initialize(MachineFunction &MFParm);
358 
359   bool eliminateTruncSeq(void);
360 
361 public:
362 
363   // Main entry point for this pass.
364   bool runOnMachineFunction(MachineFunction &MF) override {
365     if (skipFunction(MF.getFunction()))
366       return false;
367 
368     initialize(MF);
369 
370     return eliminateTruncSeq();
371   }
372 };
373 
374 static bool TruncSizeCompatible(int TruncSize, unsigned opcode)
375 {
376   if (TruncSize == 1)
377     return opcode == BPF::LDB || opcode == BPF::LDB32;
378 
379   if (TruncSize == 2)
380     return opcode == BPF::LDH || opcode == BPF::LDH32;
381 
382   if (TruncSize == 4)
383     return opcode == BPF::LDW || opcode == BPF::LDW32;
384 
385   return false;
386 }
387 
388 // Initialize class variables.
389 void BPFMIPeepholeTruncElim::initialize(MachineFunction &MFParm) {
390   MF = &MFParm;
391   MRI = &MF->getRegInfo();
392   TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
393   LLVM_DEBUG(dbgs() << "*** BPF MachineSSA TRUNC Elim peephole pass ***\n\n");
394 }
395 
396 // Reg truncating is often the result of 8/16/32bit->64bit or
397 // 8/16bit->32bit conversion. If the reg value is loaded with
398 // masked byte width, the AND operation can be removed since
399 // BPF LOAD already has zero extension.
400 //
401 // This also solved a correctness issue.
402 // In BPF socket-related program, e.g., __sk_buff->{data, data_end}
403 // are 32-bit registers, but later on, kernel verifier will rewrite
404 // it with 64-bit value. Therefore, truncating the value after the
405 // load will result in incorrect code.
406 bool BPFMIPeepholeTruncElim::eliminateTruncSeq(void) {
407   MachineInstr* ToErase = nullptr;
408   bool Eliminated = false;
409 
410   for (MachineBasicBlock &MBB : *MF) {
411     for (MachineInstr &MI : MBB) {
412       // The second insn to remove if the eliminate candidate is a pair.
413       MachineInstr *MI2 = nullptr;
414       Register DstReg, SrcReg;
415       MachineInstr *DefMI;
416       int TruncSize = -1;
417 
418       // If the previous instruction was marked for elimination, remove it now.
419       if (ToErase) {
420         ToErase->eraseFromParent();
421         ToErase = nullptr;
422       }
423 
424       // AND A, 0xFFFFFFFF will be turned into SLL/SRL pair due to immediate
425       // for BPF ANDI is i32, and this case only happens on ALU64.
426       if (MI.getOpcode() == BPF::SRL_ri &&
427           MI.getOperand(2).getImm() == 32) {
428         SrcReg = MI.getOperand(1).getReg();
429         MI2 = MRI->getVRegDef(SrcReg);
430         DstReg = MI.getOperand(0).getReg();
431 
432         if (!MI2 ||
433             MI2->getOpcode() != BPF::SLL_ri ||
434             MI2->getOperand(2).getImm() != 32)
435           continue;
436 
437         // Update SrcReg.
438         SrcReg = MI2->getOperand(1).getReg();
439         DefMI = MRI->getVRegDef(SrcReg);
440         if (DefMI)
441           TruncSize = 4;
442       } else if (MI.getOpcode() == BPF::AND_ri ||
443                  MI.getOpcode() == BPF::AND_ri_32) {
444         SrcReg = MI.getOperand(1).getReg();
445         DstReg = MI.getOperand(0).getReg();
446         DefMI = MRI->getVRegDef(SrcReg);
447 
448         if (!DefMI)
449           continue;
450 
451         int64_t imm = MI.getOperand(2).getImm();
452         if (imm == 0xff)
453           TruncSize = 1;
454         else if (imm == 0xffff)
455           TruncSize = 2;
456       }
457 
458       if (TruncSize == -1)
459         continue;
460 
461       // The definition is PHI node, check all inputs.
462       if (DefMI->isPHI()) {
463         bool CheckFail = false;
464 
465         for (unsigned i = 1, e = DefMI->getNumOperands(); i < e; i += 2) {
466           MachineOperand &opnd = DefMI->getOperand(i);
467           if (!opnd.isReg()) {
468             CheckFail = true;
469             break;
470           }
471 
472           MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg());
473           if (!PhiDef || PhiDef->isPHI() ||
474               !TruncSizeCompatible(TruncSize, PhiDef->getOpcode())) {
475             CheckFail = true;
476             break;
477           }
478         }
479 
480         if (CheckFail)
481           continue;
482       } else if (!TruncSizeCompatible(TruncSize, DefMI->getOpcode())) {
483         continue;
484       }
485 
486       BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::MOV_rr), DstReg)
487               .addReg(SrcReg);
488 
489       if (MI2)
490         MI2->eraseFromParent();
491 
492       // Mark it to ToErase, and erase in the next iteration.
493       ToErase = &MI;
494       TruncElemNum++;
495       Eliminated = true;
496     }
497   }
498 
499   return Eliminated;
500 }
501 
502 } // end default namespace
503 
504 INITIALIZE_PASS(BPFMIPeepholeTruncElim, "bpf-mi-trunc-elim",
505                 "BPF MachineSSA Peephole Optimization For TRUNC Eliminate",
506                 false, false)
507 
508 char BPFMIPeepholeTruncElim::ID = 0;
509 FunctionPass* llvm::createBPFMIPeepholeTruncElimPass()
510 {
511   return new BPFMIPeepholeTruncElim();
512 }
513