1 //===-------------- BPFMIPeephole.cpp - MI Peephole Cleanups  -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs peephole optimizations to cleanup ugly code sequences at
10 // MachineInstruction layer.
11 //
12 // Currently, there are two optimizations implemented:
13 //  - One pre-RA MachineSSA pass to eliminate type promotion sequences, those
14 //    zero extend 32-bit subregisters to 64-bit registers, if the compiler
15 //    could prove the subregisters is defined by 32-bit operations in which
16 //    case the upper half of the underlying 64-bit registers were zeroed
17 //    implicitly.
18 //
19 //  - One post-RA PreEmit pass to do final cleanup on some redundant
20 //    instructions generated due to bad RA on subregister.
21 //===----------------------------------------------------------------------===//
22 
23 #include "BPF.h"
24 #include "BPFInstrInfo.h"
25 #include "BPFTargetMachine.h"
26 #include "llvm/ADT/Statistic.h"
27 #include "llvm/CodeGen/MachineFunctionPass.h"
28 #include "llvm/CodeGen/MachineInstrBuilder.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/Support/Debug.h"
31 #include <set>
32 
33 using namespace llvm;
34 
35 #define DEBUG_TYPE "bpf-mi-zext-elim"
36 
37 static cl::opt<int> GotolAbsLowBound("gotol-abs-low-bound", cl::Hidden,
38   cl::init(INT16_MAX >> 1), cl::desc("Specify gotol lower bound"));
39 
40 STATISTIC(ZExtElemNum, "Number of zero extension shifts eliminated");
41 
42 namespace {
43 
44 struct BPFMIPeephole : public MachineFunctionPass {
45 
46   static char ID;
47   const BPFInstrInfo *TII;
48   MachineFunction *MF;
49   MachineRegisterInfo *MRI;
50 
51   BPFMIPeephole() : MachineFunctionPass(ID) {
52     initializeBPFMIPeepholePass(*PassRegistry::getPassRegistry());
53   }
54 
55 private:
56   // Initialize class variables.
57   void initialize(MachineFunction &MFParm);
58 
59   bool isCopyFrom32Def(MachineInstr *CopyMI);
60   bool isInsnFrom32Def(MachineInstr *DefInsn);
61   bool isPhiFrom32Def(MachineInstr *MovMI);
62   bool isMovFrom32Def(MachineInstr *MovMI);
63   bool eliminateZExtSeq();
64   bool eliminateZExt();
65 
66   std::set<MachineInstr *> PhiInsns;
67 
68 public:
69 
70   // Main entry point for this pass.
71   bool runOnMachineFunction(MachineFunction &MF) override {
72     if (skipFunction(MF.getFunction()))
73       return false;
74 
75     initialize(MF);
76 
77     // First try to eliminate (zext, lshift, rshift) and then
78     // try to eliminate zext.
79     bool ZExtSeqExist, ZExtExist;
80     ZExtSeqExist = eliminateZExtSeq();
81     ZExtExist = eliminateZExt();
82     return ZExtSeqExist || ZExtExist;
83   }
84 };
85 
86 // Initialize class variables.
87 void BPFMIPeephole::initialize(MachineFunction &MFParm) {
88   MF = &MFParm;
89   MRI = &MF->getRegInfo();
90   TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
91   LLVM_DEBUG(dbgs() << "*** BPF MachineSSA ZEXT Elim peephole pass ***\n\n");
92 }
93 
94 bool BPFMIPeephole::isCopyFrom32Def(MachineInstr *CopyMI)
95 {
96   MachineOperand &opnd = CopyMI->getOperand(1);
97 
98   if (!opnd.isReg())
99     return false;
100 
101   // Return false if getting value from a 32bit physical register.
102   // Most likely, this physical register is aliased to
103   // function call return value or current function parameters.
104   Register Reg = opnd.getReg();
105   if (!Reg.isVirtual())
106     return false;
107 
108   if (MRI->getRegClass(Reg) == &BPF::GPRRegClass)
109     return false;
110 
111   MachineInstr *DefInsn = MRI->getVRegDef(Reg);
112   if (!isInsnFrom32Def(DefInsn))
113     return false;
114 
115   return true;
116 }
117 
118 bool BPFMIPeephole::isPhiFrom32Def(MachineInstr *PhiMI)
119 {
120   for (unsigned i = 1, e = PhiMI->getNumOperands(); i < e; i += 2) {
121     MachineOperand &opnd = PhiMI->getOperand(i);
122 
123     if (!opnd.isReg())
124       return false;
125 
126     MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg());
127     if (!PhiDef)
128       return false;
129     if (PhiDef->isPHI()) {
130       if (!PhiInsns.insert(PhiDef).second)
131         return false;
132       if (!isPhiFrom32Def(PhiDef))
133         return false;
134     }
135     if (PhiDef->getOpcode() == BPF::COPY && !isCopyFrom32Def(PhiDef))
136       return false;
137   }
138 
139   return true;
140 }
141 
142 // The \p DefInsn instruction defines a virtual register.
143 bool BPFMIPeephole::isInsnFrom32Def(MachineInstr *DefInsn)
144 {
145   if (!DefInsn)
146     return false;
147 
148   if (DefInsn->isPHI()) {
149     if (!PhiInsns.insert(DefInsn).second)
150       return false;
151     if (!isPhiFrom32Def(DefInsn))
152       return false;
153   } else if (DefInsn->getOpcode() == BPF::COPY) {
154     if (!isCopyFrom32Def(DefInsn))
155       return false;
156   }
157 
158   return true;
159 }
160 
161 bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI)
162 {
163   MachineInstr *DefInsn = MRI->getVRegDef(MovMI->getOperand(1).getReg());
164 
165   LLVM_DEBUG(dbgs() << "  Def of Mov Src:");
166   LLVM_DEBUG(DefInsn->dump());
167 
168   PhiInsns.clear();
169   if (!isInsnFrom32Def(DefInsn))
170     return false;
171 
172   LLVM_DEBUG(dbgs() << "  One ZExt elim sequence identified.\n");
173 
174   return true;
175 }
176 
177 bool BPFMIPeephole::eliminateZExtSeq() {
178   MachineInstr* ToErase = nullptr;
179   bool Eliminated = false;
180 
181   for (MachineBasicBlock &MBB : *MF) {
182     for (MachineInstr &MI : MBB) {
183       // If the previous instruction was marked for elimination, remove it now.
184       if (ToErase) {
185         ToErase->eraseFromParent();
186         ToErase = nullptr;
187       }
188 
189       // Eliminate the 32-bit to 64-bit zero extension sequence when possible.
190       //
191       //   MOV_32_64 rB, wA
192       //   SLL_ri    rB, rB, 32
193       //   SRL_ri    rB, rB, 32
194       if (MI.getOpcode() == BPF::SRL_ri &&
195           MI.getOperand(2).getImm() == 32) {
196         Register DstReg = MI.getOperand(0).getReg();
197         Register ShfReg = MI.getOperand(1).getReg();
198         MachineInstr *SllMI = MRI->getVRegDef(ShfReg);
199 
200         LLVM_DEBUG(dbgs() << "Starting SRL found:");
201         LLVM_DEBUG(MI.dump());
202 
203         if (!SllMI ||
204             SllMI->isPHI() ||
205             SllMI->getOpcode() != BPF::SLL_ri ||
206             SllMI->getOperand(2).getImm() != 32)
207           continue;
208 
209         LLVM_DEBUG(dbgs() << "  SLL found:");
210         LLVM_DEBUG(SllMI->dump());
211 
212         MachineInstr *MovMI = MRI->getVRegDef(SllMI->getOperand(1).getReg());
213         if (!MovMI ||
214             MovMI->isPHI() ||
215             MovMI->getOpcode() != BPF::MOV_32_64)
216           continue;
217 
218         LLVM_DEBUG(dbgs() << "  Type cast Mov found:");
219         LLVM_DEBUG(MovMI->dump());
220 
221         Register SubReg = MovMI->getOperand(1).getReg();
222         if (!isMovFrom32Def(MovMI)) {
223           LLVM_DEBUG(dbgs()
224                      << "  One ZExt elim sequence failed qualifying elim.\n");
225           continue;
226         }
227 
228         BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::SUBREG_TO_REG), DstReg)
229           .addImm(0).addReg(SubReg).addImm(BPF::sub_32);
230 
231         SllMI->eraseFromParent();
232         MovMI->eraseFromParent();
233         // MI is the right shift, we can't erase it in it's own iteration.
234         // Mark it to ToErase, and erase in the next iteration.
235         ToErase = &MI;
236         ZExtElemNum++;
237         Eliminated = true;
238       }
239     }
240   }
241 
242   return Eliminated;
243 }
244 
245 bool BPFMIPeephole::eliminateZExt() {
246   MachineInstr* ToErase = nullptr;
247   bool Eliminated = false;
248 
249   for (MachineBasicBlock &MBB : *MF) {
250     for (MachineInstr &MI : MBB) {
251       // If the previous instruction was marked for elimination, remove it now.
252       if (ToErase) {
253         ToErase->eraseFromParent();
254         ToErase = nullptr;
255       }
256 
257       if (MI.getOpcode() != BPF::MOV_32_64)
258         continue;
259 
260       // Eliminate MOV_32_64 if possible.
261       //   MOV_32_64 rA, wB
262       //
263       // If wB has been zero extended, replace it with a SUBREG_TO_REG.
264       // This is to workaround BPF programs where pkt->{data, data_end}
265       // is encoded as u32, but actually the verifier populates them
266       // as 64bit pointer. The MOV_32_64 will zero out the top 32 bits.
267       LLVM_DEBUG(dbgs() << "Candidate MOV_32_64 instruction:");
268       LLVM_DEBUG(MI.dump());
269 
270       if (!isMovFrom32Def(&MI))
271         continue;
272 
273       LLVM_DEBUG(dbgs() << "Removing the MOV_32_64 instruction\n");
274 
275       Register dst = MI.getOperand(0).getReg();
276       Register src = MI.getOperand(1).getReg();
277 
278       // Build a SUBREG_TO_REG instruction.
279       BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::SUBREG_TO_REG), dst)
280         .addImm(0).addReg(src).addImm(BPF::sub_32);
281 
282       ToErase = &MI;
283       Eliminated = true;
284     }
285   }
286 
287   return Eliminated;
288 }
289 
290 } // end default namespace
291 
292 INITIALIZE_PASS(BPFMIPeephole, DEBUG_TYPE,
293                 "BPF MachineSSA Peephole Optimization For ZEXT Eliminate",
294                 false, false)
295 
296 char BPFMIPeephole::ID = 0;
297 FunctionPass* llvm::createBPFMIPeepholePass() { return new BPFMIPeephole(); }
298 
299 STATISTIC(RedundantMovElemNum, "Number of redundant moves eliminated");
300 
301 namespace {
302 
303 struct BPFMIPreEmitPeephole : public MachineFunctionPass {
304 
305   static char ID;
306   MachineFunction *MF;
307   const TargetRegisterInfo *TRI;
308   const BPFInstrInfo *TII;
309   bool SupportGotol;
310 
311   BPFMIPreEmitPeephole() : MachineFunctionPass(ID) {
312     initializeBPFMIPreEmitPeepholePass(*PassRegistry::getPassRegistry());
313   }
314 
315 private:
316   // Initialize class variables.
317   void initialize(MachineFunction &MFParm);
318 
319   bool in16BitRange(int Num);
320   bool eliminateRedundantMov();
321   bool adjustBranch();
322 
323 public:
324 
325   // Main entry point for this pass.
326   bool runOnMachineFunction(MachineFunction &MF) override {
327     if (skipFunction(MF.getFunction()))
328       return false;
329 
330     initialize(MF);
331 
332     bool Changed;
333     Changed = eliminateRedundantMov();
334     if (SupportGotol)
335       Changed = adjustBranch() || Changed;
336     return Changed;
337   }
338 };
339 
340 // Initialize class variables.
341 void BPFMIPreEmitPeephole::initialize(MachineFunction &MFParm) {
342   MF = &MFParm;
343   TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
344   TRI = MF->getSubtarget<BPFSubtarget>().getRegisterInfo();
345   SupportGotol = MF->getSubtarget<BPFSubtarget>().hasGotol();
346   LLVM_DEBUG(dbgs() << "*** BPF PreEmit peephole pass ***\n\n");
347 }
348 
349 bool BPFMIPreEmitPeephole::eliminateRedundantMov() {
350   MachineInstr* ToErase = nullptr;
351   bool Eliminated = false;
352 
353   for (MachineBasicBlock &MBB : *MF) {
354     for (MachineInstr &MI : MBB) {
355       // If the previous instruction was marked for elimination, remove it now.
356       if (ToErase) {
357         LLVM_DEBUG(dbgs() << "  Redundant Mov Eliminated:");
358         LLVM_DEBUG(ToErase->dump());
359         ToErase->eraseFromParent();
360         ToErase = nullptr;
361       }
362 
363       // Eliminate identical move:
364       //
365       //   MOV rA, rA
366       //
367       // Note that we cannot remove
368       //   MOV_32_64  rA, wA
369       //   MOV_rr_32  wA, wA
370       // as these two instructions having side effects, zeroing out
371       // top 32 bits of rA.
372       unsigned Opcode = MI.getOpcode();
373       if (Opcode == BPF::MOV_rr) {
374         Register dst = MI.getOperand(0).getReg();
375         Register src = MI.getOperand(1).getReg();
376 
377         if (dst != src)
378           continue;
379 
380         ToErase = &MI;
381         RedundantMovElemNum++;
382         Eliminated = true;
383       }
384     }
385   }
386 
387   return Eliminated;
388 }
389 
390 bool BPFMIPreEmitPeephole::in16BitRange(int Num) {
391   // Well, the cut-off is not precisely at 16bit range since
392   // new codes are added during the transformation. So let us
393   // a little bit conservative.
394   return Num >= -GotolAbsLowBound && Num <= GotolAbsLowBound;
395 }
396 
397 // Before cpu=v4, only 16bit branch target offset (-0x8000 to 0x7fff)
398 // is supported for both unconditional (JMP) and condition (JEQ, JSGT,
399 // etc.) branches. In certain cases, e.g., full unrolling, the branch
400 // target offset might exceed 16bit range. If this happens, the llvm
401 // will generate incorrect code as the offset is truncated to 16bit.
402 //
403 // To fix this rare case, a new insn JMPL is introduced. This new
404 // insn supports supports 32bit branch target offset. The compiler
405 // does not use this insn during insn selection. Rather, BPF backend
406 // will estimate the branch target offset and do JMP -> JMPL and
407 // JEQ -> JEQ + JMPL conversion if the estimated branch target offset
408 // is beyond 16bit.
409 bool BPFMIPreEmitPeephole::adjustBranch() {
410   bool Changed = false;
411   int CurrNumInsns = 0;
412   DenseMap<MachineBasicBlock *, int> SoFarNumInsns;
413   DenseMap<MachineBasicBlock *, MachineBasicBlock *> FollowThroughBB;
414   std::vector<MachineBasicBlock *> MBBs;
415 
416   MachineBasicBlock *PrevBB = nullptr;
417   for (MachineBasicBlock &MBB : *MF) {
418     // MBB.size() is the number of insns in this basic block, including some
419     // debug info, e.g., DEBUG_VALUE, so we may over-count a little bit.
420     // Typically we have way more normal insns than DEBUG_VALUE insns.
421     // Also, if we indeed need to convert conditional branch like JEQ to
422     // JEQ + JMPL, we actually introduced some new insns like below.
423     CurrNumInsns += (int)MBB.size();
424     SoFarNumInsns[&MBB] = CurrNumInsns;
425     if (PrevBB != nullptr)
426       FollowThroughBB[PrevBB] = &MBB;
427     PrevBB = &MBB;
428     // A list of original BBs to make later traveral easier.
429     MBBs.push_back(&MBB);
430   }
431   FollowThroughBB[PrevBB] = nullptr;
432 
433   for (unsigned i = 0; i < MBBs.size(); i++) {
434     // We have four cases here:
435     //  (1). no terminator, simple follow through.
436     //  (2). jmp to another bb.
437     //  (3). conditional jmp to another bb or follow through.
438     //  (4). conditional jmp followed by an unconditional jmp.
439     MachineInstr *CondJmp = nullptr, *UncondJmp = nullptr;
440 
441     MachineBasicBlock *MBB = MBBs[i];
442     for (MachineInstr &Term : MBB->terminators()) {
443       if (Term.isConditionalBranch()) {
444         assert(CondJmp == nullptr);
445         CondJmp = &Term;
446       } else if (Term.isUnconditionalBranch()) {
447         assert(UncondJmp == nullptr);
448         UncondJmp = &Term;
449       }
450     }
451 
452     // (1). no terminator, simple follow through.
453     if (!CondJmp && !UncondJmp)
454       continue;
455 
456     MachineBasicBlock *CondTargetBB, *JmpBB;
457     CurrNumInsns = SoFarNumInsns[MBB];
458 
459     // (2). jmp to another bb.
460     if (!CondJmp && UncondJmp) {
461       JmpBB = UncondJmp->getOperand(0).getMBB();
462       if (in16BitRange(SoFarNumInsns[JmpBB] - JmpBB->size() - CurrNumInsns))
463         continue;
464 
465       // replace this insn as a JMPL.
466       BuildMI(MBB, UncondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(JmpBB);
467       UncondJmp->eraseFromParent();
468       Changed = true;
469       continue;
470     }
471 
472     const BasicBlock *TermBB = MBB->getBasicBlock();
473     int Dist;
474 
475     // (3). conditional jmp to another bb or follow through.
476     if (!UncondJmp) {
477       CondTargetBB = CondJmp->getOperand(2).getMBB();
478       MachineBasicBlock *FollowBB = FollowThroughBB[MBB];
479       Dist = SoFarNumInsns[CondTargetBB] - CondTargetBB->size() - CurrNumInsns;
480       if (in16BitRange(Dist))
481         continue;
482 
483       // We have
484       //   B2: ...
485       //       if (cond) goto B5
486       //   B3: ...
487       // where B2 -> B5 is beyond 16bit range.
488       //
489       // We do not have 32bit cond jmp insn. So we try to do
490       // the following.
491       //   B2:     ...
492       //           if (cond) goto New_B1
493       //   New_B0  goto B3
494       //   New_B1: gotol B5
495       //   B3: ...
496       // Basically two new basic blocks are created.
497       MachineBasicBlock *New_B0 = MF->CreateMachineBasicBlock(TermBB);
498       MachineBasicBlock *New_B1 = MF->CreateMachineBasicBlock(TermBB);
499 
500       // Insert New_B0 and New_B1 into function block list.
501       MachineFunction::iterator MBB_I  = ++MBB->getIterator();
502       MF->insert(MBB_I, New_B0);
503       MF->insert(MBB_I, New_B1);
504 
505       // replace B2 cond jump
506       if (CondJmp->getOperand(1).isReg())
507         BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))
508             .addReg(CondJmp->getOperand(0).getReg())
509             .addReg(CondJmp->getOperand(1).getReg())
510             .addMBB(New_B1);
511       else
512         BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))
513             .addReg(CondJmp->getOperand(0).getReg())
514             .addImm(CondJmp->getOperand(1).getImm())
515             .addMBB(New_B1);
516 
517       // it is possible that CondTargetBB and FollowBB are the same. But the
518       // above Dist checking should already filtered this case.
519       MBB->removeSuccessor(CondTargetBB);
520       MBB->removeSuccessor(FollowBB);
521       MBB->addSuccessor(New_B0);
522       MBB->addSuccessor(New_B1);
523 
524       // Populate insns in New_B0 and New_B1.
525       BuildMI(New_B0, CondJmp->getDebugLoc(), TII->get(BPF::JMP)).addMBB(FollowBB);
526       BuildMI(New_B1, CondJmp->getDebugLoc(), TII->get(BPF::JMPL))
527           .addMBB(CondTargetBB);
528 
529       New_B0->addSuccessor(FollowBB);
530       New_B1->addSuccessor(CondTargetBB);
531       CondJmp->eraseFromParent();
532       Changed = true;
533       continue;
534     }
535 
536     //  (4). conditional jmp followed by an unconditional jmp.
537     CondTargetBB = CondJmp->getOperand(2).getMBB();
538     JmpBB = UncondJmp->getOperand(0).getMBB();
539 
540     // We have
541     //   B2: ...
542     //       if (cond) goto B5
543     //       JMP B7
544     //   B3: ...
545     //
546     // If only B2->B5 is out of 16bit range, we can do
547     //   B2: ...
548     //       if (cond) goto new_B
549     //       JMP B7
550     //   New_B: gotol B5
551     //   B3: ...
552     //
553     // If only 'JMP B7' is out of 16bit range, we can replace
554     // 'JMP B7' with 'JMPL B7'.
555     //
556     // If both B2->B5 and 'JMP B7' is out of range, just do
557     // both the above transformations.
558     Dist = SoFarNumInsns[CondTargetBB] - CondTargetBB->size() - CurrNumInsns;
559     if (!in16BitRange(Dist)) {
560       MachineBasicBlock *New_B = MF->CreateMachineBasicBlock(TermBB);
561 
562       // Insert New_B0 into function block list.
563       MF->insert(++MBB->getIterator(), New_B);
564 
565       // replace B2 cond jump
566       if (CondJmp->getOperand(1).isReg())
567         BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))
568             .addReg(CondJmp->getOperand(0).getReg())
569             .addReg(CondJmp->getOperand(1).getReg())
570             .addMBB(New_B);
571       else
572         BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))
573             .addReg(CondJmp->getOperand(0).getReg())
574             .addImm(CondJmp->getOperand(1).getImm())
575             .addMBB(New_B);
576 
577       if (CondTargetBB != JmpBB)
578         MBB->removeSuccessor(CondTargetBB);
579       MBB->addSuccessor(New_B);
580 
581       // Populate insn in New_B.
582       BuildMI(New_B, CondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(CondTargetBB);
583 
584       New_B->addSuccessor(CondTargetBB);
585       CondJmp->eraseFromParent();
586       Changed = true;
587     }
588 
589     if (!in16BitRange(SoFarNumInsns[JmpBB] - CurrNumInsns)) {
590       BuildMI(MBB, UncondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(JmpBB);
591       UncondJmp->eraseFromParent();
592       Changed = true;
593     }
594   }
595 
596   return Changed;
597 }
598 
599 } // end default namespace
600 
601 INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole",
602                 "BPF PreEmit Peephole Optimization", false, false)
603 
604 char BPFMIPreEmitPeephole::ID = 0;
605 FunctionPass* llvm::createBPFMIPreEmitPeepholePass()
606 {
607   return new BPFMIPreEmitPeephole();
608 }
609