1 //===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The purpose of this pass is to employ a canonical code transformation so
10 // that code compiled with slightly different IR passes can be diffed more
11 // effectively than otherwise. This is done by renaming vregs in a given
12 // LiveRange in a canonical way. This pass also does a pseudo-scheduling to
13 // move defs closer to their use inorder to reduce diffs caused by slightly
14 // different schedules.
15 //
16 // Basic Usage:
17 //
18 // llc -o - -run-pass mir-canonicalizer example.mir
19 //
20 // Reorders instructions canonically.
21 // Renames virtual register operands canonically.
22 // Strips certain MIR artifacts (optionally).
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "MIRVRegNamerUtils.h"
27 #include "llvm/ADT/PostOrderIterator.h"
28 #include "llvm/ADT/STLExtras.h"
29 #include "llvm/CodeGen/MachineFunctionPass.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #include "llvm/CodeGen/Passes.h"
33 #include "llvm/InitializePasses.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/raw_ostream.h"
36 
37 #include <queue>
38 
39 using namespace llvm;
40 
41 namespace llvm {
42 extern char &MIRCanonicalizerID;
43 } // namespace llvm
44 
45 #define DEBUG_TYPE "mir-canonicalizer"
46 
47 static cl::opt<unsigned>
48     CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u),
49                                cl::value_desc("N"),
50                                cl::desc("Function number to canonicalize."));
51 
52 namespace {
53 
54 class MIRCanonicalizer : public MachineFunctionPass {
55 public:
56   static char ID;
MIRCanonicalizer()57   MIRCanonicalizer() : MachineFunctionPass(ID) {}
58 
getPassName() const59   StringRef getPassName() const override {
60     return "Rename register operands in a canonical ordering.";
61   }
62 
getAnalysisUsage(AnalysisUsage & AU) const63   void getAnalysisUsage(AnalysisUsage &AU) const override {
64     AU.setPreservesCFG();
65     MachineFunctionPass::getAnalysisUsage(AU);
66   }
67 
68   bool runOnMachineFunction(MachineFunction &MF) override;
69 };
70 
71 } // end anonymous namespace
72 
73 char MIRCanonicalizer::ID;
74 
75 char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID;
76 
77 INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer",
78                       "Rename Register Operands Canonically", false, false)
79 
80 INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer",
81                     "Rename Register Operands Canonically", false, false)
82 
GetRPOList(MachineFunction & MF)83 static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) {
84   if (MF.empty())
85     return {};
86   ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
87   std::vector<MachineBasicBlock *> RPOList;
88   append_range(RPOList, RPOT);
89 
90   return RPOList;
91 }
92 
93 static bool
rescheduleLexographically(std::vector<MachineInstr * > instructions,MachineBasicBlock * MBB,std::function<MachineBasicBlock::iterator ()> getPos)94 rescheduleLexographically(std::vector<MachineInstr *> instructions,
95                           MachineBasicBlock *MBB,
96                           std::function<MachineBasicBlock::iterator()> getPos) {
97 
98   bool Changed = false;
99   using StringInstrPair = std::pair<std::string, MachineInstr *>;
100   std::vector<StringInstrPair> StringInstrMap;
101 
102   for (auto *II : instructions) {
103     std::string S;
104     raw_string_ostream OS(S);
105     II->print(OS);
106     OS.flush();
107 
108     // Trim the assignment, or start from the beginning in the case of a store.
109     const size_t i = S.find('=');
110     StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II});
111   }
112 
113   llvm::sort(StringInstrMap,
114              [](const StringInstrPair &a, const StringInstrPair &b) -> bool {
115                return (a.first < b.first);
116              });
117 
118   for (auto &II : StringInstrMap) {
119 
120     LLVM_DEBUG({
121       dbgs() << "Splicing ";
122       II.second->dump();
123       dbgs() << " right before: ";
124       getPos()->dump();
125     });
126 
127     Changed = true;
128     MBB->splice(getPos(), MBB, II.second);
129   }
130 
131   return Changed;
132 }
133 
rescheduleCanonically(unsigned & PseudoIdempotentInstCount,MachineBasicBlock * MBB)134 static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
135                                   MachineBasicBlock *MBB) {
136 
137   bool Changed = false;
138 
139   // Calculates the distance of MI from the beginning of its parent BB.
140   auto getInstrIdx = [](const MachineInstr &MI) {
141     unsigned i = 0;
142     for (auto &CurMI : *MI.getParent()) {
143       if (&CurMI == &MI)
144         return i;
145       i++;
146     }
147     return ~0U;
148   };
149 
150   // Pre-Populate vector of instructions to reschedule so that we don't
151   // clobber the iterator.
152   std::vector<MachineInstr *> Instructions;
153   for (auto &MI : *MBB) {
154     Instructions.push_back(&MI);
155   }
156 
157   std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers;
158   std::map<unsigned, MachineInstr *> MultiUserLookup;
159   unsigned UseToBringDefCloserToCount = 0;
160   std::vector<MachineInstr *> PseudoIdempotentInstructions;
161   std::vector<unsigned> PhysRegDefs;
162   for (auto *II : Instructions) {
163     for (unsigned i = 1; i < II->getNumOperands(); i++) {
164       MachineOperand &MO = II->getOperand(i);
165       if (!MO.isReg())
166         continue;
167 
168       if (Register::isVirtualRegister(MO.getReg()))
169         continue;
170 
171       if (!MO.isDef())
172         continue;
173 
174       PhysRegDefs.push_back(MO.getReg());
175     }
176   }
177 
178   for (auto *II : Instructions) {
179     if (II->getNumOperands() == 0)
180       continue;
181     if (II->mayLoadOrStore())
182       continue;
183 
184     MachineOperand &MO = II->getOperand(0);
185     if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
186       continue;
187     if (!MO.isDef())
188       continue;
189 
190     bool IsPseudoIdempotent = true;
191     for (unsigned i = 1; i < II->getNumOperands(); i++) {
192 
193       if (II->getOperand(i).isImm()) {
194         continue;
195       }
196 
197       if (II->getOperand(i).isReg()) {
198         if (!Register::isVirtualRegister(II->getOperand(i).getReg()))
199           if (!llvm::is_contained(PhysRegDefs, II->getOperand(i).getReg())) {
200             continue;
201           }
202       }
203 
204       IsPseudoIdempotent = false;
205       break;
206     }
207 
208     if (IsPseudoIdempotent) {
209       PseudoIdempotentInstructions.push_back(II);
210       continue;
211     }
212 
213     LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump(););
214 
215     MachineInstr *Def = II;
216     unsigned Distance = ~0U;
217     MachineInstr *UseToBringDefCloserTo = nullptr;
218     MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
219     for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) {
220       MachineInstr *UseInst = UO.getParent();
221 
222       const unsigned DefLoc = getInstrIdx(*Def);
223       const unsigned UseLoc = getInstrIdx(*UseInst);
224       const unsigned Delta = (UseLoc - DefLoc);
225 
226       if (UseInst->getParent() != Def->getParent())
227         continue;
228       if (DefLoc >= UseLoc)
229         continue;
230 
231       if (Delta < Distance) {
232         Distance = Delta;
233         UseToBringDefCloserTo = UseInst;
234         MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo;
235       }
236     }
237 
238     const auto BBE = MBB->instr_end();
239     MachineBasicBlock::iterator DefI = BBE;
240     MachineBasicBlock::iterator UseI = BBE;
241 
242     for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) {
243 
244       if (DefI != BBE && UseI != BBE)
245         break;
246 
247       if (&*BBI == Def) {
248         DefI = BBI;
249         continue;
250       }
251 
252       if (&*BBI == UseToBringDefCloserTo) {
253         UseI = BBI;
254         continue;
255       }
256     }
257 
258     if (DefI == BBE || UseI == BBE)
259       continue;
260 
261     LLVM_DEBUG({
262       dbgs() << "Splicing ";
263       DefI->dump();
264       dbgs() << " right before: ";
265       UseI->dump();
266     });
267 
268     MultiUsers[UseToBringDefCloserTo].push_back(Def);
269     Changed = true;
270     MBB->splice(UseI, MBB, DefI);
271   }
272 
273   // Sort the defs for users of multiple defs lexographically.
274   for (const auto &E : MultiUserLookup) {
275 
276     auto UseI = llvm::find_if(MBB->instrs(), [&](MachineInstr &MI) -> bool {
277       return &MI == E.second;
278     });
279 
280     if (UseI == MBB->instr_end())
281       continue;
282 
283     LLVM_DEBUG(
284         dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";);
285     Changed |= rescheduleLexographically(
286         MultiUsers[E.second], MBB,
287         [&]() -> MachineBasicBlock::iterator { return UseI; });
288   }
289 
290   PseudoIdempotentInstCount = PseudoIdempotentInstructions.size();
291   LLVM_DEBUG(
292       dbgs() << "Rescheduling Idempotent Instructions Lexographically.";);
293   Changed |= rescheduleLexographically(
294       PseudoIdempotentInstructions, MBB,
295       [&]() -> MachineBasicBlock::iterator { return MBB->begin(); });
296 
297   return Changed;
298 }
299 
propagateLocalCopies(MachineBasicBlock * MBB)300 static bool propagateLocalCopies(MachineBasicBlock *MBB) {
301   bool Changed = false;
302   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
303 
304   std::vector<MachineInstr *> Copies;
305   for (MachineInstr &MI : MBB->instrs()) {
306     if (MI.isCopy())
307       Copies.push_back(&MI);
308   }
309 
310   for (MachineInstr *MI : Copies) {
311 
312     if (!MI->getOperand(0).isReg())
313       continue;
314     if (!MI->getOperand(1).isReg())
315       continue;
316 
317     const Register Dst = MI->getOperand(0).getReg();
318     const Register Src = MI->getOperand(1).getReg();
319 
320     if (!Register::isVirtualRegister(Dst))
321       continue;
322     if (!Register::isVirtualRegister(Src))
323       continue;
324     // Not folding COPY instructions if regbankselect has not set the RCs.
325     // Why are we only considering Register Classes? Because the verifier
326     // sometimes gets upset if the register classes don't match even if the
327     // types do. A future patch might add COPY folding for matching types in
328     // pre-registerbankselect code.
329     if (!MRI.getRegClassOrNull(Dst))
330       continue;
331     if (MRI.getRegClass(Dst) != MRI.getRegClass(Src))
332       continue;
333 
334     std::vector<MachineOperand *> Uses;
335     for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI)
336       Uses.push_back(&*UI);
337     for (auto *MO : Uses)
338       MO->setReg(Src);
339 
340     Changed = true;
341     MI->eraseFromParent();
342   }
343 
344   return Changed;
345 }
346 
doDefKillClear(MachineBasicBlock * MBB)347 static bool doDefKillClear(MachineBasicBlock *MBB) {
348   bool Changed = false;
349 
350   for (auto &MI : *MBB) {
351     for (auto &MO : MI.operands()) {
352       if (!MO.isReg())
353         continue;
354       if (!MO.isDef() && MO.isKill()) {
355         Changed = true;
356         MO.setIsKill(false);
357       }
358 
359       if (MO.isDef() && MO.isDead()) {
360         Changed = true;
361         MO.setIsDead(false);
362       }
363     }
364   }
365 
366   return Changed;
367 }
368 
runOnBasicBlock(MachineBasicBlock * MBB,unsigned BasicBlockNum,VRegRenamer & Renamer)369 static bool runOnBasicBlock(MachineBasicBlock *MBB,
370                             unsigned BasicBlockNum, VRegRenamer &Renamer) {
371   LLVM_DEBUG({
372     dbgs() << "\n\n  NEW BASIC BLOCK: " << MBB->getName() << "  \n\n";
373     dbgs() << "\n\n================================================\n\n";
374   });
375 
376   bool Changed = false;
377 
378   LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";);
379 
380   LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n";
381              MBB->dump(););
382   Changed |= propagateLocalCopies(MBB);
383   LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump(););
384 
385   LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump(););
386   unsigned IdempotentInstCount = 0;
387   Changed |= rescheduleCanonically(IdempotentInstCount, MBB);
388   LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
389 
390   Changed |= Renamer.renameVRegs(MBB, BasicBlockNum);
391 
392   // TODO: Consider dropping this. Dropping kill defs is probably not
393   // semantically sound.
394   Changed |= doDefKillClear(MBB);
395 
396   LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump();
397              dbgs() << "\n";);
398   LLVM_DEBUG(
399       dbgs() << "\n\n================================================\n\n");
400   return Changed;
401 }
402 
runOnMachineFunction(MachineFunction & MF)403 bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
404 
405   static unsigned functionNum = 0;
406   if (CanonicalizeFunctionNumber != ~0U) {
407     if (CanonicalizeFunctionNumber != functionNum++)
408       return false;
409     LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName()
410                       << "\n";);
411   }
412 
413   // we need a valid vreg to create a vreg type for skipping all those
414   // stray vreg numbers so reach alignment/canonical vreg values.
415   std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF);
416 
417   LLVM_DEBUG(
418       dbgs() << "\n\n  NEW MACHINE FUNCTION: " << MF.getName() << "  \n\n";
419       dbgs() << "\n\n================================================\n\n";
420       dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n";
421       for (auto MBB
422            : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs()
423       << "\n\n================================================\n\n";);
424 
425   unsigned BBNum = 0;
426   bool Changed = false;
427   MachineRegisterInfo &MRI = MF.getRegInfo();
428   VRegRenamer Renamer(MRI);
429   for (auto MBB : RPOList)
430     Changed |= runOnBasicBlock(MBB, BBNum++, Renamer);
431 
432   return Changed;
433 }
434