1 //===- Localizer.cpp ---------------------- Localize some instrs -*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the Localizer class.
10 //===----------------------------------------------------------------------===//
11 
12 #include "llvm/CodeGen/GlobalISel/Localizer.h"
13 #include "llvm/ADT/DenseMap.h"
14 #include "llvm/Analysis/TargetTransformInfo.h"
15 #include "llvm/CodeGen/MachineRegisterInfo.h"
16 #include "llvm/InitializePasses.h"
17 #include "llvm/Support/Debug.h"
18 
19 #define DEBUG_TYPE "localizer"
20 
21 using namespace llvm;
22 
23 char Localizer::ID = 0;
24 INITIALIZE_PASS_BEGIN(Localizer, DEBUG_TYPE,
25                       "Move/duplicate certain instructions close to their use",
26                       false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)27 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
28 INITIALIZE_PASS_END(Localizer, DEBUG_TYPE,
29                     "Move/duplicate certain instructions close to their use",
30                     false, false)
31 
32 Localizer::Localizer(std::function<bool(const MachineFunction &)> F)
33     : MachineFunctionPass(ID), DoNotRunPass(F) {}
34 
Localizer()35 Localizer::Localizer()
36     : Localizer([](const MachineFunction &) { return false; }) {}
37 
init(MachineFunction & MF)38 void Localizer::init(MachineFunction &MF) {
39   MRI = &MF.getRegInfo();
40   TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(MF.getFunction());
41 }
42 
shouldLocalize(const MachineInstr & MI)43 bool Localizer::shouldLocalize(const MachineInstr &MI) {
44   // Assuming a spill and reload of a value has a cost of 1 instruction each,
45   // this helper function computes the maximum number of uses we should consider
46   // for remat. E.g. on arm64 global addresses take 2 insts to materialize. We
47   // break even in terms of code size when the original MI has 2 users vs
48   // choosing to potentially spill. Any more than 2 users we we have a net code
49   // size increase. This doesn't take into account register pressure though.
50   auto maxUses = [](unsigned RematCost) {
51     // A cost of 1 means remats are basically free.
52     if (RematCost == 1)
53       return UINT_MAX;
54     if (RematCost == 2)
55       return 2U;
56 
57     // Remat is too expensive, only sink if there's one user.
58     if (RematCost > 2)
59       return 1U;
60     llvm_unreachable("Unexpected remat cost");
61   };
62 
63   // Helper to walk through uses and terminate if we've reached a limit. Saves
64   // us spending time traversing uses if all we want to know is if it's >= min.
65   auto isUsesAtMost = [&](unsigned Reg, unsigned MaxUses) {
66     unsigned NumUses = 0;
67     auto UI = MRI->use_instr_nodbg_begin(Reg), UE = MRI->use_instr_nodbg_end();
68     for (; UI != UE && NumUses < MaxUses; ++UI) {
69       NumUses++;
70     }
71     // If we haven't reached the end yet then there are more than MaxUses users.
72     return UI == UE;
73   };
74 
75   switch (MI.getOpcode()) {
76   default:
77     return false;
78   // Constants-like instructions should be close to their users.
79   // We don't want long live-ranges for them.
80   case TargetOpcode::G_CONSTANT:
81   case TargetOpcode::G_FCONSTANT:
82   case TargetOpcode::G_FRAME_INDEX:
83   case TargetOpcode::G_INTTOPTR:
84     return true;
85   case TargetOpcode::G_GLOBAL_VALUE: {
86     unsigned RematCost = TTI->getGISelRematGlobalCost();
87     Register Reg = MI.getOperand(0).getReg();
88     unsigned MaxUses = maxUses(RematCost);
89     if (MaxUses == UINT_MAX)
90       return true; // Remats are "free" so always localize.
91     bool B = isUsesAtMost(Reg, MaxUses);
92     return B;
93   }
94   }
95 }
96 
getAnalysisUsage(AnalysisUsage & AU) const97 void Localizer::getAnalysisUsage(AnalysisUsage &AU) const {
98   AU.addRequired<TargetTransformInfoWrapperPass>();
99   getSelectionDAGFallbackAnalysisUsage(AU);
100   MachineFunctionPass::getAnalysisUsage(AU);
101 }
102 
isLocalUse(MachineOperand & MOUse,const MachineInstr & Def,MachineBasicBlock * & InsertMBB)103 bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def,
104                            MachineBasicBlock *&InsertMBB) {
105   MachineInstr &MIUse = *MOUse.getParent();
106   InsertMBB = MIUse.getParent();
107   if (MIUse.isPHI())
108     InsertMBB = MIUse.getOperand(MIUse.getOperandNo(&MOUse) + 1).getMBB();
109   return InsertMBB == Def.getParent();
110 }
111 
localizeInterBlock(MachineFunction & MF,LocalizedSetVecT & LocalizedInstrs)112 bool Localizer::localizeInterBlock(MachineFunction &MF,
113                                    LocalizedSetVecT &LocalizedInstrs) {
114   bool Changed = false;
115   DenseMap<std::pair<MachineBasicBlock *, unsigned>, unsigned> MBBWithLocalDef;
116 
117   // Since the IRTranslator only emits constants into the entry block, and the
118   // rest of the GISel pipeline generally emits constants close to their users,
119   // we only localize instructions in the entry block here. This might change if
120   // we start doing CSE across blocks.
121   auto &MBB = MF.front();
122   for (auto RI = MBB.rbegin(), RE = MBB.rend(); RI != RE; ++RI) {
123     MachineInstr &MI = *RI;
124     if (!shouldLocalize(MI))
125       continue;
126     LLVM_DEBUG(dbgs() << "Should localize: " << MI);
127     assert(MI.getDesc().getNumDefs() == 1 &&
128            "More than one definition not supported yet");
129     Register Reg = MI.getOperand(0).getReg();
130     // Check if all the users of MI are local.
131     // We are going to invalidation the list of use operands, so we
132     // can't use range iterator.
133     for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end();
134          MOIt != MOItEnd;) {
135       MachineOperand &MOUse = *MOIt++;
136       // Check if the use is already local.
137       MachineBasicBlock *InsertMBB;
138       LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent();
139                  dbgs() << "Checking use: " << MIUse
140                         << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n');
141       if (isLocalUse(MOUse, MI, InsertMBB))
142         continue;
143       LLVM_DEBUG(dbgs() << "Fixing non-local use\n");
144       Changed = true;
145       auto MBBAndReg = std::make_pair(InsertMBB, Reg);
146       auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg);
147       if (NewVRegIt == MBBWithLocalDef.end()) {
148         // Create the localized instruction.
149         MachineInstr *LocalizedMI = MF.CloneMachineInstr(&MI);
150         LocalizedInstrs.insert(LocalizedMI);
151         MachineInstr &UseMI = *MOUse.getParent();
152         if (MRI->hasOneUse(Reg) && !UseMI.isPHI())
153           InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(UseMI), LocalizedMI);
154         else
155           InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(InsertMBB->begin()),
156                             LocalizedMI);
157 
158         // Set a new register for the definition.
159         Register NewReg = MRI->createGenericVirtualRegister(MRI->getType(Reg));
160         MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg));
161         LocalizedMI->getOperand(0).setReg(NewReg);
162         NewVRegIt =
163             MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first;
164         LLVM_DEBUG(dbgs() << "Inserted: " << *LocalizedMI);
165       }
166       LLVM_DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second)
167                         << '\n');
168       // Update the user reg.
169       MOUse.setReg(NewVRegIt->second);
170     }
171   }
172   return Changed;
173 }
174 
localizeIntraBlock(LocalizedSetVecT & LocalizedInstrs)175 bool Localizer::localizeIntraBlock(LocalizedSetVecT &LocalizedInstrs) {
176   bool Changed = false;
177 
178   // For each already-localized instruction which has multiple users, then we
179   // scan the block top down from the current position until we hit one of them.
180 
181   // FIXME: Consider doing inst duplication if live ranges are very long due to
182   // many users, but this case may be better served by regalloc improvements.
183 
184   for (MachineInstr *MI : LocalizedInstrs) {
185     Register Reg = MI->getOperand(0).getReg();
186     MachineBasicBlock &MBB = *MI->getParent();
187     // All of the user MIs of this reg.
188     SmallPtrSet<MachineInstr *, 32> Users;
189     for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) {
190       if (!UseMI.isPHI())
191         Users.insert(&UseMI);
192     }
193     // If all the users were PHIs then they're not going to be in our block,
194     // don't try to move this instruction.
195     if (Users.empty())
196       continue;
197 
198     MachineBasicBlock::iterator II(MI);
199     ++II;
200     while (II != MBB.end() && !Users.count(&*II))
201       ++II;
202 
203     LLVM_DEBUG(dbgs() << "Intra-block: moving " << *MI << " before " << *&*II
204                       << "\n");
205     assert(II != MBB.end() && "Didn't find the user in the MBB");
206     MI->removeFromParent();
207     MBB.insert(II, MI);
208     Changed = true;
209   }
210   return Changed;
211 }
212 
runOnMachineFunction(MachineFunction & MF)213 bool Localizer::runOnMachineFunction(MachineFunction &MF) {
214   // If the ISel pipeline failed, do not bother running that pass.
215   if (MF.getProperties().hasProperty(
216           MachineFunctionProperties::Property::FailedISel))
217     return false;
218 
219   // Don't run the pass if the target asked so.
220   if (DoNotRunPass(MF))
221     return false;
222 
223   LLVM_DEBUG(dbgs() << "Localize instructions for: " << MF.getName() << '\n');
224 
225   init(MF);
226 
227   // Keep track of the instructions we localized. We'll do a second pass of
228   // intra-block localization to further reduce live ranges.
229   LocalizedSetVecT LocalizedInstrs;
230 
231   bool Changed = localizeInterBlock(MF, LocalizedInstrs);
232   Changed |= localizeIntraBlock(LocalizedInstrs);
233   return Changed;
234 }
235