1 //===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief This pass propagates attributes from kernels to the non-entry
11 /// functions. Most of the library functions were not compiled for specific ABI,
12 /// yet will be correctly compiled if proper attrbutes are propagated from the
13 /// caller.
14 ///
15 /// The pass analyzes call graph and propagates ABI target features through the
16 /// call graph.
17 ///
18 /// It can run in two modes: as a function or module pass. A function pass
19 /// simply propagates attributes. A module pass clones functions if there are
20 /// callers with different ABI. If a function is clonned all call sites will
21 /// be updated to use a correct clone.
22 ///
23 /// A function pass is limited in functionality but can run early in the
24 /// pipeline. A module pass is more powerful but has to run late, so misses
25 /// library folding opportunities.
26 //
27 //===----------------------------------------------------------------------===//
28 
29 #include "AMDGPU.h"
30 #include "AMDGPUSubtarget.h"
31 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
32 #include "Utils/AMDGPUBaseInfo.h"
33 #include "llvm/ADT/SmallSet.h"
34 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/IR/Function.h"
36 #include "llvm/IR/Module.h"
37 #include "llvm/Target/TargetMachine.h"
38 #include "llvm/Transforms/Utils/Cloning.h"
39 #include <string>
40 
41 #define DEBUG_TYPE "amdgpu-propagate-attributes"
42 
43 using namespace llvm;
44 
45 namespace llvm {
46 extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
47 }
48 
49 namespace {
50 
51 class AMDGPUPropagateAttributes {
52   const FeatureBitset TargetFeatures = {
53     AMDGPU::FeatureWavefrontSize16,
54     AMDGPU::FeatureWavefrontSize32,
55     AMDGPU::FeatureWavefrontSize64
56   };
57 
58   class Clone{
59   public:
Clone(FeatureBitset FeatureMask,Function * OrigF,Function * NewF)60     Clone(FeatureBitset FeatureMask, Function *OrigF, Function *NewF) :
61       FeatureMask(FeatureMask), OrigF(OrigF), NewF(NewF) {}
62 
63     FeatureBitset FeatureMask;
64     Function *OrigF;
65     Function *NewF;
66   };
67 
68   const TargetMachine *TM;
69 
70   // Clone functions as needed or just set attributes.
71   bool AllowClone;
72 
73   // Option propagation roots.
74   SmallSet<Function *, 32> Roots;
75 
76   // Clones of functions with their attributes.
77   SmallVector<Clone, 32> Clones;
78 
79   // Find a clone with required features.
80   Function *findFunction(const FeatureBitset &FeaturesNeeded,
81                          Function *OrigF);
82 
83   // Clone function F and set NewFeatures on the clone.
84   // Cole takes the name of original function.
85   Function *cloneWithFeatures(Function &F,
86                               const FeatureBitset &NewFeatures);
87 
88   // Set new function's features in place.
89   void setFeatures(Function &F, const FeatureBitset &NewFeatures);
90 
91   std::string getFeatureString(const FeatureBitset &Features) const;
92 
93   // Propagate attributes from Roots.
94   bool process();
95 
96 public:
AMDGPUPropagateAttributes(const TargetMachine * TM,bool AllowClone)97   AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) :
98     TM(TM), AllowClone(AllowClone) {}
99 
100   // Use F as a root and propagate its attributes.
101   bool process(Function &F);
102 
103   // Propagate attributes starting from kernel functions.
104   bool process(Module &M);
105 };
106 
107 // Allows to propagate attributes early, but no clonning is allowed as it must
108 // be a function pass to run before any optimizations.
109 // TODO: We shall only need a one instance of module pass, but that needs to be
110 // in the linker pipeline which is currently not possible.
111 class AMDGPUPropagateAttributesEarly : public FunctionPass {
112   const TargetMachine *TM;
113 
114 public:
115   static char ID; // Pass identification
116 
AMDGPUPropagateAttributesEarly(const TargetMachine * TM=nullptr)117   AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) :
118     FunctionPass(ID), TM(TM) {
119     initializeAMDGPUPropagateAttributesEarlyPass(
120       *PassRegistry::getPassRegistry());
121   }
122 
123   bool runOnFunction(Function &F) override;
124 };
125 
126 // Allows to propagate attributes with clonning but does that late in the
127 // pipeline.
128 class AMDGPUPropagateAttributesLate : public ModulePass {
129   const TargetMachine *TM;
130 
131 public:
132   static char ID; // Pass identification
133 
AMDGPUPropagateAttributesLate(const TargetMachine * TM=nullptr)134   AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :
135     ModulePass(ID), TM(TM) {
136     initializeAMDGPUPropagateAttributesLatePass(
137       *PassRegistry::getPassRegistry());
138   }
139 
140   bool runOnModule(Module &M) override;
141 };
142 
143 }  // end anonymous namespace.
144 
145 char AMDGPUPropagateAttributesEarly::ID = 0;
146 char AMDGPUPropagateAttributesLate::ID = 0;
147 
148 INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,
149                 "amdgpu-propagate-attributes-early",
150                 "Early propagate attributes from kernels to functions",
151                 false, false)
152 INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
153                 "amdgpu-propagate-attributes-late",
154                 "Late propagate attributes from kernels to functions",
155                 false, false)
156 
157 Function *
findFunction(const FeatureBitset & FeaturesNeeded,Function * OrigF)158 AMDGPUPropagateAttributes::findFunction(const FeatureBitset &FeaturesNeeded,
159                                         Function *OrigF) {
160   // TODO: search for clone's clones.
161   for (Clone &C : Clones)
162     if (C.OrigF == OrigF && FeaturesNeeded == C.FeatureMask)
163       return C.NewF;
164 
165   return nullptr;
166 }
167 
process(Module & M)168 bool AMDGPUPropagateAttributes::process(Module &M) {
169   for (auto &F : M.functions())
170     if (AMDGPU::isEntryFunctionCC(F.getCallingConv()))
171       Roots.insert(&F);
172 
173   return process();
174 }
175 
process(Function & F)176 bool AMDGPUPropagateAttributes::process(Function &F) {
177   Roots.insert(&F);
178   return process();
179 }
180 
process()181 bool AMDGPUPropagateAttributes::process() {
182   bool Changed = false;
183   SmallSet<Function *, 32> NewRoots;
184   SmallSet<Function *, 32> Replaced;
185 
186   if (Roots.empty())
187     return false;
188   Module &M = *(*Roots.begin())->getParent();
189 
190   do {
191     Roots.insert(NewRoots.begin(), NewRoots.end());
192     NewRoots.clear();
193 
194     for (auto &F : M.functions()) {
195       if (F.isDeclaration() || Roots.count(&F) || Roots.count(&F))
196         continue;
197 
198       const FeatureBitset &CalleeBits =
199         TM->getSubtargetImpl(F)->getFeatureBits();
200       SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
201 
202       for (User *U : F.users()) {
203         Instruction *I = dyn_cast<Instruction>(U);
204         if (!I)
205           continue;
206         CallBase *CI = dyn_cast<CallBase>(I);
207         if (!CI)
208           continue;
209         Function *Caller = CI->getCaller();
210         if (!Caller)
211           continue;
212         if (!Roots.count(Caller))
213           continue;
214 
215         const FeatureBitset &CallerBits =
216           TM->getSubtargetImpl(*Caller)->getFeatureBits() & TargetFeatures;
217 
218         if (CallerBits == (CalleeBits  & TargetFeatures)) {
219           NewRoots.insert(&F);
220           continue;
221         }
222 
223         Function *NewF = findFunction(CallerBits, &F);
224         if (!NewF) {
225           FeatureBitset NewFeatures((CalleeBits & ~TargetFeatures) |
226                                     CallerBits);
227           if (!AllowClone) {
228             // This may set different features on different iteartions if
229             // there is a contradiction in callers' attributes. In this case
230             // we rely on a second pass running on Module, which is allowed
231             // to clone.
232             setFeatures(F, NewFeatures);
233             NewRoots.insert(&F);
234             Changed = true;
235             break;
236           }
237 
238           NewF = cloneWithFeatures(F, NewFeatures);
239           Clones.push_back(Clone(CallerBits, &F, NewF));
240           NewRoots.insert(NewF);
241         }
242 
243         ToReplace.push_back(std::make_pair(CI, NewF));
244         Replaced.insert(&F);
245 
246         Changed = true;
247       }
248 
249       while (!ToReplace.empty()) {
250         auto R = ToReplace.pop_back_val();
251         R.first->setCalledFunction(R.second);
252       }
253     }
254   } while (!NewRoots.empty());
255 
256   for (Function *F : Replaced) {
257     if (F->use_empty())
258       F->eraseFromParent();
259   }
260 
261   return Changed;
262 }
263 
264 Function *
cloneWithFeatures(Function & F,const FeatureBitset & NewFeatures)265 AMDGPUPropagateAttributes::cloneWithFeatures(Function &F,
266                                              const FeatureBitset &NewFeatures) {
267   LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
268 
269   ValueToValueMapTy dummy;
270   Function *NewF = CloneFunction(&F, dummy);
271   setFeatures(*NewF, NewFeatures);
272 
273   // Swap names. If that is the only clone it will retain the name of now
274   // dead value.
275   if (F.hasName()) {
276     std::string NewName = NewF->getName();
277     NewF->takeName(&F);
278     F.setName(NewName);
279 
280     // Name has changed, it does not need an external symbol.
281     F.setVisibility(GlobalValue::DefaultVisibility);
282     F.setLinkage(GlobalValue::InternalLinkage);
283   }
284 
285   return NewF;
286 }
287 
setFeatures(Function & F,const FeatureBitset & NewFeatures)288 void AMDGPUPropagateAttributes::setFeatures(Function &F,
289                                             const FeatureBitset &NewFeatures) {
290   std::string NewFeatureStr = getFeatureString(NewFeatures);
291 
292   LLVM_DEBUG(dbgs() << "Set features "
293                     << getFeatureString(NewFeatures & TargetFeatures)
294                     << " on " << F.getName() << '\n');
295 
296   F.removeFnAttr("target-features");
297   F.addFnAttr("target-features", NewFeatureStr);
298 }
299 
300 std::string
getFeatureString(const FeatureBitset & Features) const301 AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
302 {
303   std::string Ret;
304   for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) {
305     if (Features[KV.Value])
306       Ret += (StringRef("+") + KV.Key + ",").str();
307     else if (TargetFeatures[KV.Value])
308       Ret += (StringRef("-") + KV.Key + ",").str();
309   }
310   Ret.pop_back(); // Remove last comma.
311   return Ret;
312 }
313 
runOnFunction(Function & F)314 bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) {
315   if (!TM || !AMDGPU::isEntryFunctionCC(F.getCallingConv()))
316     return false;
317 
318   return AMDGPUPropagateAttributes(TM, false).process(F);
319 }
320 
runOnModule(Module & M)321 bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
322   if (!TM)
323     return false;
324 
325   return AMDGPUPropagateAttributes(TM, true).process(M);
326 }
327 
328 FunctionPass
createAMDGPUPropagateAttributesEarlyPass(const TargetMachine * TM)329 *llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) {
330   return new AMDGPUPropagateAttributesEarly(TM);
331 }
332 
333 ModulePass
createAMDGPUPropagateAttributesLatePass(const TargetMachine * TM)334 *llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) {
335   return new AMDGPUPropagateAttributesLate(TM);
336 }
337