1 //===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief This pass propagates attributes from kernels to the non-entry
11 /// functions. Most of the library functions were not compiled for specific ABI,
12 /// yet will be correctly compiled if proper attributes are propagated from the
13 /// caller.
14 ///
15 /// The pass analyzes call graph and propagates ABI target features through the
16 /// call graph.
17 ///
18 /// It can run in two modes: as a function or module pass. A function pass
19 /// simply propagates attributes. A module pass clones functions if there are
20 /// callers with different ABI. If a function is cloned all call sites will
21 /// be updated to use a correct clone.
22 ///
23 /// A function pass is limited in functionality but can run early in the
24 /// pipeline. A module pass is more powerful but has to run late, so misses
25 /// library folding opportunities.
26 //
27 //===----------------------------------------------------------------------===//
28 
29 #include "AMDGPU.h"
30 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
31 #include "Utils/AMDGPUBaseInfo.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/CodeGen/TargetPassConfig.h"
34 #include "llvm/CodeGen/TargetSubtargetInfo.h"
35 #include "llvm/IR/InstrTypes.h"
36 #include "llvm/Target/TargetMachine.h"
37 #include "llvm/Transforms/Utils/Cloning.h"
38 
39 #define DEBUG_TYPE "amdgpu-propagate-attributes"
40 
41 using namespace llvm;
42 
43 namespace llvm {
44 extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
45 }
46 
47 namespace {
48 
49 // Target features to propagate.
50 static constexpr const FeatureBitset TargetFeatures = {
51   AMDGPU::FeatureWavefrontSize16,
52   AMDGPU::FeatureWavefrontSize32,
53   AMDGPU::FeatureWavefrontSize64
54 };
55 
56 // Attributes to propagate.
57 // TODO: Support conservative min/max merging instead of cloning.
58 static constexpr const char* AttributeNames[] = {
59   "amdgpu-waves-per-eu",
60   "amdgpu-flat-work-group-size"
61 };
62 
63 static constexpr unsigned NumAttr =
64   sizeof(AttributeNames) / sizeof(AttributeNames[0]);
65 
66 class AMDGPUPropagateAttributes {
67 
68   class FnProperties {
69   private:
FnProperties(const FeatureBitset && FB)70     explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {}
71 
72   public:
FnProperties(const TargetMachine & TM,const Function & F)73     explicit FnProperties(const TargetMachine &TM, const Function &F) {
74       Features = TM.getSubtargetImpl(F)->getFeatureBits();
75 
76       for (unsigned I = 0; I < NumAttr; ++I)
77         if (F.hasFnAttribute(AttributeNames[I]))
78           Attributes[I] = F.getFnAttribute(AttributeNames[I]);
79     }
80 
operator ==(const FnProperties & Other) const81     bool operator == (const FnProperties &Other) const {
82       if ((Features & TargetFeatures) != (Other.Features & TargetFeatures))
83         return false;
84       for (unsigned I = 0; I < NumAttr; ++I)
85         if (Attributes[I] != Other.Attributes[I])
86           return false;
87       return true;
88     }
89 
adjustToCaller(const FnProperties & CallerProps) const90     FnProperties adjustToCaller(const FnProperties &CallerProps) const {
91       FnProperties New((Features & ~TargetFeatures) | CallerProps.Features);
92       for (unsigned I = 0; I < NumAttr; ++I)
93         New.Attributes[I] = CallerProps.Attributes[I];
94       return New;
95     }
96 
97     FeatureBitset Features;
98     Optional<Attribute> Attributes[NumAttr];
99   };
100 
101   class Clone {
102   public:
Clone(const FnProperties & Props,Function * OrigF,Function * NewF)103     Clone(const FnProperties &Props, Function *OrigF, Function *NewF) :
104       Properties(Props), OrigF(OrigF), NewF(NewF) {}
105 
106     FnProperties Properties;
107     Function *OrigF;
108     Function *NewF;
109   };
110 
111   const TargetMachine *TM;
112 
113   // Clone functions as needed or just set attributes.
114   bool AllowClone;
115 
116   // Option propagation roots.
117   SmallSet<Function *, 32> Roots;
118 
119   // Clones of functions with their attributes.
120   SmallVector<Clone, 32> Clones;
121 
122   // Find a clone with required features.
123   Function *findFunction(const FnProperties &PropsNeeded,
124                          Function *OrigF);
125 
126   // Clone function \p F and set \p NewProps on the clone.
127   // Cole takes the name of original function.
128   Function *cloneWithProperties(Function &F, const FnProperties &NewProps);
129 
130   // Set new function's features in place.
131   void setFeatures(Function &F, const FeatureBitset &NewFeatures);
132 
133   // Set new function's attributes in place.
134   void setAttributes(Function &F, const ArrayRef<Optional<Attribute>> NewAttrs);
135 
136   std::string getFeatureString(const FeatureBitset &Features) const;
137 
138   // Propagate attributes from Roots.
139   bool process();
140 
141 public:
AMDGPUPropagateAttributes(const TargetMachine * TM,bool AllowClone)142   AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) :
143     TM(TM), AllowClone(AllowClone) {}
144 
145   // Use F as a root and propagate its attributes.
146   bool process(Function &F);
147 
148   // Propagate attributes starting from kernel functions.
149   bool process(Module &M);
150 };
151 
152 // Allows to propagate attributes early, but no cloning is allowed as it must
153 // be a function pass to run before any optimizations.
154 // TODO: We shall only need a one instance of module pass, but that needs to be
155 // in the linker pipeline which is currently not possible.
156 class AMDGPUPropagateAttributesEarly : public FunctionPass {
157   const TargetMachine *TM;
158 
159 public:
160   static char ID; // Pass identification
161 
AMDGPUPropagateAttributesEarly(const TargetMachine * TM=nullptr)162   AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) :
163     FunctionPass(ID), TM(TM) {
164     initializeAMDGPUPropagateAttributesEarlyPass(
165       *PassRegistry::getPassRegistry());
166   }
167 
168   bool runOnFunction(Function &F) override;
169 };
170 
171 // Allows to propagate attributes with cloning but does that late in the
172 // pipeline.
173 class AMDGPUPropagateAttributesLate : public ModulePass {
174   const TargetMachine *TM;
175 
176 public:
177   static char ID; // Pass identification
178 
AMDGPUPropagateAttributesLate(const TargetMachine * TM=nullptr)179   AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :
180     ModulePass(ID), TM(TM) {
181     initializeAMDGPUPropagateAttributesLatePass(
182       *PassRegistry::getPassRegistry());
183   }
184 
185   bool runOnModule(Module &M) override;
186 };
187 
188 }  // end anonymous namespace.
189 
190 char AMDGPUPropagateAttributesEarly::ID = 0;
191 char AMDGPUPropagateAttributesLate::ID = 0;
192 
193 INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,
194                 "amdgpu-propagate-attributes-early",
195                 "Early propagate attributes from kernels to functions",
196                 false, false)
197 INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
198                 "amdgpu-propagate-attributes-late",
199                 "Late propagate attributes from kernels to functions",
200                 false, false)
201 
202 Function *
findFunction(const FnProperties & PropsNeeded,Function * OrigF)203 AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded,
204                                         Function *OrigF) {
205   // TODO: search for clone's clones.
206   for (Clone &C : Clones)
207     if (C.OrigF == OrigF && PropsNeeded == C.Properties)
208       return C.NewF;
209 
210   return nullptr;
211 }
212 
process(Module & M)213 bool AMDGPUPropagateAttributes::process(Module &M) {
214   for (auto &F : M.functions())
215     if (AMDGPU::isKernel(F.getCallingConv()))
216       Roots.insert(&F);
217 
218   return Roots.empty() ? false : process();
219 }
220 
process(Function & F)221 bool AMDGPUPropagateAttributes::process(Function &F) {
222   Roots.insert(&F);
223   return process();
224 }
225 
process()226 bool AMDGPUPropagateAttributes::process() {
227   bool Changed = false;
228   SmallSet<Function *, 32> NewRoots;
229   SmallSet<Function *, 32> Replaced;
230 
231   assert(!Roots.empty());
232   Module &M = *(*Roots.begin())->getParent();
233 
234   do {
235     Roots.insert(NewRoots.begin(), NewRoots.end());
236     NewRoots.clear();
237 
238     for (auto &F : M.functions()) {
239       if (F.isDeclaration())
240         continue;
241 
242       const FnProperties CalleeProps(*TM, F);
243       SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
244       SmallSet<CallBase *, 32> Visited;
245 
246       for (User *U : F.users()) {
247         Instruction *I = dyn_cast<Instruction>(U);
248         if (!I)
249           continue;
250         CallBase *CI = dyn_cast<CallBase>(I);
251         // Only propagate attributes if F is the called function. Specifically,
252         // do not propagate attributes if F is passed as an argument.
253         // FIXME: handle bitcasted callee, e.g.
254         // %retval = call i8* bitcast (i32* ()* @f to i8* ()*)()
255         if (!CI || CI->getCalledOperand() != &F)
256           continue;
257         Function *Caller = CI->getCaller();
258         if (!Caller || !Visited.insert(CI).second)
259           continue;
260         if (!Roots.count(Caller) && !NewRoots.count(Caller))
261           continue;
262 
263         const FnProperties CallerProps(*TM, *Caller);
264 
265         if (CalleeProps == CallerProps) {
266           if (!Roots.count(&F))
267             NewRoots.insert(&F);
268           continue;
269         }
270 
271         Function *NewF = findFunction(CallerProps, &F);
272         if (!NewF) {
273           const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
274           if (!AllowClone) {
275             // This may set different features on different iterations if
276             // there is a contradiction in callers' attributes. In this case
277             // we rely on a second pass running on Module, which is allowed
278             // to clone.
279             setFeatures(F, NewProps.Features);
280             setAttributes(F, NewProps.Attributes);
281             NewRoots.insert(&F);
282             Changed = true;
283             break;
284           }
285 
286           NewF = cloneWithProperties(F, NewProps);
287           Clones.push_back(Clone(CallerProps, &F, NewF));
288           NewRoots.insert(NewF);
289         }
290 
291         ToReplace.push_back(std::make_pair(CI, NewF));
292         Replaced.insert(&F);
293 
294         Changed = true;
295       }
296 
297       while (!ToReplace.empty()) {
298         auto R = ToReplace.pop_back_val();
299         R.first->setCalledFunction(R.second);
300       }
301     }
302   } while (!NewRoots.empty());
303 
304   for (Function *F : Replaced) {
305     if (F->use_empty())
306       F->eraseFromParent();
307   }
308 
309   Roots.clear();
310   Clones.clear();
311 
312   return Changed;
313 }
314 
315 Function *
cloneWithProperties(Function & F,const FnProperties & NewProps)316 AMDGPUPropagateAttributes::cloneWithProperties(Function &F,
317                                                const FnProperties &NewProps) {
318   LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
319 
320   ValueToValueMapTy dummy;
321   Function *NewF = CloneFunction(&F, dummy);
322   setFeatures(*NewF, NewProps.Features);
323   setAttributes(*NewF, NewProps.Attributes);
324   NewF->setVisibility(GlobalValue::DefaultVisibility);
325   NewF->setLinkage(GlobalValue::InternalLinkage);
326 
327   // Swap names. If that is the only clone it will retain the name of now
328   // dead value. Preserve original name for externally visible functions.
329   if (F.hasName() && F.hasLocalLinkage()) {
330     std::string NewName = std::string(NewF->getName());
331     NewF->takeName(&F);
332     F.setName(NewName);
333   }
334 
335   return NewF;
336 }
337 
setFeatures(Function & F,const FeatureBitset & NewFeatures)338 void AMDGPUPropagateAttributes::setFeatures(Function &F,
339                                             const FeatureBitset &NewFeatures) {
340   std::string NewFeatureStr = getFeatureString(NewFeatures);
341 
342   LLVM_DEBUG(dbgs() << "Set features "
343                     << getFeatureString(NewFeatures & TargetFeatures)
344                     << " on " << F.getName() << '\n');
345 
346   F.removeFnAttr("target-features");
347   F.addFnAttr("target-features", NewFeatureStr);
348 }
349 
setAttributes(Function & F,const ArrayRef<Optional<Attribute>> NewAttrs)350 void AMDGPUPropagateAttributes::setAttributes(Function &F,
351     const ArrayRef<Optional<Attribute>> NewAttrs) {
352   LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n");
353   for (unsigned I = 0; I < NumAttr; ++I) {
354     F.removeFnAttr(AttributeNames[I]);
355     if (NewAttrs[I]) {
356       LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n');
357       F.addFnAttr(*NewAttrs[I]);
358     }
359   }
360 }
361 
362 std::string
getFeatureString(const FeatureBitset & Features) const363 AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
364 {
365   std::string Ret;
366   for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) {
367     if (Features[KV.Value])
368       Ret += (StringRef("+") + KV.Key + ",").str();
369     else if (TargetFeatures[KV.Value])
370       Ret += (StringRef("-") + KV.Key + ",").str();
371   }
372   Ret.pop_back(); // Remove last comma.
373   return Ret;
374 }
375 
runOnFunction(Function & F)376 bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) {
377   if (!TM) {
378     auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
379     if (!TPC)
380       return false;
381 
382     TM = &TPC->getTM<TargetMachine>();
383   }
384 
385   if (!AMDGPU::isKernel(F.getCallingConv()))
386     return false;
387 
388   return AMDGPUPropagateAttributes(TM, false).process(F);
389 }
390 
runOnModule(Module & M)391 bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
392   if (!TM) {
393     auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
394     if (!TPC)
395       return false;
396 
397     TM = &TPC->getTM<TargetMachine>();
398   }
399 
400   return AMDGPUPropagateAttributes(TM, true).process(M);
401 }
402 
403 FunctionPass
createAMDGPUPropagateAttributesEarlyPass(const TargetMachine * TM)404 *llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) {
405   return new AMDGPUPropagateAttributesEarly(TM);
406 }
407 
408 ModulePass
createAMDGPUPropagateAttributesLatePass(const TargetMachine * TM)409 *llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) {
410   return new AMDGPUPropagateAttributesLate(TM);
411 }
412 
413 PreservedAnalyses
run(Function & F,FunctionAnalysisManager & AM)414 AMDGPUPropagateAttributesEarlyPass::run(Function &F,
415                                         FunctionAnalysisManager &AM) {
416   if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
417     return PreservedAnalyses::all();
418 
419   return AMDGPUPropagateAttributes(&TM, false).process(F)
420              ? PreservedAnalyses::none()
421              : PreservedAnalyses::all();
422 }
423 
424 PreservedAnalyses
run(Module & M,ModuleAnalysisManager & AM)425 AMDGPUPropagateAttributesLatePass::run(Module &M, ModuleAnalysisManager &AM) {
426   return AMDGPUPropagateAttributes(&TM, true).process(M)
427              ? PreservedAnalyses::none()
428              : PreservedAnalyses::all();
429 }
430