109467b48Spatrick //===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick /// \file
1009467b48Spatrick /// \brief This pass propagates attributes from kernels to the non-entry
1109467b48Spatrick /// functions. Most of the library functions were not compiled for specific ABI,
1209467b48Spatrick /// yet will be correctly compiled if proper attrbutes are propagated from the
1309467b48Spatrick /// caller.
1409467b48Spatrick ///
1509467b48Spatrick /// The pass analyzes call graph and propagates ABI target features through the
1609467b48Spatrick /// call graph.
1709467b48Spatrick ///
1809467b48Spatrick /// It can run in two modes: as a function or module pass. A function pass
1909467b48Spatrick /// simply propagates attributes. A module pass clones functions if there are
2009467b48Spatrick /// callers with different ABI. If a function is clonned all call sites will
2109467b48Spatrick /// be updated to use a correct clone.
2209467b48Spatrick ///
2309467b48Spatrick /// A function pass is limited in functionality but can run early in the
2409467b48Spatrick /// pipeline. A module pass is more powerful but has to run late, so misses
2509467b48Spatrick /// library folding opportunities.
2609467b48Spatrick //
2709467b48Spatrick //===----------------------------------------------------------------------===//
2809467b48Spatrick 
2909467b48Spatrick #include "AMDGPU.h"
3009467b48Spatrick #include "AMDGPUSubtarget.h"
3109467b48Spatrick #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
3209467b48Spatrick #include "Utils/AMDGPUBaseInfo.h"
3309467b48Spatrick #include "llvm/ADT/SmallSet.h"
3409467b48Spatrick #include "llvm/ADT/SmallVector.h"
3509467b48Spatrick #include "llvm/IR/Function.h"
3609467b48Spatrick #include "llvm/IR/Module.h"
3709467b48Spatrick #include "llvm/Target/TargetMachine.h"
3809467b48Spatrick #include "llvm/Transforms/Utils/Cloning.h"
3909467b48Spatrick #include <string>
4009467b48Spatrick 
4109467b48Spatrick #define DEBUG_TYPE "amdgpu-propagate-attributes"
4209467b48Spatrick 
4309467b48Spatrick using namespace llvm;
4409467b48Spatrick 
4509467b48Spatrick namespace llvm {
4609467b48Spatrick extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
4709467b48Spatrick }
4809467b48Spatrick 
4909467b48Spatrick namespace {
5009467b48Spatrick 
51*097a140dSpatrick // Target features to propagate.
52*097a140dSpatrick static constexpr const FeatureBitset TargetFeatures = {
5309467b48Spatrick   AMDGPU::FeatureWavefrontSize16,
5409467b48Spatrick   AMDGPU::FeatureWavefrontSize32,
5509467b48Spatrick   AMDGPU::FeatureWavefrontSize64
5609467b48Spatrick };
5709467b48Spatrick 
58*097a140dSpatrick // Attributes to propagate.
59*097a140dSpatrick static constexpr const char* AttributeNames[] = {
60*097a140dSpatrick   "amdgpu-waves-per-eu"
61*097a140dSpatrick };
62*097a140dSpatrick 
63*097a140dSpatrick static constexpr unsigned NumAttr =
64*097a140dSpatrick   sizeof(AttributeNames) / sizeof(AttributeNames[0]);
65*097a140dSpatrick 
66*097a140dSpatrick class AMDGPUPropagateAttributes {
67*097a140dSpatrick 
68*097a140dSpatrick   class FnProperties {
69*097a140dSpatrick   private:
70*097a140dSpatrick     explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {}
71*097a140dSpatrick 
72*097a140dSpatrick   public:
73*097a140dSpatrick     explicit FnProperties(const TargetMachine &TM, const Function &F) {
74*097a140dSpatrick       Features = TM.getSubtargetImpl(F)->getFeatureBits();
75*097a140dSpatrick 
76*097a140dSpatrick       for (unsigned I = 0; I < NumAttr; ++I)
77*097a140dSpatrick         if (F.hasFnAttribute(AttributeNames[I]))
78*097a140dSpatrick           Attributes[I] = F.getFnAttribute(AttributeNames[I]);
79*097a140dSpatrick     }
80*097a140dSpatrick 
81*097a140dSpatrick     bool operator == (const FnProperties &Other) const {
82*097a140dSpatrick       if ((Features & TargetFeatures) != (Other.Features & TargetFeatures))
83*097a140dSpatrick         return false;
84*097a140dSpatrick       for (unsigned I = 0; I < NumAttr; ++I)
85*097a140dSpatrick         if (Attributes[I] != Other.Attributes[I])
86*097a140dSpatrick           return false;
87*097a140dSpatrick       return true;
88*097a140dSpatrick     }
89*097a140dSpatrick 
90*097a140dSpatrick     FnProperties adjustToCaller(const FnProperties &CallerProps) const {
91*097a140dSpatrick       FnProperties New((Features & ~TargetFeatures) | CallerProps.Features);
92*097a140dSpatrick       for (unsigned I = 0; I < NumAttr; ++I)
93*097a140dSpatrick         New.Attributes[I] = CallerProps.Attributes[I];
94*097a140dSpatrick       return New;
95*097a140dSpatrick     }
96*097a140dSpatrick 
97*097a140dSpatrick     FeatureBitset Features;
98*097a140dSpatrick     Optional<Attribute> Attributes[NumAttr];
99*097a140dSpatrick   };
100*097a140dSpatrick 
10109467b48Spatrick   class Clone {
10209467b48Spatrick   public:
103*097a140dSpatrick     Clone(const FnProperties &Props, Function *OrigF, Function *NewF) :
104*097a140dSpatrick       Properties(Props), OrigF(OrigF), NewF(NewF) {}
10509467b48Spatrick 
106*097a140dSpatrick     FnProperties Properties;
10709467b48Spatrick     Function *OrigF;
10809467b48Spatrick     Function *NewF;
10909467b48Spatrick   };
11009467b48Spatrick 
11109467b48Spatrick   const TargetMachine *TM;
11209467b48Spatrick 
11309467b48Spatrick   // Clone functions as needed or just set attributes.
11409467b48Spatrick   bool AllowClone;
11509467b48Spatrick 
11609467b48Spatrick   // Option propagation roots.
11709467b48Spatrick   SmallSet<Function *, 32> Roots;
11809467b48Spatrick 
11909467b48Spatrick   // Clones of functions with their attributes.
12009467b48Spatrick   SmallVector<Clone, 32> Clones;
12109467b48Spatrick 
12209467b48Spatrick   // Find a clone with required features.
123*097a140dSpatrick   Function *findFunction(const FnProperties &PropsNeeded,
12409467b48Spatrick                          Function *OrigF);
12509467b48Spatrick 
126*097a140dSpatrick   // Clone function \p F and set \p NewProps on the clone.
12709467b48Spatrick   // Cole takes the name of original function.
128*097a140dSpatrick   Function *cloneWithProperties(Function &F, const FnProperties &NewProps);
12909467b48Spatrick 
13009467b48Spatrick   // Set new function's features in place.
13109467b48Spatrick   void setFeatures(Function &F, const FeatureBitset &NewFeatures);
13209467b48Spatrick 
133*097a140dSpatrick   // Set new function's attributes in place.
134*097a140dSpatrick   void setAttributes(Function &F, const ArrayRef<Optional<Attribute>> NewAttrs);
135*097a140dSpatrick 
13609467b48Spatrick   std::string getFeatureString(const FeatureBitset &Features) const;
13709467b48Spatrick 
13809467b48Spatrick   // Propagate attributes from Roots.
13909467b48Spatrick   bool process();
14009467b48Spatrick 
14109467b48Spatrick public:
14209467b48Spatrick   AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) :
14309467b48Spatrick     TM(TM), AllowClone(AllowClone) {}
14409467b48Spatrick 
14509467b48Spatrick   // Use F as a root and propagate its attributes.
14609467b48Spatrick   bool process(Function &F);
14709467b48Spatrick 
14809467b48Spatrick   // Propagate attributes starting from kernel functions.
14909467b48Spatrick   bool process(Module &M);
15009467b48Spatrick };
15109467b48Spatrick 
15209467b48Spatrick // Allows to propagate attributes early, but no clonning is allowed as it must
15309467b48Spatrick // be a function pass to run before any optimizations.
15409467b48Spatrick // TODO: We shall only need a one instance of module pass, but that needs to be
15509467b48Spatrick // in the linker pipeline which is currently not possible.
15609467b48Spatrick class AMDGPUPropagateAttributesEarly : public FunctionPass {
15709467b48Spatrick   const TargetMachine *TM;
15809467b48Spatrick 
15909467b48Spatrick public:
16009467b48Spatrick   static char ID; // Pass identification
16109467b48Spatrick 
16209467b48Spatrick   AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) :
16309467b48Spatrick     FunctionPass(ID), TM(TM) {
16409467b48Spatrick     initializeAMDGPUPropagateAttributesEarlyPass(
16509467b48Spatrick       *PassRegistry::getPassRegistry());
16609467b48Spatrick   }
16709467b48Spatrick 
16809467b48Spatrick   bool runOnFunction(Function &F) override;
16909467b48Spatrick };
17009467b48Spatrick 
17109467b48Spatrick // Allows to propagate attributes with clonning but does that late in the
17209467b48Spatrick // pipeline.
17309467b48Spatrick class AMDGPUPropagateAttributesLate : public ModulePass {
17409467b48Spatrick   const TargetMachine *TM;
17509467b48Spatrick 
17609467b48Spatrick public:
17709467b48Spatrick   static char ID; // Pass identification
17809467b48Spatrick 
17909467b48Spatrick   AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :
18009467b48Spatrick     ModulePass(ID), TM(TM) {
18109467b48Spatrick     initializeAMDGPUPropagateAttributesLatePass(
18209467b48Spatrick       *PassRegistry::getPassRegistry());
18309467b48Spatrick   }
18409467b48Spatrick 
18509467b48Spatrick   bool runOnModule(Module &M) override;
18609467b48Spatrick };
18709467b48Spatrick 
18809467b48Spatrick }  // end anonymous namespace.
18909467b48Spatrick 
19009467b48Spatrick char AMDGPUPropagateAttributesEarly::ID = 0;
19109467b48Spatrick char AMDGPUPropagateAttributesLate::ID = 0;
19209467b48Spatrick 
19309467b48Spatrick INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,
19409467b48Spatrick                 "amdgpu-propagate-attributes-early",
19509467b48Spatrick                 "Early propagate attributes from kernels to functions",
19609467b48Spatrick                 false, false)
19709467b48Spatrick INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
19809467b48Spatrick                 "amdgpu-propagate-attributes-late",
19909467b48Spatrick                 "Late propagate attributes from kernels to functions",
20009467b48Spatrick                 false, false)
20109467b48Spatrick 
20209467b48Spatrick Function *
203*097a140dSpatrick AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded,
20409467b48Spatrick                                         Function *OrigF) {
20509467b48Spatrick   // TODO: search for clone's clones.
20609467b48Spatrick   for (Clone &C : Clones)
207*097a140dSpatrick     if (C.OrigF == OrigF && PropsNeeded == C.Properties)
20809467b48Spatrick       return C.NewF;
20909467b48Spatrick 
21009467b48Spatrick   return nullptr;
21109467b48Spatrick }
21209467b48Spatrick 
21309467b48Spatrick bool AMDGPUPropagateAttributes::process(Module &M) {
21409467b48Spatrick   for (auto &F : M.functions())
21509467b48Spatrick     if (AMDGPU::isEntryFunctionCC(F.getCallingConv()))
21609467b48Spatrick       Roots.insert(&F);
21709467b48Spatrick 
21809467b48Spatrick   return process();
21909467b48Spatrick }
22009467b48Spatrick 
22109467b48Spatrick bool AMDGPUPropagateAttributes::process(Function &F) {
22209467b48Spatrick   Roots.insert(&F);
22309467b48Spatrick   return process();
22409467b48Spatrick }
22509467b48Spatrick 
22609467b48Spatrick bool AMDGPUPropagateAttributes::process() {
22709467b48Spatrick   bool Changed = false;
22809467b48Spatrick   SmallSet<Function *, 32> NewRoots;
22909467b48Spatrick   SmallSet<Function *, 32> Replaced;
23009467b48Spatrick 
23109467b48Spatrick   if (Roots.empty())
23209467b48Spatrick     return false;
23309467b48Spatrick   Module &M = *(*Roots.begin())->getParent();
23409467b48Spatrick 
23509467b48Spatrick   do {
23609467b48Spatrick     Roots.insert(NewRoots.begin(), NewRoots.end());
23709467b48Spatrick     NewRoots.clear();
23809467b48Spatrick 
23909467b48Spatrick     for (auto &F : M.functions()) {
240*097a140dSpatrick       if (F.isDeclaration())
24109467b48Spatrick         continue;
24209467b48Spatrick 
243*097a140dSpatrick       const FnProperties CalleeProps(*TM, F);
24409467b48Spatrick       SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
245*097a140dSpatrick       SmallSet<CallBase *, 32> Visited;
24609467b48Spatrick 
24709467b48Spatrick       for (User *U : F.users()) {
24809467b48Spatrick         Instruction *I = dyn_cast<Instruction>(U);
24909467b48Spatrick         if (!I)
25009467b48Spatrick           continue;
25109467b48Spatrick         CallBase *CI = dyn_cast<CallBase>(I);
25209467b48Spatrick         if (!CI)
25309467b48Spatrick           continue;
25409467b48Spatrick         Function *Caller = CI->getCaller();
255*097a140dSpatrick         if (!Caller || !Visited.insert(CI).second)
25609467b48Spatrick           continue;
257*097a140dSpatrick         if (!Roots.count(Caller) && !NewRoots.count(Caller))
25809467b48Spatrick           continue;
25909467b48Spatrick 
260*097a140dSpatrick         const FnProperties CallerProps(*TM, *Caller);
26109467b48Spatrick 
262*097a140dSpatrick         if (CalleeProps == CallerProps) {
263*097a140dSpatrick           if (!Roots.count(&F))
26409467b48Spatrick             NewRoots.insert(&F);
26509467b48Spatrick           continue;
26609467b48Spatrick         }
26709467b48Spatrick 
268*097a140dSpatrick         Function *NewF = findFunction(CallerProps, &F);
26909467b48Spatrick         if (!NewF) {
270*097a140dSpatrick           const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
27109467b48Spatrick           if (!AllowClone) {
27209467b48Spatrick             // This may set different features on different iteartions if
27309467b48Spatrick             // there is a contradiction in callers' attributes. In this case
27409467b48Spatrick             // we rely on a second pass running on Module, which is allowed
27509467b48Spatrick             // to clone.
276*097a140dSpatrick             setFeatures(F, NewProps.Features);
277*097a140dSpatrick             setAttributes(F, NewProps.Attributes);
27809467b48Spatrick             NewRoots.insert(&F);
27909467b48Spatrick             Changed = true;
28009467b48Spatrick             break;
28109467b48Spatrick           }
28209467b48Spatrick 
283*097a140dSpatrick           NewF = cloneWithProperties(F, NewProps);
284*097a140dSpatrick           Clones.push_back(Clone(CallerProps, &F, NewF));
28509467b48Spatrick           NewRoots.insert(NewF);
28609467b48Spatrick         }
28709467b48Spatrick 
28809467b48Spatrick         ToReplace.push_back(std::make_pair(CI, NewF));
28909467b48Spatrick         Replaced.insert(&F);
29009467b48Spatrick 
29109467b48Spatrick         Changed = true;
29209467b48Spatrick       }
29309467b48Spatrick 
29409467b48Spatrick       while (!ToReplace.empty()) {
29509467b48Spatrick         auto R = ToReplace.pop_back_val();
29609467b48Spatrick         R.first->setCalledFunction(R.second);
29709467b48Spatrick       }
29809467b48Spatrick     }
29909467b48Spatrick   } while (!NewRoots.empty());
30009467b48Spatrick 
30109467b48Spatrick   for (Function *F : Replaced) {
30209467b48Spatrick     if (F->use_empty())
30309467b48Spatrick       F->eraseFromParent();
30409467b48Spatrick   }
30509467b48Spatrick 
306*097a140dSpatrick   Roots.clear();
307*097a140dSpatrick   Clones.clear();
308*097a140dSpatrick 
30909467b48Spatrick   return Changed;
31009467b48Spatrick }
31109467b48Spatrick 
31209467b48Spatrick Function *
313*097a140dSpatrick AMDGPUPropagateAttributes::cloneWithProperties(Function &F,
314*097a140dSpatrick                                                const FnProperties &NewProps) {
31509467b48Spatrick   LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
31609467b48Spatrick 
31709467b48Spatrick   ValueToValueMapTy dummy;
31809467b48Spatrick   Function *NewF = CloneFunction(&F, dummy);
319*097a140dSpatrick   setFeatures(*NewF, NewProps.Features);
320*097a140dSpatrick   setAttributes(*NewF, NewProps.Attributes);
321*097a140dSpatrick   NewF->setVisibility(GlobalValue::DefaultVisibility);
322*097a140dSpatrick   NewF->setLinkage(GlobalValue::InternalLinkage);
32309467b48Spatrick 
32409467b48Spatrick   // Swap names. If that is the only clone it will retain the name of now
325*097a140dSpatrick   // dead value. Preserve original name for externally visible functions.
326*097a140dSpatrick   if (F.hasName() && F.hasLocalLinkage()) {
327*097a140dSpatrick     std::string NewName = std::string(NewF->getName());
32809467b48Spatrick     NewF->takeName(&F);
32909467b48Spatrick     F.setName(NewName);
33009467b48Spatrick   }
33109467b48Spatrick 
33209467b48Spatrick   return NewF;
33309467b48Spatrick }
33409467b48Spatrick 
33509467b48Spatrick void AMDGPUPropagateAttributes::setFeatures(Function &F,
33609467b48Spatrick                                             const FeatureBitset &NewFeatures) {
33709467b48Spatrick   std::string NewFeatureStr = getFeatureString(NewFeatures);
33809467b48Spatrick 
33909467b48Spatrick   LLVM_DEBUG(dbgs() << "Set features "
34009467b48Spatrick                     << getFeatureString(NewFeatures & TargetFeatures)
34109467b48Spatrick                     << " on " << F.getName() << '\n');
34209467b48Spatrick 
34309467b48Spatrick   F.removeFnAttr("target-features");
34409467b48Spatrick   F.addFnAttr("target-features", NewFeatureStr);
34509467b48Spatrick }
34609467b48Spatrick 
347*097a140dSpatrick void AMDGPUPropagateAttributes::setAttributes(Function &F,
348*097a140dSpatrick     const ArrayRef<Optional<Attribute>> NewAttrs) {
349*097a140dSpatrick   LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n");
350*097a140dSpatrick   for (unsigned I = 0; I < NumAttr; ++I) {
351*097a140dSpatrick     F.removeFnAttr(AttributeNames[I]);
352*097a140dSpatrick     if (NewAttrs[I]) {
353*097a140dSpatrick       LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n');
354*097a140dSpatrick       F.addFnAttr(*NewAttrs[I]);
355*097a140dSpatrick     }
356*097a140dSpatrick   }
357*097a140dSpatrick }
358*097a140dSpatrick 
35909467b48Spatrick std::string
36009467b48Spatrick AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
36109467b48Spatrick {
36209467b48Spatrick   std::string Ret;
36309467b48Spatrick   for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) {
36409467b48Spatrick     if (Features[KV.Value])
36509467b48Spatrick       Ret += (StringRef("+") + KV.Key + ",").str();
36609467b48Spatrick     else if (TargetFeatures[KV.Value])
36709467b48Spatrick       Ret += (StringRef("-") + KV.Key + ",").str();
36809467b48Spatrick   }
36909467b48Spatrick   Ret.pop_back(); // Remove last comma.
37009467b48Spatrick   return Ret;
37109467b48Spatrick }
37209467b48Spatrick 
37309467b48Spatrick bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) {
37409467b48Spatrick   if (!TM || !AMDGPU::isEntryFunctionCC(F.getCallingConv()))
37509467b48Spatrick     return false;
37609467b48Spatrick 
37709467b48Spatrick   return AMDGPUPropagateAttributes(TM, false).process(F);
37809467b48Spatrick }
37909467b48Spatrick 
38009467b48Spatrick bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
38109467b48Spatrick   if (!TM)
38209467b48Spatrick     return false;
38309467b48Spatrick 
38409467b48Spatrick   return AMDGPUPropagateAttributes(TM, true).process(M);
38509467b48Spatrick }
38609467b48Spatrick 
38709467b48Spatrick FunctionPass
38809467b48Spatrick *llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) {
38909467b48Spatrick   return new AMDGPUPropagateAttributesEarly(TM);
39009467b48Spatrick }
39109467b48Spatrick 
39209467b48Spatrick ModulePass
39309467b48Spatrick *llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) {
39409467b48Spatrick   return new AMDGPUPropagateAttributesLate(TM);
39509467b48Spatrick }
396