109467b48Spatrick //===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===// 209467b48Spatrick // 309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information. 509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 609467b48Spatrick // 709467b48Spatrick //===----------------------------------------------------------------------===// 809467b48Spatrick // 909467b48Spatrick /// \file 1009467b48Spatrick /// \brief This pass propagates attributes from kernels to the non-entry 1109467b48Spatrick /// functions. Most of the library functions were not compiled for specific ABI, 1209467b48Spatrick /// yet will be correctly compiled if proper attrbutes are propagated from the 1309467b48Spatrick /// caller. 1409467b48Spatrick /// 1509467b48Spatrick /// The pass analyzes call graph and propagates ABI target features through the 1609467b48Spatrick /// call graph. 1709467b48Spatrick /// 1809467b48Spatrick /// It can run in two modes: as a function or module pass. A function pass 1909467b48Spatrick /// simply propagates attributes. A module pass clones functions if there are 2009467b48Spatrick /// callers with different ABI. If a function is clonned all call sites will 2109467b48Spatrick /// be updated to use a correct clone. 2209467b48Spatrick /// 2309467b48Spatrick /// A function pass is limited in functionality but can run early in the 2409467b48Spatrick /// pipeline. A module pass is more powerful but has to run late, so misses 2509467b48Spatrick /// library folding opportunities. 2609467b48Spatrick // 2709467b48Spatrick //===----------------------------------------------------------------------===// 2809467b48Spatrick 2909467b48Spatrick #include "AMDGPU.h" 3009467b48Spatrick #include "AMDGPUSubtarget.h" 3109467b48Spatrick #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 3209467b48Spatrick #include "Utils/AMDGPUBaseInfo.h" 3309467b48Spatrick #include "llvm/ADT/SmallSet.h" 3409467b48Spatrick #include "llvm/ADT/SmallVector.h" 3509467b48Spatrick #include "llvm/IR/Function.h" 3609467b48Spatrick #include "llvm/IR/Module.h" 3709467b48Spatrick #include "llvm/Target/TargetMachine.h" 3809467b48Spatrick #include "llvm/Transforms/Utils/Cloning.h" 3909467b48Spatrick #include <string> 4009467b48Spatrick 4109467b48Spatrick #define DEBUG_TYPE "amdgpu-propagate-attributes" 4209467b48Spatrick 4309467b48Spatrick using namespace llvm; 4409467b48Spatrick 4509467b48Spatrick namespace llvm { 4609467b48Spatrick extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1]; 4709467b48Spatrick } 4809467b48Spatrick 4909467b48Spatrick namespace { 5009467b48Spatrick 51*097a140dSpatrick // Target features to propagate. 52*097a140dSpatrick static constexpr const FeatureBitset TargetFeatures = { 5309467b48Spatrick AMDGPU::FeatureWavefrontSize16, 5409467b48Spatrick AMDGPU::FeatureWavefrontSize32, 5509467b48Spatrick AMDGPU::FeatureWavefrontSize64 5609467b48Spatrick }; 5709467b48Spatrick 58*097a140dSpatrick // Attributes to propagate. 59*097a140dSpatrick static constexpr const char* AttributeNames[] = { 60*097a140dSpatrick "amdgpu-waves-per-eu" 61*097a140dSpatrick }; 62*097a140dSpatrick 63*097a140dSpatrick static constexpr unsigned NumAttr = 64*097a140dSpatrick sizeof(AttributeNames) / sizeof(AttributeNames[0]); 65*097a140dSpatrick 66*097a140dSpatrick class AMDGPUPropagateAttributes { 67*097a140dSpatrick 68*097a140dSpatrick class FnProperties { 69*097a140dSpatrick private: 70*097a140dSpatrick explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {} 71*097a140dSpatrick 72*097a140dSpatrick public: 73*097a140dSpatrick explicit FnProperties(const TargetMachine &TM, const Function &F) { 74*097a140dSpatrick Features = TM.getSubtargetImpl(F)->getFeatureBits(); 75*097a140dSpatrick 76*097a140dSpatrick for (unsigned I = 0; I < NumAttr; ++I) 77*097a140dSpatrick if (F.hasFnAttribute(AttributeNames[I])) 78*097a140dSpatrick Attributes[I] = F.getFnAttribute(AttributeNames[I]); 79*097a140dSpatrick } 80*097a140dSpatrick 81*097a140dSpatrick bool operator == (const FnProperties &Other) const { 82*097a140dSpatrick if ((Features & TargetFeatures) != (Other.Features & TargetFeatures)) 83*097a140dSpatrick return false; 84*097a140dSpatrick for (unsigned I = 0; I < NumAttr; ++I) 85*097a140dSpatrick if (Attributes[I] != Other.Attributes[I]) 86*097a140dSpatrick return false; 87*097a140dSpatrick return true; 88*097a140dSpatrick } 89*097a140dSpatrick 90*097a140dSpatrick FnProperties adjustToCaller(const FnProperties &CallerProps) const { 91*097a140dSpatrick FnProperties New((Features & ~TargetFeatures) | CallerProps.Features); 92*097a140dSpatrick for (unsigned I = 0; I < NumAttr; ++I) 93*097a140dSpatrick New.Attributes[I] = CallerProps.Attributes[I]; 94*097a140dSpatrick return New; 95*097a140dSpatrick } 96*097a140dSpatrick 97*097a140dSpatrick FeatureBitset Features; 98*097a140dSpatrick Optional<Attribute> Attributes[NumAttr]; 99*097a140dSpatrick }; 100*097a140dSpatrick 10109467b48Spatrick class Clone { 10209467b48Spatrick public: 103*097a140dSpatrick Clone(const FnProperties &Props, Function *OrigF, Function *NewF) : 104*097a140dSpatrick Properties(Props), OrigF(OrigF), NewF(NewF) {} 10509467b48Spatrick 106*097a140dSpatrick FnProperties Properties; 10709467b48Spatrick Function *OrigF; 10809467b48Spatrick Function *NewF; 10909467b48Spatrick }; 11009467b48Spatrick 11109467b48Spatrick const TargetMachine *TM; 11209467b48Spatrick 11309467b48Spatrick // Clone functions as needed or just set attributes. 11409467b48Spatrick bool AllowClone; 11509467b48Spatrick 11609467b48Spatrick // Option propagation roots. 11709467b48Spatrick SmallSet<Function *, 32> Roots; 11809467b48Spatrick 11909467b48Spatrick // Clones of functions with their attributes. 12009467b48Spatrick SmallVector<Clone, 32> Clones; 12109467b48Spatrick 12209467b48Spatrick // Find a clone with required features. 123*097a140dSpatrick Function *findFunction(const FnProperties &PropsNeeded, 12409467b48Spatrick Function *OrigF); 12509467b48Spatrick 126*097a140dSpatrick // Clone function \p F and set \p NewProps on the clone. 12709467b48Spatrick // Cole takes the name of original function. 128*097a140dSpatrick Function *cloneWithProperties(Function &F, const FnProperties &NewProps); 12909467b48Spatrick 13009467b48Spatrick // Set new function's features in place. 13109467b48Spatrick void setFeatures(Function &F, const FeatureBitset &NewFeatures); 13209467b48Spatrick 133*097a140dSpatrick // Set new function's attributes in place. 134*097a140dSpatrick void setAttributes(Function &F, const ArrayRef<Optional<Attribute>> NewAttrs); 135*097a140dSpatrick 13609467b48Spatrick std::string getFeatureString(const FeatureBitset &Features) const; 13709467b48Spatrick 13809467b48Spatrick // Propagate attributes from Roots. 13909467b48Spatrick bool process(); 14009467b48Spatrick 14109467b48Spatrick public: 14209467b48Spatrick AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) : 14309467b48Spatrick TM(TM), AllowClone(AllowClone) {} 14409467b48Spatrick 14509467b48Spatrick // Use F as a root and propagate its attributes. 14609467b48Spatrick bool process(Function &F); 14709467b48Spatrick 14809467b48Spatrick // Propagate attributes starting from kernel functions. 14909467b48Spatrick bool process(Module &M); 15009467b48Spatrick }; 15109467b48Spatrick 15209467b48Spatrick // Allows to propagate attributes early, but no clonning is allowed as it must 15309467b48Spatrick // be a function pass to run before any optimizations. 15409467b48Spatrick // TODO: We shall only need a one instance of module pass, but that needs to be 15509467b48Spatrick // in the linker pipeline which is currently not possible. 15609467b48Spatrick class AMDGPUPropagateAttributesEarly : public FunctionPass { 15709467b48Spatrick const TargetMachine *TM; 15809467b48Spatrick 15909467b48Spatrick public: 16009467b48Spatrick static char ID; // Pass identification 16109467b48Spatrick 16209467b48Spatrick AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) : 16309467b48Spatrick FunctionPass(ID), TM(TM) { 16409467b48Spatrick initializeAMDGPUPropagateAttributesEarlyPass( 16509467b48Spatrick *PassRegistry::getPassRegistry()); 16609467b48Spatrick } 16709467b48Spatrick 16809467b48Spatrick bool runOnFunction(Function &F) override; 16909467b48Spatrick }; 17009467b48Spatrick 17109467b48Spatrick // Allows to propagate attributes with clonning but does that late in the 17209467b48Spatrick // pipeline. 17309467b48Spatrick class AMDGPUPropagateAttributesLate : public ModulePass { 17409467b48Spatrick const TargetMachine *TM; 17509467b48Spatrick 17609467b48Spatrick public: 17709467b48Spatrick static char ID; // Pass identification 17809467b48Spatrick 17909467b48Spatrick AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) : 18009467b48Spatrick ModulePass(ID), TM(TM) { 18109467b48Spatrick initializeAMDGPUPropagateAttributesLatePass( 18209467b48Spatrick *PassRegistry::getPassRegistry()); 18309467b48Spatrick } 18409467b48Spatrick 18509467b48Spatrick bool runOnModule(Module &M) override; 18609467b48Spatrick }; 18709467b48Spatrick 18809467b48Spatrick } // end anonymous namespace. 18909467b48Spatrick 19009467b48Spatrick char AMDGPUPropagateAttributesEarly::ID = 0; 19109467b48Spatrick char AMDGPUPropagateAttributesLate::ID = 0; 19209467b48Spatrick 19309467b48Spatrick INITIALIZE_PASS(AMDGPUPropagateAttributesEarly, 19409467b48Spatrick "amdgpu-propagate-attributes-early", 19509467b48Spatrick "Early propagate attributes from kernels to functions", 19609467b48Spatrick false, false) 19709467b48Spatrick INITIALIZE_PASS(AMDGPUPropagateAttributesLate, 19809467b48Spatrick "amdgpu-propagate-attributes-late", 19909467b48Spatrick "Late propagate attributes from kernels to functions", 20009467b48Spatrick false, false) 20109467b48Spatrick 20209467b48Spatrick Function * 203*097a140dSpatrick AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded, 20409467b48Spatrick Function *OrigF) { 20509467b48Spatrick // TODO: search for clone's clones. 20609467b48Spatrick for (Clone &C : Clones) 207*097a140dSpatrick if (C.OrigF == OrigF && PropsNeeded == C.Properties) 20809467b48Spatrick return C.NewF; 20909467b48Spatrick 21009467b48Spatrick return nullptr; 21109467b48Spatrick } 21209467b48Spatrick 21309467b48Spatrick bool AMDGPUPropagateAttributes::process(Module &M) { 21409467b48Spatrick for (auto &F : M.functions()) 21509467b48Spatrick if (AMDGPU::isEntryFunctionCC(F.getCallingConv())) 21609467b48Spatrick Roots.insert(&F); 21709467b48Spatrick 21809467b48Spatrick return process(); 21909467b48Spatrick } 22009467b48Spatrick 22109467b48Spatrick bool AMDGPUPropagateAttributes::process(Function &F) { 22209467b48Spatrick Roots.insert(&F); 22309467b48Spatrick return process(); 22409467b48Spatrick } 22509467b48Spatrick 22609467b48Spatrick bool AMDGPUPropagateAttributes::process() { 22709467b48Spatrick bool Changed = false; 22809467b48Spatrick SmallSet<Function *, 32> NewRoots; 22909467b48Spatrick SmallSet<Function *, 32> Replaced; 23009467b48Spatrick 23109467b48Spatrick if (Roots.empty()) 23209467b48Spatrick return false; 23309467b48Spatrick Module &M = *(*Roots.begin())->getParent(); 23409467b48Spatrick 23509467b48Spatrick do { 23609467b48Spatrick Roots.insert(NewRoots.begin(), NewRoots.end()); 23709467b48Spatrick NewRoots.clear(); 23809467b48Spatrick 23909467b48Spatrick for (auto &F : M.functions()) { 240*097a140dSpatrick if (F.isDeclaration()) 24109467b48Spatrick continue; 24209467b48Spatrick 243*097a140dSpatrick const FnProperties CalleeProps(*TM, F); 24409467b48Spatrick SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace; 245*097a140dSpatrick SmallSet<CallBase *, 32> Visited; 24609467b48Spatrick 24709467b48Spatrick for (User *U : F.users()) { 24809467b48Spatrick Instruction *I = dyn_cast<Instruction>(U); 24909467b48Spatrick if (!I) 25009467b48Spatrick continue; 25109467b48Spatrick CallBase *CI = dyn_cast<CallBase>(I); 25209467b48Spatrick if (!CI) 25309467b48Spatrick continue; 25409467b48Spatrick Function *Caller = CI->getCaller(); 255*097a140dSpatrick if (!Caller || !Visited.insert(CI).second) 25609467b48Spatrick continue; 257*097a140dSpatrick if (!Roots.count(Caller) && !NewRoots.count(Caller)) 25809467b48Spatrick continue; 25909467b48Spatrick 260*097a140dSpatrick const FnProperties CallerProps(*TM, *Caller); 26109467b48Spatrick 262*097a140dSpatrick if (CalleeProps == CallerProps) { 263*097a140dSpatrick if (!Roots.count(&F)) 26409467b48Spatrick NewRoots.insert(&F); 26509467b48Spatrick continue; 26609467b48Spatrick } 26709467b48Spatrick 268*097a140dSpatrick Function *NewF = findFunction(CallerProps, &F); 26909467b48Spatrick if (!NewF) { 270*097a140dSpatrick const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps); 27109467b48Spatrick if (!AllowClone) { 27209467b48Spatrick // This may set different features on different iteartions if 27309467b48Spatrick // there is a contradiction in callers' attributes. In this case 27409467b48Spatrick // we rely on a second pass running on Module, which is allowed 27509467b48Spatrick // to clone. 276*097a140dSpatrick setFeatures(F, NewProps.Features); 277*097a140dSpatrick setAttributes(F, NewProps.Attributes); 27809467b48Spatrick NewRoots.insert(&F); 27909467b48Spatrick Changed = true; 28009467b48Spatrick break; 28109467b48Spatrick } 28209467b48Spatrick 283*097a140dSpatrick NewF = cloneWithProperties(F, NewProps); 284*097a140dSpatrick Clones.push_back(Clone(CallerProps, &F, NewF)); 28509467b48Spatrick NewRoots.insert(NewF); 28609467b48Spatrick } 28709467b48Spatrick 28809467b48Spatrick ToReplace.push_back(std::make_pair(CI, NewF)); 28909467b48Spatrick Replaced.insert(&F); 29009467b48Spatrick 29109467b48Spatrick Changed = true; 29209467b48Spatrick } 29309467b48Spatrick 29409467b48Spatrick while (!ToReplace.empty()) { 29509467b48Spatrick auto R = ToReplace.pop_back_val(); 29609467b48Spatrick R.first->setCalledFunction(R.second); 29709467b48Spatrick } 29809467b48Spatrick } 29909467b48Spatrick } while (!NewRoots.empty()); 30009467b48Spatrick 30109467b48Spatrick for (Function *F : Replaced) { 30209467b48Spatrick if (F->use_empty()) 30309467b48Spatrick F->eraseFromParent(); 30409467b48Spatrick } 30509467b48Spatrick 306*097a140dSpatrick Roots.clear(); 307*097a140dSpatrick Clones.clear(); 308*097a140dSpatrick 30909467b48Spatrick return Changed; 31009467b48Spatrick } 31109467b48Spatrick 31209467b48Spatrick Function * 313*097a140dSpatrick AMDGPUPropagateAttributes::cloneWithProperties(Function &F, 314*097a140dSpatrick const FnProperties &NewProps) { 31509467b48Spatrick LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n'); 31609467b48Spatrick 31709467b48Spatrick ValueToValueMapTy dummy; 31809467b48Spatrick Function *NewF = CloneFunction(&F, dummy); 319*097a140dSpatrick setFeatures(*NewF, NewProps.Features); 320*097a140dSpatrick setAttributes(*NewF, NewProps.Attributes); 321*097a140dSpatrick NewF->setVisibility(GlobalValue::DefaultVisibility); 322*097a140dSpatrick NewF->setLinkage(GlobalValue::InternalLinkage); 32309467b48Spatrick 32409467b48Spatrick // Swap names. If that is the only clone it will retain the name of now 325*097a140dSpatrick // dead value. Preserve original name for externally visible functions. 326*097a140dSpatrick if (F.hasName() && F.hasLocalLinkage()) { 327*097a140dSpatrick std::string NewName = std::string(NewF->getName()); 32809467b48Spatrick NewF->takeName(&F); 32909467b48Spatrick F.setName(NewName); 33009467b48Spatrick } 33109467b48Spatrick 33209467b48Spatrick return NewF; 33309467b48Spatrick } 33409467b48Spatrick 33509467b48Spatrick void AMDGPUPropagateAttributes::setFeatures(Function &F, 33609467b48Spatrick const FeatureBitset &NewFeatures) { 33709467b48Spatrick std::string NewFeatureStr = getFeatureString(NewFeatures); 33809467b48Spatrick 33909467b48Spatrick LLVM_DEBUG(dbgs() << "Set features " 34009467b48Spatrick << getFeatureString(NewFeatures & TargetFeatures) 34109467b48Spatrick << " on " << F.getName() << '\n'); 34209467b48Spatrick 34309467b48Spatrick F.removeFnAttr("target-features"); 34409467b48Spatrick F.addFnAttr("target-features", NewFeatureStr); 34509467b48Spatrick } 34609467b48Spatrick 347*097a140dSpatrick void AMDGPUPropagateAttributes::setAttributes(Function &F, 348*097a140dSpatrick const ArrayRef<Optional<Attribute>> NewAttrs) { 349*097a140dSpatrick LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n"); 350*097a140dSpatrick for (unsigned I = 0; I < NumAttr; ++I) { 351*097a140dSpatrick F.removeFnAttr(AttributeNames[I]); 352*097a140dSpatrick if (NewAttrs[I]) { 353*097a140dSpatrick LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n'); 354*097a140dSpatrick F.addFnAttr(*NewAttrs[I]); 355*097a140dSpatrick } 356*097a140dSpatrick } 357*097a140dSpatrick } 358*097a140dSpatrick 35909467b48Spatrick std::string 36009467b48Spatrick AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const 36109467b48Spatrick { 36209467b48Spatrick std::string Ret; 36309467b48Spatrick for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) { 36409467b48Spatrick if (Features[KV.Value]) 36509467b48Spatrick Ret += (StringRef("+") + KV.Key + ",").str(); 36609467b48Spatrick else if (TargetFeatures[KV.Value]) 36709467b48Spatrick Ret += (StringRef("-") + KV.Key + ",").str(); 36809467b48Spatrick } 36909467b48Spatrick Ret.pop_back(); // Remove last comma. 37009467b48Spatrick return Ret; 37109467b48Spatrick } 37209467b48Spatrick 37309467b48Spatrick bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) { 37409467b48Spatrick if (!TM || !AMDGPU::isEntryFunctionCC(F.getCallingConv())) 37509467b48Spatrick return false; 37609467b48Spatrick 37709467b48Spatrick return AMDGPUPropagateAttributes(TM, false).process(F); 37809467b48Spatrick } 37909467b48Spatrick 38009467b48Spatrick bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) { 38109467b48Spatrick if (!TM) 38209467b48Spatrick return false; 38309467b48Spatrick 38409467b48Spatrick return AMDGPUPropagateAttributes(TM, true).process(M); 38509467b48Spatrick } 38609467b48Spatrick 38709467b48Spatrick FunctionPass 38809467b48Spatrick *llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) { 38909467b48Spatrick return new AMDGPUPropagateAttributesEarly(TM); 39009467b48Spatrick } 39109467b48Spatrick 39209467b48Spatrick ModulePass 39309467b48Spatrick *llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) { 39409467b48Spatrick return new AMDGPUPropagateAttributesLate(TM); 39509467b48Spatrick } 396