1 //===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// \brief This pass propagates attributes from kernels to the non-entry
11 /// functions. Most of the library functions were not compiled for specific ABI,
12 /// yet will be correctly compiled if proper attrbutes are propagated from the
13 /// caller.
14 ///
15 /// The pass analyzes call graph and propagates ABI target features through the
16 /// call graph.
17 ///
18 /// It can run in two modes: as a function or module pass. A function pass
19 /// simply propagates attributes. A module pass clones functions if there are
20 /// callers with different ABI. If a function is clonned all call sites will
21 /// be updated to use a correct clone.
22 ///
23 /// A function pass is limited in functionality but can run early in the
24 /// pipeline. A module pass is more powerful but has to run late, so misses
25 /// library folding opportunities.
26 //
27 //===----------------------------------------------------------------------===//
28
29 #include "AMDGPU.h"
30 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
31 #include "Utils/AMDGPUBaseInfo.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/CodeGen/TargetPassConfig.h"
34 #include "llvm/CodeGen/TargetSubtargetInfo.h"
35 #include "llvm/IR/InstrTypes.h"
36 #include "llvm/Target/TargetMachine.h"
37 #include "llvm/Transforms/Utils/Cloning.h"
38
39 #define DEBUG_TYPE "amdgpu-propagate-attributes"
40
41 using namespace llvm;
42
43 namespace llvm {
44 extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
45 }
46
47 namespace {
48
49 // Target features to propagate.
50 static constexpr const FeatureBitset TargetFeatures = {
51 AMDGPU::FeatureWavefrontSize16,
52 AMDGPU::FeatureWavefrontSize32,
53 AMDGPU::FeatureWavefrontSize64
54 };
55
56 // Attributes to propagate.
57 // TODO: Support conservative min/max merging instead of cloning.
58 static constexpr const char* AttributeNames[] = {
59 "amdgpu-waves-per-eu",
60 "amdgpu-flat-work-group-size"
61 };
62
63 static constexpr unsigned NumAttr =
64 sizeof(AttributeNames) / sizeof(AttributeNames[0]);
65
66 class AMDGPUPropagateAttributes {
67
68 class FnProperties {
69 private:
FnProperties(const FeatureBitset && FB)70 explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {}
71
72 public:
FnProperties(const TargetMachine & TM,const Function & F)73 explicit FnProperties(const TargetMachine &TM, const Function &F) {
74 Features = TM.getSubtargetImpl(F)->getFeatureBits();
75
76 for (unsigned I = 0; I < NumAttr; ++I)
77 if (F.hasFnAttribute(AttributeNames[I]))
78 Attributes[I] = F.getFnAttribute(AttributeNames[I]);
79 }
80
operator ==(const FnProperties & Other) const81 bool operator == (const FnProperties &Other) const {
82 if ((Features & TargetFeatures) != (Other.Features & TargetFeatures))
83 return false;
84 for (unsigned I = 0; I < NumAttr; ++I)
85 if (Attributes[I] != Other.Attributes[I])
86 return false;
87 return true;
88 }
89
adjustToCaller(const FnProperties & CallerProps) const90 FnProperties adjustToCaller(const FnProperties &CallerProps) const {
91 FnProperties New((Features & ~TargetFeatures) | CallerProps.Features);
92 for (unsigned I = 0; I < NumAttr; ++I)
93 New.Attributes[I] = CallerProps.Attributes[I];
94 return New;
95 }
96
97 FeatureBitset Features;
98 Optional<Attribute> Attributes[NumAttr];
99 };
100
101 class Clone {
102 public:
Clone(const FnProperties & Props,Function * OrigF,Function * NewF)103 Clone(const FnProperties &Props, Function *OrigF, Function *NewF) :
104 Properties(Props), OrigF(OrigF), NewF(NewF) {}
105
106 FnProperties Properties;
107 Function *OrigF;
108 Function *NewF;
109 };
110
111 const TargetMachine *TM;
112
113 // Clone functions as needed or just set attributes.
114 bool AllowClone;
115
116 // Option propagation roots.
117 SmallSet<Function *, 32> Roots;
118
119 // Clones of functions with their attributes.
120 SmallVector<Clone, 32> Clones;
121
122 // Find a clone with required features.
123 Function *findFunction(const FnProperties &PropsNeeded,
124 Function *OrigF);
125
126 // Clone function \p F and set \p NewProps on the clone.
127 // Cole takes the name of original function.
128 Function *cloneWithProperties(Function &F, const FnProperties &NewProps);
129
130 // Set new function's features in place.
131 void setFeatures(Function &F, const FeatureBitset &NewFeatures);
132
133 // Set new function's attributes in place.
134 void setAttributes(Function &F, const ArrayRef<Optional<Attribute>> NewAttrs);
135
136 std::string getFeatureString(const FeatureBitset &Features) const;
137
138 // Propagate attributes from Roots.
139 bool process();
140
141 public:
AMDGPUPropagateAttributes(const TargetMachine * TM,bool AllowClone)142 AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) :
143 TM(TM), AllowClone(AllowClone) {}
144
145 // Use F as a root and propagate its attributes.
146 bool process(Function &F);
147
148 // Propagate attributes starting from kernel functions.
149 bool process(Module &M);
150 };
151
152 // Allows to propagate attributes early, but no clonning is allowed as it must
153 // be a function pass to run before any optimizations.
154 // TODO: We shall only need a one instance of module pass, but that needs to be
155 // in the linker pipeline which is currently not possible.
156 class AMDGPUPropagateAttributesEarly : public FunctionPass {
157 const TargetMachine *TM;
158
159 public:
160 static char ID; // Pass identification
161
AMDGPUPropagateAttributesEarly(const TargetMachine * TM=nullptr)162 AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) :
163 FunctionPass(ID), TM(TM) {
164 initializeAMDGPUPropagateAttributesEarlyPass(
165 *PassRegistry::getPassRegistry());
166 }
167
168 bool runOnFunction(Function &F) override;
169 };
170
171 // Allows to propagate attributes with clonning but does that late in the
172 // pipeline.
173 class AMDGPUPropagateAttributesLate : public ModulePass {
174 const TargetMachine *TM;
175
176 public:
177 static char ID; // Pass identification
178
AMDGPUPropagateAttributesLate(const TargetMachine * TM=nullptr)179 AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :
180 ModulePass(ID), TM(TM) {
181 initializeAMDGPUPropagateAttributesLatePass(
182 *PassRegistry::getPassRegistry());
183 }
184
185 bool runOnModule(Module &M) override;
186 };
187
188 } // end anonymous namespace.
189
190 char AMDGPUPropagateAttributesEarly::ID = 0;
191 char AMDGPUPropagateAttributesLate::ID = 0;
192
193 INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,
194 "amdgpu-propagate-attributes-early",
195 "Early propagate attributes from kernels to functions",
196 false, false)
197 INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
198 "amdgpu-propagate-attributes-late",
199 "Late propagate attributes from kernels to functions",
200 false, false)
201
202 Function *
findFunction(const FnProperties & PropsNeeded,Function * OrigF)203 AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded,
204 Function *OrigF) {
205 // TODO: search for clone's clones.
206 for (Clone &C : Clones)
207 if (C.OrigF == OrigF && PropsNeeded == C.Properties)
208 return C.NewF;
209
210 return nullptr;
211 }
212
process(Module & M)213 bool AMDGPUPropagateAttributes::process(Module &M) {
214 for (auto &F : M.functions())
215 if (AMDGPU::isEntryFunctionCC(F.getCallingConv()))
216 Roots.insert(&F);
217
218 return process();
219 }
220
process(Function & F)221 bool AMDGPUPropagateAttributes::process(Function &F) {
222 Roots.insert(&F);
223 return process();
224 }
225
process()226 bool AMDGPUPropagateAttributes::process() {
227 bool Changed = false;
228 SmallSet<Function *, 32> NewRoots;
229 SmallSet<Function *, 32> Replaced;
230
231 if (Roots.empty())
232 return false;
233 Module &M = *(*Roots.begin())->getParent();
234
235 do {
236 Roots.insert(NewRoots.begin(), NewRoots.end());
237 NewRoots.clear();
238
239 for (auto &F : M.functions()) {
240 if (F.isDeclaration())
241 continue;
242
243 const FnProperties CalleeProps(*TM, F);
244 SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
245 SmallSet<CallBase *, 32> Visited;
246
247 for (User *U : F.users()) {
248 Instruction *I = dyn_cast<Instruction>(U);
249 if (!I)
250 continue;
251 CallBase *CI = dyn_cast<CallBase>(I);
252 // Only propagate attributes if F is the called function. Specifically,
253 // do not propagate attributes if F is passed as an argument.
254 // FIXME: handle bitcasted callee, e.g.
255 // %retval = call i8* bitcast (i32* ()* @f to i8* ()*)()
256 if (!CI || CI->getCalledOperand() != &F)
257 continue;
258 Function *Caller = CI->getCaller();
259 if (!Caller || !Visited.insert(CI).second)
260 continue;
261 if (!Roots.count(Caller) && !NewRoots.count(Caller))
262 continue;
263
264 const FnProperties CallerProps(*TM, *Caller);
265
266 if (CalleeProps == CallerProps) {
267 if (!Roots.count(&F))
268 NewRoots.insert(&F);
269 continue;
270 }
271
272 Function *NewF = findFunction(CallerProps, &F);
273 if (!NewF) {
274 const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
275 if (!AllowClone) {
276 // This may set different features on different iteartions if
277 // there is a contradiction in callers' attributes. In this case
278 // we rely on a second pass running on Module, which is allowed
279 // to clone.
280 setFeatures(F, NewProps.Features);
281 setAttributes(F, NewProps.Attributes);
282 NewRoots.insert(&F);
283 Changed = true;
284 break;
285 }
286
287 NewF = cloneWithProperties(F, NewProps);
288 Clones.push_back(Clone(CallerProps, &F, NewF));
289 NewRoots.insert(NewF);
290 }
291
292 ToReplace.push_back(std::make_pair(CI, NewF));
293 Replaced.insert(&F);
294
295 Changed = true;
296 }
297
298 while (!ToReplace.empty()) {
299 auto R = ToReplace.pop_back_val();
300 R.first->setCalledFunction(R.second);
301 }
302 }
303 } while (!NewRoots.empty());
304
305 for (Function *F : Replaced) {
306 if (F->use_empty())
307 F->eraseFromParent();
308 }
309
310 Roots.clear();
311 Clones.clear();
312
313 return Changed;
314 }
315
316 Function *
cloneWithProperties(Function & F,const FnProperties & NewProps)317 AMDGPUPropagateAttributes::cloneWithProperties(Function &F,
318 const FnProperties &NewProps) {
319 LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
320
321 ValueToValueMapTy dummy;
322 Function *NewF = CloneFunction(&F, dummy);
323 setFeatures(*NewF, NewProps.Features);
324 setAttributes(*NewF, NewProps.Attributes);
325 NewF->setVisibility(GlobalValue::DefaultVisibility);
326 NewF->setLinkage(GlobalValue::InternalLinkage);
327
328 // Swap names. If that is the only clone it will retain the name of now
329 // dead value. Preserve original name for externally visible functions.
330 if (F.hasName() && F.hasLocalLinkage()) {
331 std::string NewName = std::string(NewF->getName());
332 NewF->takeName(&F);
333 F.setName(NewName);
334 }
335
336 return NewF;
337 }
338
setFeatures(Function & F,const FeatureBitset & NewFeatures)339 void AMDGPUPropagateAttributes::setFeatures(Function &F,
340 const FeatureBitset &NewFeatures) {
341 std::string NewFeatureStr = getFeatureString(NewFeatures);
342
343 LLVM_DEBUG(dbgs() << "Set features "
344 << getFeatureString(NewFeatures & TargetFeatures)
345 << " on " << F.getName() << '\n');
346
347 F.removeFnAttr("target-features");
348 F.addFnAttr("target-features", NewFeatureStr);
349 }
350
setAttributes(Function & F,const ArrayRef<Optional<Attribute>> NewAttrs)351 void AMDGPUPropagateAttributes::setAttributes(Function &F,
352 const ArrayRef<Optional<Attribute>> NewAttrs) {
353 LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n");
354 for (unsigned I = 0; I < NumAttr; ++I) {
355 F.removeFnAttr(AttributeNames[I]);
356 if (NewAttrs[I]) {
357 LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n');
358 F.addFnAttr(*NewAttrs[I]);
359 }
360 }
361 }
362
363 std::string
getFeatureString(const FeatureBitset & Features) const364 AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
365 {
366 std::string Ret;
367 for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) {
368 if (Features[KV.Value])
369 Ret += (StringRef("+") + KV.Key + ",").str();
370 else if (TargetFeatures[KV.Value])
371 Ret += (StringRef("-") + KV.Key + ",").str();
372 }
373 Ret.pop_back(); // Remove last comma.
374 return Ret;
375 }
376
runOnFunction(Function & F)377 bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) {
378 if (!TM) {
379 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
380 if (!TPC)
381 return false;
382
383 TM = &TPC->getTM<TargetMachine>();
384 }
385
386 if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
387 return false;
388
389 return AMDGPUPropagateAttributes(TM, false).process(F);
390 }
391
runOnModule(Module & M)392 bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
393 if (!TM) {
394 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
395 if (!TPC)
396 return false;
397
398 TM = &TPC->getTM<TargetMachine>();
399 }
400
401 return AMDGPUPropagateAttributes(TM, true).process(M);
402 }
403
404 FunctionPass
createAMDGPUPropagateAttributesEarlyPass(const TargetMachine * TM)405 *llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) {
406 return new AMDGPUPropagateAttributesEarly(TM);
407 }
408
409 ModulePass
createAMDGPUPropagateAttributesLatePass(const TargetMachine * TM)410 *llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) {
411 return new AMDGPUPropagateAttributesLate(TM);
412 }
413
414 PreservedAnalyses
run(Function & F,FunctionAnalysisManager & AM)415 AMDGPUPropagateAttributesEarlyPass::run(Function &F,
416 FunctionAnalysisManager &AM) {
417 if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
418 return PreservedAnalyses::all();
419
420 return AMDGPUPropagateAttributes(&TM, false).process(F)
421 ? PreservedAnalyses::none()
422 : PreservedAnalyses::all();
423 }
424
425 PreservedAnalyses
run(Module & M,ModuleAnalysisManager & AM)426 AMDGPUPropagateAttributesLatePass::run(Module &M, ModuleAnalysisManager &AM) {
427 return AMDGPUPropagateAttributes(&TM, true).process(M)
428 ? PreservedAnalyses::none()
429 : PreservedAnalyses::all();
430 }
431