1 //===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 /// \file
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
11 #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
12 
13 #include "llvm/IR/PassManager.h"
14 #include "llvm/Pass.h"
15 #include "llvm/Support/CodeGen.h"
16 
17 namespace llvm {
18 
19 class TargetMachine;
20 
21 // GlobalISel passes
22 void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &);
23 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone);
24 void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &);
25 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone);
26 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone);
27 void initializeAMDGPURegBankCombinerPass(PassRegistry &);
28 
29 void initializeAMDGPURegBankSelectPass(PassRegistry &);
30 
31 // SI Passes
32 FunctionPass *createGCNDPPCombinePass();
33 FunctionPass *createSIAnnotateControlFlowPass();
34 FunctionPass *createSIFoldOperandsPass();
35 FunctionPass *createSIPeepholeSDWAPass();
36 FunctionPass *createSILowerI1CopiesPass();
37 FunctionPass *createSIShrinkInstructionsPass();
38 FunctionPass *createSILoadStoreOptimizerPass();
39 FunctionPass *createSIWholeQuadModePass();
40 FunctionPass *createSIFixControlFlowLiveIntervalsPass();
41 FunctionPass *createSIOptimizeExecMaskingPreRAPass();
42 FunctionPass *createSIOptimizeVGPRLiveRangePass();
43 FunctionPass *createSIFixSGPRCopiesPass();
44 FunctionPass *createSIMemoryLegalizerPass();
45 FunctionPass *createSIInsertWaitcntsPass();
46 FunctionPass *createSIPreAllocateWWMRegsPass();
47 FunctionPass *createSIFormMemoryClausesPass();
48 
49 FunctionPass *createSIPostRABundlerPass();
50 FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetMachine *);
51 FunctionPass *createAMDGPUUseNativeCallsPass();
52 ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *);
53 FunctionPass *createAMDGPUCodeGenPreparePass();
54 FunctionPass *createAMDGPULateCodeGenPreparePass();
55 FunctionPass *createAMDGPUMachineCFGStructurizerPass();
56 FunctionPass *createAMDGPURewriteOutArgumentsPass();
57 ModulePass *createAMDGPULowerModuleLDSPass();
58 FunctionPass *createSIModeRegisterPass();
59 FunctionPass *createGCNPreRAOptimizationsPass();
60 
61 struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> {
62   AMDGPUSimplifyLibCallsPass(TargetMachine &TM) : TM(TM) {}
63   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
64 
65 private:
66   TargetMachine &TM;
67 };
68 
69 struct AMDGPUUseNativeCallsPass : PassInfoMixin<AMDGPUUseNativeCallsPass> {
70   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
71 };
72 
73 void initializeAMDGPUDAGToDAGISelPass(PassRegistry&);
74 
75 void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&);
76 extern char &AMDGPUMachineCFGStructurizerID;
77 
78 void initializeAMDGPUAlwaysInlinePass(PassRegistry&);
79 
80 Pass *createAMDGPUAnnotateKernelFeaturesPass();
81 Pass *createAMDGPUAttributorPass();
82 void initializeAMDGPUAttributorPass(PassRegistry &);
83 void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
84 extern char &AMDGPUAnnotateKernelFeaturesID;
85 
86 // DPP/Iterative option enables the atomic optimizer with given strategy
87 // whereas None disables the atomic optimizer.
88 enum class ScanOptions { DPP, Iterative, None };
89 FunctionPass *createAMDGPUAtomicOptimizerPass(ScanOptions ScanStrategy);
90 void initializeAMDGPUAtomicOptimizerPass(PassRegistry &);
91 extern char &AMDGPUAtomicOptimizerID;
92 
93 ModulePass *createAMDGPUCtorDtorLoweringLegacyPass();
94 void initializeAMDGPUCtorDtorLoweringLegacyPass(PassRegistry &);
95 extern char &AMDGPUCtorDtorLoweringLegacyPassID;
96 
97 FunctionPass *createAMDGPULowerKernelArgumentsPass();
98 void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &);
99 extern char &AMDGPULowerKernelArgumentsID;
100 
101 FunctionPass *createAMDGPUPromoteKernelArgumentsPass();
102 void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &);
103 extern char &AMDGPUPromoteKernelArgumentsID;
104 
105 struct AMDGPUPromoteKernelArgumentsPass
106     : PassInfoMixin<AMDGPUPromoteKernelArgumentsPass> {
107   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
108 };
109 
110 ModulePass *createAMDGPULowerKernelAttributesPass();
111 void initializeAMDGPULowerKernelAttributesPass(PassRegistry &);
112 extern char &AMDGPULowerKernelAttributesID;
113 
114 struct AMDGPULowerKernelAttributesPass
115     : PassInfoMixin<AMDGPULowerKernelAttributesPass> {
116   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
117 };
118 
119 void initializeAMDGPULowerModuleLDSPass(PassRegistry &);
120 extern char &AMDGPULowerModuleLDSID;
121 
122 struct AMDGPULowerModuleLDSPass : PassInfoMixin<AMDGPULowerModuleLDSPass> {
123   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
124 };
125 
126 void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
127 extern char &AMDGPURewriteOutArgumentsID;
128 
129 void initializeGCNDPPCombinePass(PassRegistry &);
130 extern char &GCNDPPCombineID;
131 
132 void initializeSIFoldOperandsPass(PassRegistry &);
133 extern char &SIFoldOperandsID;
134 
135 void initializeSIPeepholeSDWAPass(PassRegistry &);
136 extern char &SIPeepholeSDWAID;
137 
138 void initializeSIShrinkInstructionsPass(PassRegistry&);
139 extern char &SIShrinkInstructionsID;
140 
141 void initializeSIFixSGPRCopiesPass(PassRegistry &);
142 extern char &SIFixSGPRCopiesID;
143 
144 void initializeSIFixVGPRCopiesPass(PassRegistry &);
145 extern char &SIFixVGPRCopiesID;
146 
147 void initializeSILowerI1CopiesPass(PassRegistry &);
148 extern char &SILowerI1CopiesID;
149 
150 void initializeSILowerSGPRSpillsPass(PassRegistry &);
151 extern char &SILowerSGPRSpillsID;
152 
153 void initializeSILoadStoreOptimizerPass(PassRegistry &);
154 extern char &SILoadStoreOptimizerID;
155 
156 void initializeSIWholeQuadModePass(PassRegistry &);
157 extern char &SIWholeQuadModeID;
158 
159 void initializeSILowerControlFlowPass(PassRegistry &);
160 extern char &SILowerControlFlowID;
161 
162 void initializeSIPreEmitPeepholePass(PassRegistry &);
163 extern char &SIPreEmitPeepholeID;
164 
165 void initializeSILateBranchLoweringPass(PassRegistry &);
166 extern char &SILateBranchLoweringPassID;
167 
168 void initializeSIOptimizeExecMaskingPass(PassRegistry &);
169 extern char &SIOptimizeExecMaskingID;
170 
171 void initializeSIPreAllocateWWMRegsPass(PassRegistry &);
172 extern char &SIPreAllocateWWMRegsID;
173 
174 void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &);
175 extern char &AMDGPUSimplifyLibCallsID;
176 
177 void initializeAMDGPUUseNativeCallsPass(PassRegistry &);
178 extern char &AMDGPUUseNativeCallsID;
179 
180 void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &);
181 extern char &AMDGPUPerfHintAnalysisID;
182 
183 // Passes common to R600 and SI
184 FunctionPass *createAMDGPUPromoteAlloca();
185 void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
186 extern char &AMDGPUPromoteAllocaID;
187 
188 FunctionPass *createAMDGPUPromoteAllocaToVector();
189 void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry&);
190 extern char &AMDGPUPromoteAllocaToVectorID;
191 
192 struct AMDGPUPromoteAllocaPass : PassInfoMixin<AMDGPUPromoteAllocaPass> {
193   AMDGPUPromoteAllocaPass(TargetMachine &TM) : TM(TM) {}
194   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
195 
196 private:
197   TargetMachine &TM;
198 };
199 
200 struct AMDGPUPromoteAllocaToVectorPass
201     : PassInfoMixin<AMDGPUPromoteAllocaToVectorPass> {
202   AMDGPUPromoteAllocaToVectorPass(TargetMachine &TM) : TM(TM) {}
203   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
204 
205 private:
206   TargetMachine &TM;
207 };
208 
209 struct AMDGPUAtomicOptimizerPass : PassInfoMixin<AMDGPUAtomicOptimizerPass> {
210   AMDGPUAtomicOptimizerPass(TargetMachine &TM, ScanOptions ScanImpl)
211       : TM(TM), ScanImpl(ScanImpl) {}
212   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
213 
214 private:
215   TargetMachine &TM;
216   ScanOptions ScanImpl;
217 };
218 
219 Pass *createAMDGPUStructurizeCFGPass();
220 FunctionPass *createAMDGPUISelDag(TargetMachine &TM,
221                                   CodeGenOpt::Level OptLevel);
222 ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true);
223 
224 struct AMDGPUAlwaysInlinePass : PassInfoMixin<AMDGPUAlwaysInlinePass> {
225   AMDGPUAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {}
226   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
227 
228 private:
229   bool GlobalOpt;
230 };
231 
232 class AMDGPUCodeGenPreparePass
233     : public PassInfoMixin<AMDGPUCodeGenPreparePass> {
234 private:
235   TargetMachine &TM;
236 
237 public:
238   AMDGPUCodeGenPreparePass(TargetMachine &TM) : TM(TM){};
239   PreservedAnalyses run(Function &, FunctionAnalysisManager &);
240 };
241 
242 FunctionPass *createAMDGPUAnnotateUniformValues();
243 
244 ModulePass *createAMDGPUPrintfRuntimeBinding();
245 void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&);
246 extern char &AMDGPUPrintfRuntimeBindingID;
247 
248 void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &);
249 extern char &AMDGPUResourceUsageAnalysisID;
250 
251 struct AMDGPUPrintfRuntimeBindingPass
252     : PassInfoMixin<AMDGPUPrintfRuntimeBindingPass> {
253   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
254 };
255 
256 ModulePass* createAMDGPUUnifyMetadataPass();
257 void initializeAMDGPUUnifyMetadataPass(PassRegistry&);
258 extern char &AMDGPUUnifyMetadataID;
259 
260 struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> {
261   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
262 };
263 
264 void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&);
265 extern char &SIOptimizeExecMaskingPreRAID;
266 
267 void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &);
268 extern char &SIOptimizeVGPRLiveRangeID;
269 
270 void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&);
271 extern char &AMDGPUAnnotateUniformValuesPassID;
272 
273 void initializeAMDGPUCodeGenPreparePass(PassRegistry&);
274 extern char &AMDGPUCodeGenPrepareID;
275 
276 void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &);
277 extern char &AMDGPURemoveIncompatibleFunctionsID;
278 
279 void initializeAMDGPULateCodeGenPreparePass(PassRegistry &);
280 extern char &AMDGPULateCodeGenPrepareID;
281 
282 FunctionPass *createAMDGPURewriteUndefForPHIPass();
283 void initializeAMDGPURewriteUndefForPHIPass(PassRegistry &);
284 extern char &AMDGPURewriteUndefForPHIPassID;
285 
286 void initializeSIAnnotateControlFlowPass(PassRegistry&);
287 extern char &SIAnnotateControlFlowPassID;
288 
289 void initializeSIMemoryLegalizerPass(PassRegistry&);
290 extern char &SIMemoryLegalizerID;
291 
292 void initializeSIModeRegisterPass(PassRegistry&);
293 extern char &SIModeRegisterID;
294 
295 void initializeAMDGPUInsertDelayAluPass(PassRegistry &);
296 extern char &AMDGPUInsertDelayAluID;
297 
298 void initializeSIInsertHardClausesPass(PassRegistry &);
299 extern char &SIInsertHardClausesID;
300 
301 void initializeSIInsertWaitcntsPass(PassRegistry&);
302 extern char &SIInsertWaitcntsID;
303 
304 void initializeSIFormMemoryClausesPass(PassRegistry&);
305 extern char &SIFormMemoryClausesID;
306 
307 void initializeSIPostRABundlerPass(PassRegistry&);
308 extern char &SIPostRABundlerID;
309 
310 void initializeGCNCreateVOPDPass(PassRegistry &);
311 extern char &GCNCreateVOPDID;
312 
313 void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&);
314 extern char &AMDGPUUnifyDivergentExitNodesID;
315 
316 ImmutablePass *createAMDGPUAAWrapperPass();
317 void initializeAMDGPUAAWrapperPassPass(PassRegistry&);
318 ImmutablePass *createAMDGPUExternalAAWrapperPass();
319 void initializeAMDGPUExternalAAWrapperPass(PassRegistry&);
320 
321 void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &);
322 
323 ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass();
324 void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &);
325 extern char &AMDGPUOpenCLEnqueuedBlockLoweringID;
326 
327 void initializeGCNNSAReassignPass(PassRegistry &);
328 extern char &GCNNSAReassignID;
329 
330 void initializeGCNPreRALongBranchRegPass(PassRegistry &);
331 extern char &GCNPreRALongBranchRegID;
332 
333 void initializeGCNPreRAOptimizationsPass(PassRegistry &);
334 extern char &GCNPreRAOptimizationsID;
335 
336 FunctionPass *createAMDGPUSetWavePriorityPass();
337 void initializeAMDGPUSetWavePriorityPass(PassRegistry &);
338 
339 void initializeGCNRewritePartialRegUsesPass(llvm::PassRegistry &);
340 extern char &GCNRewritePartialRegUsesID;
341 
342 namespace AMDGPU {
343 enum TargetIndex {
344   TI_CONSTDATA_START,
345   TI_SCRATCH_RSRC_DWORD0,
346   TI_SCRATCH_RSRC_DWORD1,
347   TI_SCRATCH_RSRC_DWORD2,
348   TI_SCRATCH_RSRC_DWORD3
349 };
350 }
351 
352 /// OpenCL uses address spaces to differentiate between
353 /// various memory regions on the hardware. On the CPU
354 /// all of the address spaces point to the same memory,
355 /// however on the GPU, each address space points to
356 /// a separate piece of memory that is unique from other
357 /// memory locations.
358 namespace AMDGPUAS {
359 enum : unsigned {
360   // The maximum value for flat, generic, local, private, constant and region.
361   MAX_AMDGPU_ADDRESS = 8,
362 
363   FLAT_ADDRESS = 0,   ///< Address space for flat memory.
364   GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
365   REGION_ADDRESS = 2, ///< Address space for region memory. (GDS)
366 
367   CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2).
368   LOCAL_ADDRESS = 3,    ///< Address space for local memory.
369   PRIVATE_ADDRESS = 5,  ///< Address space for private memory.
370 
371   CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory.
372 
373   BUFFER_FAT_POINTER = 7, ///< Address space for 160-bit buffer fat pointers.
374                           ///< Not used in backend.
375 
376   BUFFER_RESOURCE = 8, ///< Address space for 128-bit buffer resources.
377 
378   /// Internal address spaces. Can be freely renumbered.
379   STREAMOUT_REGISTER = 128, ///< Address space for GS NGG Streamout registers.
380   /// end Internal address spaces.
381 
382   /// Address space for direct addressable parameter memory (CONST0).
383   PARAM_D_ADDRESS = 6,
384   /// Address space for indirect addressable parameter memory (VTX1).
385   PARAM_I_ADDRESS = 7,
386 
387   // Do not re-order the CONSTANT_BUFFER_* enums.  Several places depend on
388   // this order to be able to dynamically index a constant buffer, for
389   // example:
390   //
391   // ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx
392 
393   CONSTANT_BUFFER_0 = 8,
394   CONSTANT_BUFFER_1 = 9,
395   CONSTANT_BUFFER_2 = 10,
396   CONSTANT_BUFFER_3 = 11,
397   CONSTANT_BUFFER_4 = 12,
398   CONSTANT_BUFFER_5 = 13,
399   CONSTANT_BUFFER_6 = 14,
400   CONSTANT_BUFFER_7 = 15,
401   CONSTANT_BUFFER_8 = 16,
402   CONSTANT_BUFFER_9 = 17,
403   CONSTANT_BUFFER_10 = 18,
404   CONSTANT_BUFFER_11 = 19,
405   CONSTANT_BUFFER_12 = 20,
406   CONSTANT_BUFFER_13 = 21,
407   CONSTANT_BUFFER_14 = 22,
408   CONSTANT_BUFFER_15 = 23,
409 
410   // Some places use this if the address space can't be determined.
411   UNKNOWN_ADDRESS_SPACE = ~0u,
412 };
413 }
414 
415 namespace AMDGPU {
416 
417 // FIXME: Missing constant_32bit
418 inline bool isFlatGlobalAddrSpace(unsigned AS) {
419   return AS == AMDGPUAS::GLOBAL_ADDRESS ||
420          AS == AMDGPUAS::FLAT_ADDRESS ||
421          AS == AMDGPUAS::CONSTANT_ADDRESS ||
422          AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
423 }
424 
425 inline bool isExtendedGlobalAddrSpace(unsigned AS) {
426   return AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::CONSTANT_ADDRESS ||
427          AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
428          AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
429 }
430 
431 static inline bool addrspacesMayAlias(unsigned AS1, unsigned AS2) {
432   static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 8, "Addr space out of range");
433 
434   if (AS1 > AMDGPUAS::MAX_AMDGPU_ADDRESS || AS2 > AMDGPUAS::MAX_AMDGPU_ADDRESS)
435     return true;
436 
437   // This array is indexed by address space value enum elements 0 ... to 8
438   // clang-format off
439   static const bool ASAliasRules[9][9] = {
440     /*                   Flat   Global Region  Group Constant Private Const32 BufFatPtr BufRsrc */
441     /* Flat     */        {true,  true,  false, true,  true,  true,  true,  true,  true},
442     /* Global   */        {true,  true,  false, false, true,  false, true,  true,  true},
443     /* Region   */        {false, false, true,  false, false, false, false, false, false},
444     /* Group    */        {true,  false, false, true,  false, false, false, false, false},
445     /* Constant */        {true,  true,  false, false, false, false, true,  true,  true},
446     /* Private  */        {true,  false, false, false, false, true,  false, false, false},
447     /* Constant 32-bit */ {true,  true,  false, false, true,  false, false, true,  true},
448     /* Buffer Fat Ptr  */ {true,  true,  false, false, true,  false, true,  true,  true},
449     /* Buffer Resource */ {true,  true,  false, false, true,  false, true,  true,  true},
450   };
451   // clang-format on
452 
453   return ASAliasRules[AS1][AS2];
454 }
455 
456 }
457 
458 } // End namespace llvm
459 
460 #endif
461