1 //===-- AMDGPUMachineFunctionInfo.h -------------------------------*- C++ -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H
10 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H
11 
12 #include "Utils/AMDGPUBaseInfo.h"
13 #include "llvm/ADT/DenseMap.h"
14 #include "llvm/CodeGen/MachineFunction.h"
15 #include "llvm/IR/DataLayout.h"
16 #include "llvm/IR/Function.h"
17 #include "llvm/IR/GlobalValue.h"
18 #include "llvm/IR/GlobalVariable.h"
19 
20 namespace llvm {
21 
22 class AMDGPUSubtarget;
23 
24 class AMDGPUMachineFunction : public MachineFunctionInfo {
25   /// A map to keep track of local memory objects and their offsets within the
26   /// local memory space.
27   SmallDenseMap<const GlobalValue *, unsigned, 4> LocalMemoryObjects;
28 
29 protected:
30   uint64_t ExplicitKernArgSize = 0; // Cache for this.
31   Align MaxKernArgAlign;        // Cache for this.
32 
33   /// Number of bytes in the LDS that are being used.
34   uint32_t LDSSize = 0;
35   uint32_t GDSSize = 0;
36 
37   /// Number of bytes in the LDS allocated statically. This field is only used
38   /// in the instruction selector and not part of the machine function info.
39   uint32_t StaticLDSSize = 0;
40   uint32_t StaticGDSSize = 0;
41 
42   /// Align for dynamic shared memory if any. Dynamic shared memory is
43   /// allocated directly after the static one, i.e., LDSSize. Need to pad
44   /// LDSSize to ensure that dynamic one is aligned accordingly.
45   /// The maximal alignment is updated during IR translation or lowering
46   /// stages.
47   Align DynLDSAlign;
48 
49   // Flag to check dynamic LDS usage by kernel.
50   bool UsesDynamicLDS = false;
51 
52   // Kernels + shaders. i.e. functions called by the hardware and not called
53   // by other functions.
54   bool IsEntryFunction = false;
55 
56   // Entry points called by other functions instead of directly by the hardware.
57   bool IsModuleEntryFunction = false;
58 
59   // Functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve CC.
60   bool IsChainFunction = false;
61 
62   bool NoSignedZerosFPMath = false;
63 
64   // Function may be memory bound.
65   bool MemoryBound = false;
66 
67   // Kernel may need limited waves per EU for better performance.
68   bool WaveLimiter = false;
69 
70 public:
71   AMDGPUMachineFunction(const Function &F, const AMDGPUSubtarget &ST);
72 
getExplicitKernArgSize()73   uint64_t getExplicitKernArgSize() const {
74     return ExplicitKernArgSize;
75   }
76 
getMaxKernArgAlign()77   Align getMaxKernArgAlign() const { return MaxKernArgAlign; }
78 
getLDSSize()79   uint32_t getLDSSize() const {
80     return LDSSize;
81   }
82 
getGDSSize()83   uint32_t getGDSSize() const {
84     return GDSSize;
85   }
86 
isEntryFunction()87   bool isEntryFunction() const {
88     return IsEntryFunction;
89   }
90 
isModuleEntryFunction()91   bool isModuleEntryFunction() const { return IsModuleEntryFunction; }
92 
isChainFunction()93   bool isChainFunction() const { return IsChainFunction; }
94 
95   // The stack is empty upon entry to this function.
isBottomOfStack()96   bool isBottomOfStack() const {
97     return isEntryFunction() || isChainFunction();
98   }
99 
hasNoSignedZerosFPMath()100   bool hasNoSignedZerosFPMath() const {
101     return NoSignedZerosFPMath;
102   }
103 
isMemoryBound()104   bool isMemoryBound() const {
105     return MemoryBound;
106   }
107 
needsWaveLimiter()108   bool needsWaveLimiter() const {
109     return WaveLimiter;
110   }
111 
allocateLDSGlobal(const DataLayout & DL,const GlobalVariable & GV)112   unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV) {
113     return allocateLDSGlobal(DL, GV, DynLDSAlign);
114   }
115 
116   unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV,
117                              Align Trailing);
118 
119   static std::optional<uint32_t> getLDSKernelIdMetadata(const Function &F);
120   static std::optional<uint32_t> getLDSAbsoluteAddress(const GlobalValue &GV);
121 
getDynLDSAlign()122   Align getDynLDSAlign() const { return DynLDSAlign; }
123 
124   void setDynLDSAlign(const Function &F, const GlobalVariable &GV);
125 
126   void setUsesDynamicLDS(bool DynLDS);
127 
128   bool isDynamicLDSUsed() const;
129 };
130 
131 }
132 #endif
133