1 //===- NVPTX.cpp ----------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "ABIInfoImpl.h"
10 #include "TargetInfo.h"
11 #include "llvm/IR/IntrinsicsNVPTX.h"
12
13 using namespace clang;
14 using namespace clang::CodeGen;
15
16 //===----------------------------------------------------------------------===//
17 // NVPTX ABI Implementation
18 //===----------------------------------------------------------------------===//
19
20 namespace {
21
22 class NVPTXTargetCodeGenInfo;
23
24 class NVPTXABIInfo : public ABIInfo {
25 NVPTXTargetCodeGenInfo &CGInfo;
26
27 public:
NVPTXABIInfo(CodeGenTypes & CGT,NVPTXTargetCodeGenInfo & Info)28 NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info)
29 : ABIInfo(CGT), CGInfo(Info) {}
30
31 ABIArgInfo classifyReturnType(QualType RetTy) const;
32 ABIArgInfo classifyArgumentType(QualType Ty) const;
33
34 void computeInfo(CGFunctionInfo &FI) const override;
35 Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
36 QualType Ty) const override;
37 bool isUnsupportedType(QualType T) const;
38 ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const;
39 };
40
41 class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
42 public:
NVPTXTargetCodeGenInfo(CodeGenTypes & CGT)43 NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)
44 : TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(CGT, *this)) {}
45
46 void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
47 CodeGen::CodeGenModule &M) const override;
48 bool shouldEmitStaticExternCAliases() const override;
49
getCUDADeviceBuiltinSurfaceDeviceType() const50 llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override {
51 // On the device side, surface reference is represented as an object handle
52 // in 64-bit integer.
53 return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
54 }
55
getCUDADeviceBuiltinTextureDeviceType() const56 llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override {
57 // On the device side, texture reference is represented as an object handle
58 // in 64-bit integer.
59 return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
60 }
61
emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction & CGF,LValue Dst,LValue Src) const62 bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst,
63 LValue Src) const override {
64 emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
65 return true;
66 }
67
emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction & CGF,LValue Dst,LValue Src) const68 bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst,
69 LValue Src) const override {
70 emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
71 return true;
72 }
73
74 // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
75 // resulting MDNode to the nvvm.annotations MDNode.
76 static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
77 int Operand);
78
79 private:
emitBuiltinSurfTexDeviceCopy(CodeGenFunction & CGF,LValue Dst,LValue Src)80 static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
81 LValue Src) {
82 llvm::Value *Handle = nullptr;
83 llvm::Constant *C =
84 llvm::dyn_cast<llvm::Constant>(Src.getAddress(CGF).getPointer());
85 // Lookup `addrspacecast` through the constant pointer if any.
86 if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C))
87 C = llvm::cast<llvm::Constant>(ASC->getPointerOperand());
88 if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) {
89 // Load the handle from the specific global variable using
90 // `nvvm.texsurf.handle.internal` intrinsic.
91 Handle = CGF.EmitRuntimeCall(
92 CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal,
93 {GV->getType()}),
94 {GV}, "texsurf_handle");
95 } else
96 Handle = CGF.EmitLoadOfScalar(Src, SourceLocation());
97 CGF.EmitStoreOfScalar(Handle, Dst);
98 }
99 };
100
101 /// Checks if the type is unsupported directly by the current target.
isUnsupportedType(QualType T) const102 bool NVPTXABIInfo::isUnsupportedType(QualType T) const {
103 ASTContext &Context = getContext();
104 if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type())
105 return true;
106 if (!Context.getTargetInfo().hasFloat128Type() &&
107 (T->isFloat128Type() ||
108 (T->isRealFloatingType() && Context.getTypeSize(T) == 128)))
109 return true;
110 if (const auto *EIT = T->getAs<BitIntType>())
111 return EIT->getNumBits() >
112 (Context.getTargetInfo().hasInt128Type() ? 128U : 64U);
113 if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() &&
114 Context.getTypeSize(T) > 64U)
115 return true;
116 if (const auto *AT = T->getAsArrayTypeUnsafe())
117 return isUnsupportedType(AT->getElementType());
118 const auto *RT = T->getAs<RecordType>();
119 if (!RT)
120 return false;
121 const RecordDecl *RD = RT->getDecl();
122
123 // If this is a C++ record, check the bases first.
124 if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
125 for (const CXXBaseSpecifier &I : CXXRD->bases())
126 if (isUnsupportedType(I.getType()))
127 return true;
128
129 for (const FieldDecl *I : RD->fields())
130 if (isUnsupportedType(I->getType()))
131 return true;
132 return false;
133 }
134
135 /// Coerce the given type into an array with maximum allowed size of elements.
coerceToIntArrayWithLimit(QualType Ty,unsigned MaxSize) const136 ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty,
137 unsigned MaxSize) const {
138 // Alignment and Size are measured in bits.
139 const uint64_t Size = getContext().getTypeSize(Ty);
140 const uint64_t Alignment = getContext().getTypeAlign(Ty);
141 const unsigned Div = std::min<unsigned>(MaxSize, Alignment);
142 llvm::Type *IntType = llvm::Type::getIntNTy(getVMContext(), Div);
143 const uint64_t NumElements = (Size + Div - 1) / Div;
144 return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements));
145 }
146
classifyReturnType(QualType RetTy) const147 ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
148 if (RetTy->isVoidType())
149 return ABIArgInfo::getIgnore();
150
151 if (getContext().getLangOpts().OpenMP &&
152 getContext().getLangOpts().OpenMPIsTargetDevice &&
153 isUnsupportedType(RetTy))
154 return coerceToIntArrayWithLimit(RetTy, 64);
155
156 // note: this is different from default ABI
157 if (!RetTy->isScalarType())
158 return ABIArgInfo::getDirect();
159
160 // Treat an enum type as its underlying type.
161 if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
162 RetTy = EnumTy->getDecl()->getIntegerType();
163
164 return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
165 : ABIArgInfo::getDirect());
166 }
167
classifyArgumentType(QualType Ty) const168 ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
169 // Treat an enum type as its underlying type.
170 if (const EnumType *EnumTy = Ty->getAs<EnumType>())
171 Ty = EnumTy->getDecl()->getIntegerType();
172
173 // Return aggregates type as indirect by value
174 if (isAggregateTypeForABI(Ty)) {
175 // Under CUDA device compilation, tex/surf builtin types are replaced with
176 // object types and passed directly.
177 if (getContext().getLangOpts().CUDAIsDevice) {
178 if (Ty->isCUDADeviceBuiltinSurfaceType())
179 return ABIArgInfo::getDirect(
180 CGInfo.getCUDADeviceBuiltinSurfaceDeviceType());
181 if (Ty->isCUDADeviceBuiltinTextureType())
182 return ABIArgInfo::getDirect(
183 CGInfo.getCUDADeviceBuiltinTextureDeviceType());
184 }
185 return getNaturalAlignIndirect(Ty, /* byval */ true);
186 }
187
188 if (const auto *EIT = Ty->getAs<BitIntType>()) {
189 if ((EIT->getNumBits() > 128) ||
190 (!getContext().getTargetInfo().hasInt128Type() &&
191 EIT->getNumBits() > 64))
192 return getNaturalAlignIndirect(Ty, /* byval */ true);
193 }
194
195 return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
196 : ABIArgInfo::getDirect());
197 }
198
computeInfo(CGFunctionInfo & FI) const199 void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
200 if (!getCXXABI().classifyReturnType(FI))
201 FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
202 for (auto &I : FI.arguments())
203 I.info = classifyArgumentType(I.type);
204
205 // Always honor user-specified calling convention.
206 if (FI.getCallingConvention() != llvm::CallingConv::C)
207 return;
208
209 FI.setEffectiveCallingConvention(getRuntimeCC());
210 }
211
EmitVAArg(CodeGenFunction & CGF,Address VAListAddr,QualType Ty) const212 Address NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
213 QualType Ty) const {
214 llvm_unreachable("NVPTX does not support varargs");
215 }
216
setTargetAttributes(const Decl * D,llvm::GlobalValue * GV,CodeGen::CodeGenModule & M) const217 void NVPTXTargetCodeGenInfo::setTargetAttributes(
218 const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
219 if (GV->isDeclaration())
220 return;
221 const VarDecl *VD = dyn_cast_or_null<VarDecl>(D);
222 if (VD) {
223 if (M.getLangOpts().CUDA) {
224 if (VD->getType()->isCUDADeviceBuiltinSurfaceType())
225 addNVVMMetadata(GV, "surface", 1);
226 else if (VD->getType()->isCUDADeviceBuiltinTextureType())
227 addNVVMMetadata(GV, "texture", 1);
228 return;
229 }
230 }
231
232 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
233 if (!FD) return;
234
235 llvm::Function *F = cast<llvm::Function>(GV);
236
237 // Perform special handling in OpenCL mode
238 if (M.getLangOpts().OpenCL) {
239 // Use OpenCL function attributes to check for kernel functions
240 // By default, all functions are device functions
241 if (FD->hasAttr<OpenCLKernelAttr>()) {
242 // OpenCL __kernel functions get kernel metadata
243 // Create !{<func-ref>, metadata !"kernel", i32 1} node
244 addNVVMMetadata(F, "kernel", 1);
245 // And kernel functions are not subject to inlining
246 F->addFnAttr(llvm::Attribute::NoInline);
247 }
248 }
249
250 // Perform special handling in CUDA mode.
251 if (M.getLangOpts().CUDA) {
252 // CUDA __global__ functions get a kernel metadata entry. Since
253 // __global__ functions cannot be called from the device, we do not
254 // need to set the noinline attribute.
255 if (FD->hasAttr<CUDAGlobalAttr>()) {
256 // Create !{<func-ref>, metadata !"kernel", i32 1} node
257 addNVVMMetadata(F, "kernel", 1);
258 }
259 if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>())
260 M.handleCUDALaunchBoundsAttr(F, Attr);
261 }
262
263 // Attach kernel metadata directly if compiling for NVPTX.
264 if (FD->hasAttr<NVPTXKernelAttr>()) {
265 addNVVMMetadata(F, "kernel", 1);
266 }
267 }
268
addNVVMMetadata(llvm::GlobalValue * GV,StringRef Name,int Operand)269 void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
270 StringRef Name, int Operand) {
271 llvm::Module *M = GV->getParent();
272 llvm::LLVMContext &Ctx = M->getContext();
273
274 // Get "nvvm.annotations" metadata node
275 llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
276
277 llvm::Metadata *MDVals[] = {
278 llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
279 llvm::ConstantAsMetadata::get(
280 llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
281 // Append metadata to nvvm.annotations
282 MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
283 }
284
shouldEmitStaticExternCAliases() const285 bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
286 return false;
287 }
288 }
289
handleCUDALaunchBoundsAttr(llvm::Function * F,const CUDALaunchBoundsAttr * Attr,int32_t * MaxThreadsVal,int32_t * MinBlocksVal,int32_t * MaxClusterRankVal)290 void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F,
291 const CUDALaunchBoundsAttr *Attr,
292 int32_t *MaxThreadsVal,
293 int32_t *MinBlocksVal,
294 int32_t *MaxClusterRankVal) {
295 // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
296 llvm::APSInt MaxThreads(32);
297 MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(getContext());
298 if (MaxThreads > 0) {
299 if (MaxThreadsVal)
300 *MaxThreadsVal = MaxThreads.getExtValue();
301 if (F) {
302 // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
303 NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxntidx",
304 MaxThreads.getExtValue());
305 }
306 }
307
308 // min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it
309 // was not specified in __launch_bounds__ or if the user specified a 0 value,
310 // we don't have to add a PTX directive.
311 if (Attr->getMinBlocks()) {
312 llvm::APSInt MinBlocks(32);
313 MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(getContext());
314 if (MinBlocks > 0) {
315 if (MinBlocksVal)
316 *MinBlocksVal = MinBlocks.getExtValue();
317 if (F) {
318 // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
319 NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "minctasm",
320 MinBlocks.getExtValue());
321 }
322 }
323 }
324 if (Attr->getMaxBlocks()) {
325 llvm::APSInt MaxBlocks(32);
326 MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(getContext());
327 if (MaxBlocks > 0) {
328 if (MaxClusterRankVal)
329 *MaxClusterRankVal = MaxBlocks.getExtValue();
330 if (F) {
331 // Create !{<func-ref>, metadata !"maxclusterrank", i32 <val>} node
332 NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxclusterrank",
333 MaxBlocks.getExtValue());
334 }
335 }
336 }
337 }
338
339 std::unique_ptr<TargetCodeGenInfo>
createNVPTXTargetCodeGenInfo(CodeGenModule & CGM)340 CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) {
341 return std::make_unique<NVPTXTargetCodeGenInfo>(CGM.getTypes());
342 }
343