1 //===- NVPTX.cpp ----------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ABIInfoImpl.h"
10 #include "TargetInfo.h"
11 #include "llvm/IR/IntrinsicsNVPTX.h"
12 
13 using namespace clang;
14 using namespace clang::CodeGen;
15 
16 //===----------------------------------------------------------------------===//
17 // NVPTX ABI Implementation
18 //===----------------------------------------------------------------------===//
19 
20 namespace {
21 
22 class NVPTXTargetCodeGenInfo;
23 
24 class NVPTXABIInfo : public ABIInfo {
25   NVPTXTargetCodeGenInfo &CGInfo;
26 
27 public:
28   NVPTXABIInfo(CodeGenTypes &CGT, NVPTXTargetCodeGenInfo &Info)
29       : ABIInfo(CGT), CGInfo(Info) {}
30 
31   ABIArgInfo classifyReturnType(QualType RetTy) const;
32   ABIArgInfo classifyArgumentType(QualType Ty) const;
33 
34   void computeInfo(CGFunctionInfo &FI) const override;
35   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
36                     QualType Ty) const override;
37   bool isUnsupportedType(QualType T) const;
38   ABIArgInfo coerceToIntArrayWithLimit(QualType Ty, unsigned MaxSize) const;
39 };
40 
41 class NVPTXTargetCodeGenInfo : public TargetCodeGenInfo {
42 public:
43   NVPTXTargetCodeGenInfo(CodeGenTypes &CGT)
44       : TargetCodeGenInfo(std::make_unique<NVPTXABIInfo>(CGT, *this)) {}
45 
46   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
47                            CodeGen::CodeGenModule &M) const override;
48   bool shouldEmitStaticExternCAliases() const override;
49 
50   llvm::Type *getCUDADeviceBuiltinSurfaceDeviceType() const override {
51     // On the device side, surface reference is represented as an object handle
52     // in 64-bit integer.
53     return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
54   }
55 
56   llvm::Type *getCUDADeviceBuiltinTextureDeviceType() const override {
57     // On the device side, texture reference is represented as an object handle
58     // in 64-bit integer.
59     return llvm::Type::getInt64Ty(getABIInfo().getVMContext());
60   }
61 
62   bool emitCUDADeviceBuiltinSurfaceDeviceCopy(CodeGenFunction &CGF, LValue Dst,
63                                               LValue Src) const override {
64     emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
65     return true;
66   }
67 
68   bool emitCUDADeviceBuiltinTextureDeviceCopy(CodeGenFunction &CGF, LValue Dst,
69                                               LValue Src) const override {
70     emitBuiltinSurfTexDeviceCopy(CGF, Dst, Src);
71     return true;
72   }
73 
74 private:
75   // Adds a NamedMDNode with GV, Name, and Operand as operands, and adds the
76   // resulting MDNode to the nvvm.annotations MDNode.
77   static void addNVVMMetadata(llvm::GlobalValue *GV, StringRef Name,
78                               int Operand);
79 
80   static void emitBuiltinSurfTexDeviceCopy(CodeGenFunction &CGF, LValue Dst,
81                                            LValue Src) {
82     llvm::Value *Handle = nullptr;
83     llvm::Constant *C =
84         llvm::dyn_cast<llvm::Constant>(Src.getAddress(CGF).getPointer());
85     // Lookup `addrspacecast` through the constant pointer if any.
86     if (auto *ASC = llvm::dyn_cast_or_null<llvm::AddrSpaceCastOperator>(C))
87       C = llvm::cast<llvm::Constant>(ASC->getPointerOperand());
88     if (auto *GV = llvm::dyn_cast_or_null<llvm::GlobalVariable>(C)) {
89       // Load the handle from the specific global variable using
90       // `nvvm.texsurf.handle.internal` intrinsic.
91       Handle = CGF.EmitRuntimeCall(
92           CGF.CGM.getIntrinsic(llvm::Intrinsic::nvvm_texsurf_handle_internal,
93                                {GV->getType()}),
94           {GV}, "texsurf_handle");
95     } else
96       Handle = CGF.EmitLoadOfScalar(Src, SourceLocation());
97     CGF.EmitStoreOfScalar(Handle, Dst);
98   }
99 };
100 
101 /// Checks if the type is unsupported directly by the current target.
102 bool NVPTXABIInfo::isUnsupportedType(QualType T) const {
103   ASTContext &Context = getContext();
104   if (!Context.getTargetInfo().hasFloat16Type() && T->isFloat16Type())
105     return true;
106   if (!Context.getTargetInfo().hasFloat128Type() &&
107       (T->isFloat128Type() ||
108        (T->isRealFloatingType() && Context.getTypeSize(T) == 128)))
109     return true;
110   if (const auto *EIT = T->getAs<BitIntType>())
111     return EIT->getNumBits() >
112            (Context.getTargetInfo().hasInt128Type() ? 128U : 64U);
113   if (!Context.getTargetInfo().hasInt128Type() && T->isIntegerType() &&
114       Context.getTypeSize(T) > 64U)
115     return true;
116   if (const auto *AT = T->getAsArrayTypeUnsafe())
117     return isUnsupportedType(AT->getElementType());
118   const auto *RT = T->getAs<RecordType>();
119   if (!RT)
120     return false;
121   const RecordDecl *RD = RT->getDecl();
122 
123   // If this is a C++ record, check the bases first.
124   if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD))
125     for (const CXXBaseSpecifier &I : CXXRD->bases())
126       if (isUnsupportedType(I.getType()))
127         return true;
128 
129   for (const FieldDecl *I : RD->fields())
130     if (isUnsupportedType(I->getType()))
131       return true;
132   return false;
133 }
134 
135 /// Coerce the given type into an array with maximum allowed size of elements.
136 ABIArgInfo NVPTXABIInfo::coerceToIntArrayWithLimit(QualType Ty,
137                                                    unsigned MaxSize) const {
138   // Alignment and Size are measured in bits.
139   const uint64_t Size = getContext().getTypeSize(Ty);
140   const uint64_t Alignment = getContext().getTypeAlign(Ty);
141   const unsigned Div = std::min<unsigned>(MaxSize, Alignment);
142   llvm::Type *IntType = llvm::Type::getIntNTy(getVMContext(), Div);
143   const uint64_t NumElements = (Size + Div - 1) / Div;
144   return ABIArgInfo::getDirect(llvm::ArrayType::get(IntType, NumElements));
145 }
146 
147 ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const {
148   if (RetTy->isVoidType())
149     return ABIArgInfo::getIgnore();
150 
151   if (getContext().getLangOpts().OpenMP &&
152       getContext().getLangOpts().OpenMPIsTargetDevice &&
153       isUnsupportedType(RetTy))
154     return coerceToIntArrayWithLimit(RetTy, 64);
155 
156   // note: this is different from default ABI
157   if (!RetTy->isScalarType())
158     return ABIArgInfo::getDirect();
159 
160   // Treat an enum type as its underlying type.
161   if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
162     RetTy = EnumTy->getDecl()->getIntegerType();
163 
164   return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy)
165                                                : ABIArgInfo::getDirect());
166 }
167 
168 ABIArgInfo NVPTXABIInfo::classifyArgumentType(QualType Ty) const {
169   // Treat an enum type as its underlying type.
170   if (const EnumType *EnumTy = Ty->getAs<EnumType>())
171     Ty = EnumTy->getDecl()->getIntegerType();
172 
173   // Return aggregates type as indirect by value
174   if (isAggregateTypeForABI(Ty)) {
175     // Under CUDA device compilation, tex/surf builtin types are replaced with
176     // object types and passed directly.
177     if (getContext().getLangOpts().CUDAIsDevice) {
178       if (Ty->isCUDADeviceBuiltinSurfaceType())
179         return ABIArgInfo::getDirect(
180             CGInfo.getCUDADeviceBuiltinSurfaceDeviceType());
181       if (Ty->isCUDADeviceBuiltinTextureType())
182         return ABIArgInfo::getDirect(
183             CGInfo.getCUDADeviceBuiltinTextureDeviceType());
184     }
185     return getNaturalAlignIndirect(Ty, /* byval */ true);
186   }
187 
188   if (const auto *EIT = Ty->getAs<BitIntType>()) {
189     if ((EIT->getNumBits() > 128) ||
190         (!getContext().getTargetInfo().hasInt128Type() &&
191          EIT->getNumBits() > 64))
192       return getNaturalAlignIndirect(Ty, /* byval */ true);
193   }
194 
195   return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty)
196                                             : ABIArgInfo::getDirect());
197 }
198 
199 void NVPTXABIInfo::computeInfo(CGFunctionInfo &FI) const {
200   if (!getCXXABI().classifyReturnType(FI))
201     FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
202   for (auto &I : FI.arguments())
203     I.info = classifyArgumentType(I.type);
204 
205   // Always honor user-specified calling convention.
206   if (FI.getCallingConvention() != llvm::CallingConv::C)
207     return;
208 
209   FI.setEffectiveCallingConvention(getRuntimeCC());
210 }
211 
212 Address NVPTXABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
213                                 QualType Ty) const {
214   llvm_unreachable("NVPTX does not support varargs");
215 }
216 
217 void NVPTXTargetCodeGenInfo::setTargetAttributes(
218     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
219   if (GV->isDeclaration())
220     return;
221   const VarDecl *VD = dyn_cast_or_null<VarDecl>(D);
222   if (VD) {
223     if (M.getLangOpts().CUDA) {
224       if (VD->getType()->isCUDADeviceBuiltinSurfaceType())
225         addNVVMMetadata(GV, "surface", 1);
226       else if (VD->getType()->isCUDADeviceBuiltinTextureType())
227         addNVVMMetadata(GV, "texture", 1);
228       return;
229     }
230   }
231 
232   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
233   if (!FD) return;
234 
235   llvm::Function *F = cast<llvm::Function>(GV);
236 
237   // Perform special handling in OpenCL mode
238   if (M.getLangOpts().OpenCL) {
239     // Use OpenCL function attributes to check for kernel functions
240     // By default, all functions are device functions
241     if (FD->hasAttr<OpenCLKernelAttr>()) {
242       // OpenCL __kernel functions get kernel metadata
243       // Create !{<func-ref>, metadata !"kernel", i32 1} node
244       addNVVMMetadata(F, "kernel", 1);
245       // And kernel functions are not subject to inlining
246       F->addFnAttr(llvm::Attribute::NoInline);
247     }
248   }
249 
250   // Perform special handling in CUDA mode.
251   if (M.getLangOpts().CUDA) {
252     // CUDA __global__ functions get a kernel metadata entry.  Since
253     // __global__ functions cannot be called from the device, we do not
254     // need to set the noinline attribute.
255     if (FD->hasAttr<CUDAGlobalAttr>()) {
256       // Create !{<func-ref>, metadata !"kernel", i32 1} node
257       addNVVMMetadata(F, "kernel", 1);
258     }
259     if (CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>()) {
260       // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
261       llvm::APSInt MaxThreads(32);
262       MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(M.getContext());
263       if (MaxThreads > 0)
264         addNVVMMetadata(F, "maxntidx", MaxThreads.getExtValue());
265 
266       // min blocks is an optional argument for CUDALaunchBoundsAttr. If it was
267       // not specified in __launch_bounds__ or if the user specified a 0 value,
268       // we don't have to add a PTX directive.
269       if (Attr->getMinBlocks()) {
270         llvm::APSInt MinBlocks(32);
271         MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(M.getContext());
272         if (MinBlocks > 0)
273           // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
274           addNVVMMetadata(F, "minctasm", MinBlocks.getExtValue());
275       }
276     }
277   }
278 
279   // Attach kernel metadata directly if compiling for NVPTX.
280   if (FD->hasAttr<NVPTXKernelAttr>()) {
281     addNVVMMetadata(F, "kernel", 1);
282   }
283 }
284 
285 void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::GlobalValue *GV,
286                                              StringRef Name, int Operand) {
287   llvm::Module *M = GV->getParent();
288   llvm::LLVMContext &Ctx = M->getContext();
289 
290   // Get "nvvm.annotations" metadata node
291   llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations");
292 
293   llvm::Metadata *MDVals[] = {
294       llvm::ConstantAsMetadata::get(GV), llvm::MDString::get(Ctx, Name),
295       llvm::ConstantAsMetadata::get(
296           llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand))};
297   // Append metadata to nvvm.annotations
298   MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
299 }
300 
301 bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
302   return false;
303 }
304 }
305 
306 std::unique_ptr<TargetCodeGenInfo>
307 CodeGen::createNVPTXTargetCodeGenInfo(CodeGenModule &CGM) {
308   return std::make_unique<NVPTXTargetCodeGenInfo>(CGM.getTypes());
309 }
310