1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/Diagnostic.h"
17 #include "clang/Basic/LangOptions.h"
18 #include "clang/Basic/MacroBuilder.h"
19 #include "clang/Basic/TargetBuiltins.h"
20 using namespace clang;
21 using namespace clang::targets;
22 
23 namespace clang {
24 namespace targets {
25 
26 // If you edit the description strings, make sure you update
27 // getPointerWidthV().
28 
29 static const char *const DataLayoutStringR600 =
30     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
31     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
32 
33 static const char *const DataLayoutStringAMDGCN =
34     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
35     "-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
36     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
37     "-ni:7:8";
38 
39 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
40     Generic,  // Default
41     Global,   // opencl_global
42     Local,    // opencl_local
43     Constant, // opencl_constant
44     Private,  // opencl_private
45     Generic,  // opencl_generic
46     Global,   // opencl_global_device
47     Global,   // opencl_global_host
48     Global,   // cuda_device
49     Constant, // cuda_constant
50     Local,    // cuda_shared
51     Global,   // sycl_global
52     Global,   // sycl_global_device
53     Global,   // sycl_global_host
54     Local,    // sycl_local
55     Private,  // sycl_private
56     Generic,  // ptr32_sptr
57     Generic,  // ptr32_uptr
58     Generic,  // ptr64
59     Generic,  // hlsl_groupshared
60 };
61 
62 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
63     Private,  // Default
64     Global,   // opencl_global
65     Local,    // opencl_local
66     Constant, // opencl_constant
67     Private,  // opencl_private
68     Generic,  // opencl_generic
69     Global,   // opencl_global_device
70     Global,   // opencl_global_host
71     Global,   // cuda_device
72     Constant, // cuda_constant
73     Local,    // cuda_shared
74     // SYCL address space values for this map are dummy
75     Generic, // sycl_global
76     Generic, // sycl_global_device
77     Generic, // sycl_global_host
78     Generic, // sycl_local
79     Generic, // sycl_private
80     Generic, // ptr32_sptr
81     Generic, // ptr32_uptr
82     Generic, // ptr64
83     Generic, // hlsl_groupshared
84 
85 };
86 } // namespace targets
87 } // namespace clang
88 
89 static constexpr Builtin::Info BuiltinInfo[] = {
90 #define BUILTIN(ID, TYPE, ATTRS)                                               \
91   {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
92 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
93   {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
94 #include "clang/Basic/BuiltinsAMDGPU.def"
95 };
96 
97 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
98   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
99   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
100   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
101   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
102   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
103   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
104   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
105   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
106   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
107   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
108   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
109   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
110   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
111   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
112   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
113   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
114   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
115   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
116   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
117   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
118   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
119   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
120   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
121   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
122   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
123   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
124   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
125   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
126   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
127   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
128   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
129   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
130   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
131   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
132   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
133   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
134   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
135   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
136   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
137   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
138   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
139   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
140   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
141   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
142   "flat_scratch_lo", "flat_scratch_hi",
143   "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
144   "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
145   "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
146   "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
147   "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
148   "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
149   "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
150   "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
151   "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
152   "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
153   "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
154   "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
155   "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
156   "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
157   "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
158   "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
159   "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
160   "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
161   "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
162   "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
163   "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
164   "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
165   "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
166   "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
167   "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
168   "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
169   "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
170   "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
171   "a252", "a253", "a254", "a255"
172 };
173 
174 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
175   return llvm::ArrayRef(GCCRegNames);
176 }
177 
178 bool AMDGPUTargetInfo::initFeatureMap(
179     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
180     const std::vector<std::string> &FeatureVec) const {
181 
182   using namespace llvm::AMDGPU;
183   fillAMDGPUFeatureMap(CPU, getTriple(), Features);
184   if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec))
185     return false;
186 
187   // TODO: Should move this logic into TargetParser
188   std::string ErrorMsg;
189   if (!insertWaveSizeFeature(CPU, getTriple(), Features, ErrorMsg)) {
190     Diags.Report(diag::err_invalid_feature_combination) << ErrorMsg;
191     return false;
192   }
193 
194   return true;
195 }
196 
197 void AMDGPUTargetInfo::fillValidCPUList(
198     SmallVectorImpl<StringRef> &Values) const {
199   if (isAMDGCN(getTriple()))
200     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
201   else
202     llvm::AMDGPU::fillValidArchListR600(Values);
203 }
204 
205 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
206   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
207 }
208 
209 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
210                                    const TargetOptions &Opts)
211     : TargetInfo(Triple),
212       GPUKind(isAMDGCN(Triple) ?
213               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
214               llvm::AMDGPU::parseArchR600(Opts.CPU)),
215       GPUFeatures(isAMDGCN(Triple) ?
216                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
217                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
218   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
219                                         : DataLayoutStringR600);
220 
221   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
222                      !isAMDGCN(Triple));
223   UseAddrSpaceMapMangling = true;
224 
225   if (isAMDGCN(Triple)) {
226     // __bf16 is always available as a load/store only type on AMDGCN.
227     BFloat16Width = BFloat16Align = 16;
228     BFloat16Format = &llvm::APFloat::BFloat();
229   }
230 
231   HasLegalHalfType = true;
232   HasFloat16 = true;
233   WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
234   AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
235 
236   // Set pointer width and alignment for the generic address space.
237   PointerWidth = PointerAlign = getPointerWidthV(LangAS::Default);
238   if (getMaxPointerWidth() == 64) {
239     LongWidth = LongAlign = 64;
240     SizeType = UnsignedLong;
241     PtrDiffType = SignedLong;
242     IntPtrType = SignedLong;
243   }
244 
245   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
246   CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP);
247   ReadOnlyFeatures.insert("image-insts");
248 }
249 
250 void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
251   TargetInfo::adjust(Diags, Opts);
252   // ToDo: There are still a few places using default address space as private
253   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
254   // can be removed from the following line.
255   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
256                      !isAMDGCN(getTriple()));
257 }
258 
259 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
260   return llvm::ArrayRef(BuiltinInfo,
261                         clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin);
262 }
263 
264 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
265                                         MacroBuilder &Builder) const {
266   Builder.defineMacro("__AMD__");
267   Builder.defineMacro("__AMDGPU__");
268 
269   if (isAMDGCN(getTriple()))
270     Builder.defineMacro("__AMDGCN__");
271   else
272     Builder.defineMacro("__R600__");
273 
274   if (GPUKind != llvm::AMDGPU::GK_NONE) {
275     StringRef CanonName = isAMDGCN(getTriple()) ?
276       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
277     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
278     // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___
279     if (isAMDGCN(getTriple())) {
280       assert(CanonName.startswith("gfx") && "Invalid amdgcn canonical name");
281       Builder.defineMacro(Twine("__") + Twine(CanonName.drop_back(2).upper()) +
282                           Twine("__"));
283     }
284     if (isAMDGCN(getTriple())) {
285       Builder.defineMacro("__amdgcn_processor__",
286                           Twine("\"") + Twine(CanonName) + Twine("\""));
287       Builder.defineMacro("__amdgcn_target_id__",
288                           Twine("\"") + Twine(*getTargetID()) + Twine("\""));
289       for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
290         auto Loc = OffloadArchFeatures.find(F);
291         if (Loc != OffloadArchFeatures.end()) {
292           std::string NewF = F.str();
293           std::replace(NewF.begin(), NewF.end(), '-', '_');
294           Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
295                                   Twine("__"),
296                               Loc->second ? "1" : "0");
297         }
298       }
299     }
300   }
301 
302   if (AllowAMDGPUUnsafeFPAtomics)
303     Builder.defineMacro("__AMDGCN_UNSAFE_FP_ATOMICS__");
304 
305   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
306   // removed in the near future.
307   if (hasFMAF())
308     Builder.defineMacro("__HAS_FMAF__");
309   if (hasFastFMAF())
310     Builder.defineMacro("FP_FAST_FMAF");
311   if (hasLDEXPF())
312     Builder.defineMacro("__HAS_LDEXPF__");
313   if (hasFP64())
314     Builder.defineMacro("__HAS_FP64__");
315   if (hasFastFMA())
316     Builder.defineMacro("FP_FAST_FMA");
317 
318   Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize));
319   // ToDo: deprecate this macro for naming consistency.
320   Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
321   Builder.defineMacro("__AMDGCN_CUMODE__", Twine(CUMode));
322 }
323 
324 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
325   assert(HalfFormat == Aux->HalfFormat);
326   assert(FloatFormat == Aux->FloatFormat);
327   assert(DoubleFormat == Aux->DoubleFormat);
328 
329   // On x86_64 long double is 80-bit extended precision format, which is
330   // not supported by AMDGPU. 128-bit floating point format is also not
331   // supported by AMDGPU. Therefore keep its own format for these two types.
332   auto SaveLongDoubleFormat = LongDoubleFormat;
333   auto SaveFloat128Format = Float128Format;
334   auto SaveLongDoubleWidth = LongDoubleWidth;
335   auto SaveLongDoubleAlign = LongDoubleAlign;
336   copyAuxTarget(Aux);
337   LongDoubleFormat = SaveLongDoubleFormat;
338   Float128Format = SaveFloat128Format;
339   LongDoubleWidth = SaveLongDoubleWidth;
340   LongDoubleAlign = SaveLongDoubleAlign;
341   // For certain builtin types support on the host target, claim they are
342   // support to pass the compilation of the host code during the device-side
343   // compilation.
344   // FIXME: As the side effect, we also accept `__float128` uses in the device
345   // code. To rejct these builtin types supported in the host target but not in
346   // the device target, one approach would support `device_builtin` attribute
347   // so that we could tell the device builtin types from the host ones. The
348   // also solves the different representations of the same builtin type, such
349   // as `size_t` in the MSVC environment.
350   if (Aux->hasFloat128Type()) {
351     HasFloat128 = true;
352     Float128Format = DoubleFormat;
353   }
354 }
355