1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/Diagnostic.h"
17 #include "clang/Basic/LangOptions.h"
18 #include "clang/Basic/MacroBuilder.h"
19 #include "clang/Basic/TargetBuiltins.h"
20 using namespace clang;
21 using namespace clang::targets;
22 
23 namespace clang {
24 namespace targets {
25 
26 // If you edit the description strings, make sure you update
27 // getPointerWidthV().
28 
29 static const char *const DataLayoutStringR600 =
30     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
31     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
32 
33 static const char *const DataLayoutStringAMDGCN =
34     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
35     "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:"
36     "32-v48:64-v96:128"
37     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
38     "-ni:7:8:9";
39 
40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
41     llvm::AMDGPUAS::FLAT_ADDRESS,     // Default
42     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // opencl_global
43     llvm::AMDGPUAS::LOCAL_ADDRESS,    // opencl_local
44     llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant
45     llvm::AMDGPUAS::PRIVATE_ADDRESS,  // opencl_private
46     llvm::AMDGPUAS::FLAT_ADDRESS,     // opencl_generic
47     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // opencl_global_device
48     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // opencl_global_host
49     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // cuda_device
50     llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant
51     llvm::AMDGPUAS::LOCAL_ADDRESS,    // cuda_shared
52     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // sycl_global
53     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // sycl_global_device
54     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // sycl_global_host
55     llvm::AMDGPUAS::LOCAL_ADDRESS,    // sycl_local
56     llvm::AMDGPUAS::PRIVATE_ADDRESS,  // sycl_private
57     llvm::AMDGPUAS::FLAT_ADDRESS,     // ptr32_sptr
58     llvm::AMDGPUAS::FLAT_ADDRESS,     // ptr32_uptr
59     llvm::AMDGPUAS::FLAT_ADDRESS,     // ptr64
60     llvm::AMDGPUAS::FLAT_ADDRESS,     // hlsl_groupshared
61 };
62 
63 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
64     llvm::AMDGPUAS::PRIVATE_ADDRESS,  // Default
65     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // opencl_global
66     llvm::AMDGPUAS::LOCAL_ADDRESS,    // opencl_local
67     llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant
68     llvm::AMDGPUAS::PRIVATE_ADDRESS,  // opencl_private
69     llvm::AMDGPUAS::FLAT_ADDRESS,     // opencl_generic
70     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // opencl_global_device
71     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // opencl_global_host
72     llvm::AMDGPUAS::GLOBAL_ADDRESS,   // cuda_device
73     llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant
74     llvm::AMDGPUAS::LOCAL_ADDRESS,    // cuda_shared
75     // SYCL address space values for this map are dummy
76     llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global
77     llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_device
78     llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_host
79     llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_local
80     llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_private
81     llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr
82     llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr
83     llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64
84     llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared
85 
86 };
87 } // namespace targets
88 } // namespace clang
89 
90 static constexpr Builtin::Info BuiltinInfo[] = {
91 #define BUILTIN(ID, TYPE, ATTRS)                                               \
92   {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
93 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
94   {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
95 #include "clang/Basic/BuiltinsAMDGPU.def"
96 };
97 
98 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
99   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
100   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
101   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
102   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
103   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
104   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
105   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
106   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
107   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
108   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
109   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
110   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
111   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
112   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
113   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
114   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
115   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
116   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
117   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
118   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
119   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
120   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
121   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
122   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
123   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
124   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
125   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
126   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
127   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
128   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
129   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
130   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
131   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
132   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
133   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
134   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
135   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
136   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
137   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
138   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
139   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
140   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
141   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
142   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
143   "flat_scratch_lo", "flat_scratch_hi",
144   "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
145   "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
146   "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
147   "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
148   "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
149   "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
150   "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
151   "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
152   "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
153   "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
154   "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
155   "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
156   "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
157   "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
158   "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
159   "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
160   "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
161   "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
162   "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
163   "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
164   "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
165   "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
166   "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
167   "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
168   "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
169   "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
170   "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
171   "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
172   "a252", "a253", "a254", "a255"
173 };
174 
175 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
176   return llvm::ArrayRef(GCCRegNames);
177 }
178 
179 bool AMDGPUTargetInfo::initFeatureMap(
180     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
181     const std::vector<std::string> &FeatureVec) const {
182 
183   using namespace llvm::AMDGPU;
184   fillAMDGPUFeatureMap(CPU, getTriple(), Features);
185   if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec))
186     return false;
187 
188   // TODO: Should move this logic into TargetParser
189   std::string ErrorMsg;
190   if (!insertWaveSizeFeature(CPU, getTriple(), Features, ErrorMsg)) {
191     Diags.Report(diag::err_invalid_feature_combination) << ErrorMsg;
192     return false;
193   }
194 
195   return true;
196 }
197 
198 void AMDGPUTargetInfo::fillValidCPUList(
199     SmallVectorImpl<StringRef> &Values) const {
200   if (isAMDGCN(getTriple()))
201     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
202   else
203     llvm::AMDGPU::fillValidArchListR600(Values);
204 }
205 
206 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
207   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
208 }
209 
210 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
211                                    const TargetOptions &Opts)
212     : TargetInfo(Triple),
213       GPUKind(isAMDGCN(Triple) ?
214               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
215               llvm::AMDGPU::parseArchR600(Opts.CPU)),
216       GPUFeatures(isAMDGCN(Triple) ?
217                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
218                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
219   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
220                                         : DataLayoutStringR600);
221 
222   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
223                      !isAMDGCN(Triple));
224   UseAddrSpaceMapMangling = true;
225 
226   if (isAMDGCN(Triple)) {
227     // __bf16 is always available as a load/store only type on AMDGCN.
228     BFloat16Width = BFloat16Align = 16;
229     BFloat16Format = &llvm::APFloat::BFloat();
230   }
231 
232   HasLegalHalfType = true;
233   HasFloat16 = true;
234   WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
235   AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
236 
237   // Set pointer width and alignment for the generic address space.
238   PointerWidth = PointerAlign = getPointerWidthV(LangAS::Default);
239   if (getMaxPointerWidth() == 64) {
240     LongWidth = LongAlign = 64;
241     SizeType = UnsignedLong;
242     PtrDiffType = SignedLong;
243     IntPtrType = SignedLong;
244   }
245 
246   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
247   CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP);
248   for (auto F : {"image-insts", "gws"})
249     ReadOnlyFeatures.insert(F);
250   HalfArgsAndReturns = true;
251 }
252 
253 void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
254   TargetInfo::adjust(Diags, Opts);
255   // ToDo: There are still a few places using default address space as private
256   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
257   // can be removed from the following line.
258   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
259                      !isAMDGCN(getTriple()));
260 }
261 
262 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
263   return llvm::ArrayRef(BuiltinInfo,
264                         clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin);
265 }
266 
267 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
268                                         MacroBuilder &Builder) const {
269   Builder.defineMacro("__AMD__");
270   Builder.defineMacro("__AMDGPU__");
271 
272   if (isAMDGCN(getTriple()))
273     Builder.defineMacro("__AMDGCN__");
274   else
275     Builder.defineMacro("__R600__");
276 
277   if (GPUKind != llvm::AMDGPU::GK_NONE) {
278     StringRef CanonName = isAMDGCN(getTriple()) ?
279       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
280     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
281     // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___
282     if (isAMDGCN(getTriple())) {
283       assert(CanonName.starts_with("gfx") && "Invalid amdgcn canonical name");
284       Builder.defineMacro(Twine("__") + Twine(CanonName.drop_back(2).upper()) +
285                           Twine("__"));
286     }
287     if (isAMDGCN(getTriple())) {
288       Builder.defineMacro("__amdgcn_processor__",
289                           Twine("\"") + Twine(CanonName) + Twine("\""));
290       Builder.defineMacro("__amdgcn_target_id__",
291                           Twine("\"") + Twine(*getTargetID()) + Twine("\""));
292       for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
293         auto Loc = OffloadArchFeatures.find(F);
294         if (Loc != OffloadArchFeatures.end()) {
295           std::string NewF = F.str();
296           std::replace(NewF.begin(), NewF.end(), '-', '_');
297           Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
298                                   Twine("__"),
299                               Loc->second ? "1" : "0");
300         }
301       }
302     }
303   }
304 
305   if (AllowAMDGPUUnsafeFPAtomics)
306     Builder.defineMacro("__AMDGCN_UNSAFE_FP_ATOMICS__");
307 
308   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
309   // removed in the near future.
310   if (hasFMAF())
311     Builder.defineMacro("__HAS_FMAF__");
312   if (hasFastFMAF())
313     Builder.defineMacro("FP_FAST_FMAF");
314   if (hasLDEXPF())
315     Builder.defineMacro("__HAS_LDEXPF__");
316   if (hasFP64())
317     Builder.defineMacro("__HAS_FP64__");
318   if (hasFastFMA())
319     Builder.defineMacro("FP_FAST_FMA");
320 
321   Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize));
322   // ToDo: deprecate this macro for naming consistency.
323   Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
324   Builder.defineMacro("__AMDGCN_CUMODE__", Twine(CUMode));
325 }
326 
327 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
328   assert(HalfFormat == Aux->HalfFormat);
329   assert(FloatFormat == Aux->FloatFormat);
330   assert(DoubleFormat == Aux->DoubleFormat);
331 
332   // On x86_64 long double is 80-bit extended precision format, which is
333   // not supported by AMDGPU. 128-bit floating point format is also not
334   // supported by AMDGPU. Therefore keep its own format for these two types.
335   auto SaveLongDoubleFormat = LongDoubleFormat;
336   auto SaveFloat128Format = Float128Format;
337   auto SaveLongDoubleWidth = LongDoubleWidth;
338   auto SaveLongDoubleAlign = LongDoubleAlign;
339   copyAuxTarget(Aux);
340   LongDoubleFormat = SaveLongDoubleFormat;
341   Float128Format = SaveFloat128Format;
342   LongDoubleWidth = SaveLongDoubleWidth;
343   LongDoubleAlign = SaveLongDoubleAlign;
344   // For certain builtin types support on the host target, claim they are
345   // support to pass the compilation of the host code during the device-side
346   // compilation.
347   // FIXME: As the side effect, we also accept `__float128` uses in the device
348   // code. To rejct these builtin types supported in the host target but not in
349   // the device target, one approach would support `device_builtin` attribute
350   // so that we could tell the device builtin types from the host ones. The
351   // also solves the different representations of the same builtin type, such
352   // as `size_t` in the MSVC environment.
353   if (Aux->hasFloat128Type()) {
354     HasFloat128 = true;
355     Float128Format = DoubleFormat;
356   }
357 }
358