1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
21 
22 using namespace clang;
23 using namespace clang::targets;
24 
25 namespace clang {
26 namespace targets {
27 
28 // If you edit the description strings, make sure you update
29 // getPointerWidthV().
30 
31 static const char *const DataLayoutStringR600 =
32     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
33     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
34 
35 static const char *const DataLayoutStringAMDGCN =
36     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
37     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
38     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
39     "-ni:7";
40 
41 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
42     Generic,  // Default
43     Global,   // opencl_global
44     Local,    // opencl_local
45     Constant, // opencl_constant
46     Private,  // opencl_private
47     Generic,  // opencl_generic
48     Global,   // opencl_global_device
49     Global,   // opencl_global_host
50     Global,   // cuda_device
51     Constant, // cuda_constant
52     Local,    // cuda_shared
53     Global,   // sycl_global
54     Global,   // sycl_global_device
55     Global,   // sycl_global_host
56     Local,    // sycl_local
57     Private,  // sycl_private
58     Generic,  // ptr32_sptr
59     Generic,  // ptr32_uptr
60     Generic   // ptr64
61 };
62 
63 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
64     Private,  // Default
65     Global,   // opencl_global
66     Local,    // opencl_local
67     Constant, // opencl_constant
68     Private,  // opencl_private
69     Generic,  // opencl_generic
70     Global,   // opencl_global_device
71     Global,   // opencl_global_host
72     Global,   // cuda_device
73     Constant, // cuda_constant
74     Local,    // cuda_shared
75     // SYCL address space values for this map are dummy
76     Generic,  // sycl_global
77     Generic,  // sycl_global_device
78     Generic,  // sycl_global_host
79     Generic,  // sycl_local
80     Generic,  // sycl_private
81     Generic,  // ptr32_sptr
82     Generic,  // ptr32_uptr
83     Generic   // ptr64
84 
85 };
86 } // namespace targets
87 } // namespace clang
88 
89 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
90 #define BUILTIN(ID, TYPE, ATTRS)                                               \
91   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
92 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
93   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
94 #include "clang/Basic/BuiltinsAMDGPU.def"
95 };
96 
97 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
98   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
99   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
100   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
101   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
102   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
103   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
104   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
105   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
106   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
107   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
108   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
109   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
110   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
111   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
112   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
113   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
114   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
115   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
116   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
117   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
118   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
119   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
120   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
121   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
122   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
123   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
124   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
125   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
126   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
127   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
128   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
129   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
130   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
131   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
132   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
133   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
134   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
135   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
136   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
137   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
138   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
139   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
140   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
141   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
142   "flat_scratch_lo", "flat_scratch_hi",
143   "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
144   "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
145   "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
146   "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
147   "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
148   "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
149   "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
150   "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
151   "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
152   "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
153   "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
154   "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
155   "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
156   "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
157   "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
158   "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
159   "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
160   "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
161   "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
162   "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
163   "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
164   "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
165   "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
166   "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
167   "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
168   "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
169   "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
170   "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
171   "a252", "a253", "a254", "a255"
172 };
173 
getGCCRegNames() const174 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
175   return llvm::makeArrayRef(GCCRegNames);
176 }
177 
initFeatureMap(llvm::StringMap<bool> & Features,DiagnosticsEngine & Diags,StringRef CPU,const std::vector<std::string> & FeatureVec) const178 bool AMDGPUTargetInfo::initFeatureMap(
179     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
180     const std::vector<std::string> &FeatureVec) const {
181 
182   using namespace llvm::AMDGPU;
183 
184   // XXX - What does the member GPU mean if device name string passed here?
185   if (isAMDGCN(getTriple())) {
186     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
187     case GK_GFX1034:
188     case GK_GFX1033:
189     case GK_GFX1032:
190     case GK_GFX1031:
191     case GK_GFX1030:
192       Features["ci-insts"] = true;
193       Features["dot1-insts"] = true;
194       Features["dot2-insts"] = true;
195       Features["dot5-insts"] = true;
196       Features["dot6-insts"] = true;
197       Features["dot7-insts"] = true;
198       Features["dl-insts"] = true;
199       Features["flat-address-space"] = true;
200       Features["16-bit-insts"] = true;
201       Features["dpp"] = true;
202       Features["gfx8-insts"] = true;
203       Features["gfx9-insts"] = true;
204       Features["gfx10-insts"] = true;
205       Features["gfx10-3-insts"] = true;
206       Features["s-memrealtime"] = true;
207       Features["s-memtime-inst"] = true;
208       break;
209     case GK_GFX1012:
210     case GK_GFX1011:
211       Features["dot1-insts"] = true;
212       Features["dot2-insts"] = true;
213       Features["dot5-insts"] = true;
214       Features["dot6-insts"] = true;
215       Features["dot7-insts"] = true;
216       LLVM_FALLTHROUGH;
217     case GK_GFX1010:
218       Features["dl-insts"] = true;
219       Features["ci-insts"] = true;
220       Features["flat-address-space"] = true;
221       Features["16-bit-insts"] = true;
222       Features["dpp"] = true;
223       Features["gfx8-insts"] = true;
224       Features["gfx9-insts"] = true;
225       Features["gfx10-insts"] = true;
226       Features["s-memrealtime"] = true;
227       Features["s-memtime-inst"] = true;
228       break;
229     case GK_GFX90A:
230       Features["gfx90a-insts"] = true;
231       LLVM_FALLTHROUGH;
232     case GK_GFX908:
233       Features["dot3-insts"] = true;
234       Features["dot4-insts"] = true;
235       Features["dot5-insts"] = true;
236       Features["dot6-insts"] = true;
237       Features["mai-insts"] = true;
238       LLVM_FALLTHROUGH;
239     case GK_GFX906:
240       Features["dl-insts"] = true;
241       Features["dot1-insts"] = true;
242       Features["dot2-insts"] = true;
243       Features["dot7-insts"] = true;
244       LLVM_FALLTHROUGH;
245     case GK_GFX90C:
246     case GK_GFX909:
247     case GK_GFX904:
248     case GK_GFX902:
249     case GK_GFX900:
250       Features["gfx9-insts"] = true;
251       LLVM_FALLTHROUGH;
252     case GK_GFX810:
253     case GK_GFX805:
254     case GK_GFX803:
255     case GK_GFX802:
256     case GK_GFX801:
257       Features["gfx8-insts"] = true;
258       Features["16-bit-insts"] = true;
259       Features["dpp"] = true;
260       Features["s-memrealtime"] = true;
261       LLVM_FALLTHROUGH;
262     case GK_GFX705:
263     case GK_GFX704:
264     case GK_GFX703:
265     case GK_GFX702:
266     case GK_GFX701:
267     case GK_GFX700:
268       Features["ci-insts"] = true;
269       Features["flat-address-space"] = true;
270       LLVM_FALLTHROUGH;
271     case GK_GFX602:
272     case GK_GFX601:
273     case GK_GFX600:
274       Features["s-memtime-inst"] = true;
275       break;
276     case GK_NONE:
277       break;
278     default:
279       llvm_unreachable("Unhandled GPU!");
280     }
281   } else {
282     if (CPU.empty())
283       CPU = "r600";
284 
285     switch (llvm::AMDGPU::parseArchR600(CPU)) {
286     case GK_CAYMAN:
287     case GK_CYPRESS:
288     case GK_RV770:
289     case GK_RV670:
290       // TODO: Add fp64 when implemented.
291       break;
292     case GK_TURKS:
293     case GK_CAICOS:
294     case GK_BARTS:
295     case GK_SUMO:
296     case GK_REDWOOD:
297     case GK_JUNIPER:
298     case GK_CEDAR:
299     case GK_RV730:
300     case GK_RV710:
301     case GK_RS880:
302     case GK_R630:
303     case GK_R600:
304       break;
305     default:
306       llvm_unreachable("Unhandled GPU!");
307     }
308   }
309 
310   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
311 }
312 
fillValidCPUList(SmallVectorImpl<StringRef> & Values) const313 void AMDGPUTargetInfo::fillValidCPUList(
314     SmallVectorImpl<StringRef> &Values) const {
315   if (isAMDGCN(getTriple()))
316     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
317   else
318     llvm::AMDGPU::fillValidArchListR600(Values);
319 }
320 
setAddressSpaceMap(bool DefaultIsPrivate)321 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
322   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
323 }
324 
AMDGPUTargetInfo(const llvm::Triple & Triple,const TargetOptions & Opts)325 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
326                                    const TargetOptions &Opts)
327     : TargetInfo(Triple),
328       GPUKind(isAMDGCN(Triple) ?
329               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
330               llvm::AMDGPU::parseArchR600(Opts.CPU)),
331       GPUFeatures(isAMDGCN(Triple) ?
332                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
333                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
334   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
335                                         : DataLayoutStringR600);
336   GridValues = llvm::omp::AMDGPUGpuGridValues;
337 
338   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
339                      !isAMDGCN(Triple));
340   UseAddrSpaceMapMangling = true;
341 
342   HasLegalHalfType = true;
343   HasFloat16 = true;
344   WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
345   AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
346 
347   // Set pointer width and alignment for target address space 0.
348   PointerWidth = PointerAlign = getPointerWidthV(Generic);
349   if (getMaxPointerWidth() == 64) {
350     LongWidth = LongAlign = 64;
351     SizeType = UnsignedLong;
352     PtrDiffType = SignedLong;
353     IntPtrType = SignedLong;
354   }
355 
356   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
357 }
358 
adjust(LangOptions & Opts)359 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
360   TargetInfo::adjust(Opts);
361   // ToDo: There are still a few places using default address space as private
362   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
363   // can be removed from the following line.
364   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
365                      !isAMDGCN(getTriple()));
366 }
367 
getTargetBuiltins() const368 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
369   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
370                                              Builtin::FirstTSBuiltin);
371 }
372 
getTargetDefines(const LangOptions & Opts,MacroBuilder & Builder) const373 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
374                                         MacroBuilder &Builder) const {
375   Builder.defineMacro("__AMD__");
376   Builder.defineMacro("__AMDGPU__");
377 
378   if (isAMDGCN(getTriple()))
379     Builder.defineMacro("__AMDGCN__");
380   else
381     Builder.defineMacro("__R600__");
382 
383   if (GPUKind != llvm::AMDGPU::GK_NONE) {
384     StringRef CanonName = isAMDGCN(getTriple()) ?
385       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
386     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
387     if (isAMDGCN(getTriple())) {
388       Builder.defineMacro("__amdgcn_processor__",
389                           Twine("\"") + Twine(CanonName) + Twine("\""));
390       Builder.defineMacro("__amdgcn_target_id__",
391                           Twine("\"") + Twine(getTargetID().getValue()) +
392                               Twine("\""));
393       for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
394         auto Loc = OffloadArchFeatures.find(F);
395         if (Loc != OffloadArchFeatures.end()) {
396           std::string NewF = F.str();
397           std::replace(NewF.begin(), NewF.end(), '-', '_');
398           Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
399                                   Twine("__"),
400                               Loc->second ? "1" : "0");
401         }
402       }
403     }
404   }
405 
406   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
407   // removed in the near future.
408   if (hasFMAF())
409     Builder.defineMacro("__HAS_FMAF__");
410   if (hasFastFMAF())
411     Builder.defineMacro("FP_FAST_FMAF");
412   if (hasLDEXPF())
413     Builder.defineMacro("__HAS_LDEXPF__");
414   if (hasFP64())
415     Builder.defineMacro("__HAS_FP64__");
416   if (hasFastFMA())
417     Builder.defineMacro("FP_FAST_FMA");
418 
419   Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
420 }
421 
setAuxTarget(const TargetInfo * Aux)422 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
423   assert(HalfFormat == Aux->HalfFormat);
424   assert(FloatFormat == Aux->FloatFormat);
425   assert(DoubleFormat == Aux->DoubleFormat);
426 
427   // On x86_64 long double is 80-bit extended precision format, which is
428   // not supported by AMDGPU. 128-bit floating point format is also not
429   // supported by AMDGPU. Therefore keep its own format for these two types.
430   auto SaveLongDoubleFormat = LongDoubleFormat;
431   auto SaveFloat128Format = Float128Format;
432   copyAuxTarget(Aux);
433   LongDoubleFormat = SaveLongDoubleFormat;
434   Float128Format = SaveFloat128Format;
435   // For certain builtin types support on the host target, claim they are
436   // support to pass the compilation of the host code during the device-side
437   // compilation.
438   // FIXME: As the side effect, we also accept `__float128` uses in the device
439   // code. To rejct these builtin types supported in the host target but not in
440   // the device target, one approach would support `device_builtin` attribute
441   // so that we could tell the device builtin types from the host ones. The
442   // also solves the different representations of the same builtin type, such
443   // as `size_t` in the MSVC environment.
444   if (Aux->hasFloat128Type()) {
445     HasFloat128 = true;
446     Float128Format = DoubleFormat;
447   }
448 }
449