1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
21 #include "llvm/IR/DataLayout.h"
22 
23 using namespace clang;
24 using namespace clang::targets;
25 
26 namespace clang {
27 namespace targets {
28 
29 // If you edit the description strings, make sure you update
30 // getPointerWidthV().
31 
32 static const char *const DataLayoutStringR600 =
33     "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
34     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
35 
36 static const char *const DataLayoutStringAMDGCN =
37     "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
38     "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
39     "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
40     "-ni:7";
41 
42 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
43     Generic,  // Default
44     Global,   // opencl_global
45     Local,    // opencl_local
46     Constant, // opencl_constant
47     Private,  // opencl_private
48     Generic,  // opencl_generic
49     Global,   // cuda_device
50     Constant, // cuda_constant
51     Local,    // cuda_shared
52     Generic,  // ptr32_sptr
53     Generic,  // ptr32_uptr
54     Generic   // ptr64
55 };
56 
57 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
58     Private,  // Default
59     Global,   // opencl_global
60     Local,    // opencl_local
61     Constant, // opencl_constant
62     Private,  // opencl_private
63     Generic,  // opencl_generic
64     Global,   // cuda_device
65     Constant, // cuda_constant
66     Local,    // cuda_shared
67     Generic,  // ptr32_sptr
68     Generic,  // ptr32_uptr
69     Generic   // ptr64
70 
71 };
72 } // namespace targets
73 } // namespace clang
74 
75 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
76 #define BUILTIN(ID, TYPE, ATTRS)                                               \
77   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
78 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
79   {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
80 #include "clang/Basic/BuiltinsAMDGPU.def"
81 };
82 
83 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
84   "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
85   "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
86   "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
87   "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
88   "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
89   "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
90   "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
91   "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
92   "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
93   "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
94   "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
95   "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
96   "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
97   "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
98   "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
99   "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
100   "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
101   "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
102   "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
103   "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
104   "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
105   "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
106   "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
107   "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
108   "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
109   "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
110   "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
111   "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
112   "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
113   "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
114   "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
115   "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
116   "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
117   "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
118   "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
119   "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
120   "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
121   "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
122   "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
123   "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
124   "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
125   "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
126   "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
127   "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
128   "flat_scratch_lo", "flat_scratch_hi",
129   "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
130   "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
131   "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
132   "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
133   "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
134   "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
135   "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
136   "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
137   "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
138   "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
139   "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
140   "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
141   "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
142   "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
143   "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
144   "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
145   "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
146   "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
147   "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
148   "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
149   "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
150   "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
151   "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
152   "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
153   "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
154   "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
155   "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
156   "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
157   "a252", "a253", "a254", "a255"
158 };
159 
160 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
161   return llvm::makeArrayRef(GCCRegNames);
162 }
163 
164 bool AMDGPUTargetInfo::initFeatureMap(
165     llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
166     const std::vector<std::string> &FeatureVec) const {
167 
168   using namespace llvm::AMDGPU;
169 
170   // XXX - What does the member GPU mean if device name string passed here?
171   if (isAMDGCN(getTriple())) {
172     switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
173     case GK_GFX1030:
174       Features["ci-insts"] = true;
175       Features["dot1-insts"] = true;
176       Features["dot2-insts"] = true;
177       Features["dot5-insts"] = true;
178       Features["dot6-insts"] = true;
179       Features["dl-insts"] = true;
180       Features["flat-address-space"] = true;
181       Features["16-bit-insts"] = true;
182       Features["dpp"] = true;
183       Features["gfx8-insts"] = true;
184       Features["gfx9-insts"] = true;
185       Features["gfx10-insts"] = true;
186       Features["gfx10-3-insts"] = true;
187       Features["s-memrealtime"] = true;
188       break;
189     case GK_GFX1012:
190     case GK_GFX1011:
191       Features["dot1-insts"] = true;
192       Features["dot2-insts"] = true;
193       Features["dot5-insts"] = true;
194       Features["dot6-insts"] = true;
195       LLVM_FALLTHROUGH;
196     case GK_GFX1010:
197       Features["dl-insts"] = true;
198       Features["ci-insts"] = true;
199       Features["flat-address-space"] = true;
200       Features["16-bit-insts"] = true;
201       Features["dpp"] = true;
202       Features["gfx8-insts"] = true;
203       Features["gfx9-insts"] = true;
204       Features["gfx10-insts"] = true;
205       Features["s-memrealtime"] = true;
206       break;
207     case GK_GFX908:
208       Features["dot3-insts"] = true;
209       Features["dot4-insts"] = true;
210       Features["dot5-insts"] = true;
211       Features["dot6-insts"] = true;
212       Features["mai-insts"] = true;
213       LLVM_FALLTHROUGH;
214     case GK_GFX906:
215       Features["dl-insts"] = true;
216       Features["dot1-insts"] = true;
217       Features["dot2-insts"] = true;
218       LLVM_FALLTHROUGH;
219     case GK_GFX909:
220     case GK_GFX904:
221     case GK_GFX902:
222     case GK_GFX900:
223       Features["gfx9-insts"] = true;
224       LLVM_FALLTHROUGH;
225     case GK_GFX810:
226     case GK_GFX803:
227     case GK_GFX802:
228     case GK_GFX801:
229       Features["gfx8-insts"] = true;
230       Features["16-bit-insts"] = true;
231       Features["dpp"] = true;
232       Features["s-memrealtime"] = true;
233       LLVM_FALLTHROUGH;
234     case GK_GFX704:
235     case GK_GFX703:
236     case GK_GFX702:
237     case GK_GFX701:
238     case GK_GFX700:
239       Features["ci-insts"] = true;
240       Features["flat-address-space"] = true;
241       LLVM_FALLTHROUGH;
242     case GK_GFX601:
243     case GK_GFX600:
244       break;
245     case GK_NONE:
246       break;
247     default:
248       llvm_unreachable("Unhandled GPU!");
249     }
250   } else {
251     if (CPU.empty())
252       CPU = "r600";
253 
254     switch (llvm::AMDGPU::parseArchR600(CPU)) {
255     case GK_CAYMAN:
256     case GK_CYPRESS:
257     case GK_RV770:
258     case GK_RV670:
259       // TODO: Add fp64 when implemented.
260       break;
261     case GK_TURKS:
262     case GK_CAICOS:
263     case GK_BARTS:
264     case GK_SUMO:
265     case GK_REDWOOD:
266     case GK_JUNIPER:
267     case GK_CEDAR:
268     case GK_RV730:
269     case GK_RV710:
270     case GK_RS880:
271     case GK_R630:
272     case GK_R600:
273       break;
274     default:
275       llvm_unreachable("Unhandled GPU!");
276     }
277   }
278 
279   return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
280 }
281 
282 void AMDGPUTargetInfo::fillValidCPUList(
283     SmallVectorImpl<StringRef> &Values) const {
284   if (isAMDGCN(getTriple()))
285     llvm::AMDGPU::fillValidArchListAMDGCN(Values);
286   else
287     llvm::AMDGPU::fillValidArchListR600(Values);
288 }
289 
290 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
291   AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
292 }
293 
294 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
295                                    const TargetOptions &Opts)
296     : TargetInfo(Triple),
297       GPUKind(isAMDGCN(Triple) ?
298               llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
299               llvm::AMDGPU::parseArchR600(Opts.CPU)),
300       GPUFeatures(isAMDGCN(Triple) ?
301                   llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
302                   llvm::AMDGPU::getArchAttrR600(GPUKind)) {
303   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
304                                         : DataLayoutStringR600);
305   assert(DataLayout->getAllocaAddrSpace() == Private);
306   GridValues = llvm::omp::AMDGPUGpuGridValues;
307 
308   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
309                      !isAMDGCN(Triple));
310   UseAddrSpaceMapMangling = true;
311 
312   HasLegalHalfType = true;
313   HasFloat16 = true;
314 
315   // Set pointer width and alignment for target address space 0.
316   PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
317   if (getMaxPointerWidth() == 64) {
318     LongWidth = LongAlign = 64;
319     SizeType = UnsignedLong;
320     PtrDiffType = SignedLong;
321     IntPtrType = SignedLong;
322   }
323 
324   MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
325 }
326 
327 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
328   TargetInfo::adjust(Opts);
329   // ToDo: There are still a few places using default address space as private
330   // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
331   // can be removed from the following line.
332   setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
333                      !isAMDGCN(getTriple()));
334 }
335 
336 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
337   return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
338                                              Builtin::FirstTSBuiltin);
339 }
340 
341 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
342                                         MacroBuilder &Builder) const {
343   Builder.defineMacro("__AMD__");
344   Builder.defineMacro("__AMDGPU__");
345 
346   if (isAMDGCN(getTriple()))
347     Builder.defineMacro("__AMDGCN__");
348   else
349     Builder.defineMacro("__R600__");
350 
351   if (GPUKind != llvm::AMDGPU::GK_NONE) {
352     StringRef CanonName = isAMDGCN(getTriple()) ?
353       getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
354     Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
355   }
356 
357   // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
358   // removed in the near future.
359   if (hasFMAF())
360     Builder.defineMacro("__HAS_FMAF__");
361   if (hasFastFMAF())
362     Builder.defineMacro("FP_FAST_FMAF");
363   if (hasLDEXPF())
364     Builder.defineMacro("__HAS_LDEXPF__");
365   if (hasFP64())
366     Builder.defineMacro("__HAS_FP64__");
367   if (hasFastFMA())
368     Builder.defineMacro("FP_FAST_FMA");
369 }
370 
371 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
372   assert(HalfFormat == Aux->HalfFormat);
373   assert(FloatFormat == Aux->FloatFormat);
374   assert(DoubleFormat == Aux->DoubleFormat);
375 
376   // On x86_64 long double is 80-bit extended precision format, which is
377   // not supported by AMDGPU. 128-bit floating point format is also not
378   // supported by AMDGPU. Therefore keep its own format for these two types.
379   auto SaveLongDoubleFormat = LongDoubleFormat;
380   auto SaveFloat128Format = Float128Format;
381   copyAuxTarget(Aux);
382   LongDoubleFormat = SaveLongDoubleFormat;
383   Float128Format = SaveFloat128Format;
384   // For certain builtin types support on the host target, claim they are
385   // support to pass the compilation of the host code during the device-side
386   // compilation.
387   // FIXME: As the side effect, we also accept `__float128` uses in the device
388   // code. To rejct these builtin types supported in the host target but not in
389   // the device target, one approach would support `device_builtin` attribute
390   // so that we could tell the device builtin types from the host ones. The
391   // also solves the different representations of the same builtin type, such
392   // as `size_t` in the MSVC environment.
393   if (Aux->hasFloat128Type()) {
394     HasFloat128 = true;
395     Float128Format = DoubleFormat;
396   }
397 }
398