1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/LangOptions.h"
17 #include "clang/Basic/MacroBuilder.h"
18 #include "clang/Basic/TargetBuiltins.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
21
22 using namespace clang;
23 using namespace clang::targets;
24
25 namespace clang {
26 namespace targets {
27
28 // If you edit the description strings, make sure you update
29 // getPointerWidthV().
30
31 static const char *const DataLayoutStringR600 =
32 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
33 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
34
35 static const char *const DataLayoutStringAMDGCN =
36 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
37 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
38 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
39 "-ni:7";
40
41 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
42 Generic, // Default
43 Global, // opencl_global
44 Local, // opencl_local
45 Constant, // opencl_constant
46 Private, // opencl_private
47 Generic, // opencl_generic
48 Global, // opencl_global_device
49 Global, // opencl_global_host
50 Global, // cuda_device
51 Constant, // cuda_constant
52 Local, // cuda_shared
53 Global, // sycl_global
54 Global, // sycl_global_device
55 Global, // sycl_global_host
56 Local, // sycl_local
57 Private, // sycl_private
58 Generic, // ptr32_sptr
59 Generic, // ptr32_uptr
60 Generic // ptr64
61 };
62
63 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
64 Private, // Default
65 Global, // opencl_global
66 Local, // opencl_local
67 Constant, // opencl_constant
68 Private, // opencl_private
69 Generic, // opencl_generic
70 Global, // opencl_global_device
71 Global, // opencl_global_host
72 Global, // cuda_device
73 Constant, // cuda_constant
74 Local, // cuda_shared
75 // SYCL address space values for this map are dummy
76 Generic, // sycl_global
77 Generic, // sycl_global_device
78 Generic, // sycl_global_host
79 Generic, // sycl_local
80 Generic, // sycl_private
81 Generic, // ptr32_sptr
82 Generic, // ptr32_uptr
83 Generic // ptr64
84
85 };
86 } // namespace targets
87 } // namespace clang
88
89 const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
90 #define BUILTIN(ID, TYPE, ATTRS) \
91 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
92 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
93 {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
94 #include "clang/Basic/BuiltinsAMDGPU.def"
95 };
96
97 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
98 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
99 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
100 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
101 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
102 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
103 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
104 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
105 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
106 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
107 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
108 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
109 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
110 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
111 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
112 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
113 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
114 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
115 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
116 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
117 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
118 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
119 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
120 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
121 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
122 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
123 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
124 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
125 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
126 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
127 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
128 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
129 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
130 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
131 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
132 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
133 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
134 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
135 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
136 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
137 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
138 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
139 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
140 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
141 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
142 "flat_scratch_lo", "flat_scratch_hi",
143 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
144 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
145 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
146 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
147 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
148 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
149 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
150 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
151 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
152 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
153 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
154 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
155 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
156 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
157 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
158 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
159 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
160 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
161 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
162 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
163 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
164 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
165 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
166 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
167 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
168 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
169 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
170 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
171 "a252", "a253", "a254", "a255"
172 };
173
getGCCRegNames() const174 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
175 return llvm::makeArrayRef(GCCRegNames);
176 }
177
initFeatureMap(llvm::StringMap<bool> & Features,DiagnosticsEngine & Diags,StringRef CPU,const std::vector<std::string> & FeatureVec) const178 bool AMDGPUTargetInfo::initFeatureMap(
179 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
180 const std::vector<std::string> &FeatureVec) const {
181
182 using namespace llvm::AMDGPU;
183
184 // XXX - What does the member GPU mean if device name string passed here?
185 if (isAMDGCN(getTriple())) {
186 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
187 case GK_GFX1034:
188 case GK_GFX1033:
189 case GK_GFX1032:
190 case GK_GFX1031:
191 case GK_GFX1030:
192 Features["ci-insts"] = true;
193 Features["dot1-insts"] = true;
194 Features["dot2-insts"] = true;
195 Features["dot5-insts"] = true;
196 Features["dot6-insts"] = true;
197 Features["dot7-insts"] = true;
198 Features["dl-insts"] = true;
199 Features["flat-address-space"] = true;
200 Features["16-bit-insts"] = true;
201 Features["dpp"] = true;
202 Features["gfx8-insts"] = true;
203 Features["gfx9-insts"] = true;
204 Features["gfx10-insts"] = true;
205 Features["gfx10-3-insts"] = true;
206 Features["s-memrealtime"] = true;
207 Features["s-memtime-inst"] = true;
208 break;
209 case GK_GFX1012:
210 case GK_GFX1011:
211 Features["dot1-insts"] = true;
212 Features["dot2-insts"] = true;
213 Features["dot5-insts"] = true;
214 Features["dot6-insts"] = true;
215 Features["dot7-insts"] = true;
216 LLVM_FALLTHROUGH;
217 case GK_GFX1010:
218 Features["dl-insts"] = true;
219 Features["ci-insts"] = true;
220 Features["flat-address-space"] = true;
221 Features["16-bit-insts"] = true;
222 Features["dpp"] = true;
223 Features["gfx8-insts"] = true;
224 Features["gfx9-insts"] = true;
225 Features["gfx10-insts"] = true;
226 Features["s-memrealtime"] = true;
227 Features["s-memtime-inst"] = true;
228 break;
229 case GK_GFX90A:
230 Features["gfx90a-insts"] = true;
231 LLVM_FALLTHROUGH;
232 case GK_GFX908:
233 Features["dot3-insts"] = true;
234 Features["dot4-insts"] = true;
235 Features["dot5-insts"] = true;
236 Features["dot6-insts"] = true;
237 Features["mai-insts"] = true;
238 LLVM_FALLTHROUGH;
239 case GK_GFX906:
240 Features["dl-insts"] = true;
241 Features["dot1-insts"] = true;
242 Features["dot2-insts"] = true;
243 Features["dot7-insts"] = true;
244 LLVM_FALLTHROUGH;
245 case GK_GFX90C:
246 case GK_GFX909:
247 case GK_GFX904:
248 case GK_GFX902:
249 case GK_GFX900:
250 Features["gfx9-insts"] = true;
251 LLVM_FALLTHROUGH;
252 case GK_GFX810:
253 case GK_GFX805:
254 case GK_GFX803:
255 case GK_GFX802:
256 case GK_GFX801:
257 Features["gfx8-insts"] = true;
258 Features["16-bit-insts"] = true;
259 Features["dpp"] = true;
260 Features["s-memrealtime"] = true;
261 LLVM_FALLTHROUGH;
262 case GK_GFX705:
263 case GK_GFX704:
264 case GK_GFX703:
265 case GK_GFX702:
266 case GK_GFX701:
267 case GK_GFX700:
268 Features["ci-insts"] = true;
269 Features["flat-address-space"] = true;
270 LLVM_FALLTHROUGH;
271 case GK_GFX602:
272 case GK_GFX601:
273 case GK_GFX600:
274 Features["s-memtime-inst"] = true;
275 break;
276 case GK_NONE:
277 break;
278 default:
279 llvm_unreachable("Unhandled GPU!");
280 }
281 } else {
282 if (CPU.empty())
283 CPU = "r600";
284
285 switch (llvm::AMDGPU::parseArchR600(CPU)) {
286 case GK_CAYMAN:
287 case GK_CYPRESS:
288 case GK_RV770:
289 case GK_RV670:
290 // TODO: Add fp64 when implemented.
291 break;
292 case GK_TURKS:
293 case GK_CAICOS:
294 case GK_BARTS:
295 case GK_SUMO:
296 case GK_REDWOOD:
297 case GK_JUNIPER:
298 case GK_CEDAR:
299 case GK_RV730:
300 case GK_RV710:
301 case GK_RS880:
302 case GK_R630:
303 case GK_R600:
304 break;
305 default:
306 llvm_unreachable("Unhandled GPU!");
307 }
308 }
309
310 return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
311 }
312
fillValidCPUList(SmallVectorImpl<StringRef> & Values) const313 void AMDGPUTargetInfo::fillValidCPUList(
314 SmallVectorImpl<StringRef> &Values) const {
315 if (isAMDGCN(getTriple()))
316 llvm::AMDGPU::fillValidArchListAMDGCN(Values);
317 else
318 llvm::AMDGPU::fillValidArchListR600(Values);
319 }
320
setAddressSpaceMap(bool DefaultIsPrivate)321 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
322 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
323 }
324
AMDGPUTargetInfo(const llvm::Triple & Triple,const TargetOptions & Opts)325 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
326 const TargetOptions &Opts)
327 : TargetInfo(Triple),
328 GPUKind(isAMDGCN(Triple) ?
329 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
330 llvm::AMDGPU::parseArchR600(Opts.CPU)),
331 GPUFeatures(isAMDGCN(Triple) ?
332 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
333 llvm::AMDGPU::getArchAttrR600(GPUKind)) {
334 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
335 : DataLayoutStringR600);
336 GridValues = llvm::omp::AMDGPUGpuGridValues;
337
338 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
339 !isAMDGCN(Triple));
340 UseAddrSpaceMapMangling = true;
341
342 HasLegalHalfType = true;
343 HasFloat16 = true;
344 WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
345 AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
346
347 // Set pointer width and alignment for target address space 0.
348 PointerWidth = PointerAlign = getPointerWidthV(Generic);
349 if (getMaxPointerWidth() == 64) {
350 LongWidth = LongAlign = 64;
351 SizeType = UnsignedLong;
352 PtrDiffType = SignedLong;
353 IntPtrType = SignedLong;
354 }
355
356 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
357 }
358
adjust(LangOptions & Opts)359 void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
360 TargetInfo::adjust(Opts);
361 // ToDo: There are still a few places using default address space as private
362 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
363 // can be removed from the following line.
364 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
365 !isAMDGCN(getTriple()));
366 }
367
getTargetBuiltins() const368 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
369 return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
370 Builtin::FirstTSBuiltin);
371 }
372
getTargetDefines(const LangOptions & Opts,MacroBuilder & Builder) const373 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
374 MacroBuilder &Builder) const {
375 Builder.defineMacro("__AMD__");
376 Builder.defineMacro("__AMDGPU__");
377
378 if (isAMDGCN(getTriple()))
379 Builder.defineMacro("__AMDGCN__");
380 else
381 Builder.defineMacro("__R600__");
382
383 if (GPUKind != llvm::AMDGPU::GK_NONE) {
384 StringRef CanonName = isAMDGCN(getTriple()) ?
385 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
386 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
387 if (isAMDGCN(getTriple())) {
388 Builder.defineMacro("__amdgcn_processor__",
389 Twine("\"") + Twine(CanonName) + Twine("\""));
390 Builder.defineMacro("__amdgcn_target_id__",
391 Twine("\"") + Twine(getTargetID().getValue()) +
392 Twine("\""));
393 for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
394 auto Loc = OffloadArchFeatures.find(F);
395 if (Loc != OffloadArchFeatures.end()) {
396 std::string NewF = F.str();
397 std::replace(NewF.begin(), NewF.end(), '-', '_');
398 Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
399 Twine("__"),
400 Loc->second ? "1" : "0");
401 }
402 }
403 }
404 }
405
406 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
407 // removed in the near future.
408 if (hasFMAF())
409 Builder.defineMacro("__HAS_FMAF__");
410 if (hasFastFMAF())
411 Builder.defineMacro("FP_FAST_FMAF");
412 if (hasLDEXPF())
413 Builder.defineMacro("__HAS_LDEXPF__");
414 if (hasFP64())
415 Builder.defineMacro("__HAS_FP64__");
416 if (hasFastFMA())
417 Builder.defineMacro("FP_FAST_FMA");
418
419 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
420 }
421
setAuxTarget(const TargetInfo * Aux)422 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
423 assert(HalfFormat == Aux->HalfFormat);
424 assert(FloatFormat == Aux->FloatFormat);
425 assert(DoubleFormat == Aux->DoubleFormat);
426
427 // On x86_64 long double is 80-bit extended precision format, which is
428 // not supported by AMDGPU. 128-bit floating point format is also not
429 // supported by AMDGPU. Therefore keep its own format for these two types.
430 auto SaveLongDoubleFormat = LongDoubleFormat;
431 auto SaveFloat128Format = Float128Format;
432 copyAuxTarget(Aux);
433 LongDoubleFormat = SaveLongDoubleFormat;
434 Float128Format = SaveFloat128Format;
435 // For certain builtin types support on the host target, claim they are
436 // support to pass the compilation of the host code during the device-side
437 // compilation.
438 // FIXME: As the side effect, we also accept `__float128` uses in the device
439 // code. To rejct these builtin types supported in the host target but not in
440 // the device target, one approach would support `device_builtin` attribute
441 // so that we could tell the device builtin types from the host ones. The
442 // also solves the different representations of the same builtin type, such
443 // as `size_t` in the MSVC environment.
444 if (Aux->hasFloat128Type()) {
445 HasFloat128 = true;
446 Float128Format = DoubleFormat;
447 }
448 }
449