1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/Diagnostic.h"
17 #include "clang/Basic/LangOptions.h"
18 #include "clang/Basic/MacroBuilder.h"
19 #include "clang/Basic/TargetBuiltins.h"
20 using namespace clang;
21 using namespace clang::targets;
22
23 namespace clang {
24 namespace targets {
25
26 // If you edit the description strings, make sure you update
27 // getPointerWidthV().
28
29 static const char *const DataLayoutStringR600 =
30 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
31 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
32
33 static const char *const DataLayoutStringAMDGCN =
34 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
35 "-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:"
36 "32-v48:64-v96:128"
37 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
38 "-ni:7:8:9";
39
40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
41 llvm::AMDGPUAS::FLAT_ADDRESS, // Default
42 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global
43 llvm::AMDGPUAS::LOCAL_ADDRESS, // opencl_local
44 llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant
45 llvm::AMDGPUAS::PRIVATE_ADDRESS, // opencl_private
46 llvm::AMDGPUAS::FLAT_ADDRESS, // opencl_generic
47 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_device
48 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_host
49 llvm::AMDGPUAS::GLOBAL_ADDRESS, // cuda_device
50 llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant
51 llvm::AMDGPUAS::LOCAL_ADDRESS, // cuda_shared
52 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global
53 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_device
54 llvm::AMDGPUAS::GLOBAL_ADDRESS, // sycl_global_host
55 llvm::AMDGPUAS::LOCAL_ADDRESS, // sycl_local
56 llvm::AMDGPUAS::PRIVATE_ADDRESS, // sycl_private
57 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr
58 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr
59 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64
60 llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared
61 };
62
63 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
64 llvm::AMDGPUAS::PRIVATE_ADDRESS, // Default
65 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global
66 llvm::AMDGPUAS::LOCAL_ADDRESS, // opencl_local
67 llvm::AMDGPUAS::CONSTANT_ADDRESS, // opencl_constant
68 llvm::AMDGPUAS::PRIVATE_ADDRESS, // opencl_private
69 llvm::AMDGPUAS::FLAT_ADDRESS, // opencl_generic
70 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_device
71 llvm::AMDGPUAS::GLOBAL_ADDRESS, // opencl_global_host
72 llvm::AMDGPUAS::GLOBAL_ADDRESS, // cuda_device
73 llvm::AMDGPUAS::CONSTANT_ADDRESS, // cuda_constant
74 llvm::AMDGPUAS::LOCAL_ADDRESS, // cuda_shared
75 // SYCL address space values for this map are dummy
76 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global
77 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_device
78 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_global_host
79 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_local
80 llvm::AMDGPUAS::FLAT_ADDRESS, // sycl_private
81 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_sptr
82 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr32_uptr
83 llvm::AMDGPUAS::FLAT_ADDRESS, // ptr64
84 llvm::AMDGPUAS::FLAT_ADDRESS, // hlsl_groupshared
85
86 };
87 } // namespace targets
88 } // namespace clang
89
90 static constexpr Builtin::Info BuiltinInfo[] = {
91 #define BUILTIN(ID, TYPE, ATTRS) \
92 {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
93 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
94 {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
95 #include "clang/Basic/BuiltinsAMDGPU.def"
96 };
97
98 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
99 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
100 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
101 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
102 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
103 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
104 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
105 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
106 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
107 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
108 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
109 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
110 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
111 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
112 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
113 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
114 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
115 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
116 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
117 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
118 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
119 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
120 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
121 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
122 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
123 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
124 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
125 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
126 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
127 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
128 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
129 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
130 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
131 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
132 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
133 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
134 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
135 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
136 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
137 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
138 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
139 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
140 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
141 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
142 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
143 "flat_scratch_lo", "flat_scratch_hi",
144 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
145 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
146 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
147 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
148 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
149 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
150 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
151 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
152 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
153 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
154 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
155 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
156 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
157 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
158 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
159 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
160 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
161 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
162 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
163 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
164 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
165 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
166 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
167 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
168 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
169 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
170 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
171 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
172 "a252", "a253", "a254", "a255"
173 };
174
getGCCRegNames() const175 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
176 return llvm::ArrayRef(GCCRegNames);
177 }
178
initFeatureMap(llvm::StringMap<bool> & Features,DiagnosticsEngine & Diags,StringRef CPU,const std::vector<std::string> & FeatureVec) const179 bool AMDGPUTargetInfo::initFeatureMap(
180 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
181 const std::vector<std::string> &FeatureVec) const {
182
183 using namespace llvm::AMDGPU;
184 fillAMDGPUFeatureMap(CPU, getTriple(), Features);
185 if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec))
186 return false;
187
188 // TODO: Should move this logic into TargetParser
189 std::string ErrorMsg;
190 if (!insertWaveSizeFeature(CPU, getTriple(), Features, ErrorMsg)) {
191 Diags.Report(diag::err_invalid_feature_combination) << ErrorMsg;
192 return false;
193 }
194
195 return true;
196 }
197
fillValidCPUList(SmallVectorImpl<StringRef> & Values) const198 void AMDGPUTargetInfo::fillValidCPUList(
199 SmallVectorImpl<StringRef> &Values) const {
200 if (isAMDGCN(getTriple()))
201 llvm::AMDGPU::fillValidArchListAMDGCN(Values);
202 else
203 llvm::AMDGPU::fillValidArchListR600(Values);
204 }
205
setAddressSpaceMap(bool DefaultIsPrivate)206 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
207 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
208 }
209
AMDGPUTargetInfo(const llvm::Triple & Triple,const TargetOptions & Opts)210 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
211 const TargetOptions &Opts)
212 : TargetInfo(Triple),
213 GPUKind(isAMDGCN(Triple) ?
214 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
215 llvm::AMDGPU::parseArchR600(Opts.CPU)),
216 GPUFeatures(isAMDGCN(Triple) ?
217 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
218 llvm::AMDGPU::getArchAttrR600(GPUKind)) {
219 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
220 : DataLayoutStringR600);
221
222 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
223 !isAMDGCN(Triple));
224 UseAddrSpaceMapMangling = true;
225
226 if (isAMDGCN(Triple)) {
227 // __bf16 is always available as a load/store only type on AMDGCN.
228 BFloat16Width = BFloat16Align = 16;
229 BFloat16Format = &llvm::APFloat::BFloat();
230 }
231
232 HasLegalHalfType = true;
233 HasFloat16 = true;
234 WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
235 AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
236
237 // Set pointer width and alignment for the generic address space.
238 PointerWidth = PointerAlign = getPointerWidthV(LangAS::Default);
239 if (getMaxPointerWidth() == 64) {
240 LongWidth = LongAlign = 64;
241 SizeType = UnsignedLong;
242 PtrDiffType = SignedLong;
243 IntPtrType = SignedLong;
244 }
245
246 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
247 CUMode = !(GPUFeatures & llvm::AMDGPU::FEATURE_WGP);
248 for (auto F : {"image-insts", "gws"})
249 ReadOnlyFeatures.insert(F);
250 HalfArgsAndReturns = true;
251 }
252
adjust(DiagnosticsEngine & Diags,LangOptions & Opts)253 void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
254 TargetInfo::adjust(Diags, Opts);
255 // ToDo: There are still a few places using default address space as private
256 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
257 // can be removed from the following line.
258 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
259 !isAMDGCN(getTriple()));
260 }
261
getTargetBuiltins() const262 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
263 return llvm::ArrayRef(BuiltinInfo,
264 clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin);
265 }
266
getTargetDefines(const LangOptions & Opts,MacroBuilder & Builder) const267 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
268 MacroBuilder &Builder) const {
269 Builder.defineMacro("__AMD__");
270 Builder.defineMacro("__AMDGPU__");
271
272 if (isAMDGCN(getTriple()))
273 Builder.defineMacro("__AMDGCN__");
274 else
275 Builder.defineMacro("__R600__");
276
277 if (GPUKind != llvm::AMDGPU::GK_NONE) {
278 StringRef CanonName = isAMDGCN(getTriple()) ?
279 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
280 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
281 // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___
282 if (isAMDGCN(getTriple())) {
283 assert(CanonName.starts_with("gfx") && "Invalid amdgcn canonical name");
284 Builder.defineMacro(Twine("__") + Twine(CanonName.drop_back(2).upper()) +
285 Twine("__"));
286 }
287 if (isAMDGCN(getTriple())) {
288 Builder.defineMacro("__amdgcn_processor__",
289 Twine("\"") + Twine(CanonName) + Twine("\""));
290 Builder.defineMacro("__amdgcn_target_id__",
291 Twine("\"") + Twine(*getTargetID()) + Twine("\""));
292 for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
293 auto Loc = OffloadArchFeatures.find(F);
294 if (Loc != OffloadArchFeatures.end()) {
295 std::string NewF = F.str();
296 std::replace(NewF.begin(), NewF.end(), '-', '_');
297 Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
298 Twine("__"),
299 Loc->second ? "1" : "0");
300 }
301 }
302 }
303 }
304
305 if (AllowAMDGPUUnsafeFPAtomics)
306 Builder.defineMacro("__AMDGCN_UNSAFE_FP_ATOMICS__");
307
308 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
309 // removed in the near future.
310 if (hasFMAF())
311 Builder.defineMacro("__HAS_FMAF__");
312 if (hasFastFMAF())
313 Builder.defineMacro("FP_FAST_FMAF");
314 if (hasLDEXPF())
315 Builder.defineMacro("__HAS_LDEXPF__");
316 if (hasFP64())
317 Builder.defineMacro("__HAS_FP64__");
318 if (hasFastFMA())
319 Builder.defineMacro("FP_FAST_FMA");
320
321 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE__", Twine(WavefrontSize));
322 // ToDo: deprecate this macro for naming consistency.
323 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
324 Builder.defineMacro("__AMDGCN_CUMODE__", Twine(CUMode));
325 }
326
setAuxTarget(const TargetInfo * Aux)327 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
328 assert(HalfFormat == Aux->HalfFormat);
329 assert(FloatFormat == Aux->FloatFormat);
330 assert(DoubleFormat == Aux->DoubleFormat);
331
332 // On x86_64 long double is 80-bit extended precision format, which is
333 // not supported by AMDGPU. 128-bit floating point format is also not
334 // supported by AMDGPU. Therefore keep its own format for these two types.
335 auto SaveLongDoubleFormat = LongDoubleFormat;
336 auto SaveFloat128Format = Float128Format;
337 auto SaveLongDoubleWidth = LongDoubleWidth;
338 auto SaveLongDoubleAlign = LongDoubleAlign;
339 copyAuxTarget(Aux);
340 LongDoubleFormat = SaveLongDoubleFormat;
341 Float128Format = SaveFloat128Format;
342 LongDoubleWidth = SaveLongDoubleWidth;
343 LongDoubleAlign = SaveLongDoubleAlign;
344 // For certain builtin types support on the host target, claim they are
345 // support to pass the compilation of the host code during the device-side
346 // compilation.
347 // FIXME: As the side effect, we also accept `__float128` uses in the device
348 // code. To rejct these builtin types supported in the host target but not in
349 // the device target, one approach would support `device_builtin` attribute
350 // so that we could tell the device builtin types from the host ones. The
351 // also solves the different representations of the same builtin type, such
352 // as `size_t` in the MSVC environment.
353 if (Aux->hasFloat128Type()) {
354 HasFloat128 = true;
355 Float128Format = DoubleFormat;
356 }
357 }
358