1 //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements AMDGPU TargetInfo objects.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "AMDGPU.h"
14 #include "clang/Basic/Builtins.h"
15 #include "clang/Basic/CodeGenOptions.h"
16 #include "clang/Basic/Diagnostic.h"
17 #include "clang/Basic/LangOptions.h"
18 #include "clang/Basic/MacroBuilder.h"
19 #include "clang/Basic/TargetBuiltins.h"
20
21 using namespace clang;
22 using namespace clang::targets;
23
24 namespace clang {
25 namespace targets {
26
27 // If you edit the description strings, make sure you update
28 // getPointerWidthV().
29
30 static const char *const DataLayoutStringR600 =
31 "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
32 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
33
34 static const char *const DataLayoutStringAMDGCN =
35 "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
36 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
37 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
38 "-ni:7";
39
40 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
41 Generic, // Default
42 Global, // opencl_global
43 Local, // opencl_local
44 Constant, // opencl_constant
45 Private, // opencl_private
46 Generic, // opencl_generic
47 Global, // opencl_global_device
48 Global, // opencl_global_host
49 Global, // cuda_device
50 Constant, // cuda_constant
51 Local, // cuda_shared
52 Global, // sycl_global
53 Global, // sycl_global_device
54 Global, // sycl_global_host
55 Local, // sycl_local
56 Private, // sycl_private
57 Generic, // ptr32_sptr
58 Generic, // ptr32_uptr
59 Generic, // ptr64
60 Generic, // hlsl_groupshared
61 };
62
63 const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
64 Private, // Default
65 Global, // opencl_global
66 Local, // opencl_local
67 Constant, // opencl_constant
68 Private, // opencl_private
69 Generic, // opencl_generic
70 Global, // opencl_global_device
71 Global, // opencl_global_host
72 Global, // cuda_device
73 Constant, // cuda_constant
74 Local, // cuda_shared
75 // SYCL address space values for this map are dummy
76 Generic, // sycl_global
77 Generic, // sycl_global_device
78 Generic, // sycl_global_host
79 Generic, // sycl_local
80 Generic, // sycl_private
81 Generic, // ptr32_sptr
82 Generic, // ptr32_uptr
83 Generic, // ptr64
84 Generic, // hlsl_groupshared
85
86 };
87 } // namespace targets
88 } // namespace clang
89
90 static constexpr Builtin::Info BuiltinInfo[] = {
91 #define BUILTIN(ID, TYPE, ATTRS) \
92 {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
93 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
94 {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
95 #include "clang/Basic/BuiltinsAMDGPU.def"
96 };
97
98 const char *const AMDGPUTargetInfo::GCCRegNames[] = {
99 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
100 "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
101 "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
102 "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
103 "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
104 "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
105 "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
106 "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
107 "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
108 "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
109 "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
110 "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
111 "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
112 "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
113 "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
114 "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
115 "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
116 "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
117 "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
118 "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
119 "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
120 "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
121 "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
122 "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
123 "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
124 "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
125 "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
126 "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
127 "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
128 "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
129 "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
130 "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
131 "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
132 "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
133 "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
134 "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
135 "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
136 "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
137 "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
138 "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
139 "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
140 "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
141 "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
142 "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
143 "flat_scratch_lo", "flat_scratch_hi",
144 "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8",
145 "a9", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17",
146 "a18", "a19", "a20", "a21", "a22", "a23", "a24", "a25", "a26",
147 "a27", "a28", "a29", "a30", "a31", "a32", "a33", "a34", "a35",
148 "a36", "a37", "a38", "a39", "a40", "a41", "a42", "a43", "a44",
149 "a45", "a46", "a47", "a48", "a49", "a50", "a51", "a52", "a53",
150 "a54", "a55", "a56", "a57", "a58", "a59", "a60", "a61", "a62",
151 "a63", "a64", "a65", "a66", "a67", "a68", "a69", "a70", "a71",
152 "a72", "a73", "a74", "a75", "a76", "a77", "a78", "a79", "a80",
153 "a81", "a82", "a83", "a84", "a85", "a86", "a87", "a88", "a89",
154 "a90", "a91", "a92", "a93", "a94", "a95", "a96", "a97", "a98",
155 "a99", "a100", "a101", "a102", "a103", "a104", "a105", "a106", "a107",
156 "a108", "a109", "a110", "a111", "a112", "a113", "a114", "a115", "a116",
157 "a117", "a118", "a119", "a120", "a121", "a122", "a123", "a124", "a125",
158 "a126", "a127", "a128", "a129", "a130", "a131", "a132", "a133", "a134",
159 "a135", "a136", "a137", "a138", "a139", "a140", "a141", "a142", "a143",
160 "a144", "a145", "a146", "a147", "a148", "a149", "a150", "a151", "a152",
161 "a153", "a154", "a155", "a156", "a157", "a158", "a159", "a160", "a161",
162 "a162", "a163", "a164", "a165", "a166", "a167", "a168", "a169", "a170",
163 "a171", "a172", "a173", "a174", "a175", "a176", "a177", "a178", "a179",
164 "a180", "a181", "a182", "a183", "a184", "a185", "a186", "a187", "a188",
165 "a189", "a190", "a191", "a192", "a193", "a194", "a195", "a196", "a197",
166 "a198", "a199", "a200", "a201", "a202", "a203", "a204", "a205", "a206",
167 "a207", "a208", "a209", "a210", "a211", "a212", "a213", "a214", "a215",
168 "a216", "a217", "a218", "a219", "a220", "a221", "a222", "a223", "a224",
169 "a225", "a226", "a227", "a228", "a229", "a230", "a231", "a232", "a233",
170 "a234", "a235", "a236", "a237", "a238", "a239", "a240", "a241", "a242",
171 "a243", "a244", "a245", "a246", "a247", "a248", "a249", "a250", "a251",
172 "a252", "a253", "a254", "a255"
173 };
174
getGCCRegNames() const175 ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
176 return llvm::ArrayRef(GCCRegNames);
177 }
178
initFeatureMap(llvm::StringMap<bool> & Features,DiagnosticsEngine & Diags,StringRef CPU,const std::vector<std::string> & FeatureVec) const179 bool AMDGPUTargetInfo::initFeatureMap(
180 llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
181 const std::vector<std::string> &FeatureVec) const {
182 const bool IsNullCPU = CPU.empty();
183 bool IsWave32Capable = false;
184
185 using namespace llvm::AMDGPU;
186
187 // XXX - What does the member GPU mean if device name string passed here?
188 if (isAMDGCN(getTriple())) {
189 switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
190 case GK_GFX1103:
191 case GK_GFX1102:
192 case GK_GFX1101:
193 case GK_GFX1100:
194 IsWave32Capable = true;
195 Features["ci-insts"] = true;
196 Features["dot5-insts"] = true;
197 Features["dot7-insts"] = true;
198 Features["dot8-insts"] = true;
199 Features["dot9-insts"] = true;
200 Features["dl-insts"] = true;
201 Features["16-bit-insts"] = true;
202 Features["dpp"] = true;
203 Features["gfx8-insts"] = true;
204 Features["gfx9-insts"] = true;
205 Features["gfx10-insts"] = true;
206 Features["gfx10-3-insts"] = true;
207 Features["gfx11-insts"] = true;
208 break;
209 case GK_GFX1036:
210 case GK_GFX1035:
211 case GK_GFX1034:
212 case GK_GFX1033:
213 case GK_GFX1032:
214 case GK_GFX1031:
215 case GK_GFX1030:
216 IsWave32Capable = true;
217 Features["ci-insts"] = true;
218 Features["dot1-insts"] = true;
219 Features["dot2-insts"] = true;
220 Features["dot5-insts"] = true;
221 Features["dot6-insts"] = true;
222 Features["dot7-insts"] = true;
223 Features["dl-insts"] = true;
224 Features["16-bit-insts"] = true;
225 Features["dpp"] = true;
226 Features["gfx8-insts"] = true;
227 Features["gfx9-insts"] = true;
228 Features["gfx10-insts"] = true;
229 Features["gfx10-3-insts"] = true;
230 Features["s-memrealtime"] = true;
231 Features["s-memtime-inst"] = true;
232 break;
233 case GK_GFX1012:
234 case GK_GFX1011:
235 Features["dot1-insts"] = true;
236 Features["dot2-insts"] = true;
237 Features["dot5-insts"] = true;
238 Features["dot6-insts"] = true;
239 Features["dot7-insts"] = true;
240 [[fallthrough]];
241 case GK_GFX1013:
242 case GK_GFX1010:
243 IsWave32Capable = true;
244 Features["dl-insts"] = true;
245 Features["ci-insts"] = true;
246 Features["16-bit-insts"] = true;
247 Features["dpp"] = true;
248 Features["gfx8-insts"] = true;
249 Features["gfx9-insts"] = true;
250 Features["gfx10-insts"] = true;
251 Features["s-memrealtime"] = true;
252 Features["s-memtime-inst"] = true;
253 break;
254 case GK_GFX940:
255 Features["gfx940-insts"] = true;
256 Features["fp8-insts"] = true;
257 [[fallthrough]];
258 case GK_GFX90A:
259 Features["gfx90a-insts"] = true;
260 [[fallthrough]];
261 case GK_GFX908:
262 Features["dot3-insts"] = true;
263 Features["dot4-insts"] = true;
264 Features["dot5-insts"] = true;
265 Features["dot6-insts"] = true;
266 Features["mai-insts"] = true;
267 [[fallthrough]];
268 case GK_GFX906:
269 Features["dl-insts"] = true;
270 Features["dot1-insts"] = true;
271 Features["dot2-insts"] = true;
272 Features["dot7-insts"] = true;
273 [[fallthrough]];
274 case GK_GFX90C:
275 case GK_GFX909:
276 case GK_GFX904:
277 case GK_GFX902:
278 case GK_GFX900:
279 Features["gfx9-insts"] = true;
280 [[fallthrough]];
281 case GK_GFX810:
282 case GK_GFX805:
283 case GK_GFX803:
284 case GK_GFX802:
285 case GK_GFX801:
286 Features["gfx8-insts"] = true;
287 Features["16-bit-insts"] = true;
288 Features["dpp"] = true;
289 Features["s-memrealtime"] = true;
290 [[fallthrough]];
291 case GK_GFX705:
292 case GK_GFX704:
293 case GK_GFX703:
294 case GK_GFX702:
295 case GK_GFX701:
296 case GK_GFX700:
297 Features["ci-insts"] = true;
298 [[fallthrough]];
299 case GK_GFX602:
300 case GK_GFX601:
301 case GK_GFX600:
302 Features["s-memtime-inst"] = true;
303 break;
304 case GK_NONE:
305 break;
306 default:
307 llvm_unreachable("Unhandled GPU!");
308 }
309 } else {
310 if (CPU.empty())
311 CPU = "r600";
312
313 switch (llvm::AMDGPU::parseArchR600(CPU)) {
314 case GK_CAYMAN:
315 case GK_CYPRESS:
316 case GK_RV770:
317 case GK_RV670:
318 // TODO: Add fp64 when implemented.
319 break;
320 case GK_TURKS:
321 case GK_CAICOS:
322 case GK_BARTS:
323 case GK_SUMO:
324 case GK_REDWOOD:
325 case GK_JUNIPER:
326 case GK_CEDAR:
327 case GK_RV730:
328 case GK_RV710:
329 case GK_RS880:
330 case GK_R630:
331 case GK_R600:
332 break;
333 default:
334 llvm_unreachable("Unhandled GPU!");
335 }
336 }
337
338 if (!TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec))
339 return false;
340
341 // FIXME: Not diagnosing wavefrontsize32 on wave64 only targets.
342 const bool HaveWave32 =
343 (IsWave32Capable || IsNullCPU) && Features.count("wavefrontsize32");
344 const bool HaveWave64 = Features.count("wavefrontsize64");
345
346 // TODO: Should move this logic into TargetParser
347 if (HaveWave32 && HaveWave64) {
348 Diags.Report(diag::err_invalid_feature_combination)
349 << "'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive";
350 return false;
351 }
352
353 // Don't assume any wavesize with an unknown subtarget.
354 if (!IsNullCPU) {
355 // Default to wave32 if available, or wave64 if not
356 if (!HaveWave32 && !HaveWave64) {
357 StringRef DefaultWaveSizeFeature =
358 IsWave32Capable ? "wavefrontsize32" : "wavefrontsize64";
359 Features.insert(std::make_pair(DefaultWaveSizeFeature, true));
360 }
361 }
362
363 return true;
364 }
365
fillValidCPUList(SmallVectorImpl<StringRef> & Values) const366 void AMDGPUTargetInfo::fillValidCPUList(
367 SmallVectorImpl<StringRef> &Values) const {
368 if (isAMDGCN(getTriple()))
369 llvm::AMDGPU::fillValidArchListAMDGCN(Values);
370 else
371 llvm::AMDGPU::fillValidArchListR600(Values);
372 }
373
setAddressSpaceMap(bool DefaultIsPrivate)374 void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
375 AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
376 }
377
AMDGPUTargetInfo(const llvm::Triple & Triple,const TargetOptions & Opts)378 AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
379 const TargetOptions &Opts)
380 : TargetInfo(Triple),
381 GPUKind(isAMDGCN(Triple) ?
382 llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
383 llvm::AMDGPU::parseArchR600(Opts.CPU)),
384 GPUFeatures(isAMDGCN(Triple) ?
385 llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
386 llvm::AMDGPU::getArchAttrR600(GPUKind)) {
387 resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
388 : DataLayoutStringR600);
389
390 setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
391 !isAMDGCN(Triple));
392 UseAddrSpaceMapMangling = true;
393
394 if (isAMDGCN(Triple)) {
395 // __bf16 is always available as a load/store only type on AMDGCN.
396 BFloat16Width = BFloat16Align = 16;
397 BFloat16Format = &llvm::APFloat::BFloat();
398 }
399
400 HasLegalHalfType = true;
401 HasFloat16 = true;
402 WavefrontSize = GPUFeatures & llvm::AMDGPU::FEATURE_WAVE32 ? 32 : 64;
403 AllowAMDGPUUnsafeFPAtomics = Opts.AllowAMDGPUUnsafeFPAtomics;
404
405 // Set pointer width and alignment for the generic address space.
406 PointerWidth = PointerAlign = getPointerWidthV(LangAS::Default);
407 if (getMaxPointerWidth() == 64) {
408 LongWidth = LongAlign = 64;
409 SizeType = UnsignedLong;
410 PtrDiffType = SignedLong;
411 IntPtrType = SignedLong;
412 }
413
414 MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
415 }
416
adjust(DiagnosticsEngine & Diags,LangOptions & Opts)417 void AMDGPUTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
418 TargetInfo::adjust(Diags, Opts);
419 // ToDo: There are still a few places using default address space as private
420 // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
421 // can be removed from the following line.
422 setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
423 !isAMDGCN(getTriple()));
424 }
425
getTargetBuiltins() const426 ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
427 return llvm::ArrayRef(BuiltinInfo,
428 clang::AMDGPU::LastTSBuiltin - Builtin::FirstTSBuiltin);
429 }
430
getTargetDefines(const LangOptions & Opts,MacroBuilder & Builder) const431 void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
432 MacroBuilder &Builder) const {
433 Builder.defineMacro("__AMD__");
434 Builder.defineMacro("__AMDGPU__");
435
436 if (isAMDGCN(getTriple()))
437 Builder.defineMacro("__AMDGCN__");
438 else
439 Builder.defineMacro("__R600__");
440
441 if (GPUKind != llvm::AMDGPU::GK_NONE) {
442 StringRef CanonName = isAMDGCN(getTriple()) ?
443 getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
444 Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
445 // Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___
446 if (isAMDGCN(getTriple())) {
447 assert(CanonName.startswith("gfx") && "Invalid amdgcn canonical name");
448 Builder.defineMacro(Twine("__") + Twine(CanonName.drop_back(2).upper()) +
449 Twine("__"));
450 }
451 if (isAMDGCN(getTriple())) {
452 Builder.defineMacro("__amdgcn_processor__",
453 Twine("\"") + Twine(CanonName) + Twine("\""));
454 Builder.defineMacro("__amdgcn_target_id__",
455 Twine("\"") + Twine(*getTargetID()) + Twine("\""));
456 for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) {
457 auto Loc = OffloadArchFeatures.find(F);
458 if (Loc != OffloadArchFeatures.end()) {
459 std::string NewF = F.str();
460 std::replace(NewF.begin(), NewF.end(), '-', '_');
461 Builder.defineMacro(Twine("__amdgcn_feature_") + Twine(NewF) +
462 Twine("__"),
463 Loc->second ? "1" : "0");
464 }
465 }
466 }
467 }
468
469 if (AllowAMDGPUUnsafeFPAtomics)
470 Builder.defineMacro("__AMDGCN_UNSAFE_FP_ATOMICS__");
471
472 // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
473 // removed in the near future.
474 if (hasFMAF())
475 Builder.defineMacro("__HAS_FMAF__");
476 if (hasFastFMAF())
477 Builder.defineMacro("FP_FAST_FMAF");
478 if (hasLDEXPF())
479 Builder.defineMacro("__HAS_LDEXPF__");
480 if (hasFP64())
481 Builder.defineMacro("__HAS_FP64__");
482 if (hasFastFMA())
483 Builder.defineMacro("FP_FAST_FMA");
484
485 Builder.defineMacro("__AMDGCN_WAVEFRONT_SIZE", Twine(WavefrontSize));
486 }
487
setAuxTarget(const TargetInfo * Aux)488 void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
489 assert(HalfFormat == Aux->HalfFormat);
490 assert(FloatFormat == Aux->FloatFormat);
491 assert(DoubleFormat == Aux->DoubleFormat);
492
493 // On x86_64 long double is 80-bit extended precision format, which is
494 // not supported by AMDGPU. 128-bit floating point format is also not
495 // supported by AMDGPU. Therefore keep its own format for these two types.
496 auto SaveLongDoubleFormat = LongDoubleFormat;
497 auto SaveFloat128Format = Float128Format;
498 auto SaveLongDoubleWidth = LongDoubleWidth;
499 auto SaveLongDoubleAlign = LongDoubleAlign;
500 copyAuxTarget(Aux);
501 LongDoubleFormat = SaveLongDoubleFormat;
502 Float128Format = SaveFloat128Format;
503 LongDoubleWidth = SaveLongDoubleWidth;
504 LongDoubleAlign = SaveLongDoubleAlign;
505 // For certain builtin types support on the host target, claim they are
506 // support to pass the compilation of the host code during the device-side
507 // compilation.
508 // FIXME: As the side effect, we also accept `__float128` uses in the device
509 // code. To rejct these builtin types supported in the host target but not in
510 // the device target, one approach would support `device_builtin` attribute
511 // so that we could tell the device builtin types from the host ones. The
512 // also solves the different representations of the same builtin type, such
513 // as `size_t` in the MSVC environment.
514 if (Aux->hasFloat128Type()) {
515 HasFloat128 = true;
516 Float128Format = DoubleFormat;
517 }
518 }
519