1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "AMDGPUBaseInfo.h"
10 #include "AMDGPU.h"
11 #include "AMDGPUAsmUtils.h"
12 #include "AMDKernelCodeT.h"
13 #include "GCNSubtarget.h"
14 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
15 #include "llvm/BinaryFormat/ELF.h"
16 #include "llvm/IR/Attributes.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/GlobalValue.h"
19 #include "llvm/IR/IntrinsicsAMDGPU.h"
20 #include "llvm/IR/IntrinsicsR600.h"
21 #include "llvm/IR/LLVMContext.h"
22 #include "llvm/MC/MCSubtargetInfo.h"
23 #include "llvm/Support/AMDHSAKernelDescriptor.h"
24 #include "llvm/Support/CommandLine.h"
25 #include "llvm/Support/TargetParser.h"
26
27 #define GET_INSTRINFO_NAMED_OPS
28 #define GET_INSTRMAP_INFO
29 #include "AMDGPUGenInstrInfo.inc"
30
31 static llvm::cl::opt<unsigned> AmdhsaCodeObjectVersion(
32 "amdhsa-code-object-version", llvm::cl::Hidden,
33 llvm::cl::desc("AMDHSA Code Object Version"), llvm::cl::init(4),
34 llvm::cl::ZeroOrMore);
35
36 namespace {
37
38 /// \returns Bit mask for given bit \p Shift and bit \p Width.
getBitMask(unsigned Shift,unsigned Width)39 unsigned getBitMask(unsigned Shift, unsigned Width) {
40 return ((1 << Width) - 1) << Shift;
41 }
42
43 /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
44 ///
45 /// \returns Packed \p Dst.
packBits(unsigned Src,unsigned Dst,unsigned Shift,unsigned Width)46 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
47 Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
48 Dst |= (Src << Shift) & getBitMask(Shift, Width);
49 return Dst;
50 }
51
52 /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
53 ///
54 /// \returns Unpacked bits.
unpackBits(unsigned Src,unsigned Shift,unsigned Width)55 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
56 return (Src & getBitMask(Shift, Width)) >> Shift;
57 }
58
59 /// \returns Vmcnt bit shift (lower bits).
getVmcntBitShiftLo()60 unsigned getVmcntBitShiftLo() { return 0; }
61
62 /// \returns Vmcnt bit width (lower bits).
getVmcntBitWidthLo()63 unsigned getVmcntBitWidthLo() { return 4; }
64
65 /// \returns Expcnt bit shift.
getExpcntBitShift()66 unsigned getExpcntBitShift() { return 4; }
67
68 /// \returns Expcnt bit width.
getExpcntBitWidth()69 unsigned getExpcntBitWidth() { return 3; }
70
71 /// \returns Lgkmcnt bit shift.
getLgkmcntBitShift()72 unsigned getLgkmcntBitShift() { return 8; }
73
74 /// \returns Lgkmcnt bit width.
getLgkmcntBitWidth(unsigned VersionMajor)75 unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
76 return (VersionMajor >= 10) ? 6 : 4;
77 }
78
79 /// \returns Vmcnt bit shift (higher bits).
getVmcntBitShiftHi()80 unsigned getVmcntBitShiftHi() { return 14; }
81
82 /// \returns Vmcnt bit width (higher bits).
getVmcntBitWidthHi()83 unsigned getVmcntBitWidthHi() { return 2; }
84
85 } // end namespace anonymous
86
87 namespace llvm {
88
89 namespace AMDGPU {
90
getHsaAbiVersion(const MCSubtargetInfo * STI)91 Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI) {
92 if (STI && STI->getTargetTriple().getOS() != Triple::AMDHSA)
93 return None;
94
95 switch (AmdhsaCodeObjectVersion) {
96 case 2:
97 return ELF::ELFABIVERSION_AMDGPU_HSA_V2;
98 case 3:
99 return ELF::ELFABIVERSION_AMDGPU_HSA_V3;
100 case 4:
101 return ELF::ELFABIVERSION_AMDGPU_HSA_V4;
102 default:
103 report_fatal_error(Twine("Unsupported AMDHSA Code Object Version ") +
104 Twine(AmdhsaCodeObjectVersion));
105 }
106 }
107
isHsaAbiVersion2(const MCSubtargetInfo * STI)108 bool isHsaAbiVersion2(const MCSubtargetInfo *STI) {
109 if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
110 return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V2;
111 return false;
112 }
113
isHsaAbiVersion3(const MCSubtargetInfo * STI)114 bool isHsaAbiVersion3(const MCSubtargetInfo *STI) {
115 if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
116 return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V3;
117 return false;
118 }
119
isHsaAbiVersion4(const MCSubtargetInfo * STI)120 bool isHsaAbiVersion4(const MCSubtargetInfo *STI) {
121 if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
122 return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V4;
123 return false;
124 }
125
isHsaAbiVersion3Or4(const MCSubtargetInfo * STI)126 bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI) {
127 return isHsaAbiVersion3(STI) || isHsaAbiVersion4(STI);
128 }
129
130 #define GET_MIMGBaseOpcodesTable_IMPL
131 #define GET_MIMGDimInfoTable_IMPL
132 #define GET_MIMGInfoTable_IMPL
133 #define GET_MIMGLZMappingTable_IMPL
134 #define GET_MIMGMIPMappingTable_IMPL
135 #define GET_MIMGG16MappingTable_IMPL
136 #include "AMDGPUGenSearchableTables.inc"
137
getMIMGOpcode(unsigned BaseOpcode,unsigned MIMGEncoding,unsigned VDataDwords,unsigned VAddrDwords)138 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
139 unsigned VDataDwords, unsigned VAddrDwords) {
140 const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
141 VDataDwords, VAddrDwords);
142 return Info ? Info->Opcode : -1;
143 }
144
getMIMGBaseOpcode(unsigned Opc)145 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) {
146 const MIMGInfo *Info = getMIMGInfo(Opc);
147 return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
148 }
149
getMaskedMIMGOp(unsigned Opc,unsigned NewChannels)150 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
151 const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
152 const MIMGInfo *NewInfo =
153 getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
154 NewChannels, OrigInfo->VAddrDwords);
155 return NewInfo ? NewInfo->Opcode : -1;
156 }
157
getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo * BaseOpcode,const MIMGDimInfo * Dim,bool IsA16,bool IsG16Supported)158 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
159 const MIMGDimInfo *Dim, bool IsA16,
160 bool IsG16Supported) {
161 unsigned AddrWords = BaseOpcode->NumExtraArgs;
162 unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
163 (BaseOpcode->LodOrClampOrMip ? 1 : 0);
164 if (IsA16)
165 AddrWords += divideCeil(AddrComponents, 2);
166 else
167 AddrWords += AddrComponents;
168
169 // Note: For subtargets that support A16 but not G16, enabling A16 also
170 // enables 16 bit gradients.
171 // For subtargets that support A16 (operand) and G16 (done with a different
172 // instruction encoding), they are independent.
173
174 if (BaseOpcode->Gradients) {
175 if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
176 // There are two gradients per coordinate, we pack them separately.
177 // For the 3d case,
178 // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
179 AddrWords += alignTo<2>(Dim->NumGradients / 2);
180 else
181 AddrWords += Dim->NumGradients;
182 }
183 return AddrWords;
184 }
185
186 struct MUBUFInfo {
187 uint16_t Opcode;
188 uint16_t BaseOpcode;
189 uint8_t elements;
190 bool has_vaddr;
191 bool has_srsrc;
192 bool has_soffset;
193 bool IsBufferInv;
194 };
195
196 struct MTBUFInfo {
197 uint16_t Opcode;
198 uint16_t BaseOpcode;
199 uint8_t elements;
200 bool has_vaddr;
201 bool has_srsrc;
202 bool has_soffset;
203 };
204
205 struct SMInfo {
206 uint16_t Opcode;
207 bool IsBuffer;
208 };
209
210 struct VOPInfo {
211 uint16_t Opcode;
212 bool IsSingle;
213 };
214
215 #define GET_MTBUFInfoTable_DECL
216 #define GET_MTBUFInfoTable_IMPL
217 #define GET_MUBUFInfoTable_DECL
218 #define GET_MUBUFInfoTable_IMPL
219 #define GET_SMInfoTable_DECL
220 #define GET_SMInfoTable_IMPL
221 #define GET_VOP1InfoTable_DECL
222 #define GET_VOP1InfoTable_IMPL
223 #define GET_VOP2InfoTable_DECL
224 #define GET_VOP2InfoTable_IMPL
225 #define GET_VOP3InfoTable_DECL
226 #define GET_VOP3InfoTable_IMPL
227 #include "AMDGPUGenSearchableTables.inc"
228
getMTBUFBaseOpcode(unsigned Opc)229 int getMTBUFBaseOpcode(unsigned Opc) {
230 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
231 return Info ? Info->BaseOpcode : -1;
232 }
233
getMTBUFOpcode(unsigned BaseOpc,unsigned Elements)234 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
235 const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
236 return Info ? Info->Opcode : -1;
237 }
238
getMTBUFElements(unsigned Opc)239 int getMTBUFElements(unsigned Opc) {
240 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
241 return Info ? Info->elements : 0;
242 }
243
getMTBUFHasVAddr(unsigned Opc)244 bool getMTBUFHasVAddr(unsigned Opc) {
245 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
246 return Info ? Info->has_vaddr : false;
247 }
248
getMTBUFHasSrsrc(unsigned Opc)249 bool getMTBUFHasSrsrc(unsigned Opc) {
250 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
251 return Info ? Info->has_srsrc : false;
252 }
253
getMTBUFHasSoffset(unsigned Opc)254 bool getMTBUFHasSoffset(unsigned Opc) {
255 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
256 return Info ? Info->has_soffset : false;
257 }
258
getMUBUFBaseOpcode(unsigned Opc)259 int getMUBUFBaseOpcode(unsigned Opc) {
260 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
261 return Info ? Info->BaseOpcode : -1;
262 }
263
getMUBUFOpcode(unsigned BaseOpc,unsigned Elements)264 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
265 const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
266 return Info ? Info->Opcode : -1;
267 }
268
getMUBUFElements(unsigned Opc)269 int getMUBUFElements(unsigned Opc) {
270 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
271 return Info ? Info->elements : 0;
272 }
273
getMUBUFHasVAddr(unsigned Opc)274 bool getMUBUFHasVAddr(unsigned Opc) {
275 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
276 return Info ? Info->has_vaddr : false;
277 }
278
getMUBUFHasSrsrc(unsigned Opc)279 bool getMUBUFHasSrsrc(unsigned Opc) {
280 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
281 return Info ? Info->has_srsrc : false;
282 }
283
getMUBUFHasSoffset(unsigned Opc)284 bool getMUBUFHasSoffset(unsigned Opc) {
285 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
286 return Info ? Info->has_soffset : false;
287 }
288
getMUBUFIsBufferInv(unsigned Opc)289 bool getMUBUFIsBufferInv(unsigned Opc) {
290 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
291 return Info ? Info->IsBufferInv : false;
292 }
293
getSMEMIsBuffer(unsigned Opc)294 bool getSMEMIsBuffer(unsigned Opc) {
295 const SMInfo *Info = getSMEMOpcodeHelper(Opc);
296 return Info ? Info->IsBuffer : false;
297 }
298
getVOP1IsSingle(unsigned Opc)299 bool getVOP1IsSingle(unsigned Opc) {
300 const VOPInfo *Info = getVOP1OpcodeHelper(Opc);
301 return Info ? Info->IsSingle : false;
302 }
303
getVOP2IsSingle(unsigned Opc)304 bool getVOP2IsSingle(unsigned Opc) {
305 const VOPInfo *Info = getVOP2OpcodeHelper(Opc);
306 return Info ? Info->IsSingle : false;
307 }
308
getVOP3IsSingle(unsigned Opc)309 bool getVOP3IsSingle(unsigned Opc) {
310 const VOPInfo *Info = getVOP3OpcodeHelper(Opc);
311 return Info ? Info->IsSingle : false;
312 }
313
314 // Wrapper for Tablegen'd function. enum Subtarget is not defined in any
315 // header files, so we need to wrap it in a function that takes unsigned
316 // instead.
getMCOpcode(uint16_t Opcode,unsigned Gen)317 int getMCOpcode(uint16_t Opcode, unsigned Gen) {
318 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
319 }
320
321 namespace IsaInfo {
322
AMDGPUTargetID(const MCSubtargetInfo & STI)323 AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI)
324 : STI(STI), XnackSetting(TargetIDSetting::Any),
325 SramEccSetting(TargetIDSetting::Any) {
326 if (!STI.getFeatureBits().test(FeatureSupportsXNACK))
327 XnackSetting = TargetIDSetting::Unsupported;
328 if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))
329 SramEccSetting = TargetIDSetting::Unsupported;
330 }
331
setTargetIDFromFeaturesString(StringRef FS)332 void AMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) {
333 // Check if xnack or sramecc is explicitly enabled or disabled. In the
334 // absence of the target features we assume we must generate code that can run
335 // in any environment.
336 SubtargetFeatures Features(FS);
337 Optional<bool> XnackRequested;
338 Optional<bool> SramEccRequested;
339
340 for (const std::string &Feature : Features.getFeatures()) {
341 if (Feature == "+xnack")
342 XnackRequested = true;
343 else if (Feature == "-xnack")
344 XnackRequested = false;
345 else if (Feature == "+sramecc")
346 SramEccRequested = true;
347 else if (Feature == "-sramecc")
348 SramEccRequested = false;
349 }
350
351 bool XnackSupported = isXnackSupported();
352 bool SramEccSupported = isSramEccSupported();
353
354 if (XnackRequested) {
355 if (XnackSupported) {
356 XnackSetting =
357 *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off;
358 } else {
359 // If a specific xnack setting was requested and this GPU does not support
360 // xnack emit a warning. Setting will remain set to "Unsupported".
361 if (*XnackRequested) {
362 errs() << "warning: xnack 'On' was requested for a processor that does "
363 "not support it!\n";
364 } else {
365 errs() << "warning: xnack 'Off' was requested for a processor that "
366 "does not support it!\n";
367 }
368 }
369 }
370
371 if (SramEccRequested) {
372 if (SramEccSupported) {
373 SramEccSetting =
374 *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off;
375 } else {
376 // If a specific sramecc setting was requested and this GPU does not
377 // support sramecc emit a warning. Setting will remain set to
378 // "Unsupported".
379 if (*SramEccRequested) {
380 errs() << "warning: sramecc 'On' was requested for a processor that "
381 "does not support it!\n";
382 } else {
383 errs() << "warning: sramecc 'Off' was requested for a processor that "
384 "does not support it!\n";
385 }
386 }
387 }
388 }
389
390 static TargetIDSetting
getTargetIDSettingFromFeatureString(StringRef FeatureString)391 getTargetIDSettingFromFeatureString(StringRef FeatureString) {
392 if (FeatureString.endswith("-"))
393 return TargetIDSetting::Off;
394 if (FeatureString.endswith("+"))
395 return TargetIDSetting::On;
396
397 llvm_unreachable("Malformed feature string");
398 }
399
setTargetIDFromTargetIDStream(StringRef TargetID)400 void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) {
401 SmallVector<StringRef, 3> TargetIDSplit;
402 TargetID.split(TargetIDSplit, ':');
403
404 for (const auto &FeatureString : TargetIDSplit) {
405 if (FeatureString.startswith("xnack"))
406 XnackSetting = getTargetIDSettingFromFeatureString(FeatureString);
407 if (FeatureString.startswith("sramecc"))
408 SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString);
409 }
410 }
411
toString() const412 std::string AMDGPUTargetID::toString() const {
413 std::string StringRep = "";
414 raw_string_ostream StreamRep(StringRep);
415
416 auto TargetTriple = STI.getTargetTriple();
417 auto Version = getIsaVersion(STI.getCPU());
418
419 StreamRep << TargetTriple.getArchName() << '-'
420 << TargetTriple.getVendorName() << '-'
421 << TargetTriple.getOSName() << '-'
422 << TargetTriple.getEnvironmentName() << '-';
423
424 std::string Processor = "";
425 // TODO: Following else statement is present here because we used various
426 // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803').
427 // Remove once all aliases are removed from GCNProcessors.td.
428 if (Version.Major >= 9)
429 Processor = STI.getCPU().str();
430 else
431 Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) +
432 Twine(Version.Stepping))
433 .str();
434
435 std::string Features = "";
436 if (Optional<uint8_t> HsaAbiVersion = getHsaAbiVersion(&STI)) {
437 switch (*HsaAbiVersion) {
438 case ELF::ELFABIVERSION_AMDGPU_HSA_V2:
439 // Code object V2 only supported specific processors and had fixed
440 // settings for the XNACK.
441 if (Processor == "gfx600") {
442 } else if (Processor == "gfx601") {
443 } else if (Processor == "gfx602") {
444 } else if (Processor == "gfx700") {
445 } else if (Processor == "gfx701") {
446 } else if (Processor == "gfx702") {
447 } else if (Processor == "gfx703") {
448 } else if (Processor == "gfx704") {
449 } else if (Processor == "gfx705") {
450 } else if (Processor == "gfx801") {
451 if (!isXnackOnOrAny())
452 report_fatal_error(
453 "AMD GPU code object V2 does not support processor " + Processor +
454 " without XNACK");
455 } else if (Processor == "gfx802") {
456 } else if (Processor == "gfx803") {
457 } else if (Processor == "gfx805") {
458 } else if (Processor == "gfx810") {
459 if (!isXnackOnOrAny())
460 report_fatal_error(
461 "AMD GPU code object V2 does not support processor " + Processor +
462 " without XNACK");
463 } else if (Processor == "gfx900") {
464 if (isXnackOnOrAny())
465 Processor = "gfx901";
466 } else if (Processor == "gfx902") {
467 if (isXnackOnOrAny())
468 Processor = "gfx903";
469 } else if (Processor == "gfx904") {
470 if (isXnackOnOrAny())
471 Processor = "gfx905";
472 } else if (Processor == "gfx906") {
473 if (isXnackOnOrAny())
474 Processor = "gfx907";
475 } else if (Processor == "gfx90c") {
476 if (isXnackOnOrAny())
477 report_fatal_error(
478 "AMD GPU code object V2 does not support processor " + Processor +
479 " with XNACK being ON or ANY");
480 } else {
481 report_fatal_error(
482 "AMD GPU code object V2 does not support processor " + Processor);
483 }
484 break;
485 case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
486 // xnack.
487 if (isXnackOnOrAny())
488 Features += "+xnack";
489 // In code object v2 and v3, "sramecc" feature was spelled with a
490 // hyphen ("sram-ecc").
491 if (isSramEccOnOrAny())
492 Features += "+sram-ecc";
493 break;
494 case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
495 // sramecc.
496 if (getSramEccSetting() == TargetIDSetting::Off)
497 Features += ":sramecc-";
498 else if (getSramEccSetting() == TargetIDSetting::On)
499 Features += ":sramecc+";
500 // xnack.
501 if (getXnackSetting() == TargetIDSetting::Off)
502 Features += ":xnack-";
503 else if (getXnackSetting() == TargetIDSetting::On)
504 Features += ":xnack+";
505 break;
506 default:
507 break;
508 }
509 }
510
511 StreamRep << Processor << Features;
512
513 StreamRep.flush();
514 return StringRep;
515 }
516
getWavefrontSize(const MCSubtargetInfo * STI)517 unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
518 if (STI->getFeatureBits().test(FeatureWavefrontSize16))
519 return 16;
520 if (STI->getFeatureBits().test(FeatureWavefrontSize32))
521 return 32;
522
523 return 64;
524 }
525
getLocalMemorySize(const MCSubtargetInfo * STI)526 unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
527 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
528 return 32768;
529 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
530 return 65536;
531
532 return 0;
533 }
534
getEUsPerCU(const MCSubtargetInfo * STI)535 unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
536 // "Per CU" really means "per whatever functional block the waves of a
537 // workgroup must share". For gfx10 in CU mode this is the CU, which contains
538 // two SIMDs.
539 if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode))
540 return 2;
541 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains
542 // two CUs, so a total of four SIMDs.
543 return 4;
544 }
545
getMaxWorkGroupsPerCU(const MCSubtargetInfo * STI,unsigned FlatWorkGroupSize)546 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
547 unsigned FlatWorkGroupSize) {
548 assert(FlatWorkGroupSize != 0);
549 if (STI->getTargetTriple().getArch() != Triple::amdgcn)
550 return 8;
551 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
552 if (N == 1)
553 return 40;
554 N = 40 / N;
555 return std::min(N, 16u);
556 }
557
getMinWavesPerEU(const MCSubtargetInfo * STI)558 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
559 return 1;
560 }
561
getMaxWavesPerEU(const MCSubtargetInfo * STI)562 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
563 // FIXME: Need to take scratch memory into account.
564 if (isGFX90A(*STI))
565 return 8;
566 if (!isGFX10Plus(*STI))
567 return 10;
568 return hasGFX10_3Insts(*STI) ? 16 : 20;
569 }
570
getWavesPerEUForWorkGroup(const MCSubtargetInfo * STI,unsigned FlatWorkGroupSize)571 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
572 unsigned FlatWorkGroupSize) {
573 return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize),
574 getEUsPerCU(STI));
575 }
576
getMinFlatWorkGroupSize(const MCSubtargetInfo * STI)577 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
578 return 1;
579 }
580
getMaxFlatWorkGroupSize(const MCSubtargetInfo * STI)581 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
582 // Some subtargets allow encoding 2048, but this isn't tested or supported.
583 return 1024;
584 }
585
getWavesPerWorkGroup(const MCSubtargetInfo * STI,unsigned FlatWorkGroupSize)586 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
587 unsigned FlatWorkGroupSize) {
588 return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI));
589 }
590
getSGPRAllocGranule(const MCSubtargetInfo * STI)591 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
592 IsaVersion Version = getIsaVersion(STI->getCPU());
593 if (Version.Major >= 10)
594 return getAddressableNumSGPRs(STI);
595 if (Version.Major >= 8)
596 return 16;
597 return 8;
598 }
599
getSGPREncodingGranule(const MCSubtargetInfo * STI)600 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) {
601 return 8;
602 }
603
getTotalNumSGPRs(const MCSubtargetInfo * STI)604 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
605 IsaVersion Version = getIsaVersion(STI->getCPU());
606 if (Version.Major >= 8)
607 return 800;
608 return 512;
609 }
610
getAddressableNumSGPRs(const MCSubtargetInfo * STI)611 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
612 if (STI->getFeatureBits().test(FeatureSGPRInitBug))
613 return FIXED_NUM_SGPRS_FOR_INIT_BUG;
614
615 IsaVersion Version = getIsaVersion(STI->getCPU());
616 if (Version.Major >= 10)
617 return 106;
618 if (Version.Major >= 8)
619 return 102;
620 return 104;
621 }
622
getMinNumSGPRs(const MCSubtargetInfo * STI,unsigned WavesPerEU)623 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
624 assert(WavesPerEU != 0);
625
626 IsaVersion Version = getIsaVersion(STI->getCPU());
627 if (Version.Major >= 10)
628 return 0;
629
630 if (WavesPerEU >= getMaxWavesPerEU(STI))
631 return 0;
632
633 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
634 if (STI->getFeatureBits().test(FeatureTrapHandler))
635 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
636 MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
637 return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
638 }
639
getMaxNumSGPRs(const MCSubtargetInfo * STI,unsigned WavesPerEU,bool Addressable)640 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
641 bool Addressable) {
642 assert(WavesPerEU != 0);
643
644 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
645 IsaVersion Version = getIsaVersion(STI->getCPU());
646 if (Version.Major >= 10)
647 return Addressable ? AddressableNumSGPRs : 108;
648 if (Version.Major >= 8 && !Addressable)
649 AddressableNumSGPRs = 112;
650 unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
651 if (STI->getFeatureBits().test(FeatureTrapHandler))
652 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
653 MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
654 return std::min(MaxNumSGPRs, AddressableNumSGPRs);
655 }
656
getNumExtraSGPRs(const MCSubtargetInfo * STI,bool VCCUsed,bool FlatScrUsed,bool XNACKUsed)657 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
658 bool FlatScrUsed, bool XNACKUsed) {
659 unsigned ExtraSGPRs = 0;
660 if (VCCUsed)
661 ExtraSGPRs = 2;
662
663 IsaVersion Version = getIsaVersion(STI->getCPU());
664 if (Version.Major >= 10)
665 return ExtraSGPRs;
666
667 if (Version.Major < 8) {
668 if (FlatScrUsed)
669 ExtraSGPRs = 4;
670 } else {
671 if (XNACKUsed)
672 ExtraSGPRs = 4;
673
674 if (FlatScrUsed)
675 ExtraSGPRs = 6;
676 }
677
678 return ExtraSGPRs;
679 }
680
getNumExtraSGPRs(const MCSubtargetInfo * STI,bool VCCUsed,bool FlatScrUsed)681 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
682 bool FlatScrUsed) {
683 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
684 STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
685 }
686
getNumSGPRBlocks(const MCSubtargetInfo * STI,unsigned NumSGPRs)687 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
688 NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
689 // SGPRBlocks is actual number of SGPR blocks minus 1.
690 return NumSGPRs / getSGPREncodingGranule(STI) - 1;
691 }
692
getVGPRAllocGranule(const MCSubtargetInfo * STI,Optional<bool> EnableWavefrontSize32)693 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
694 Optional<bool> EnableWavefrontSize32) {
695 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
696 return 8;
697
698 bool IsWave32 = EnableWavefrontSize32 ?
699 *EnableWavefrontSize32 :
700 STI->getFeatureBits().test(FeatureWavefrontSize32);
701
702 if (hasGFX10_3Insts(*STI))
703 return IsWave32 ? 16 : 8;
704
705 return IsWave32 ? 8 : 4;
706 }
707
getVGPREncodingGranule(const MCSubtargetInfo * STI,Optional<bool> EnableWavefrontSize32)708 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
709 Optional<bool> EnableWavefrontSize32) {
710 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
711 return 8;
712
713 bool IsWave32 = EnableWavefrontSize32 ?
714 *EnableWavefrontSize32 :
715 STI->getFeatureBits().test(FeatureWavefrontSize32);
716
717 return IsWave32 ? 8 : 4;
718 }
719
getTotalNumVGPRs(const MCSubtargetInfo * STI)720 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
721 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
722 return 512;
723 if (!isGFX10Plus(*STI))
724 return 256;
725 return STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1024 : 512;
726 }
727
getAddressableNumVGPRs(const MCSubtargetInfo * STI)728 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
729 if (STI->getFeatureBits().test(FeatureGFX90AInsts))
730 return 512;
731 return 256;
732 }
733
getMinNumVGPRs(const MCSubtargetInfo * STI,unsigned WavesPerEU)734 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
735 assert(WavesPerEU != 0);
736
737 if (WavesPerEU >= getMaxWavesPerEU(STI))
738 return 0;
739 unsigned MinNumVGPRs =
740 alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1),
741 getVGPRAllocGranule(STI)) + 1;
742 return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI));
743 }
744
getMaxNumVGPRs(const MCSubtargetInfo * STI,unsigned WavesPerEU)745 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
746 assert(WavesPerEU != 0);
747
748 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
749 getVGPRAllocGranule(STI));
750 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
751 return std::min(MaxNumVGPRs, AddressableNumVGPRs);
752 }
753
getNumVGPRBlocks(const MCSubtargetInfo * STI,unsigned NumVGPRs,Optional<bool> EnableWavefrontSize32)754 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
755 Optional<bool> EnableWavefrontSize32) {
756 NumVGPRs = alignTo(std::max(1u, NumVGPRs),
757 getVGPREncodingGranule(STI, EnableWavefrontSize32));
758 // VGPRBlocks is actual number of VGPR blocks minus 1.
759 return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1;
760 }
761
762 } // end namespace IsaInfo
763
initDefaultAMDKernelCodeT(amd_kernel_code_t & Header,const MCSubtargetInfo * STI)764 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
765 const MCSubtargetInfo *STI) {
766 IsaVersion Version = getIsaVersion(STI->getCPU());
767
768 memset(&Header, 0, sizeof(Header));
769
770 Header.amd_kernel_code_version_major = 1;
771 Header.amd_kernel_code_version_minor = 2;
772 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
773 Header.amd_machine_version_major = Version.Major;
774 Header.amd_machine_version_minor = Version.Minor;
775 Header.amd_machine_version_stepping = Version.Stepping;
776 Header.kernel_code_entry_byte_offset = sizeof(Header);
777 Header.wavefront_size = 6;
778
779 // If the code object does not support indirect functions, then the value must
780 // be 0xffffffff.
781 Header.call_convention = -1;
782
783 // These alignment values are specified in powers of two, so alignment =
784 // 2^n. The minimum alignment is 2^4 = 16.
785 Header.kernarg_segment_alignment = 4;
786 Header.group_segment_alignment = 4;
787 Header.private_segment_alignment = 4;
788
789 if (Version.Major >= 10) {
790 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
791 Header.wavefront_size = 5;
792 Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
793 }
794 Header.compute_pgm_resource_registers |=
795 S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
796 S_00B848_MEM_ORDERED(1);
797 }
798 }
799
getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo * STI)800 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
801 const MCSubtargetInfo *STI) {
802 IsaVersion Version = getIsaVersion(STI->getCPU());
803
804 amdhsa::kernel_descriptor_t KD;
805 memset(&KD, 0, sizeof(KD));
806
807 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
808 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
809 amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE);
810 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
811 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1);
812 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
813 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
814 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
815 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
816 if (Version.Major >= 10) {
817 AMDHSA_BITS_SET(KD.kernel_code_properties,
818 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
819 STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
820 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
821 amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
822 STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
823 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
824 amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1);
825 }
826 if (AMDGPU::isGFX90A(*STI)) {
827 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3,
828 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
829 STI->getFeatureBits().test(FeatureTgSplit) ? 1 : 0);
830 }
831 return KD;
832 }
833
isGroupSegment(const GlobalValue * GV)834 bool isGroupSegment(const GlobalValue *GV) {
835 return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
836 }
837
isGlobalSegment(const GlobalValue * GV)838 bool isGlobalSegment(const GlobalValue *GV) {
839 return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
840 }
841
isReadOnlySegment(const GlobalValue * GV)842 bool isReadOnlySegment(const GlobalValue *GV) {
843 unsigned AS = GV->getAddressSpace();
844 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
845 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
846 }
847
shouldEmitConstantsToTextSection(const Triple & TT)848 bool shouldEmitConstantsToTextSection(const Triple &TT) {
849 return TT.getArch() == Triple::r600;
850 }
851
getIntegerAttribute(const Function & F,StringRef Name,int Default)852 int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
853 Attribute A = F.getFnAttribute(Name);
854 int Result = Default;
855
856 if (A.isStringAttribute()) {
857 StringRef Str = A.getValueAsString();
858 if (Str.getAsInteger(0, Result)) {
859 LLVMContext &Ctx = F.getContext();
860 Ctx.emitError("can't parse integer attribute " + Name);
861 }
862 }
863
864 return Result;
865 }
866
getIntegerPairAttribute(const Function & F,StringRef Name,std::pair<int,int> Default,bool OnlyFirstRequired)867 std::pair<int, int> getIntegerPairAttribute(const Function &F,
868 StringRef Name,
869 std::pair<int, int> Default,
870 bool OnlyFirstRequired) {
871 Attribute A = F.getFnAttribute(Name);
872 if (!A.isStringAttribute())
873 return Default;
874
875 LLVMContext &Ctx = F.getContext();
876 std::pair<int, int> Ints = Default;
877 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
878 if (Strs.first.trim().getAsInteger(0, Ints.first)) {
879 Ctx.emitError("can't parse first integer attribute " + Name);
880 return Default;
881 }
882 if (Strs.second.trim().getAsInteger(0, Ints.second)) {
883 if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
884 Ctx.emitError("can't parse second integer attribute " + Name);
885 return Default;
886 }
887 }
888
889 return Ints;
890 }
891
getVmcntBitMask(const IsaVersion & Version)892 unsigned getVmcntBitMask(const IsaVersion &Version) {
893 unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
894 if (Version.Major < 9)
895 return VmcntLo;
896
897 unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
898 return VmcntLo | VmcntHi;
899 }
900
getExpcntBitMask(const IsaVersion & Version)901 unsigned getExpcntBitMask(const IsaVersion &Version) {
902 return (1 << getExpcntBitWidth()) - 1;
903 }
904
getLgkmcntBitMask(const IsaVersion & Version)905 unsigned getLgkmcntBitMask(const IsaVersion &Version) {
906 return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
907 }
908
getWaitcntBitMask(const IsaVersion & Version)909 unsigned getWaitcntBitMask(const IsaVersion &Version) {
910 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
911 unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
912 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(),
913 getLgkmcntBitWidth(Version.Major));
914 unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
915 if (Version.Major < 9)
916 return Waitcnt;
917
918 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
919 return Waitcnt | VmcntHi;
920 }
921
decodeVmcnt(const IsaVersion & Version,unsigned Waitcnt)922 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
923 unsigned VmcntLo =
924 unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
925 if (Version.Major < 9)
926 return VmcntLo;
927
928 unsigned VmcntHi =
929 unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
930 VmcntHi <<= getVmcntBitWidthLo();
931 return VmcntLo | VmcntHi;
932 }
933
decodeExpcnt(const IsaVersion & Version,unsigned Waitcnt)934 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
935 return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
936 }
937
decodeLgkmcnt(const IsaVersion & Version,unsigned Waitcnt)938 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
939 return unpackBits(Waitcnt, getLgkmcntBitShift(),
940 getLgkmcntBitWidth(Version.Major));
941 }
942
decodeWaitcnt(const IsaVersion & Version,unsigned Waitcnt,unsigned & Vmcnt,unsigned & Expcnt,unsigned & Lgkmcnt)943 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
944 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
945 Vmcnt = decodeVmcnt(Version, Waitcnt);
946 Expcnt = decodeExpcnt(Version, Waitcnt);
947 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
948 }
949
decodeWaitcnt(const IsaVersion & Version,unsigned Encoded)950 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
951 Waitcnt Decoded;
952 Decoded.VmCnt = decodeVmcnt(Version, Encoded);
953 Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
954 Decoded.LgkmCnt = decodeLgkmcnt(Version, Encoded);
955 return Decoded;
956 }
957
encodeVmcnt(const IsaVersion & Version,unsigned Waitcnt,unsigned Vmcnt)958 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
959 unsigned Vmcnt) {
960 Waitcnt =
961 packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
962 if (Version.Major < 9)
963 return Waitcnt;
964
965 Vmcnt >>= getVmcntBitWidthLo();
966 return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
967 }
968
encodeExpcnt(const IsaVersion & Version,unsigned Waitcnt,unsigned Expcnt)969 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
970 unsigned Expcnt) {
971 return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
972 }
973
encodeLgkmcnt(const IsaVersion & Version,unsigned Waitcnt,unsigned Lgkmcnt)974 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
975 unsigned Lgkmcnt) {
976 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(),
977 getLgkmcntBitWidth(Version.Major));
978 }
979
encodeWaitcnt(const IsaVersion & Version,unsigned Vmcnt,unsigned Expcnt,unsigned Lgkmcnt)980 unsigned encodeWaitcnt(const IsaVersion &Version,
981 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
982 unsigned Waitcnt = getWaitcntBitMask(Version);
983 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
984 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
985 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
986 return Waitcnt;
987 }
988
encodeWaitcnt(const IsaVersion & Version,const Waitcnt & Decoded)989 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
990 return encodeWaitcnt(Version, Decoded.VmCnt, Decoded.ExpCnt, Decoded.LgkmCnt);
991 }
992
993 //===----------------------------------------------------------------------===//
994 // hwreg
995 //===----------------------------------------------------------------------===//
996
997 namespace Hwreg {
998
getHwregId(const StringRef Name)999 int64_t getHwregId(const StringRef Name) {
1000 for (int Id = ID_SYMBOLIC_FIRST_; Id < ID_SYMBOLIC_LAST_; ++Id) {
1001 if (IdSymbolic[Id] && Name == IdSymbolic[Id])
1002 return Id;
1003 }
1004 return ID_UNKNOWN_;
1005 }
1006
getLastSymbolicHwreg(const MCSubtargetInfo & STI)1007 static unsigned getLastSymbolicHwreg(const MCSubtargetInfo &STI) {
1008 if (isSI(STI) || isCI(STI) || isVI(STI))
1009 return ID_SYMBOLIC_FIRST_GFX9_;
1010 else if (isGFX9(STI))
1011 return ID_SYMBOLIC_FIRST_GFX10_;
1012 else if (isGFX10(STI) && !isGFX10_BEncoding(STI))
1013 return ID_SYMBOLIC_FIRST_GFX1030_;
1014 else
1015 return ID_SYMBOLIC_LAST_;
1016 }
1017
isValidHwreg(int64_t Id,const MCSubtargetInfo & STI)1018 bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI) {
1019 return
1020 ID_SYMBOLIC_FIRST_ <= Id && Id < getLastSymbolicHwreg(STI) &&
1021 IdSymbolic[Id] && (Id != ID_XNACK_MASK || !AMDGPU::isGFX10_BEncoding(STI));
1022 }
1023
isValidHwreg(int64_t Id)1024 bool isValidHwreg(int64_t Id) {
1025 return 0 <= Id && isUInt<ID_WIDTH_>(Id);
1026 }
1027
isValidHwregOffset(int64_t Offset)1028 bool isValidHwregOffset(int64_t Offset) {
1029 return 0 <= Offset && isUInt<OFFSET_WIDTH_>(Offset);
1030 }
1031
isValidHwregWidth(int64_t Width)1032 bool isValidHwregWidth(int64_t Width) {
1033 return 0 <= (Width - 1) && isUInt<WIDTH_M1_WIDTH_>(Width - 1);
1034 }
1035
encodeHwreg(uint64_t Id,uint64_t Offset,uint64_t Width)1036 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width) {
1037 return (Id << ID_SHIFT_) |
1038 (Offset << OFFSET_SHIFT_) |
1039 ((Width - 1) << WIDTH_M1_SHIFT_);
1040 }
1041
getHwreg(unsigned Id,const MCSubtargetInfo & STI)1042 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) {
1043 return isValidHwreg(Id, STI) ? IdSymbolic[Id] : "";
1044 }
1045
decodeHwreg(unsigned Val,unsigned & Id,unsigned & Offset,unsigned & Width)1046 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) {
1047 Id = (Val & ID_MASK_) >> ID_SHIFT_;
1048 Offset = (Val & OFFSET_MASK_) >> OFFSET_SHIFT_;
1049 Width = ((Val & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1;
1050 }
1051
1052 } // namespace Hwreg
1053
1054 //===----------------------------------------------------------------------===//
1055 // exp tgt
1056 //===----------------------------------------------------------------------===//
1057
1058 namespace Exp {
1059
1060 struct ExpTgt {
1061 StringLiteral Name;
1062 unsigned Tgt;
1063 unsigned MaxIndex;
1064 };
1065
1066 static constexpr ExpTgt ExpTgtInfo[] = {
1067 {{"null"}, ET_NULL, ET_NULL_MAX_IDX},
1068 {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX},
1069 {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX},
1070 {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX},
1071 {{"pos"}, ET_POS0, ET_POS_MAX_IDX},
1072 {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX},
1073 };
1074
getTgtName(unsigned Id,StringRef & Name,int & Index)1075 bool getTgtName(unsigned Id, StringRef &Name, int &Index) {
1076 for (const ExpTgt &Val : ExpTgtInfo) {
1077 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
1078 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
1079 Name = Val.Name;
1080 return true;
1081 }
1082 }
1083 return false;
1084 }
1085
getTgtId(const StringRef Name)1086 unsigned getTgtId(const StringRef Name) {
1087
1088 for (const ExpTgt &Val : ExpTgtInfo) {
1089 if (Val.MaxIndex == 0 && Name == Val.Name)
1090 return Val.Tgt;
1091
1092 if (Val.MaxIndex > 0 && Name.startswith(Val.Name)) {
1093 StringRef Suffix = Name.drop_front(Val.Name.size());
1094
1095 unsigned Id;
1096 if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex)
1097 return ET_INVALID;
1098
1099 // Disable leading zeroes
1100 if (Suffix.size() > 1 && Suffix[0] == '0')
1101 return ET_INVALID;
1102
1103 return Val.Tgt + Id;
1104 }
1105 }
1106 return ET_INVALID;
1107 }
1108
isSupportedTgtId(unsigned Id,const MCSubtargetInfo & STI)1109 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) {
1110 return (Id != ET_POS4 && Id != ET_PRIM) || isGFX10Plus(STI);
1111 }
1112
1113 } // namespace Exp
1114
1115 //===----------------------------------------------------------------------===//
1116 // MTBUF Format
1117 //===----------------------------------------------------------------------===//
1118
1119 namespace MTBUFFormat {
1120
getDfmt(const StringRef Name)1121 int64_t getDfmt(const StringRef Name) {
1122 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) {
1123 if (Name == DfmtSymbolic[Id])
1124 return Id;
1125 }
1126 return DFMT_UNDEF;
1127 }
1128
getDfmtName(unsigned Id)1129 StringRef getDfmtName(unsigned Id) {
1130 assert(Id <= DFMT_MAX);
1131 return DfmtSymbolic[Id];
1132 }
1133
getNfmtLookupTable(const MCSubtargetInfo & STI)1134 static StringLiteral const *getNfmtLookupTable(const MCSubtargetInfo &STI) {
1135 if (isSI(STI) || isCI(STI))
1136 return NfmtSymbolicSICI;
1137 if (isVI(STI) || isGFX9(STI))
1138 return NfmtSymbolicVI;
1139 return NfmtSymbolicGFX10;
1140 }
1141
getNfmt(const StringRef Name,const MCSubtargetInfo & STI)1142 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) {
1143 auto lookupTable = getNfmtLookupTable(STI);
1144 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) {
1145 if (Name == lookupTable[Id])
1146 return Id;
1147 }
1148 return NFMT_UNDEF;
1149 }
1150
getNfmtName(unsigned Id,const MCSubtargetInfo & STI)1151 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) {
1152 assert(Id <= NFMT_MAX);
1153 return getNfmtLookupTable(STI)[Id];
1154 }
1155
isValidDfmtNfmt(unsigned Id,const MCSubtargetInfo & STI)1156 bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) {
1157 unsigned Dfmt;
1158 unsigned Nfmt;
1159 decodeDfmtNfmt(Id, Dfmt, Nfmt);
1160 return isValidNfmt(Nfmt, STI);
1161 }
1162
isValidNfmt(unsigned Id,const MCSubtargetInfo & STI)1163 bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) {
1164 return !getNfmtName(Id, STI).empty();
1165 }
1166
encodeDfmtNfmt(unsigned Dfmt,unsigned Nfmt)1167 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
1168 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
1169 }
1170
decodeDfmtNfmt(unsigned Format,unsigned & Dfmt,unsigned & Nfmt)1171 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
1172 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
1173 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
1174 }
1175
getUnifiedFormat(const StringRef Name)1176 int64_t getUnifiedFormat(const StringRef Name) {
1177 for (int Id = UFMT_FIRST; Id <= UFMT_LAST; ++Id) {
1178 if (Name == UfmtSymbolic[Id])
1179 return Id;
1180 }
1181 return UFMT_UNDEF;
1182 }
1183
getUnifiedFormatName(unsigned Id)1184 StringRef getUnifiedFormatName(unsigned Id) {
1185 return isValidUnifiedFormat(Id) ? UfmtSymbolic[Id] : "";
1186 }
1187
isValidUnifiedFormat(unsigned Id)1188 bool isValidUnifiedFormat(unsigned Id) {
1189 return Id <= UFMT_LAST;
1190 }
1191
convertDfmtNfmt2Ufmt(unsigned Dfmt,unsigned Nfmt)1192 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt) {
1193 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
1194 for (int Id = UFMT_FIRST; Id <= UFMT_LAST; ++Id) {
1195 if (Fmt == DfmtNfmt2UFmt[Id])
1196 return Id;
1197 }
1198 return UFMT_UNDEF;
1199 }
1200
isValidFormatEncoding(unsigned Val,const MCSubtargetInfo & STI)1201 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) {
1202 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX);
1203 }
1204
getDefaultFormatEncoding(const MCSubtargetInfo & STI)1205 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) {
1206 if (isGFX10Plus(STI))
1207 return UFMT_DEFAULT;
1208 return DFMT_NFMT_DEFAULT;
1209 }
1210
1211 } // namespace MTBUFFormat
1212
1213 //===----------------------------------------------------------------------===//
1214 // SendMsg
1215 //===----------------------------------------------------------------------===//
1216
1217 namespace SendMsg {
1218
getMsgId(const StringRef Name)1219 int64_t getMsgId(const StringRef Name) {
1220 for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) {
1221 if (IdSymbolic[i] && Name == IdSymbolic[i])
1222 return i;
1223 }
1224 return ID_UNKNOWN_;
1225 }
1226
isValidMsgId(int64_t MsgId,const MCSubtargetInfo & STI,bool Strict)1227 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict) {
1228 if (Strict) {
1229 switch (MsgId) {
1230 case ID_SAVEWAVE:
1231 return isVI(STI) || isGFX9Plus(STI);
1232 case ID_STALL_WAVE_GEN:
1233 case ID_HALT_WAVES:
1234 case ID_ORDERED_PS_DONE:
1235 case ID_GS_ALLOC_REQ:
1236 case ID_GET_DOORBELL:
1237 return isGFX9Plus(STI);
1238 case ID_EARLY_PRIM_DEALLOC:
1239 return isGFX9(STI);
1240 case ID_GET_DDID:
1241 return isGFX10Plus(STI);
1242 default:
1243 return 0 <= MsgId && MsgId < ID_GAPS_LAST_ && IdSymbolic[MsgId];
1244 }
1245 } else {
1246 return 0 <= MsgId && isUInt<ID_WIDTH_>(MsgId);
1247 }
1248 }
1249
getMsgName(int64_t MsgId)1250 StringRef getMsgName(int64_t MsgId) {
1251 assert(0 <= MsgId && MsgId < ID_GAPS_LAST_);
1252 return IdSymbolic[MsgId];
1253 }
1254
getMsgOpId(int64_t MsgId,const StringRef Name)1255 int64_t getMsgOpId(int64_t MsgId, const StringRef Name) {
1256 const char* const *S = (MsgId == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
1257 const int F = (MsgId == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
1258 const int L = (MsgId == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
1259 for (int i = F; i < L; ++i) {
1260 if (Name == S[i]) {
1261 return i;
1262 }
1263 }
1264 return OP_UNKNOWN_;
1265 }
1266
isValidMsgOp(int64_t MsgId,int64_t OpId,const MCSubtargetInfo & STI,bool Strict)1267 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1268 bool Strict) {
1269 assert(isValidMsgId(MsgId, STI, Strict));
1270
1271 if (!Strict)
1272 return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
1273
1274 switch(MsgId)
1275 {
1276 case ID_GS:
1277 return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP;
1278 case ID_GS_DONE:
1279 return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_;
1280 case ID_SYSMSG:
1281 return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_;
1282 default:
1283 return OpId == OP_NONE_;
1284 }
1285 }
1286
getMsgOpName(int64_t MsgId,int64_t OpId)1287 StringRef getMsgOpName(int64_t MsgId, int64_t OpId) {
1288 assert(msgRequiresOp(MsgId));
1289 return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId];
1290 }
1291
isValidMsgStream(int64_t MsgId,int64_t OpId,int64_t StreamId,const MCSubtargetInfo & STI,bool Strict)1292 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1293 const MCSubtargetInfo &STI, bool Strict) {
1294 assert(isValidMsgOp(MsgId, OpId, STI, Strict));
1295
1296 if (!Strict)
1297 return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId);
1298
1299 switch(MsgId)
1300 {
1301 case ID_GS:
1302 return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_;
1303 case ID_GS_DONE:
1304 return (OpId == OP_GS_NOP)?
1305 (StreamId == STREAM_ID_NONE_) :
1306 (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_);
1307 default:
1308 return StreamId == STREAM_ID_NONE_;
1309 }
1310 }
1311
msgRequiresOp(int64_t MsgId)1312 bool msgRequiresOp(int64_t MsgId) {
1313 return MsgId == ID_GS || MsgId == ID_GS_DONE || MsgId == ID_SYSMSG;
1314 }
1315
msgSupportsStream(int64_t MsgId,int64_t OpId)1316 bool msgSupportsStream(int64_t MsgId, int64_t OpId) {
1317 return (MsgId == ID_GS || MsgId == ID_GS_DONE) && OpId != OP_GS_NOP;
1318 }
1319
decodeMsg(unsigned Val,uint16_t & MsgId,uint16_t & OpId,uint16_t & StreamId)1320 void decodeMsg(unsigned Val,
1321 uint16_t &MsgId,
1322 uint16_t &OpId,
1323 uint16_t &StreamId) {
1324 MsgId = Val & ID_MASK_;
1325 OpId = (Val & OP_MASK_) >> OP_SHIFT_;
1326 StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_;
1327 }
1328
encodeMsg(uint64_t MsgId,uint64_t OpId,uint64_t StreamId)1329 uint64_t encodeMsg(uint64_t MsgId,
1330 uint64_t OpId,
1331 uint64_t StreamId) {
1332 return (MsgId << ID_SHIFT_) |
1333 (OpId << OP_SHIFT_) |
1334 (StreamId << STREAM_ID_SHIFT_);
1335 }
1336
1337 } // namespace SendMsg
1338
1339 //===----------------------------------------------------------------------===//
1340 //
1341 //===----------------------------------------------------------------------===//
1342
getInitialPSInputAddr(const Function & F)1343 unsigned getInitialPSInputAddr(const Function &F) {
1344 return getIntegerAttribute(F, "InitialPSInputAddr", 0);
1345 }
1346
isShader(CallingConv::ID cc)1347 bool isShader(CallingConv::ID cc) {
1348 switch(cc) {
1349 case CallingConv::AMDGPU_VS:
1350 case CallingConv::AMDGPU_LS:
1351 case CallingConv::AMDGPU_HS:
1352 case CallingConv::AMDGPU_ES:
1353 case CallingConv::AMDGPU_GS:
1354 case CallingConv::AMDGPU_PS:
1355 case CallingConv::AMDGPU_CS:
1356 return true;
1357 default:
1358 return false;
1359 }
1360 }
1361
isGraphics(CallingConv::ID cc)1362 bool isGraphics(CallingConv::ID cc) {
1363 return isShader(cc) || cc == CallingConv::AMDGPU_Gfx;
1364 }
1365
isCompute(CallingConv::ID cc)1366 bool isCompute(CallingConv::ID cc) {
1367 return !isGraphics(cc) || cc == CallingConv::AMDGPU_CS;
1368 }
1369
isEntryFunctionCC(CallingConv::ID CC)1370 bool isEntryFunctionCC(CallingConv::ID CC) {
1371 switch (CC) {
1372 case CallingConv::AMDGPU_KERNEL:
1373 case CallingConv::SPIR_KERNEL:
1374 case CallingConv::AMDGPU_VS:
1375 case CallingConv::AMDGPU_GS:
1376 case CallingConv::AMDGPU_PS:
1377 case CallingConv::AMDGPU_CS:
1378 case CallingConv::AMDGPU_ES:
1379 case CallingConv::AMDGPU_HS:
1380 case CallingConv::AMDGPU_LS:
1381 return true;
1382 default:
1383 return false;
1384 }
1385 }
1386
isModuleEntryFunctionCC(CallingConv::ID CC)1387 bool isModuleEntryFunctionCC(CallingConv::ID CC) {
1388 switch (CC) {
1389 case CallingConv::AMDGPU_Gfx:
1390 return true;
1391 default:
1392 return isEntryFunctionCC(CC);
1393 }
1394 }
1395
hasXNACK(const MCSubtargetInfo & STI)1396 bool hasXNACK(const MCSubtargetInfo &STI) {
1397 return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
1398 }
1399
hasSRAMECC(const MCSubtargetInfo & STI)1400 bool hasSRAMECC(const MCSubtargetInfo &STI) {
1401 return STI.getFeatureBits()[AMDGPU::FeatureSRAMECC];
1402 }
1403
hasMIMG_R128(const MCSubtargetInfo & STI)1404 bool hasMIMG_R128(const MCSubtargetInfo &STI) {
1405 return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128] && !STI.getFeatureBits()[AMDGPU::FeatureR128A16];
1406 }
1407
hasGFX10A16(const MCSubtargetInfo & STI)1408 bool hasGFX10A16(const MCSubtargetInfo &STI) {
1409 return STI.getFeatureBits()[AMDGPU::FeatureGFX10A16];
1410 }
1411
hasG16(const MCSubtargetInfo & STI)1412 bool hasG16(const MCSubtargetInfo &STI) {
1413 return STI.getFeatureBits()[AMDGPU::FeatureG16];
1414 }
1415
hasPackedD16(const MCSubtargetInfo & STI)1416 bool hasPackedD16(const MCSubtargetInfo &STI) {
1417 return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem];
1418 }
1419
isSI(const MCSubtargetInfo & STI)1420 bool isSI(const MCSubtargetInfo &STI) {
1421 return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
1422 }
1423
isCI(const MCSubtargetInfo & STI)1424 bool isCI(const MCSubtargetInfo &STI) {
1425 return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
1426 }
1427
isVI(const MCSubtargetInfo & STI)1428 bool isVI(const MCSubtargetInfo &STI) {
1429 return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
1430 }
1431
isGFX9(const MCSubtargetInfo & STI)1432 bool isGFX9(const MCSubtargetInfo &STI) {
1433 return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
1434 }
1435
isGFX9Plus(const MCSubtargetInfo & STI)1436 bool isGFX9Plus(const MCSubtargetInfo &STI) {
1437 return isGFX9(STI) || isGFX10Plus(STI);
1438 }
1439
isGFX10(const MCSubtargetInfo & STI)1440 bool isGFX10(const MCSubtargetInfo &STI) {
1441 return STI.getFeatureBits()[AMDGPU::FeatureGFX10];
1442 }
1443
isGFX10Plus(const MCSubtargetInfo & STI)1444 bool isGFX10Plus(const MCSubtargetInfo &STI) { return isGFX10(STI); }
1445
isGCN3Encoding(const MCSubtargetInfo & STI)1446 bool isGCN3Encoding(const MCSubtargetInfo &STI) {
1447 return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
1448 }
1449
isGFX10_BEncoding(const MCSubtargetInfo & STI)1450 bool isGFX10_BEncoding(const MCSubtargetInfo &STI) {
1451 return STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding];
1452 }
1453
hasGFX10_3Insts(const MCSubtargetInfo & STI)1454 bool hasGFX10_3Insts(const MCSubtargetInfo &STI) {
1455 return STI.getFeatureBits()[AMDGPU::FeatureGFX10_3Insts];
1456 }
1457
isGFX90A(const MCSubtargetInfo & STI)1458 bool isGFX90A(const MCSubtargetInfo &STI) {
1459 return STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts];
1460 }
1461
hasArchitectedFlatScratch(const MCSubtargetInfo & STI)1462 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) {
1463 return STI.getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1464 }
1465
isSGPR(unsigned Reg,const MCRegisterInfo * TRI)1466 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
1467 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
1468 const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);
1469 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
1470 Reg == AMDGPU::SCC;
1471 }
1472
isRegIntersect(unsigned Reg0,unsigned Reg1,const MCRegisterInfo * TRI)1473 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
1474 for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) {
1475 if (*R == Reg1) return true;
1476 }
1477 return false;
1478 }
1479
1480 #define MAP_REG2REG \
1481 using namespace AMDGPU; \
1482 switch(Reg) { \
1483 default: return Reg; \
1484 CASE_CI_VI(FLAT_SCR) \
1485 CASE_CI_VI(FLAT_SCR_LO) \
1486 CASE_CI_VI(FLAT_SCR_HI) \
1487 CASE_VI_GFX9PLUS(TTMP0) \
1488 CASE_VI_GFX9PLUS(TTMP1) \
1489 CASE_VI_GFX9PLUS(TTMP2) \
1490 CASE_VI_GFX9PLUS(TTMP3) \
1491 CASE_VI_GFX9PLUS(TTMP4) \
1492 CASE_VI_GFX9PLUS(TTMP5) \
1493 CASE_VI_GFX9PLUS(TTMP6) \
1494 CASE_VI_GFX9PLUS(TTMP7) \
1495 CASE_VI_GFX9PLUS(TTMP8) \
1496 CASE_VI_GFX9PLUS(TTMP9) \
1497 CASE_VI_GFX9PLUS(TTMP10) \
1498 CASE_VI_GFX9PLUS(TTMP11) \
1499 CASE_VI_GFX9PLUS(TTMP12) \
1500 CASE_VI_GFX9PLUS(TTMP13) \
1501 CASE_VI_GFX9PLUS(TTMP14) \
1502 CASE_VI_GFX9PLUS(TTMP15) \
1503 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
1504 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
1505 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
1506 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
1507 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
1508 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
1509 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
1510 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
1511 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
1512 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
1513 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
1514 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
1515 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
1516 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
1517 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
1518 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
1519 }
1520
1521 #define CASE_CI_VI(node) \
1522 assert(!isSI(STI)); \
1523 case node: return isCI(STI) ? node##_ci : node##_vi;
1524
1525 #define CASE_VI_GFX9PLUS(node) \
1526 case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
1527
getMCReg(unsigned Reg,const MCSubtargetInfo & STI)1528 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
1529 if (STI.getTargetTriple().getArch() == Triple::r600)
1530 return Reg;
1531 MAP_REG2REG
1532 }
1533
1534 #undef CASE_CI_VI
1535 #undef CASE_VI_GFX9PLUS
1536
1537 #define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
1538 #define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node;
1539
mc2PseudoReg(unsigned Reg)1540 unsigned mc2PseudoReg(unsigned Reg) {
1541 MAP_REG2REG
1542 }
1543
1544 #undef CASE_CI_VI
1545 #undef CASE_VI_GFX9PLUS
1546 #undef MAP_REG2REG
1547
isSISrcOperand(const MCInstrDesc & Desc,unsigned OpNo)1548 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1549 assert(OpNo < Desc.NumOperands);
1550 unsigned OpType = Desc.OpInfo[OpNo].OperandType;
1551 return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
1552 OpType <= AMDGPU::OPERAND_SRC_LAST;
1553 }
1554
isSISrcFPOperand(const MCInstrDesc & Desc,unsigned OpNo)1555 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1556 assert(OpNo < Desc.NumOperands);
1557 unsigned OpType = Desc.OpInfo[OpNo].OperandType;
1558 switch (OpType) {
1559 case AMDGPU::OPERAND_REG_IMM_FP32:
1560 case AMDGPU::OPERAND_REG_IMM_FP64:
1561 case AMDGPU::OPERAND_REG_IMM_FP16:
1562 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1563 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1564 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1565 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1566 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1567 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1568 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1569 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1570 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1571 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1572 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1573 case AMDGPU::OPERAND_REG_IMM_V2FP32:
1574 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1575 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1576 return true;
1577 default:
1578 return false;
1579 }
1580 }
1581
isSISrcInlinableOperand(const MCInstrDesc & Desc,unsigned OpNo)1582 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1583 assert(OpNo < Desc.NumOperands);
1584 unsigned OpType = Desc.OpInfo[OpNo].OperandType;
1585 return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
1586 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
1587 }
1588
1589 // Avoid using MCRegisterClass::getSize, since that function will go away
1590 // (move from MC* level to Target* level). Return size in bits.
getRegBitWidth(unsigned RCID)1591 unsigned getRegBitWidth(unsigned RCID) {
1592 switch (RCID) {
1593 case AMDGPU::VGPR_LO16RegClassID:
1594 case AMDGPU::VGPR_HI16RegClassID:
1595 case AMDGPU::SGPR_LO16RegClassID:
1596 case AMDGPU::AGPR_LO16RegClassID:
1597 return 16;
1598 case AMDGPU::SGPR_32RegClassID:
1599 case AMDGPU::VGPR_32RegClassID:
1600 case AMDGPU::VRegOrLds_32RegClassID:
1601 case AMDGPU::AGPR_32RegClassID:
1602 case AMDGPU::VS_32RegClassID:
1603 case AMDGPU::AV_32RegClassID:
1604 case AMDGPU::SReg_32RegClassID:
1605 case AMDGPU::SReg_32_XM0RegClassID:
1606 case AMDGPU::SRegOrLds_32RegClassID:
1607 return 32;
1608 case AMDGPU::SGPR_64RegClassID:
1609 case AMDGPU::VS_64RegClassID:
1610 case AMDGPU::AV_64RegClassID:
1611 case AMDGPU::SReg_64RegClassID:
1612 case AMDGPU::VReg_64RegClassID:
1613 case AMDGPU::AReg_64RegClassID:
1614 case AMDGPU::SReg_64_XEXECRegClassID:
1615 case AMDGPU::VReg_64_Align2RegClassID:
1616 case AMDGPU::AReg_64_Align2RegClassID:
1617 return 64;
1618 case AMDGPU::SGPR_96RegClassID:
1619 case AMDGPU::SReg_96RegClassID:
1620 case AMDGPU::VReg_96RegClassID:
1621 case AMDGPU::AReg_96RegClassID:
1622 case AMDGPU::VReg_96_Align2RegClassID:
1623 case AMDGPU::AReg_96_Align2RegClassID:
1624 case AMDGPU::AV_96RegClassID:
1625 return 96;
1626 case AMDGPU::SGPR_128RegClassID:
1627 case AMDGPU::SReg_128RegClassID:
1628 case AMDGPU::VReg_128RegClassID:
1629 case AMDGPU::AReg_128RegClassID:
1630 case AMDGPU::VReg_128_Align2RegClassID:
1631 case AMDGPU::AReg_128_Align2RegClassID:
1632 case AMDGPU::AV_128RegClassID:
1633 return 128;
1634 case AMDGPU::SGPR_160RegClassID:
1635 case AMDGPU::SReg_160RegClassID:
1636 case AMDGPU::VReg_160RegClassID:
1637 case AMDGPU::AReg_160RegClassID:
1638 case AMDGPU::VReg_160_Align2RegClassID:
1639 case AMDGPU::AReg_160_Align2RegClassID:
1640 case AMDGPU::AV_160RegClassID:
1641 return 160;
1642 case AMDGPU::SGPR_192RegClassID:
1643 case AMDGPU::SReg_192RegClassID:
1644 case AMDGPU::VReg_192RegClassID:
1645 case AMDGPU::AReg_192RegClassID:
1646 case AMDGPU::VReg_192_Align2RegClassID:
1647 case AMDGPU::AReg_192_Align2RegClassID:
1648 return 192;
1649 case AMDGPU::SGPR_256RegClassID:
1650 case AMDGPU::SReg_256RegClassID:
1651 case AMDGPU::VReg_256RegClassID:
1652 case AMDGPU::AReg_256RegClassID:
1653 case AMDGPU::VReg_256_Align2RegClassID:
1654 case AMDGPU::AReg_256_Align2RegClassID:
1655 return 256;
1656 case AMDGPU::SGPR_512RegClassID:
1657 case AMDGPU::SReg_512RegClassID:
1658 case AMDGPU::VReg_512RegClassID:
1659 case AMDGPU::AReg_512RegClassID:
1660 case AMDGPU::VReg_512_Align2RegClassID:
1661 case AMDGPU::AReg_512_Align2RegClassID:
1662 return 512;
1663 case AMDGPU::SGPR_1024RegClassID:
1664 case AMDGPU::SReg_1024RegClassID:
1665 case AMDGPU::VReg_1024RegClassID:
1666 case AMDGPU::AReg_1024RegClassID:
1667 case AMDGPU::VReg_1024_Align2RegClassID:
1668 case AMDGPU::AReg_1024_Align2RegClassID:
1669 return 1024;
1670 default:
1671 llvm_unreachable("Unexpected register class");
1672 }
1673 }
1674
getRegBitWidth(const MCRegisterClass & RC)1675 unsigned getRegBitWidth(const MCRegisterClass &RC) {
1676 return getRegBitWidth(RC.getID());
1677 }
1678
getRegOperandSize(const MCRegisterInfo * MRI,const MCInstrDesc & Desc,unsigned OpNo)1679 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
1680 unsigned OpNo) {
1681 assert(OpNo < Desc.NumOperands);
1682 unsigned RCID = Desc.OpInfo[OpNo].RegClass;
1683 return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
1684 }
1685
isInlinableLiteral64(int64_t Literal,bool HasInv2Pi)1686 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
1687 if (isInlinableIntLiteral(Literal))
1688 return true;
1689
1690 uint64_t Val = static_cast<uint64_t>(Literal);
1691 return (Val == DoubleToBits(0.0)) ||
1692 (Val == DoubleToBits(1.0)) ||
1693 (Val == DoubleToBits(-1.0)) ||
1694 (Val == DoubleToBits(0.5)) ||
1695 (Val == DoubleToBits(-0.5)) ||
1696 (Val == DoubleToBits(2.0)) ||
1697 (Val == DoubleToBits(-2.0)) ||
1698 (Val == DoubleToBits(4.0)) ||
1699 (Val == DoubleToBits(-4.0)) ||
1700 (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
1701 }
1702
isInlinableLiteral32(int32_t Literal,bool HasInv2Pi)1703 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
1704 if (isInlinableIntLiteral(Literal))
1705 return true;
1706
1707 // The actual type of the operand does not seem to matter as long
1708 // as the bits match one of the inline immediate values. For example:
1709 //
1710 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
1711 // so it is a legal inline immediate.
1712 //
1713 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
1714 // floating-point, so it is a legal inline immediate.
1715
1716 uint32_t Val = static_cast<uint32_t>(Literal);
1717 return (Val == FloatToBits(0.0f)) ||
1718 (Val == FloatToBits(1.0f)) ||
1719 (Val == FloatToBits(-1.0f)) ||
1720 (Val == FloatToBits(0.5f)) ||
1721 (Val == FloatToBits(-0.5f)) ||
1722 (Val == FloatToBits(2.0f)) ||
1723 (Val == FloatToBits(-2.0f)) ||
1724 (Val == FloatToBits(4.0f)) ||
1725 (Val == FloatToBits(-4.0f)) ||
1726 (Val == 0x3e22f983 && HasInv2Pi);
1727 }
1728
isInlinableLiteral16(int16_t Literal,bool HasInv2Pi)1729 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
1730 if (!HasInv2Pi)
1731 return false;
1732
1733 if (isInlinableIntLiteral(Literal))
1734 return true;
1735
1736 uint16_t Val = static_cast<uint16_t>(Literal);
1737 return Val == 0x3C00 || // 1.0
1738 Val == 0xBC00 || // -1.0
1739 Val == 0x3800 || // 0.5
1740 Val == 0xB800 || // -0.5
1741 Val == 0x4000 || // 2.0
1742 Val == 0xC000 || // -2.0
1743 Val == 0x4400 || // 4.0
1744 Val == 0xC400 || // -4.0
1745 Val == 0x3118; // 1/2pi
1746 }
1747
isInlinableLiteralV216(int32_t Literal,bool HasInv2Pi)1748 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
1749 assert(HasInv2Pi);
1750
1751 if (isInt<16>(Literal) || isUInt<16>(Literal)) {
1752 int16_t Trunc = static_cast<int16_t>(Literal);
1753 return AMDGPU::isInlinableLiteral16(Trunc, HasInv2Pi);
1754 }
1755 if (!(Literal & 0xffff))
1756 return AMDGPU::isInlinableLiteral16(Literal >> 16, HasInv2Pi);
1757
1758 int16_t Lo16 = static_cast<int16_t>(Literal);
1759 int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
1760 return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
1761 }
1762
isInlinableIntLiteralV216(int32_t Literal)1763 bool isInlinableIntLiteralV216(int32_t Literal) {
1764 int16_t Lo16 = static_cast<int16_t>(Literal);
1765 if (isInt<16>(Literal) || isUInt<16>(Literal))
1766 return isInlinableIntLiteral(Lo16);
1767
1768 int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
1769 if (!(Literal & 0xffff))
1770 return isInlinableIntLiteral(Hi16);
1771 return Lo16 == Hi16 && isInlinableIntLiteral(Lo16);
1772 }
1773
isFoldableLiteralV216(int32_t Literal,bool HasInv2Pi)1774 bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi) {
1775 assert(HasInv2Pi);
1776
1777 int16_t Lo16 = static_cast<int16_t>(Literal);
1778 if (isInt<16>(Literal) || isUInt<16>(Literal))
1779 return true;
1780
1781 int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
1782 if (!(Literal & 0xffff))
1783 return true;
1784 return Lo16 == Hi16;
1785 }
1786
isArgPassedInSGPR(const Argument * A)1787 bool isArgPassedInSGPR(const Argument *A) {
1788 const Function *F = A->getParent();
1789
1790 // Arguments to compute shaders are never a source of divergence.
1791 CallingConv::ID CC = F->getCallingConv();
1792 switch (CC) {
1793 case CallingConv::AMDGPU_KERNEL:
1794 case CallingConv::SPIR_KERNEL:
1795 return true;
1796 case CallingConv::AMDGPU_VS:
1797 case CallingConv::AMDGPU_LS:
1798 case CallingConv::AMDGPU_HS:
1799 case CallingConv::AMDGPU_ES:
1800 case CallingConv::AMDGPU_GS:
1801 case CallingConv::AMDGPU_PS:
1802 case CallingConv::AMDGPU_CS:
1803 case CallingConv::AMDGPU_Gfx:
1804 // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
1805 // Everything else is in VGPRs.
1806 return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
1807 F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
1808 default:
1809 // TODO: Should calls support inreg for SGPR inputs?
1810 return false;
1811 }
1812 }
1813
hasSMEMByteOffset(const MCSubtargetInfo & ST)1814 static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
1815 return isGCN3Encoding(ST) || isGFX10Plus(ST);
1816 }
1817
hasSMRDSignedImmOffset(const MCSubtargetInfo & ST)1818 static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) {
1819 return isGFX9Plus(ST);
1820 }
1821
isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo & ST,int64_t EncodedOffset)1822 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
1823 int64_t EncodedOffset) {
1824 return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
1825 : isUInt<8>(EncodedOffset);
1826 }
1827
isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo & ST,int64_t EncodedOffset,bool IsBuffer)1828 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
1829 int64_t EncodedOffset,
1830 bool IsBuffer) {
1831 return !IsBuffer &&
1832 hasSMRDSignedImmOffset(ST) &&
1833 isInt<21>(EncodedOffset);
1834 }
1835
isDwordAligned(uint64_t ByteOffset)1836 static bool isDwordAligned(uint64_t ByteOffset) {
1837 return (ByteOffset & 3) == 0;
1838 }
1839
convertSMRDOffsetUnits(const MCSubtargetInfo & ST,uint64_t ByteOffset)1840 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST,
1841 uint64_t ByteOffset) {
1842 if (hasSMEMByteOffset(ST))
1843 return ByteOffset;
1844
1845 assert(isDwordAligned(ByteOffset));
1846 return ByteOffset >> 2;
1847 }
1848
getSMRDEncodedOffset(const MCSubtargetInfo & ST,int64_t ByteOffset,bool IsBuffer)1849 Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
1850 int64_t ByteOffset, bool IsBuffer) {
1851 // The signed version is always a byte offset.
1852 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
1853 assert(hasSMEMByteOffset(ST));
1854 return isInt<20>(ByteOffset) ? Optional<int64_t>(ByteOffset) : None;
1855 }
1856
1857 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
1858 return None;
1859
1860 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
1861 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
1862 ? Optional<int64_t>(EncodedOffset)
1863 : None;
1864 }
1865
getSMRDEncodedLiteralOffset32(const MCSubtargetInfo & ST,int64_t ByteOffset)1866 Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
1867 int64_t ByteOffset) {
1868 if (!isCI(ST) || !isDwordAligned(ByteOffset))
1869 return None;
1870
1871 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
1872 return isUInt<32>(EncodedOffset) ? Optional<int64_t>(EncodedOffset) : None;
1873 }
1874
getNumFlatOffsetBits(const MCSubtargetInfo & ST,bool Signed)1875 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed) {
1876 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
1877 if (AMDGPU::isGFX10(ST))
1878 return Signed ? 12 : 11;
1879
1880 return Signed ? 13 : 12;
1881 }
1882
1883 // Given Imm, split it into the values to put into the SOffset and ImmOffset
1884 // fields in an MUBUF instruction. Return false if it is not possible (due to a
1885 // hardware bug needing a workaround).
1886 //
1887 // The required alignment ensures that individual address components remain
1888 // aligned if they are aligned to begin with. It also ensures that additional
1889 // offsets within the given alignment can be added to the resulting ImmOffset.
splitMUBUFOffset(uint32_t Imm,uint32_t & SOffset,uint32_t & ImmOffset,const GCNSubtarget * Subtarget,Align Alignment)1890 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
1891 const GCNSubtarget *Subtarget, Align Alignment) {
1892 const uint32_t MaxImm = alignDown(4095, Alignment.value());
1893 uint32_t Overflow = 0;
1894
1895 if (Imm > MaxImm) {
1896 if (Imm <= MaxImm + 64) {
1897 // Use an SOffset inline constant for 4..64
1898 Overflow = Imm - MaxImm;
1899 Imm = MaxImm;
1900 } else {
1901 // Try to keep the same value in SOffset for adjacent loads, so that
1902 // the corresponding register contents can be re-used.
1903 //
1904 // Load values with all low-bits (except for alignment bits) set into
1905 // SOffset, so that a larger range of values can be covered using
1906 // s_movk_i32.
1907 //
1908 // Atomic operations fail to work correctly when individual address
1909 // components are unaligned, even if their sum is aligned.
1910 uint32_t High = (Imm + Alignment.value()) & ~4095;
1911 uint32_t Low = (Imm + Alignment.value()) & 4095;
1912 Imm = Low;
1913 Overflow = High - Alignment.value();
1914 }
1915 }
1916
1917 // There is a hardware bug in SI and CI which prevents address clamping in
1918 // MUBUF instructions from working correctly with SOffsets. The immediate
1919 // offset is unaffected.
1920 if (Overflow > 0 &&
1921 Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
1922 return false;
1923
1924 ImmOffset = Imm;
1925 SOffset = Overflow;
1926 return true;
1927 }
1928
SIModeRegisterDefaults(const Function & F)1929 SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) {
1930 *this = getDefaultForCallingConv(F.getCallingConv());
1931
1932 StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString();
1933 if (!IEEEAttr.empty())
1934 IEEE = IEEEAttr == "true";
1935
1936 StringRef DX10ClampAttr
1937 = F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString();
1938 if (!DX10ClampAttr.empty())
1939 DX10Clamp = DX10ClampAttr == "true";
1940
1941 StringRef DenormF32Attr = F.getFnAttribute("denormal-fp-math-f32").getValueAsString();
1942 if (!DenormF32Attr.empty()) {
1943 DenormalMode DenormMode = parseDenormalFPAttribute(DenormF32Attr);
1944 FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE;
1945 FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
1946 }
1947
1948 StringRef DenormAttr = F.getFnAttribute("denormal-fp-math").getValueAsString();
1949 if (!DenormAttr.empty()) {
1950 DenormalMode DenormMode = parseDenormalFPAttribute(DenormAttr);
1951
1952 if (DenormF32Attr.empty()) {
1953 FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE;
1954 FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
1955 }
1956
1957 FP64FP16InputDenormals = DenormMode.Input == DenormalMode::IEEE;
1958 FP64FP16OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
1959 }
1960 }
1961
1962 namespace {
1963
1964 struct SourceOfDivergence {
1965 unsigned Intr;
1966 };
1967 const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
1968
1969 #define GET_SourcesOfDivergence_IMPL
1970 #define GET_Gfx9BufferFormat_IMPL
1971 #define GET_Gfx10PlusBufferFormat_IMPL
1972 #include "AMDGPUGenSearchableTables.inc"
1973
1974 } // end anonymous namespace
1975
isIntrinsicSourceOfDivergence(unsigned IntrID)1976 bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
1977 return lookupSourceOfDivergence(IntrID);
1978 }
1979
getGcnBufferFormatInfo(uint8_t BitsPerComp,uint8_t NumComponents,uint8_t NumFormat,const MCSubtargetInfo & STI)1980 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
1981 uint8_t NumComponents,
1982 uint8_t NumFormat,
1983 const MCSubtargetInfo &STI) {
1984 return isGFX10Plus(STI)
1985 ? getGfx10PlusBufferFormatInfo(BitsPerComp, NumComponents,
1986 NumFormat)
1987 : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat);
1988 }
1989
getGcnBufferFormatInfo(uint8_t Format,const MCSubtargetInfo & STI)1990 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
1991 const MCSubtargetInfo &STI) {
1992 return isGFX10Plus(STI) ? getGfx10PlusBufferFormatInfo(Format)
1993 : getGfx9BufferFormatInfo(Format);
1994 }
1995
1996 } // namespace AMDGPU
1997
operator <<(raw_ostream & OS,const AMDGPU::IsaInfo::TargetIDSetting S)1998 raw_ostream &operator<<(raw_ostream &OS,
1999 const AMDGPU::IsaInfo::TargetIDSetting S) {
2000 switch (S) {
2001 case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported):
2002 OS << "Unsupported";
2003 break;
2004 case (AMDGPU::IsaInfo::TargetIDSetting::Any):
2005 OS << "Any";
2006 break;
2007 case (AMDGPU::IsaInfo::TargetIDSetting::Off):
2008 OS << "Off";
2009 break;
2010 case (AMDGPU::IsaInfo::TargetIDSetting::On):
2011 OS << "On";
2012 break;
2013 }
2014 return OS;
2015 }
2016
2017 } // namespace llvm
2018