1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPUBaseInfo.h"
10 #include "AMDGPU.h"
11 #include "AMDGPUAsmUtils.h"
12 #include "AMDKernelCodeT.h"
13 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
14 #include "llvm/BinaryFormat/ELF.h"
15 #include "llvm/IR/Attributes.h"
16 #include "llvm/IR/Constants.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/GlobalValue.h"
19 #include "llvm/IR/IntrinsicsAMDGPU.h"
20 #include "llvm/IR/IntrinsicsR600.h"
21 #include "llvm/IR/LLVMContext.h"
22 #include "llvm/MC/MCInstrInfo.h"
23 #include "llvm/MC/MCRegisterInfo.h"
24 #include "llvm/MC/MCSubtargetInfo.h"
25 #include "llvm/Support/AMDHSAKernelDescriptor.h"
26 #include "llvm/Support/CommandLine.h"
27 #include "llvm/TargetParser/TargetParser.h"
28 #include <optional>
29 
30 #define GET_INSTRINFO_NAMED_OPS
31 #define GET_INSTRMAP_INFO
32 #include "AMDGPUGenInstrInfo.inc"
33 
34 static llvm::cl::opt<unsigned>
35     AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden,
36                             llvm::cl::desc("AMDHSA Code Object Version"),
37                             llvm::cl::init(4));
38 
39 namespace {
40 
41 /// \returns Bit mask for given bit \p Shift and bit \p Width.
42 unsigned getBitMask(unsigned Shift, unsigned Width) {
43   return ((1 << Width) - 1) << Shift;
44 }
45 
46 /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
47 ///
48 /// \returns Packed \p Dst.
49 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
50   unsigned Mask = getBitMask(Shift, Width);
51   return ((Src << Shift) & Mask) | (Dst & ~Mask);
52 }
53 
54 /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
55 ///
56 /// \returns Unpacked bits.
57 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
58   return (Src & getBitMask(Shift, Width)) >> Shift;
59 }
60 
61 /// \returns Vmcnt bit shift (lower bits).
62 unsigned getVmcntBitShiftLo(unsigned VersionMajor) {
63   return VersionMajor >= 11 ? 10 : 0;
64 }
65 
66 /// \returns Vmcnt bit width (lower bits).
67 unsigned getVmcntBitWidthLo(unsigned VersionMajor) {
68   return VersionMajor >= 11 ? 6 : 4;
69 }
70 
71 /// \returns Expcnt bit shift.
72 unsigned getExpcntBitShift(unsigned VersionMajor) {
73   return VersionMajor >= 11 ? 0 : 4;
74 }
75 
76 /// \returns Expcnt bit width.
77 unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }
78 
79 /// \returns Lgkmcnt bit shift.
80 unsigned getLgkmcntBitShift(unsigned VersionMajor) {
81   return VersionMajor >= 11 ? 4 : 8;
82 }
83 
84 /// \returns Lgkmcnt bit width.
85 unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
86   return VersionMajor >= 10 ? 6 : 4;
87 }
88 
89 /// \returns Vmcnt bit shift (higher bits).
90 unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }
91 
92 /// \returns Vmcnt bit width (higher bits).
93 unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
94   return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
95 }
96 
97 /// \returns VmVsrc bit width
98 inline unsigned getVmVsrcBitWidth() { return 3; }
99 
100 /// \returns VmVsrc bit shift
101 inline unsigned getVmVsrcBitShift() { return 2; }
102 
103 /// \returns VaVdst bit width
104 inline unsigned getVaVdstBitWidth() { return 4; }
105 
106 /// \returns VaVdst bit shift
107 inline unsigned getVaVdstBitShift() { return 12; }
108 
109 /// \returns SaSdst bit width
110 inline unsigned getSaSdstBitWidth() { return 1; }
111 
112 /// \returns SaSdst bit shift
113 inline unsigned getSaSdstBitShift() { return 0; }
114 
115 } // end namespace anonymous
116 
117 namespace llvm {
118 
119 namespace AMDGPU {
120 
121 /// \returns True if \p STI is AMDHSA.
122 bool isHsaAbi(const MCSubtargetInfo &STI) {
123   return STI.getTargetTriple().getOS() == Triple::AMDHSA;
124 }
125 
126 std::optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI) {
127   if (STI && STI->getTargetTriple().getOS() != Triple::AMDHSA)
128     return std::nullopt;
129 
130   switch (AmdhsaCodeObjectVersion) {
131   case 4:
132     return ELF::ELFABIVERSION_AMDGPU_HSA_V4;
133   case 5:
134     return ELF::ELFABIVERSION_AMDGPU_HSA_V5;
135   default:
136     report_fatal_error(Twine("Unsupported AMDHSA Code Object Version ") +
137                        Twine(AmdhsaCodeObjectVersion));
138   }
139 }
140 
141 bool isHsaAbiVersion4(const MCSubtargetInfo *STI) {
142   if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
143     return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V4;
144   return false;
145 }
146 
147 bool isHsaAbiVersion5(const MCSubtargetInfo *STI) {
148   if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
149     return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V5;
150   return false;
151 }
152 
153 unsigned getAmdhsaCodeObjectVersion() {
154   return AmdhsaCodeObjectVersion;
155 }
156 
157 unsigned getCodeObjectVersion(const Module &M) {
158   if (auto Ver = mdconst::extract_or_null<ConstantInt>(
159       M.getModuleFlag("amdgpu_code_object_version"))) {
160     return (unsigned)Ver->getZExtValue() / 100;
161   }
162 
163   // Default code object version.
164   return AMDHSA_COV4;
165 }
166 
167 unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
168   switch (CodeObjectVersion) {
169   case AMDHSA_COV4:
170     return 48;
171   case AMDHSA_COV5:
172   default:
173     return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET;
174   }
175 }
176 
177 
178 // FIXME: All such magic numbers about the ABI should be in a
179 // central TD file.
180 unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
181   switch (CodeObjectVersion) {
182   case AMDHSA_COV4:
183     return 24;
184   case AMDHSA_COV5:
185   default:
186     return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET;
187   }
188 }
189 
190 unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
191   switch (CodeObjectVersion) {
192   case AMDHSA_COV4:
193     return 32;
194   case AMDHSA_COV5:
195   default:
196     return AMDGPU::ImplicitArg::DEFAULT_QUEUE_OFFSET;
197   }
198 }
199 
200 unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
201   switch (CodeObjectVersion) {
202   case AMDHSA_COV4:
203     return 40;
204   case AMDHSA_COV5:
205   default:
206     return AMDGPU::ImplicitArg::COMPLETION_ACTION_OFFSET;
207   }
208 }
209 
210 #define GET_MIMGBaseOpcodesTable_IMPL
211 #define GET_MIMGDimInfoTable_IMPL
212 #define GET_MIMGInfoTable_IMPL
213 #define GET_MIMGLZMappingTable_IMPL
214 #define GET_MIMGMIPMappingTable_IMPL
215 #define GET_MIMGBiasMappingTable_IMPL
216 #define GET_MIMGOffsetMappingTable_IMPL
217 #define GET_MIMGG16MappingTable_IMPL
218 #define GET_MAIInstInfoTable_IMPL
219 #include "AMDGPUGenSearchableTables.inc"
220 
221 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
222                   unsigned VDataDwords, unsigned VAddrDwords) {
223   const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
224                                              VDataDwords, VAddrDwords);
225   return Info ? Info->Opcode : -1;
226 }
227 
228 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) {
229   const MIMGInfo *Info = getMIMGInfo(Opc);
230   return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
231 }
232 
233 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
234   const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
235   const MIMGInfo *NewInfo =
236       getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
237                           NewChannels, OrigInfo->VAddrDwords);
238   return NewInfo ? NewInfo->Opcode : -1;
239 }
240 
241 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
242                            const MIMGDimInfo *Dim, bool IsA16,
243                            bool IsG16Supported) {
244   unsigned AddrWords = BaseOpcode->NumExtraArgs;
245   unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
246                             (BaseOpcode->LodOrClampOrMip ? 1 : 0);
247   if (IsA16)
248     AddrWords += divideCeil(AddrComponents, 2);
249   else
250     AddrWords += AddrComponents;
251 
252   // Note: For subtargets that support A16 but not G16, enabling A16 also
253   // enables 16 bit gradients.
254   // For subtargets that support A16 (operand) and G16 (done with a different
255   // instruction encoding), they are independent.
256 
257   if (BaseOpcode->Gradients) {
258     if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
259       // There are two gradients per coordinate, we pack them separately.
260       // For the 3d case,
261       // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
262       AddrWords += alignTo<2>(Dim->NumGradients / 2);
263     else
264       AddrWords += Dim->NumGradients;
265   }
266   return AddrWords;
267 }
268 
269 struct MUBUFInfo {
270   uint16_t Opcode;
271   uint16_t BaseOpcode;
272   uint8_t elements;
273   bool has_vaddr;
274   bool has_srsrc;
275   bool has_soffset;
276   bool IsBufferInv;
277 };
278 
279 struct MTBUFInfo {
280   uint16_t Opcode;
281   uint16_t BaseOpcode;
282   uint8_t elements;
283   bool has_vaddr;
284   bool has_srsrc;
285   bool has_soffset;
286 };
287 
288 struct SMInfo {
289   uint16_t Opcode;
290   bool IsBuffer;
291 };
292 
293 struct VOPInfo {
294   uint16_t Opcode;
295   bool IsSingle;
296 };
297 
298 struct VOPC64DPPInfo {
299   uint16_t Opcode;
300 };
301 
302 struct VOPDComponentInfo {
303   uint16_t BaseVOP;
304   uint16_t VOPDOp;
305   bool CanBeVOPDX;
306 };
307 
308 struct VOPDInfo {
309   uint16_t Opcode;
310   uint16_t OpX;
311   uint16_t OpY;
312   uint16_t Subtarget;
313 };
314 
315 struct VOPTrue16Info {
316   uint16_t Opcode;
317   bool IsTrue16;
318 };
319 
320 #define GET_MTBUFInfoTable_DECL
321 #define GET_MTBUFInfoTable_IMPL
322 #define GET_MUBUFInfoTable_DECL
323 #define GET_MUBUFInfoTable_IMPL
324 #define GET_SMInfoTable_DECL
325 #define GET_SMInfoTable_IMPL
326 #define GET_VOP1InfoTable_DECL
327 #define GET_VOP1InfoTable_IMPL
328 #define GET_VOP2InfoTable_DECL
329 #define GET_VOP2InfoTable_IMPL
330 #define GET_VOP3InfoTable_DECL
331 #define GET_VOP3InfoTable_IMPL
332 #define GET_VOPC64DPPTable_DECL
333 #define GET_VOPC64DPPTable_IMPL
334 #define GET_VOPC64DPP8Table_DECL
335 #define GET_VOPC64DPP8Table_IMPL
336 #define GET_VOPDComponentTable_DECL
337 #define GET_VOPDComponentTable_IMPL
338 #define GET_VOPDPairs_DECL
339 #define GET_VOPDPairs_IMPL
340 #define GET_VOPTrue16Table_DECL
341 #define GET_VOPTrue16Table_IMPL
342 #define GET_WMMAOpcode2AddrMappingTable_DECL
343 #define GET_WMMAOpcode2AddrMappingTable_IMPL
344 #define GET_WMMAOpcode3AddrMappingTable_DECL
345 #define GET_WMMAOpcode3AddrMappingTable_IMPL
346 #include "AMDGPUGenSearchableTables.inc"
347 
348 int getMTBUFBaseOpcode(unsigned Opc) {
349   const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
350   return Info ? Info->BaseOpcode : -1;
351 }
352 
353 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
354   const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
355   return Info ? Info->Opcode : -1;
356 }
357 
358 int getMTBUFElements(unsigned Opc) {
359   const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
360   return Info ? Info->elements : 0;
361 }
362 
363 bool getMTBUFHasVAddr(unsigned Opc) {
364   const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
365   return Info ? Info->has_vaddr : false;
366 }
367 
368 bool getMTBUFHasSrsrc(unsigned Opc) {
369   const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
370   return Info ? Info->has_srsrc : false;
371 }
372 
373 bool getMTBUFHasSoffset(unsigned Opc) {
374   const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
375   return Info ? Info->has_soffset : false;
376 }
377 
378 int getMUBUFBaseOpcode(unsigned Opc) {
379   const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
380   return Info ? Info->BaseOpcode : -1;
381 }
382 
383 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
384   const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
385   return Info ? Info->Opcode : -1;
386 }
387 
388 int getMUBUFElements(unsigned Opc) {
389   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
390   return Info ? Info->elements : 0;
391 }
392 
393 bool getMUBUFHasVAddr(unsigned Opc) {
394   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
395   return Info ? Info->has_vaddr : false;
396 }
397 
398 bool getMUBUFHasSrsrc(unsigned Opc) {
399   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
400   return Info ? Info->has_srsrc : false;
401 }
402 
403 bool getMUBUFHasSoffset(unsigned Opc) {
404   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
405   return Info ? Info->has_soffset : false;
406 }
407 
408 bool getMUBUFIsBufferInv(unsigned Opc) {
409   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
410   return Info ? Info->IsBufferInv : false;
411 }
412 
413 bool getSMEMIsBuffer(unsigned Opc) {
414   const SMInfo *Info = getSMEMOpcodeHelper(Opc);
415   return Info ? Info->IsBuffer : false;
416 }
417 
418 bool getVOP1IsSingle(unsigned Opc) {
419   const VOPInfo *Info = getVOP1OpcodeHelper(Opc);
420   return Info ? Info->IsSingle : false;
421 }
422 
423 bool getVOP2IsSingle(unsigned Opc) {
424   const VOPInfo *Info = getVOP2OpcodeHelper(Opc);
425   return Info ? Info->IsSingle : false;
426 }
427 
428 bool getVOP3IsSingle(unsigned Opc) {
429   const VOPInfo *Info = getVOP3OpcodeHelper(Opc);
430   return Info ? Info->IsSingle : false;
431 }
432 
433 bool isVOPC64DPP(unsigned Opc) {
434   return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc);
435 }
436 
437 bool getMAIIsDGEMM(unsigned Opc) {
438   const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
439   return Info ? Info->is_dgemm : false;
440 }
441 
442 bool getMAIIsGFX940XDL(unsigned Opc) {
443   const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
444   return Info ? Info->is_gfx940_xdl : false;
445 }
446 
447 unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST) {
448   if (ST.hasFeature(AMDGPU::FeatureGFX12Insts))
449     return SIEncodingFamily::GFX12;
450   if (ST.hasFeature(AMDGPU::FeatureGFX11Insts))
451     return SIEncodingFamily::GFX11;
452   llvm_unreachable("Subtarget generation does not support VOPD!");
453 }
454 
455 CanBeVOPD getCanBeVOPD(unsigned Opc) {
456   const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
457   if (Info)
458     return {Info->CanBeVOPDX, true};
459   else
460     return {false, false};
461 }
462 
463 unsigned getVOPDOpcode(unsigned Opc) {
464   const VOPDComponentInfo *Info = getVOPDComponentHelper(Opc);
465   return Info ? Info->VOPDOp : ~0u;
466 }
467 
468 bool isVOPD(unsigned Opc) {
469   return AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0X);
470 }
471 
472 bool isMAC(unsigned Opc) {
473   return Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
474          Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
475          Opc == AMDGPU::V_MAC_F32_e64_vi ||
476          Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
477          Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
478          Opc == AMDGPU::V_MAC_F16_e64_vi ||
479          Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
480          Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
481          Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
482          Opc == AMDGPU::V_FMAC_F32_e64_gfx12 ||
483          Opc == AMDGPU::V_FMAC_F32_e64_vi ||
484          Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
485          Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
486          Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
487          Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx11 ||
488          Opc == AMDGPU::V_FMAC_F16_t16_e64_gfx12 ||
489          Opc == AMDGPU::V_DOT2C_F32_F16_e64_vi ||
490          Opc == AMDGPU::V_DOT2C_I32_I16_e64_vi ||
491          Opc == AMDGPU::V_DOT4C_I32_I8_e64_vi ||
492          Opc == AMDGPU::V_DOT8C_I32_I4_e64_vi;
493 }
494 
495 bool isPermlane16(unsigned Opc) {
496   return Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
497          Opc == AMDGPU::V_PERMLANEX16_B32_gfx10 ||
498          Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx11 ||
499          Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx11 ||
500          Opc == AMDGPU::V_PERMLANE16_B32_e64_gfx12 ||
501          Opc == AMDGPU::V_PERMLANEX16_B32_e64_gfx12 ||
502          Opc == AMDGPU::V_PERMLANE16_VAR_B32_e64_gfx12 ||
503          Opc == AMDGPU::V_PERMLANEX16_VAR_B32_e64_gfx12;
504 }
505 
506 bool isGenericAtomic(unsigned Opc) {
507   return Opc == AMDGPU::G_AMDGPU_ATOMIC_FMIN ||
508          Opc == AMDGPU::G_AMDGPU_ATOMIC_FMAX ||
509          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP ||
510          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD ||
511          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB ||
512          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN ||
513          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN ||
514          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX ||
515          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX ||
516          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND ||
517          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR ||
518          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR ||
519          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC ||
520          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC ||
521          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD ||
522          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN ||
523          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX ||
524          Opc == AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP ||
525          Opc == AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG;
526 }
527 
528 bool isTrue16Inst(unsigned Opc) {
529   const VOPTrue16Info *Info = getTrue16OpcodeHelper(Opc);
530   return Info ? Info->IsTrue16 : false;
531 }
532 
533 unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc) {
534   const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom2AddrOpcode(Opc);
535   return Info ? Info->Opcode3Addr : ~0u;
536 }
537 
538 unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc) {
539   const WMMAOpcodeMappingInfo *Info = getWMMAMappingInfoFrom3AddrOpcode(Opc);
540   return Info ? Info->Opcode2Addr : ~0u;
541 }
542 
543 // Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
544 // header files, so we need to wrap it in a function that takes unsigned
545 // instead.
546 int getMCOpcode(uint16_t Opcode, unsigned Gen) {
547   return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
548 }
549 
550 int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily) {
551   const VOPDInfo *Info =
552       getVOPDInfoFromComponentOpcodes(OpX, OpY, EncodingFamily);
553   return Info ? Info->Opcode : -1;
554 }
555 
556 std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode) {
557   const VOPDInfo *Info = getVOPDOpcodeHelper(VOPDOpcode);
558   assert(Info);
559   auto OpX = getVOPDBaseFromComponent(Info->OpX);
560   auto OpY = getVOPDBaseFromComponent(Info->OpY);
561   assert(OpX && OpY);
562   return {OpX->BaseVOP, OpY->BaseVOP};
563 }
564 
565 namespace VOPD {
566 
567 ComponentProps::ComponentProps(const MCInstrDesc &OpDesc) {
568   assert(OpDesc.getNumDefs() == Component::DST_NUM);
569 
570   assert(OpDesc.getOperandConstraint(Component::SRC0, MCOI::TIED_TO) == -1);
571   assert(OpDesc.getOperandConstraint(Component::SRC1, MCOI::TIED_TO) == -1);
572   auto TiedIdx = OpDesc.getOperandConstraint(Component::SRC2, MCOI::TIED_TO);
573   assert(TiedIdx == -1 || TiedIdx == Component::DST);
574   HasSrc2Acc = TiedIdx != -1;
575 
576   SrcOperandsNum = OpDesc.getNumOperands() - OpDesc.getNumDefs();
577   assert(SrcOperandsNum <= Component::MAX_SRC_NUM);
578 
579   auto OperandsNum = OpDesc.getNumOperands();
580   unsigned CompOprIdx;
581   for (CompOprIdx = Component::SRC1; CompOprIdx < OperandsNum; ++CompOprIdx) {
582     if (OpDesc.operands()[CompOprIdx].OperandType == AMDGPU::OPERAND_KIMM32) {
583       MandatoryLiteralIdx = CompOprIdx;
584       break;
585     }
586   }
587 }
588 
589 unsigned ComponentInfo::getIndexInParsedOperands(unsigned CompOprIdx) const {
590   assert(CompOprIdx < Component::MAX_OPR_NUM);
591 
592   if (CompOprIdx == Component::DST)
593     return getIndexOfDstInParsedOperands();
594 
595   auto CompSrcIdx = CompOprIdx - Component::DST_NUM;
596   if (CompSrcIdx < getCompParsedSrcOperandsNum())
597     return getIndexOfSrcInParsedOperands(CompSrcIdx);
598 
599   // The specified operand does not exist.
600   return 0;
601 }
602 
603 std::optional<unsigned> InstInfo::getInvalidCompOperandIndex(
604     std::function<unsigned(unsigned, unsigned)> GetRegIdx, bool SkipSrc) const {
605 
606   auto OpXRegs = getRegIndices(ComponentIndex::X, GetRegIdx);
607   auto OpYRegs = getRegIndices(ComponentIndex::Y, GetRegIdx);
608 
609   const unsigned CompOprNum =
610       SkipSrc ? Component::DST_NUM : Component::MAX_OPR_NUM;
611   unsigned CompOprIdx;
612   for (CompOprIdx = 0; CompOprIdx < CompOprNum; ++CompOprIdx) {
613     unsigned BanksMasks = VOPD_VGPR_BANK_MASKS[CompOprIdx];
614     if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] &&
615         ((OpXRegs[CompOprIdx] & BanksMasks) ==
616          (OpYRegs[CompOprIdx] & BanksMasks)))
617       return CompOprIdx;
618   }
619 
620   return {};
621 }
622 
623 // Return an array of VGPR registers [DST,SRC0,SRC1,SRC2] used
624 // by the specified component. If an operand is unused
625 // or is not a VGPR, the corresponding value is 0.
626 //
627 // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
628 // for the specified component and MC operand. The callback must return 0
629 // if the operand is not a register or not a VGPR.
630 InstInfo::RegIndices InstInfo::getRegIndices(
631     unsigned CompIdx,
632     std::function<unsigned(unsigned, unsigned)> GetRegIdx) const {
633   assert(CompIdx < COMPONENTS_NUM);
634 
635   const auto &Comp = CompInfo[CompIdx];
636   InstInfo::RegIndices RegIndices;
637 
638   RegIndices[DST] = GetRegIdx(CompIdx, Comp.getIndexOfDstInMCOperands());
639 
640   for (unsigned CompOprIdx : {SRC0, SRC1, SRC2}) {
641     unsigned CompSrcIdx = CompOprIdx - DST_NUM;
642     RegIndices[CompOprIdx] =
643         Comp.hasRegSrcOperand(CompSrcIdx)
644             ? GetRegIdx(CompIdx, Comp.getIndexOfSrcInMCOperands(CompSrcIdx))
645             : 0;
646   }
647   return RegIndices;
648 }
649 
650 } // namespace VOPD
651 
652 VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY) {
653   return VOPD::InstInfo(OpX, OpY);
654 }
655 
656 VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
657                                const MCInstrInfo *InstrInfo) {
658   auto [OpX, OpY] = getVOPDComponents(VOPDOpcode);
659   const auto &OpXDesc = InstrInfo->get(OpX);
660   const auto &OpYDesc = InstrInfo->get(OpY);
661   VOPD::ComponentInfo OpXInfo(OpXDesc, VOPD::ComponentKind::COMPONENT_X);
662   VOPD::ComponentInfo OpYInfo(OpYDesc, OpXInfo);
663   return VOPD::InstInfo(OpXInfo, OpYInfo);
664 }
665 
666 namespace IsaInfo {
667 
668 AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI)
669     : STI(STI), XnackSetting(TargetIDSetting::Any),
670       SramEccSetting(TargetIDSetting::Any), CodeObjectVersion(0) {
671   if (!STI.getFeatureBits().test(FeatureSupportsXNACK))
672     XnackSetting = TargetIDSetting::Unsupported;
673   if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))
674     SramEccSetting = TargetIDSetting::Unsupported;
675 }
676 
677 void AMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) {
678   // Check if xnack or sramecc is explicitly enabled or disabled.  In the
679   // absence of the target features we assume we must generate code that can run
680   // in any environment.
681   SubtargetFeatures Features(FS);
682   std::optional<bool> XnackRequested;
683   std::optional<bool> SramEccRequested;
684 
685   for (const std::string &Feature : Features.getFeatures()) {
686     if (Feature == "+xnack")
687       XnackRequested = true;
688     else if (Feature == "-xnack")
689       XnackRequested = false;
690     else if (Feature == "+sramecc")
691       SramEccRequested = true;
692     else if (Feature == "-sramecc")
693       SramEccRequested = false;
694   }
695 
696   bool XnackSupported = isXnackSupported();
697   bool SramEccSupported = isSramEccSupported();
698 
699   if (XnackRequested) {
700     if (XnackSupported) {
701       XnackSetting =
702           *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off;
703     } else {
704       // If a specific xnack setting was requested and this GPU does not support
705       // xnack emit a warning. Setting will remain set to "Unsupported".
706       if (*XnackRequested) {
707         errs() << "warning: xnack 'On' was requested for a processor that does "
708                   "not support it!\n";
709       } else {
710         errs() << "warning: xnack 'Off' was requested for a processor that "
711                   "does not support it!\n";
712       }
713     }
714   }
715 
716   if (SramEccRequested) {
717     if (SramEccSupported) {
718       SramEccSetting =
719           *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off;
720     } else {
721       // If a specific sramecc setting was requested and this GPU does not
722       // support sramecc emit a warning. Setting will remain set to
723       // "Unsupported".
724       if (*SramEccRequested) {
725         errs() << "warning: sramecc 'On' was requested for a processor that "
726                   "does not support it!\n";
727       } else {
728         errs() << "warning: sramecc 'Off' was requested for a processor that "
729                   "does not support it!\n";
730       }
731     }
732   }
733 }
734 
735 static TargetIDSetting
736 getTargetIDSettingFromFeatureString(StringRef FeatureString) {
737   if (FeatureString.ends_with("-"))
738     return TargetIDSetting::Off;
739   if (FeatureString.ends_with("+"))
740     return TargetIDSetting::On;
741 
742   llvm_unreachable("Malformed feature string");
743 }
744 
745 void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) {
746   SmallVector<StringRef, 3> TargetIDSplit;
747   TargetID.split(TargetIDSplit, ':');
748 
749   for (const auto &FeatureString : TargetIDSplit) {
750     if (FeatureString.starts_with("xnack"))
751       XnackSetting = getTargetIDSettingFromFeatureString(FeatureString);
752     if (FeatureString.starts_with("sramecc"))
753       SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString);
754   }
755 }
756 
757 std::string AMDGPUTargetID::toString() const {
758   std::string StringRep;
759   raw_string_ostream StreamRep(StringRep);
760 
761   auto TargetTriple = STI.getTargetTriple();
762   auto Version = getIsaVersion(STI.getCPU());
763 
764   StreamRep << TargetTriple.getArchName() << '-'
765             << TargetTriple.getVendorName() << '-'
766             << TargetTriple.getOSName() << '-'
767             << TargetTriple.getEnvironmentName() << '-';
768 
769   std::string Processor;
770   // TODO: Following else statement is present here because we used various
771   // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803').
772   // Remove once all aliases are removed from GCNProcessors.td.
773   if (Version.Major >= 9)
774     Processor = STI.getCPU().str();
775   else
776     Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) +
777                  Twine(Version.Stepping))
778                     .str();
779 
780   std::string Features;
781   if (STI.getTargetTriple().getOS() == Triple::AMDHSA) {
782     switch (CodeObjectVersion) {
783     case AMDGPU::AMDHSA_COV4:
784     case AMDGPU::AMDHSA_COV5:
785       // sramecc.
786       if (getSramEccSetting() == TargetIDSetting::Off)
787         Features += ":sramecc-";
788       else if (getSramEccSetting() == TargetIDSetting::On)
789         Features += ":sramecc+";
790       // xnack.
791       if (getXnackSetting() == TargetIDSetting::Off)
792         Features += ":xnack-";
793       else if (getXnackSetting() == TargetIDSetting::On)
794         Features += ":xnack+";
795       break;
796     default:
797       break;
798     }
799   }
800 
801   StreamRep << Processor << Features;
802 
803   StreamRep.flush();
804   return StringRep;
805 }
806 
807 unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
808   if (STI->getFeatureBits().test(FeatureWavefrontSize16))
809     return 16;
810   if (STI->getFeatureBits().test(FeatureWavefrontSize32))
811     return 32;
812 
813   return 64;
814 }
815 
816 unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
817   unsigned BytesPerCU = 0;
818   if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
819     BytesPerCU = 32768;
820   if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
821     BytesPerCU = 65536;
822 
823   // "Per CU" really means "per whatever functional block the waves of a
824   // workgroup must share". So the effective local memory size is doubled in
825   // WGP mode on gfx10.
826   if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
827     BytesPerCU *= 2;
828 
829   return BytesPerCU;
830 }
831 
832 unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI) {
833   if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
834     return 32768;
835   if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
836     return 65536;
837   return 0;
838 }
839 
840 unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
841   // "Per CU" really means "per whatever functional block the waves of a
842   // workgroup must share". For gfx10 in CU mode this is the CU, which contains
843   // two SIMDs.
844   if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode))
845     return 2;
846   // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains
847   // two CUs, so a total of four SIMDs.
848   return 4;
849 }
850 
851 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
852                                unsigned FlatWorkGroupSize) {
853   assert(FlatWorkGroupSize != 0);
854   if (STI->getTargetTriple().getArch() != Triple::amdgcn)
855     return 8;
856   unsigned MaxWaves = getMaxWavesPerEU(STI) * getEUsPerCU(STI);
857   unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
858   if (N == 1) {
859     // Single-wave workgroups don't consume barrier resources.
860     return MaxWaves;
861   }
862 
863   unsigned MaxBarriers = 16;
864   if (isGFX10Plus(*STI) && !STI->getFeatureBits().test(FeatureCuMode))
865     MaxBarriers = 32;
866 
867   return std::min(MaxWaves / N, MaxBarriers);
868 }
869 
870 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
871   return 1;
872 }
873 
874 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
875   // FIXME: Need to take scratch memory into account.
876   if (isGFX90A(*STI))
877     return 8;
878   if (!isGFX10Plus(*STI))
879     return 10;
880   return hasGFX10_3Insts(*STI) ? 16 : 20;
881 }
882 
883 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
884                                    unsigned FlatWorkGroupSize) {
885   return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize),
886                     getEUsPerCU(STI));
887 }
888 
889 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
890   return 1;
891 }
892 
893 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
894   // Some subtargets allow encoding 2048, but this isn't tested or supported.
895   return 1024;
896 }
897 
898 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
899                               unsigned FlatWorkGroupSize) {
900   return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI));
901 }
902 
903 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
904   IsaVersion Version = getIsaVersion(STI->getCPU());
905   if (Version.Major >= 10)
906     return getAddressableNumSGPRs(STI);
907   if (Version.Major >= 8)
908     return 16;
909   return 8;
910 }
911 
912 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) {
913   return 8;
914 }
915 
916 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
917   IsaVersion Version = getIsaVersion(STI->getCPU());
918   if (Version.Major >= 8)
919     return 800;
920   return 512;
921 }
922 
923 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
924   if (STI->getFeatureBits().test(FeatureSGPRInitBug))
925     return FIXED_NUM_SGPRS_FOR_INIT_BUG;
926 
927   IsaVersion Version = getIsaVersion(STI->getCPU());
928   if (Version.Major >= 10)
929     return 106;
930   if (Version.Major >= 8)
931     return 102;
932   return 104;
933 }
934 
935 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
936   assert(WavesPerEU != 0);
937 
938   IsaVersion Version = getIsaVersion(STI->getCPU());
939   if (Version.Major >= 10)
940     return 0;
941 
942   if (WavesPerEU >= getMaxWavesPerEU(STI))
943     return 0;
944 
945   unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
946   if (STI->getFeatureBits().test(FeatureTrapHandler))
947     MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
948   MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
949   return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
950 }
951 
952 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
953                         bool Addressable) {
954   assert(WavesPerEU != 0);
955 
956   unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
957   IsaVersion Version = getIsaVersion(STI->getCPU());
958   if (Version.Major >= 10)
959     return Addressable ? AddressableNumSGPRs : 108;
960   if (Version.Major >= 8 && !Addressable)
961     AddressableNumSGPRs = 112;
962   unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
963   if (STI->getFeatureBits().test(FeatureTrapHandler))
964     MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
965   MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
966   return std::min(MaxNumSGPRs, AddressableNumSGPRs);
967 }
968 
969 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
970                           bool FlatScrUsed, bool XNACKUsed) {
971   unsigned ExtraSGPRs = 0;
972   if (VCCUsed)
973     ExtraSGPRs = 2;
974 
975   IsaVersion Version = getIsaVersion(STI->getCPU());
976   if (Version.Major >= 10)
977     return ExtraSGPRs;
978 
979   if (Version.Major < 8) {
980     if (FlatScrUsed)
981       ExtraSGPRs = 4;
982   } else {
983     if (XNACKUsed)
984       ExtraSGPRs = 4;
985 
986     if (FlatScrUsed ||
987         STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch))
988       ExtraSGPRs = 6;
989   }
990 
991   return ExtraSGPRs;
992 }
993 
994 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
995                           bool FlatScrUsed) {
996   return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
997                           STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
998 }
999 
1000 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
1001   NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
1002   // SGPRBlocks is actual number of SGPR blocks minus 1.
1003   return NumSGPRs / getSGPREncodingGranule(STI) - 1;
1004 }
1005 
1006 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
1007                              std::optional<bool> EnableWavefrontSize32) {
1008   if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1009     return 8;
1010 
1011   bool IsWave32 = EnableWavefrontSize32 ?
1012       *EnableWavefrontSize32 :
1013       STI->getFeatureBits().test(FeatureWavefrontSize32);
1014 
1015   if (STI->getFeatureBits().test(FeatureGFX11FullVGPRs))
1016     return IsWave32 ? 24 : 12;
1017 
1018   if (hasGFX10_3Insts(*STI))
1019     return IsWave32 ? 16 : 8;
1020 
1021   return IsWave32 ? 8 : 4;
1022 }
1023 
1024 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
1025                                 std::optional<bool> EnableWavefrontSize32) {
1026   if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1027     return 8;
1028 
1029   bool IsWave32 = EnableWavefrontSize32 ?
1030       *EnableWavefrontSize32 :
1031       STI->getFeatureBits().test(FeatureWavefrontSize32);
1032 
1033   return IsWave32 ? 8 : 4;
1034 }
1035 
1036 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
1037   if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1038     return 512;
1039   if (!isGFX10Plus(*STI))
1040     return 256;
1041   bool IsWave32 = STI->getFeatureBits().test(FeatureWavefrontSize32);
1042   if (STI->getFeatureBits().test(FeatureGFX11FullVGPRs))
1043     return IsWave32 ? 1536 : 768;
1044   return IsWave32 ? 1024 : 512;
1045 }
1046 
1047 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
1048   if (STI->getFeatureBits().test(FeatureGFX90AInsts))
1049     return 512;
1050   return 256;
1051 }
1052 
1053 unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
1054                                       unsigned NumVGPRs) {
1055   unsigned MaxWaves = getMaxWavesPerEU(STI);
1056   unsigned Granule = getVGPRAllocGranule(STI);
1057   if (NumVGPRs < Granule)
1058     return MaxWaves;
1059   unsigned RoundedRegs = alignTo(NumVGPRs, Granule);
1060   return std::min(std::max(getTotalNumVGPRs(STI) / RoundedRegs, 1u), MaxWaves);
1061 }
1062 
1063 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1064   assert(WavesPerEU != 0);
1065 
1066   unsigned MaxWavesPerEU = getMaxWavesPerEU(STI);
1067   if (WavesPerEU >= MaxWavesPerEU)
1068     return 0;
1069 
1070   unsigned TotNumVGPRs = getTotalNumVGPRs(STI);
1071   unsigned AddrsableNumVGPRs = getAddressableNumVGPRs(STI);
1072   unsigned Granule = getVGPRAllocGranule(STI);
1073   unsigned MaxNumVGPRs = alignDown(TotNumVGPRs / WavesPerEU, Granule);
1074 
1075   if (MaxNumVGPRs == alignDown(TotNumVGPRs / MaxWavesPerEU, Granule))
1076     return 0;
1077 
1078   unsigned MinWavesPerEU = getNumWavesPerEUWithNumVGPRs(STI, AddrsableNumVGPRs);
1079   if (WavesPerEU < MinWavesPerEU)
1080     return getMinNumVGPRs(STI, MinWavesPerEU);
1081 
1082   unsigned MaxNumVGPRsNext = alignDown(TotNumVGPRs / (WavesPerEU + 1), Granule);
1083   unsigned MinNumVGPRs = 1 + std::min(MaxNumVGPRs - Granule, MaxNumVGPRsNext);
1084   return std::min(MinNumVGPRs, AddrsableNumVGPRs);
1085 }
1086 
1087 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
1088   assert(WavesPerEU != 0);
1089 
1090   unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
1091                                    getVGPRAllocGranule(STI));
1092   unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
1093   return std::min(MaxNumVGPRs, AddressableNumVGPRs);
1094 }
1095 
1096 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
1097                           std::optional<bool> EnableWavefrontSize32) {
1098   NumVGPRs = alignTo(std::max(1u, NumVGPRs),
1099                      getVGPREncodingGranule(STI, EnableWavefrontSize32));
1100   // VGPRBlocks is actual number of VGPR blocks minus 1.
1101   return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1;
1102 }
1103 
1104 } // end namespace IsaInfo
1105 
1106 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
1107                                const MCSubtargetInfo *STI) {
1108   IsaVersion Version = getIsaVersion(STI->getCPU());
1109 
1110   memset(&Header, 0, sizeof(Header));
1111 
1112   Header.amd_kernel_code_version_major = 1;
1113   Header.amd_kernel_code_version_minor = 2;
1114   Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
1115   Header.amd_machine_version_major = Version.Major;
1116   Header.amd_machine_version_minor = Version.Minor;
1117   Header.amd_machine_version_stepping = Version.Stepping;
1118   Header.kernel_code_entry_byte_offset = sizeof(Header);
1119   Header.wavefront_size = 6;
1120 
1121   // If the code object does not support indirect functions, then the value must
1122   // be 0xffffffff.
1123   Header.call_convention = -1;
1124 
1125   // These alignment values are specified in powers of two, so alignment =
1126   // 2^n.  The minimum alignment is 2^4 = 16.
1127   Header.kernarg_segment_alignment = 4;
1128   Header.group_segment_alignment = 4;
1129   Header.private_segment_alignment = 4;
1130 
1131   if (Version.Major >= 10) {
1132     if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
1133       Header.wavefront_size = 5;
1134       Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
1135     }
1136     Header.compute_pgm_resource_registers |=
1137       S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
1138       S_00B848_MEM_ORDERED(1);
1139   }
1140 }
1141 
1142 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
1143     const MCSubtargetInfo *STI) {
1144   IsaVersion Version = getIsaVersion(STI->getCPU());
1145 
1146   amdhsa::kernel_descriptor_t KD;
1147   memset(&KD, 0, sizeof(KD));
1148 
1149   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
1150                   amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
1151                   amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE);
1152   if (Version.Major >= 12) {
1153     AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
1154                     amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, 0);
1155     AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
1156                     amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_DISABLE_PERF, 0);
1157   } else {
1158     AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
1159                     amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, 1);
1160     AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
1161                     amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, 1);
1162   }
1163   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
1164                   amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
1165   if (Version.Major >= 10) {
1166     AMDHSA_BITS_SET(KD.kernel_code_properties,
1167                     amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
1168                     STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
1169     AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
1170                     amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
1171                     STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
1172     AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
1173                     amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, 1);
1174   }
1175   if (AMDGPU::isGFX90A(*STI)) {
1176     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3,
1177                     amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
1178                     STI->getFeatureBits().test(FeatureTgSplit) ? 1 : 0);
1179   }
1180   return KD;
1181 }
1182 
1183 bool isGroupSegment(const GlobalValue *GV) {
1184   return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
1185 }
1186 
1187 bool isGlobalSegment(const GlobalValue *GV) {
1188   return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
1189 }
1190 
1191 bool isReadOnlySegment(const GlobalValue *GV) {
1192   unsigned AS = GV->getAddressSpace();
1193   return AS == AMDGPUAS::CONSTANT_ADDRESS ||
1194          AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
1195 }
1196 
1197 bool shouldEmitConstantsToTextSection(const Triple &TT) {
1198   return TT.getArch() == Triple::r600;
1199 }
1200 
1201 std::pair<unsigned, unsigned>
1202 getIntegerPairAttribute(const Function &F, StringRef Name,
1203                         std::pair<unsigned, unsigned> Default,
1204                         bool OnlyFirstRequired) {
1205   Attribute A = F.getFnAttribute(Name);
1206   if (!A.isStringAttribute())
1207     return Default;
1208 
1209   LLVMContext &Ctx = F.getContext();
1210   std::pair<unsigned, unsigned> Ints = Default;
1211   std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
1212   if (Strs.first.trim().getAsInteger(0, Ints.first)) {
1213     Ctx.emitError("can't parse first integer attribute " + Name);
1214     return Default;
1215   }
1216   if (Strs.second.trim().getAsInteger(0, Ints.second)) {
1217     if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
1218       Ctx.emitError("can't parse second integer attribute " + Name);
1219       return Default;
1220     }
1221   }
1222 
1223   return Ints;
1224 }
1225 
1226 unsigned getVmcntBitMask(const IsaVersion &Version) {
1227   return (1 << (getVmcntBitWidthLo(Version.Major) +
1228                 getVmcntBitWidthHi(Version.Major))) -
1229          1;
1230 }
1231 
1232 unsigned getExpcntBitMask(const IsaVersion &Version) {
1233   return (1 << getExpcntBitWidth(Version.Major)) - 1;
1234 }
1235 
1236 unsigned getLgkmcntBitMask(const IsaVersion &Version) {
1237   return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
1238 }
1239 
1240 unsigned getWaitcntBitMask(const IsaVersion &Version) {
1241   unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major),
1242                                 getVmcntBitWidthLo(Version.Major));
1243   unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major),
1244                                getExpcntBitWidth(Version.Major));
1245   unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major),
1246                                 getLgkmcntBitWidth(Version.Major));
1247   unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major),
1248                                 getVmcntBitWidthHi(Version.Major));
1249   return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
1250 }
1251 
1252 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1253   unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major),
1254                                 getVmcntBitWidthLo(Version.Major));
1255   unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major),
1256                                 getVmcntBitWidthHi(Version.Major));
1257   return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major);
1258 }
1259 
1260 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
1261   return unpackBits(Waitcnt, getExpcntBitShift(Version.Major),
1262                     getExpcntBitWidth(Version.Major));
1263 }
1264 
1265 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1266   return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major),
1267                     getLgkmcntBitWidth(Version.Major));
1268 }
1269 
1270 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
1271                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
1272   Vmcnt = decodeVmcnt(Version, Waitcnt);
1273   Expcnt = decodeExpcnt(Version, Waitcnt);
1274   Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
1275 }
1276 
1277 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
1278   Waitcnt Decoded;
1279   Decoded.VmCnt = decodeVmcnt(Version, Encoded);
1280   Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
1281   Decoded.LgkmCnt = decodeLgkmcnt(Version, Encoded);
1282   return Decoded;
1283 }
1284 
1285 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1286                      unsigned Vmcnt) {
1287   Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major),
1288                      getVmcntBitWidthLo(Version.Major));
1289   return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt,
1290                   getVmcntBitShiftHi(Version.Major),
1291                   getVmcntBitWidthHi(Version.Major));
1292 }
1293 
1294 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1295                       unsigned Expcnt) {
1296   return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major),
1297                   getExpcntBitWidth(Version.Major));
1298 }
1299 
1300 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1301                        unsigned Lgkmcnt) {
1302   return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major),
1303                   getLgkmcntBitWidth(Version.Major));
1304 }
1305 
1306 unsigned encodeWaitcnt(const IsaVersion &Version,
1307                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
1308   unsigned Waitcnt = getWaitcntBitMask(Version);
1309   Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
1310   Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
1311   Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
1312   return Waitcnt;
1313 }
1314 
1315 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1316   return encodeWaitcnt(Version, Decoded.VmCnt, Decoded.ExpCnt, Decoded.LgkmCnt);
1317 }
1318 
1319 //===----------------------------------------------------------------------===//
1320 // Custom Operands.
1321 //
1322 // A table of custom operands shall describe "primary" operand names
1323 // first followed by aliases if any. It is not required but recommended
1324 // to arrange operands so that operand encoding match operand position
1325 // in the table. This will make disassembly a bit more efficient.
1326 // Unused slots in the table shall have an empty name.
1327 //
1328 //===----------------------------------------------------------------------===//
1329 
1330 template <class T>
1331 static bool isValidOpr(int Idx, const CustomOperand<T> OpInfo[], int OpInfoSize,
1332                        T Context) {
1333   return 0 <= Idx && Idx < OpInfoSize && !OpInfo[Idx].Name.empty() &&
1334          (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context));
1335 }
1336 
1337 template <class T>
1338 static int getOprIdx(std::function<bool(const CustomOperand<T> &)> Test,
1339                      const CustomOperand<T> OpInfo[], int OpInfoSize,
1340                      T Context) {
1341   int InvalidIdx = OPR_ID_UNKNOWN;
1342   for (int Idx = 0; Idx < OpInfoSize; ++Idx) {
1343     if (Test(OpInfo[Idx])) {
1344       if (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context))
1345         return Idx;
1346       InvalidIdx = OPR_ID_UNSUPPORTED;
1347     }
1348   }
1349   return InvalidIdx;
1350 }
1351 
1352 template <class T>
1353 static int getOprIdx(const StringRef Name, const CustomOperand<T> OpInfo[],
1354                      int OpInfoSize, T Context) {
1355   auto Test = [=](const CustomOperand<T> &Op) { return Op.Name == Name; };
1356   return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context);
1357 }
1358 
1359 template <class T>
1360 static int getOprIdx(int Id, const CustomOperand<T> OpInfo[], int OpInfoSize,
1361                      T Context, bool QuickCheck = true) {
1362   auto Test = [=](const CustomOperand<T> &Op) {
1363     return Op.Encoding == Id && !Op.Name.empty();
1364   };
1365   // This is an optimization that should work in most cases.
1366   // As a side effect, it may cause selection of an alias
1367   // instead of a primary operand name in case of sparse tables.
1368   if (QuickCheck && isValidOpr<T>(Id, OpInfo, OpInfoSize, Context) &&
1369       OpInfo[Id].Encoding == Id) {
1370     return Id;
1371   }
1372   return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context);
1373 }
1374 
1375 //===----------------------------------------------------------------------===//
1376 // Custom Operand Values
1377 //===----------------------------------------------------------------------===//
1378 
1379 static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr,
1380                                                 int Size,
1381                                                 const MCSubtargetInfo &STI) {
1382   unsigned Enc = 0;
1383   for (int Idx = 0; Idx < Size; ++Idx) {
1384     const auto &Op = Opr[Idx];
1385     if (Op.isSupported(STI))
1386       Enc |= Op.encode(Op.Default);
1387   }
1388   return Enc;
1389 }
1390 
1391 static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr,
1392                                             int Size, unsigned Code,
1393                                             bool &HasNonDefaultVal,
1394                                             const MCSubtargetInfo &STI) {
1395   unsigned UsedOprMask = 0;
1396   HasNonDefaultVal = false;
1397   for (int Idx = 0; Idx < Size; ++Idx) {
1398     const auto &Op = Opr[Idx];
1399     if (!Op.isSupported(STI))
1400       continue;
1401     UsedOprMask |= Op.getMask();
1402     unsigned Val = Op.decode(Code);
1403     if (!Op.isValid(Val))
1404       return false;
1405     HasNonDefaultVal |= (Val != Op.Default);
1406   }
1407   return (Code & ~UsedOprMask) == 0;
1408 }
1409 
1410 static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size,
1411                                 unsigned Code, int &Idx, StringRef &Name,
1412                                 unsigned &Val, bool &IsDefault,
1413                                 const MCSubtargetInfo &STI) {
1414   while (Idx < Size) {
1415     const auto &Op = Opr[Idx++];
1416     if (Op.isSupported(STI)) {
1417       Name = Op.Name;
1418       Val = Op.decode(Code);
1419       IsDefault = (Val == Op.Default);
1420       return true;
1421     }
1422   }
1423 
1424   return false;
1425 }
1426 
1427 static int encodeCustomOperandVal(const CustomOperandVal &Op,
1428                                   int64_t InputVal) {
1429   if (InputVal < 0 || InputVal > Op.Max)
1430     return OPR_VAL_INVALID;
1431   return Op.encode(InputVal);
1432 }
1433 
1434 static int encodeCustomOperand(const CustomOperandVal *Opr, int Size,
1435                                const StringRef Name, int64_t InputVal,
1436                                unsigned &UsedOprMask,
1437                                const MCSubtargetInfo &STI) {
1438   int InvalidId = OPR_ID_UNKNOWN;
1439   for (int Idx = 0; Idx < Size; ++Idx) {
1440     const auto &Op = Opr[Idx];
1441     if (Op.Name == Name) {
1442       if (!Op.isSupported(STI)) {
1443         InvalidId = OPR_ID_UNSUPPORTED;
1444         continue;
1445       }
1446       auto OprMask = Op.getMask();
1447       if (OprMask & UsedOprMask)
1448         return OPR_ID_DUPLICATE;
1449       UsedOprMask |= OprMask;
1450       return encodeCustomOperandVal(Op, InputVal);
1451     }
1452   }
1453   return InvalidId;
1454 }
1455 
1456 //===----------------------------------------------------------------------===//
1457 // DepCtr
1458 //===----------------------------------------------------------------------===//
1459 
1460 namespace DepCtr {
1461 
1462 int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI) {
1463   static int Default = -1;
1464   if (Default == -1)
1465     Default = getDefaultCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, STI);
1466   return Default;
1467 }
1468 
1469 bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1470                               const MCSubtargetInfo &STI) {
1471   return isSymbolicCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, Code,
1472                                          HasNonDefaultVal, STI);
1473 }
1474 
1475 bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1476                   bool &IsDefault, const MCSubtargetInfo &STI) {
1477   return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val,
1478                              IsDefault, STI);
1479 }
1480 
1481 int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1482                  const MCSubtargetInfo &STI) {
1483   return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask,
1484                              STI);
1485 }
1486 
1487 unsigned decodeFieldVmVsrc(unsigned Encoded) {
1488   return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
1489 }
1490 
1491 unsigned decodeFieldVaVdst(unsigned Encoded) {
1492   return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
1493 }
1494 
1495 unsigned decodeFieldSaSdst(unsigned Encoded) {
1496   return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
1497 }
1498 
1499 unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
1500   return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
1501 }
1502 
1503 unsigned encodeFieldVmVsrc(unsigned VmVsrc) {
1504   return encodeFieldVmVsrc(0xffff, VmVsrc);
1505 }
1506 
1507 unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) {
1508   return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
1509 }
1510 
1511 unsigned encodeFieldVaVdst(unsigned VaVdst) {
1512   return encodeFieldVaVdst(0xffff, VaVdst);
1513 }
1514 
1515 unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) {
1516   return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
1517 }
1518 
1519 unsigned encodeFieldSaSdst(unsigned SaSdst) {
1520   return encodeFieldSaSdst(0xffff, SaSdst);
1521 }
1522 
1523 } // namespace DepCtr
1524 
1525 //===----------------------------------------------------------------------===//
1526 // hwreg
1527 //===----------------------------------------------------------------------===//
1528 
1529 namespace Hwreg {
1530 
1531 int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI) {
1532   int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Opr, OPR_SIZE, STI);
1533   return (Idx < 0) ? Idx : Opr[Idx].Encoding;
1534 }
1535 
1536 bool isValidHwreg(int64_t Id) {
1537   return 0 <= Id && isUInt<ID_WIDTH_>(Id);
1538 }
1539 
1540 bool isValidHwregOffset(int64_t Offset) {
1541   return 0 <= Offset && isUInt<OFFSET_WIDTH_>(Offset);
1542 }
1543 
1544 bool isValidHwregWidth(int64_t Width) {
1545   return 0 <= (Width - 1) && isUInt<WIDTH_M1_WIDTH_>(Width - 1);
1546 }
1547 
1548 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width) {
1549   return (Id << ID_SHIFT_) |
1550          (Offset << OFFSET_SHIFT_) |
1551          ((Width - 1) << WIDTH_M1_SHIFT_);
1552 }
1553 
1554 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) {
1555   int Idx = getOprIdx<const MCSubtargetInfo &>(Id, Opr, OPR_SIZE, STI);
1556   return (Idx < 0) ? "" : Opr[Idx].Name;
1557 }
1558 
1559 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) {
1560   Id = (Val & ID_MASK_) >> ID_SHIFT_;
1561   Offset = (Val & OFFSET_MASK_) >> OFFSET_SHIFT_;
1562   Width = ((Val & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1;
1563 }
1564 
1565 } // namespace Hwreg
1566 
1567 //===----------------------------------------------------------------------===//
1568 // exp tgt
1569 //===----------------------------------------------------------------------===//
1570 
1571 namespace Exp {
1572 
1573 struct ExpTgt {
1574   StringLiteral Name;
1575   unsigned Tgt;
1576   unsigned MaxIndex;
1577 };
1578 
1579 static constexpr ExpTgt ExpTgtInfo[] = {
1580   {{"null"},           ET_NULL,            ET_NULL_MAX_IDX},
1581   {{"mrtz"},           ET_MRTZ,            ET_MRTZ_MAX_IDX},
1582   {{"prim"},           ET_PRIM,            ET_PRIM_MAX_IDX},
1583   {{"mrt"},            ET_MRT0,            ET_MRT_MAX_IDX},
1584   {{"pos"},            ET_POS0,            ET_POS_MAX_IDX},
1585   {{"dual_src_blend"}, ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX},
1586   {{"param"},          ET_PARAM0,          ET_PARAM_MAX_IDX},
1587 };
1588 
1589 bool getTgtName(unsigned Id, StringRef &Name, int &Index) {
1590   for (const ExpTgt &Val : ExpTgtInfo) {
1591     if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
1592       Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
1593       Name = Val.Name;
1594       return true;
1595     }
1596   }
1597   return false;
1598 }
1599 
1600 unsigned getTgtId(const StringRef Name) {
1601 
1602   for (const ExpTgt &Val : ExpTgtInfo) {
1603     if (Val.MaxIndex == 0 && Name == Val.Name)
1604       return Val.Tgt;
1605 
1606     if (Val.MaxIndex > 0 && Name.starts_with(Val.Name)) {
1607       StringRef Suffix = Name.drop_front(Val.Name.size());
1608 
1609       unsigned Id;
1610       if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex)
1611         return ET_INVALID;
1612 
1613       // Disable leading zeroes
1614       if (Suffix.size() > 1 && Suffix[0] == '0')
1615         return ET_INVALID;
1616 
1617       return Val.Tgt + Id;
1618     }
1619   }
1620   return ET_INVALID;
1621 }
1622 
1623 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) {
1624   switch (Id) {
1625   case ET_NULL:
1626     return !isGFX11Plus(STI);
1627   case ET_POS4:
1628   case ET_PRIM:
1629     return isGFX10Plus(STI);
1630   case ET_DUAL_SRC_BLEND0:
1631   case ET_DUAL_SRC_BLEND1:
1632     return isGFX11Plus(STI);
1633   default:
1634     if (Id >= ET_PARAM0 && Id <= ET_PARAM31)
1635       return !isGFX11Plus(STI);
1636     return true;
1637   }
1638 }
1639 
1640 } // namespace Exp
1641 
1642 //===----------------------------------------------------------------------===//
1643 // MTBUF Format
1644 //===----------------------------------------------------------------------===//
1645 
1646 namespace MTBUFFormat {
1647 
1648 int64_t getDfmt(const StringRef Name) {
1649   for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) {
1650     if (Name == DfmtSymbolic[Id])
1651       return Id;
1652   }
1653   return DFMT_UNDEF;
1654 }
1655 
1656 StringRef getDfmtName(unsigned Id) {
1657   assert(Id <= DFMT_MAX);
1658   return DfmtSymbolic[Id];
1659 }
1660 
1661 static StringLiteral const *getNfmtLookupTable(const MCSubtargetInfo &STI) {
1662   if (isSI(STI) || isCI(STI))
1663     return NfmtSymbolicSICI;
1664   if (isVI(STI) || isGFX9(STI))
1665     return NfmtSymbolicVI;
1666   return NfmtSymbolicGFX10;
1667 }
1668 
1669 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) {
1670   auto lookupTable = getNfmtLookupTable(STI);
1671   for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) {
1672     if (Name == lookupTable[Id])
1673       return Id;
1674   }
1675   return NFMT_UNDEF;
1676 }
1677 
1678 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) {
1679   assert(Id <= NFMT_MAX);
1680   return getNfmtLookupTable(STI)[Id];
1681 }
1682 
1683 bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) {
1684   unsigned Dfmt;
1685   unsigned Nfmt;
1686   decodeDfmtNfmt(Id, Dfmt, Nfmt);
1687   return isValidNfmt(Nfmt, STI);
1688 }
1689 
1690 bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) {
1691   return !getNfmtName(Id, STI).empty();
1692 }
1693 
1694 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
1695   return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
1696 }
1697 
1698 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
1699   Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
1700   Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
1701 }
1702 
1703 int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) {
1704   if (isGFX11Plus(STI)) {
1705     for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
1706       if (Name == UfmtSymbolicGFX11[Id])
1707         return Id;
1708     }
1709   } else {
1710     for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
1711       if (Name == UfmtSymbolicGFX10[Id])
1712         return Id;
1713     }
1714   }
1715   return UFMT_UNDEF;
1716 }
1717 
1718 StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI) {
1719   if(isValidUnifiedFormat(Id, STI))
1720     return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id];
1721   return "";
1722 }
1723 
1724 bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) {
1725   return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST;
1726 }
1727 
1728 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1729                              const MCSubtargetInfo &STI) {
1730   int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
1731   if (isGFX11Plus(STI)) {
1732     for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
1733       if (Fmt == DfmtNfmt2UFmtGFX11[Id])
1734         return Id;
1735     }
1736   } else {
1737     for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
1738       if (Fmt == DfmtNfmt2UFmtGFX10[Id])
1739         return Id;
1740     }
1741   }
1742   return UFMT_UNDEF;
1743 }
1744 
1745 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) {
1746   return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX);
1747 }
1748 
1749 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) {
1750   if (isGFX10Plus(STI))
1751     return UFMT_DEFAULT;
1752   return DFMT_NFMT_DEFAULT;
1753 }
1754 
1755 } // namespace MTBUFFormat
1756 
1757 //===----------------------------------------------------------------------===//
1758 // SendMsg
1759 //===----------------------------------------------------------------------===//
1760 
1761 namespace SendMsg {
1762 
1763 static uint64_t getMsgIdMask(const MCSubtargetInfo &STI) {
1764   return isGFX11Plus(STI) ? ID_MASK_GFX11Plus_ : ID_MASK_PreGFX11_;
1765 }
1766 
1767 int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI) {
1768   int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Msg, MSG_SIZE, STI);
1769   return (Idx < 0) ? Idx : Msg[Idx].Encoding;
1770 }
1771 
1772 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) {
1773   return (MsgId & ~(getMsgIdMask(STI))) == 0;
1774 }
1775 
1776 StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI) {
1777   int Idx = getOprIdx<const MCSubtargetInfo &>(MsgId, Msg, MSG_SIZE, STI);
1778   return (Idx < 0) ? "" : Msg[Idx].Name;
1779 }
1780 
1781 int64_t getMsgOpId(int64_t MsgId, const StringRef Name) {
1782   const char* const *S = (MsgId == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
1783   const int F = (MsgId == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
1784   const int L = (MsgId == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
1785   for (int i = F; i < L; ++i) {
1786     if (Name == S[i]) {
1787       return i;
1788     }
1789   }
1790   return OP_UNKNOWN_;
1791 }
1792 
1793 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1794                   bool Strict) {
1795   assert(isValidMsgId(MsgId, STI));
1796 
1797   if (!Strict)
1798     return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
1799 
1800   if (MsgId == ID_SYSMSG)
1801     return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_;
1802   if (!isGFX11Plus(STI)) {
1803     switch (MsgId) {
1804     case ID_GS_PreGFX11:
1805       return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP;
1806     case ID_GS_DONE_PreGFX11:
1807       return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_;
1808     }
1809   }
1810   return OpId == OP_NONE_;
1811 }
1812 
1813 StringRef getMsgOpName(int64_t MsgId, int64_t OpId,
1814                        const MCSubtargetInfo &STI) {
1815   assert(msgRequiresOp(MsgId, STI));
1816   return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId];
1817 }
1818 
1819 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1820                       const MCSubtargetInfo &STI, bool Strict) {
1821   assert(isValidMsgOp(MsgId, OpId, STI, Strict));
1822 
1823   if (!Strict)
1824     return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId);
1825 
1826   if (!isGFX11Plus(STI)) {
1827     switch (MsgId) {
1828     case ID_GS_PreGFX11:
1829       return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_;
1830     case ID_GS_DONE_PreGFX11:
1831       return (OpId == OP_GS_NOP) ?
1832           (StreamId == STREAM_ID_NONE_) :
1833           (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_);
1834     }
1835   }
1836   return StreamId == STREAM_ID_NONE_;
1837 }
1838 
1839 bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) {
1840   return MsgId == ID_SYSMSG ||
1841       (!isGFX11Plus(STI) &&
1842        (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11));
1843 }
1844 
1845 bool msgSupportsStream(int64_t MsgId, int64_t OpId,
1846                        const MCSubtargetInfo &STI) {
1847   return !isGFX11Plus(STI) &&
1848       (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) &&
1849       OpId != OP_GS_NOP;
1850 }
1851 
1852 void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
1853                uint16_t &StreamId, const MCSubtargetInfo &STI) {
1854   MsgId = Val & getMsgIdMask(STI);
1855   if (isGFX11Plus(STI)) {
1856     OpId = 0;
1857     StreamId = 0;
1858   } else {
1859     OpId = (Val & OP_MASK_) >> OP_SHIFT_;
1860     StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_;
1861   }
1862 }
1863 
1864 uint64_t encodeMsg(uint64_t MsgId,
1865                    uint64_t OpId,
1866                    uint64_t StreamId) {
1867   return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
1868 }
1869 
1870 } // namespace SendMsg
1871 
1872 //===----------------------------------------------------------------------===//
1873 //
1874 //===----------------------------------------------------------------------===//
1875 
1876 unsigned getInitialPSInputAddr(const Function &F) {
1877   return F.getFnAttributeAsParsedInteger("InitialPSInputAddr", 0);
1878 }
1879 
1880 bool getHasColorExport(const Function &F) {
1881   // As a safe default always respond as if PS has color exports.
1882   return F.getFnAttributeAsParsedInteger(
1883              "amdgpu-color-export",
1884              F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
1885 }
1886 
1887 bool getHasDepthExport(const Function &F) {
1888   return F.getFnAttributeAsParsedInteger("amdgpu-depth-export", 0) != 0;
1889 }
1890 
1891 bool isShader(CallingConv::ID cc) {
1892   switch(cc) {
1893     case CallingConv::AMDGPU_VS:
1894     case CallingConv::AMDGPU_LS:
1895     case CallingConv::AMDGPU_HS:
1896     case CallingConv::AMDGPU_ES:
1897     case CallingConv::AMDGPU_GS:
1898     case CallingConv::AMDGPU_PS:
1899     case CallingConv::AMDGPU_CS_Chain:
1900     case CallingConv::AMDGPU_CS_ChainPreserve:
1901     case CallingConv::AMDGPU_CS:
1902       return true;
1903     default:
1904       return false;
1905   }
1906 }
1907 
1908 bool isGraphics(CallingConv::ID cc) {
1909   return isShader(cc) || cc == CallingConv::AMDGPU_Gfx;
1910 }
1911 
1912 bool isCompute(CallingConv::ID cc) {
1913   return !isGraphics(cc) || cc == CallingConv::AMDGPU_CS;
1914 }
1915 
1916 bool isEntryFunctionCC(CallingConv::ID CC) {
1917   switch (CC) {
1918   case CallingConv::AMDGPU_KERNEL:
1919   case CallingConv::SPIR_KERNEL:
1920   case CallingConv::AMDGPU_VS:
1921   case CallingConv::AMDGPU_GS:
1922   case CallingConv::AMDGPU_PS:
1923   case CallingConv::AMDGPU_CS:
1924   case CallingConv::AMDGPU_ES:
1925   case CallingConv::AMDGPU_HS:
1926   case CallingConv::AMDGPU_LS:
1927     return true;
1928   default:
1929     return false;
1930   }
1931 }
1932 
1933 bool isModuleEntryFunctionCC(CallingConv::ID CC) {
1934   switch (CC) {
1935   case CallingConv::AMDGPU_Gfx:
1936     return true;
1937   default:
1938     return isEntryFunctionCC(CC) || isChainCC(CC);
1939   }
1940 }
1941 
1942 bool isChainCC(CallingConv::ID CC) {
1943   switch (CC) {
1944   case CallingConv::AMDGPU_CS_Chain:
1945   case CallingConv::AMDGPU_CS_ChainPreserve:
1946     return true;
1947   default:
1948     return false;
1949   }
1950 }
1951 
1952 bool isKernelCC(const Function *Func) {
1953   return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv());
1954 }
1955 
1956 bool hasXNACK(const MCSubtargetInfo &STI) {
1957   return STI.hasFeature(AMDGPU::FeatureXNACK);
1958 }
1959 
1960 bool hasSRAMECC(const MCSubtargetInfo &STI) {
1961   return STI.hasFeature(AMDGPU::FeatureSRAMECC);
1962 }
1963 
1964 bool hasMIMG_R128(const MCSubtargetInfo &STI) {
1965   return STI.hasFeature(AMDGPU::FeatureMIMG_R128) && !STI.hasFeature(AMDGPU::FeatureR128A16);
1966 }
1967 
1968 bool hasA16(const MCSubtargetInfo &STI) {
1969   return STI.hasFeature(AMDGPU::FeatureA16);
1970 }
1971 
1972 bool hasG16(const MCSubtargetInfo &STI) {
1973   return STI.hasFeature(AMDGPU::FeatureG16);
1974 }
1975 
1976 bool hasPackedD16(const MCSubtargetInfo &STI) {
1977   return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) &&
1978          !isSI(STI);
1979 }
1980 
1981 bool hasGDS(const MCSubtargetInfo &STI) {
1982   return STI.hasFeature(AMDGPU::FeatureGDS);
1983 }
1984 
1985 unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler) {
1986   auto Version = getIsaVersion(STI.getCPU());
1987   if (Version.Major == 10)
1988     return Version.Minor >= 3 ? 13 : 5;
1989   if (Version.Major == 11)
1990     return 5;
1991   if (Version.Major >= 12)
1992     return HasSampler ? 4 : 5;
1993   return 0;
1994 }
1995 
1996 unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI) { return 16; }
1997 
1998 bool isSI(const MCSubtargetInfo &STI) {
1999   return STI.hasFeature(AMDGPU::FeatureSouthernIslands);
2000 }
2001 
2002 bool isCI(const MCSubtargetInfo &STI) {
2003   return STI.hasFeature(AMDGPU::FeatureSeaIslands);
2004 }
2005 
2006 bool isVI(const MCSubtargetInfo &STI) {
2007   return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2008 }
2009 
2010 bool isGFX9(const MCSubtargetInfo &STI) {
2011   return STI.hasFeature(AMDGPU::FeatureGFX9);
2012 }
2013 
2014 bool isGFX9_GFX10(const MCSubtargetInfo &STI) {
2015   return isGFX9(STI) || isGFX10(STI);
2016 }
2017 
2018 bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI) {
2019   return isGFX9(STI) || isGFX10(STI) || isGFX11(STI);
2020 }
2021 
2022 bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI) {
2023   return isVI(STI) || isGFX9(STI) || isGFX10(STI);
2024 }
2025 
2026 bool isGFX8Plus(const MCSubtargetInfo &STI) {
2027   return isVI(STI) || isGFX9Plus(STI);
2028 }
2029 
2030 bool isGFX9Plus(const MCSubtargetInfo &STI) {
2031   return isGFX9(STI) || isGFX10Plus(STI);
2032 }
2033 
2034 bool isGFX10(const MCSubtargetInfo &STI) {
2035   return STI.hasFeature(AMDGPU::FeatureGFX10);
2036 }
2037 
2038 bool isGFX10_GFX11(const MCSubtargetInfo &STI) {
2039   return isGFX10(STI) || isGFX11(STI);
2040 }
2041 
2042 bool isGFX10Plus(const MCSubtargetInfo &STI) {
2043   return isGFX10(STI) || isGFX11Plus(STI);
2044 }
2045 
2046 bool isGFX11(const MCSubtargetInfo &STI) {
2047   return STI.hasFeature(AMDGPU::FeatureGFX11);
2048 }
2049 
2050 bool isGFX11Plus(const MCSubtargetInfo &STI) {
2051   return isGFX11(STI) || isGFX12Plus(STI);
2052 }
2053 
2054 bool isGFX12(const MCSubtargetInfo &STI) {
2055   return STI.getFeatureBits()[AMDGPU::FeatureGFX12];
2056 }
2057 
2058 bool isGFX12Plus(const MCSubtargetInfo &STI) { return isGFX12(STI); }
2059 
2060 bool isNotGFX12Plus(const MCSubtargetInfo &STI) { return !isGFX12Plus(STI); }
2061 
2062 bool isNotGFX11Plus(const MCSubtargetInfo &STI) {
2063   return !isGFX11Plus(STI);
2064 }
2065 
2066 bool isNotGFX10Plus(const MCSubtargetInfo &STI) {
2067   return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI);
2068 }
2069 
2070 bool isGFX10Before1030(const MCSubtargetInfo &STI) {
2071   return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);
2072 }
2073 
2074 bool isGCN3Encoding(const MCSubtargetInfo &STI) {
2075   return STI.hasFeature(AMDGPU::FeatureGCN3Encoding);
2076 }
2077 
2078 bool isGFX10_AEncoding(const MCSubtargetInfo &STI) {
2079   return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding);
2080 }
2081 
2082 bool isGFX10_BEncoding(const MCSubtargetInfo &STI) {
2083   return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding);
2084 }
2085 
2086 bool hasGFX10_3Insts(const MCSubtargetInfo &STI) {
2087   return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts);
2088 }
2089 
2090 bool isGFX10_3_GFX11(const MCSubtargetInfo &STI) {
2091   return isGFX10_BEncoding(STI) && !isGFX12Plus(STI);
2092 }
2093 
2094 bool isGFX90A(const MCSubtargetInfo &STI) {
2095   return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2096 }
2097 
2098 bool isGFX940(const MCSubtargetInfo &STI) {
2099   return STI.hasFeature(AMDGPU::FeatureGFX940Insts);
2100 }
2101 
2102 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) {
2103   return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2104 }
2105 
2106 bool hasMAIInsts(const MCSubtargetInfo &STI) {
2107   return STI.hasFeature(AMDGPU::FeatureMAIInsts);
2108 }
2109 
2110 bool hasVOPD(const MCSubtargetInfo &STI) {
2111   return STI.hasFeature(AMDGPU::FeatureVOPD);
2112 }
2113 
2114 bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI) {
2115   return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR);
2116 }
2117 
2118 unsigned hasKernargPreload(const MCSubtargetInfo &STI) {
2119   return STI.hasFeature(AMDGPU::FeatureKernargPreload);
2120 }
2121 
2122 int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
2123                          int32_t ArgNumVGPR) {
2124   if (has90AInsts && ArgNumAGPR)
2125     return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
2126   return std::max(ArgNumVGPR, ArgNumAGPR);
2127 }
2128 
2129 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
2130   const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
2131   const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);
2132   return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
2133     Reg == AMDGPU::SCC;
2134 }
2135 
2136 bool isHi(unsigned Reg, const MCRegisterInfo &MRI) {
2137   return MRI.getEncodingValue(Reg) & AMDGPU::HWEncoding::IS_HI;
2138 }
2139 
2140 #define MAP_REG2REG \
2141   using namespace AMDGPU; \
2142   switch(Reg) { \
2143   default: return Reg; \
2144   CASE_CI_VI(FLAT_SCR) \
2145   CASE_CI_VI(FLAT_SCR_LO) \
2146   CASE_CI_VI(FLAT_SCR_HI) \
2147   CASE_VI_GFX9PLUS(TTMP0) \
2148   CASE_VI_GFX9PLUS(TTMP1) \
2149   CASE_VI_GFX9PLUS(TTMP2) \
2150   CASE_VI_GFX9PLUS(TTMP3) \
2151   CASE_VI_GFX9PLUS(TTMP4) \
2152   CASE_VI_GFX9PLUS(TTMP5) \
2153   CASE_VI_GFX9PLUS(TTMP6) \
2154   CASE_VI_GFX9PLUS(TTMP7) \
2155   CASE_VI_GFX9PLUS(TTMP8) \
2156   CASE_VI_GFX9PLUS(TTMP9) \
2157   CASE_VI_GFX9PLUS(TTMP10) \
2158   CASE_VI_GFX9PLUS(TTMP11) \
2159   CASE_VI_GFX9PLUS(TTMP12) \
2160   CASE_VI_GFX9PLUS(TTMP13) \
2161   CASE_VI_GFX9PLUS(TTMP14) \
2162   CASE_VI_GFX9PLUS(TTMP15) \
2163   CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
2164   CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
2165   CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
2166   CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
2167   CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
2168   CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
2169   CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
2170   CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
2171   CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
2172   CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
2173   CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
2174   CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
2175   CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
2176   CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
2177   CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2178   CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
2179   CASE_GFXPRE11_GFX11PLUS(M0) \
2180   CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
2181   CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
2182   }
2183 
2184 #define CASE_CI_VI(node) \
2185   assert(!isSI(STI)); \
2186   case node: return isCI(STI) ? node##_ci : node##_vi;
2187 
2188 #define CASE_VI_GFX9PLUS(node) \
2189   case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
2190 
2191 #define CASE_GFXPRE11_GFX11PLUS(node) \
2192   case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
2193 
2194 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
2195   case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
2196 
2197 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
2198   if (STI.getTargetTriple().getArch() == Triple::r600)
2199     return Reg;
2200   MAP_REG2REG
2201 }
2202 
2203 #undef CASE_CI_VI
2204 #undef CASE_VI_GFX9PLUS
2205 #undef CASE_GFXPRE11_GFX11PLUS
2206 #undef CASE_GFXPRE11_GFX11PLUS_TO
2207 
2208 #define CASE_CI_VI(node)   case node##_ci: case node##_vi:   return node;
2209 #define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node;
2210 #define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node;
2211 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
2212 
2213 unsigned mc2PseudoReg(unsigned Reg) {
2214   MAP_REG2REG
2215 }
2216 
2217 bool isInlineValue(unsigned Reg) {
2218   switch (Reg) {
2219   case AMDGPU::SRC_SHARED_BASE_LO:
2220   case AMDGPU::SRC_SHARED_BASE:
2221   case AMDGPU::SRC_SHARED_LIMIT_LO:
2222   case AMDGPU::SRC_SHARED_LIMIT:
2223   case AMDGPU::SRC_PRIVATE_BASE_LO:
2224   case AMDGPU::SRC_PRIVATE_BASE:
2225   case AMDGPU::SRC_PRIVATE_LIMIT_LO:
2226   case AMDGPU::SRC_PRIVATE_LIMIT:
2227   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2228     return true;
2229   case AMDGPU::SRC_VCCZ:
2230   case AMDGPU::SRC_EXECZ:
2231   case AMDGPU::SRC_SCC:
2232     return true;
2233   case AMDGPU::SGPR_NULL:
2234     return true;
2235   default:
2236     return false;
2237   }
2238 }
2239 
2240 #undef CASE_CI_VI
2241 #undef CASE_VI_GFX9PLUS
2242 #undef CASE_GFXPRE11_GFX11PLUS
2243 #undef CASE_GFXPRE11_GFX11PLUS_TO
2244 #undef MAP_REG2REG
2245 
2246 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2247   assert(OpNo < Desc.NumOperands);
2248   unsigned OpType = Desc.operands()[OpNo].OperandType;
2249   return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
2250          OpType <= AMDGPU::OPERAND_SRC_LAST;
2251 }
2252 
2253 bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2254   assert(OpNo < Desc.NumOperands);
2255   unsigned OpType = Desc.operands()[OpNo].OperandType;
2256   return OpType >= AMDGPU::OPERAND_KIMM_FIRST &&
2257          OpType <= AMDGPU::OPERAND_KIMM_LAST;
2258 }
2259 
2260 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2261   assert(OpNo < Desc.NumOperands);
2262   unsigned OpType = Desc.operands()[OpNo].OperandType;
2263   switch (OpType) {
2264   case AMDGPU::OPERAND_REG_IMM_FP32:
2265   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2266   case AMDGPU::OPERAND_REG_IMM_FP64:
2267   case AMDGPU::OPERAND_REG_IMM_FP16:
2268   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2269   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2270   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2271   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2272   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2273   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2274   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2275   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2276   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2277   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2278   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2279   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2280     return true;
2281   default:
2282     return false;
2283   }
2284 }
2285 
2286 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
2287   assert(OpNo < Desc.NumOperands);
2288   unsigned OpType = Desc.operands()[OpNo].OperandType;
2289   return (OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
2290           OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST) ||
2291          (OpType >= AMDGPU::OPERAND_REG_INLINE_AC_FIRST &&
2292           OpType <= AMDGPU::OPERAND_REG_INLINE_AC_LAST);
2293 }
2294 
2295 // Avoid using MCRegisterClass::getSize, since that function will go away
2296 // (move from MC* level to Target* level). Return size in bits.
2297 unsigned getRegBitWidth(unsigned RCID) {
2298   switch (RCID) {
2299   case AMDGPU::VGPR_LO16RegClassID:
2300   case AMDGPU::VGPR_HI16RegClassID:
2301   case AMDGPU::SGPR_LO16RegClassID:
2302   case AMDGPU::AGPR_LO16RegClassID:
2303     return 16;
2304   case AMDGPU::SGPR_32RegClassID:
2305   case AMDGPU::VGPR_32RegClassID:
2306   case AMDGPU::VRegOrLds_32RegClassID:
2307   case AMDGPU::AGPR_32RegClassID:
2308   case AMDGPU::VS_32RegClassID:
2309   case AMDGPU::AV_32RegClassID:
2310   case AMDGPU::SReg_32RegClassID:
2311   case AMDGPU::SReg_32_XM0RegClassID:
2312   case AMDGPU::SRegOrLds_32RegClassID:
2313     return 32;
2314   case AMDGPU::SGPR_64RegClassID:
2315   case AMDGPU::VS_64RegClassID:
2316   case AMDGPU::SReg_64RegClassID:
2317   case AMDGPU::VReg_64RegClassID:
2318   case AMDGPU::AReg_64RegClassID:
2319   case AMDGPU::SReg_64_XEXECRegClassID:
2320   case AMDGPU::VReg_64_Align2RegClassID:
2321   case AMDGPU::AReg_64_Align2RegClassID:
2322   case AMDGPU::AV_64RegClassID:
2323   case AMDGPU::AV_64_Align2RegClassID:
2324     return 64;
2325   case AMDGPU::SGPR_96RegClassID:
2326   case AMDGPU::SReg_96RegClassID:
2327   case AMDGPU::VReg_96RegClassID:
2328   case AMDGPU::AReg_96RegClassID:
2329   case AMDGPU::VReg_96_Align2RegClassID:
2330   case AMDGPU::AReg_96_Align2RegClassID:
2331   case AMDGPU::AV_96RegClassID:
2332   case AMDGPU::AV_96_Align2RegClassID:
2333     return 96;
2334   case AMDGPU::SGPR_128RegClassID:
2335   case AMDGPU::SReg_128RegClassID:
2336   case AMDGPU::VReg_128RegClassID:
2337   case AMDGPU::AReg_128RegClassID:
2338   case AMDGPU::VReg_128_Align2RegClassID:
2339   case AMDGPU::AReg_128_Align2RegClassID:
2340   case AMDGPU::AV_128RegClassID:
2341   case AMDGPU::AV_128_Align2RegClassID:
2342     return 128;
2343   case AMDGPU::SGPR_160RegClassID:
2344   case AMDGPU::SReg_160RegClassID:
2345   case AMDGPU::VReg_160RegClassID:
2346   case AMDGPU::AReg_160RegClassID:
2347   case AMDGPU::VReg_160_Align2RegClassID:
2348   case AMDGPU::AReg_160_Align2RegClassID:
2349   case AMDGPU::AV_160RegClassID:
2350   case AMDGPU::AV_160_Align2RegClassID:
2351     return 160;
2352   case AMDGPU::SGPR_192RegClassID:
2353   case AMDGPU::SReg_192RegClassID:
2354   case AMDGPU::VReg_192RegClassID:
2355   case AMDGPU::AReg_192RegClassID:
2356   case AMDGPU::VReg_192_Align2RegClassID:
2357   case AMDGPU::AReg_192_Align2RegClassID:
2358   case AMDGPU::AV_192RegClassID:
2359   case AMDGPU::AV_192_Align2RegClassID:
2360     return 192;
2361   case AMDGPU::SGPR_224RegClassID:
2362   case AMDGPU::SReg_224RegClassID:
2363   case AMDGPU::VReg_224RegClassID:
2364   case AMDGPU::AReg_224RegClassID:
2365   case AMDGPU::VReg_224_Align2RegClassID:
2366   case AMDGPU::AReg_224_Align2RegClassID:
2367   case AMDGPU::AV_224RegClassID:
2368   case AMDGPU::AV_224_Align2RegClassID:
2369     return 224;
2370   case AMDGPU::SGPR_256RegClassID:
2371   case AMDGPU::SReg_256RegClassID:
2372   case AMDGPU::VReg_256RegClassID:
2373   case AMDGPU::AReg_256RegClassID:
2374   case AMDGPU::VReg_256_Align2RegClassID:
2375   case AMDGPU::AReg_256_Align2RegClassID:
2376   case AMDGPU::AV_256RegClassID:
2377   case AMDGPU::AV_256_Align2RegClassID:
2378     return 256;
2379   case AMDGPU::SGPR_288RegClassID:
2380   case AMDGPU::SReg_288RegClassID:
2381   case AMDGPU::VReg_288RegClassID:
2382   case AMDGPU::AReg_288RegClassID:
2383   case AMDGPU::VReg_288_Align2RegClassID:
2384   case AMDGPU::AReg_288_Align2RegClassID:
2385   case AMDGPU::AV_288RegClassID:
2386   case AMDGPU::AV_288_Align2RegClassID:
2387     return 288;
2388   case AMDGPU::SGPR_320RegClassID:
2389   case AMDGPU::SReg_320RegClassID:
2390   case AMDGPU::VReg_320RegClassID:
2391   case AMDGPU::AReg_320RegClassID:
2392   case AMDGPU::VReg_320_Align2RegClassID:
2393   case AMDGPU::AReg_320_Align2RegClassID:
2394   case AMDGPU::AV_320RegClassID:
2395   case AMDGPU::AV_320_Align2RegClassID:
2396     return 320;
2397   case AMDGPU::SGPR_352RegClassID:
2398   case AMDGPU::SReg_352RegClassID:
2399   case AMDGPU::VReg_352RegClassID:
2400   case AMDGPU::AReg_352RegClassID:
2401   case AMDGPU::VReg_352_Align2RegClassID:
2402   case AMDGPU::AReg_352_Align2RegClassID:
2403   case AMDGPU::AV_352RegClassID:
2404   case AMDGPU::AV_352_Align2RegClassID:
2405     return 352;
2406   case AMDGPU::SGPR_384RegClassID:
2407   case AMDGPU::SReg_384RegClassID:
2408   case AMDGPU::VReg_384RegClassID:
2409   case AMDGPU::AReg_384RegClassID:
2410   case AMDGPU::VReg_384_Align2RegClassID:
2411   case AMDGPU::AReg_384_Align2RegClassID:
2412   case AMDGPU::AV_384RegClassID:
2413   case AMDGPU::AV_384_Align2RegClassID:
2414     return 384;
2415   case AMDGPU::SGPR_512RegClassID:
2416   case AMDGPU::SReg_512RegClassID:
2417   case AMDGPU::VReg_512RegClassID:
2418   case AMDGPU::AReg_512RegClassID:
2419   case AMDGPU::VReg_512_Align2RegClassID:
2420   case AMDGPU::AReg_512_Align2RegClassID:
2421   case AMDGPU::AV_512RegClassID:
2422   case AMDGPU::AV_512_Align2RegClassID:
2423     return 512;
2424   case AMDGPU::SGPR_1024RegClassID:
2425   case AMDGPU::SReg_1024RegClassID:
2426   case AMDGPU::VReg_1024RegClassID:
2427   case AMDGPU::AReg_1024RegClassID:
2428   case AMDGPU::VReg_1024_Align2RegClassID:
2429   case AMDGPU::AReg_1024_Align2RegClassID:
2430   case AMDGPU::AV_1024RegClassID:
2431   case AMDGPU::AV_1024_Align2RegClassID:
2432     return 1024;
2433   default:
2434     llvm_unreachable("Unexpected register class");
2435   }
2436 }
2437 
2438 unsigned getRegBitWidth(const MCRegisterClass &RC) {
2439   return getRegBitWidth(RC.getID());
2440 }
2441 
2442 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
2443                            unsigned OpNo) {
2444   assert(OpNo < Desc.NumOperands);
2445   unsigned RCID = Desc.operands()[OpNo].RegClass;
2446   return getRegBitWidth(RCID) / 8;
2447 }
2448 
2449 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
2450   if (isInlinableIntLiteral(Literal))
2451     return true;
2452 
2453   uint64_t Val = static_cast<uint64_t>(Literal);
2454   return (Val == llvm::bit_cast<uint64_t>(0.0)) ||
2455          (Val == llvm::bit_cast<uint64_t>(1.0)) ||
2456          (Val == llvm::bit_cast<uint64_t>(-1.0)) ||
2457          (Val == llvm::bit_cast<uint64_t>(0.5)) ||
2458          (Val == llvm::bit_cast<uint64_t>(-0.5)) ||
2459          (Val == llvm::bit_cast<uint64_t>(2.0)) ||
2460          (Val == llvm::bit_cast<uint64_t>(-2.0)) ||
2461          (Val == llvm::bit_cast<uint64_t>(4.0)) ||
2462          (Val == llvm::bit_cast<uint64_t>(-4.0)) ||
2463          (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
2464 }
2465 
2466 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
2467   if (isInlinableIntLiteral(Literal))
2468     return true;
2469 
2470   // The actual type of the operand does not seem to matter as long
2471   // as the bits match one of the inline immediate values.  For example:
2472   //
2473   // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
2474   // so it is a legal inline immediate.
2475   //
2476   // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
2477   // floating-point, so it is a legal inline immediate.
2478 
2479   uint32_t Val = static_cast<uint32_t>(Literal);
2480   return (Val == llvm::bit_cast<uint32_t>(0.0f)) ||
2481          (Val == llvm::bit_cast<uint32_t>(1.0f)) ||
2482          (Val == llvm::bit_cast<uint32_t>(-1.0f)) ||
2483          (Val == llvm::bit_cast<uint32_t>(0.5f)) ||
2484          (Val == llvm::bit_cast<uint32_t>(-0.5f)) ||
2485          (Val == llvm::bit_cast<uint32_t>(2.0f)) ||
2486          (Val == llvm::bit_cast<uint32_t>(-2.0f)) ||
2487          (Val == llvm::bit_cast<uint32_t>(4.0f)) ||
2488          (Val == llvm::bit_cast<uint32_t>(-4.0f)) ||
2489          (Val == 0x3e22f983 && HasInv2Pi);
2490 }
2491 
2492 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
2493   if (!HasInv2Pi)
2494     return false;
2495 
2496   if (isInlinableIntLiteral(Literal))
2497     return true;
2498 
2499   uint16_t Val = static_cast<uint16_t>(Literal);
2500   return Val == 0x3C00 || // 1.0
2501          Val == 0xBC00 || // -1.0
2502          Val == 0x3800 || // 0.5
2503          Val == 0xB800 || // -0.5
2504          Val == 0x4000 || // 2.0
2505          Val == 0xC000 || // -2.0
2506          Val == 0x4400 || // 4.0
2507          Val == 0xC400 || // -4.0
2508          Val == 0x3118;   // 1/2pi
2509 }
2510 
2511 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
2512   assert(HasInv2Pi);
2513 
2514   if (isInt<16>(Literal) || isUInt<16>(Literal)) {
2515     int16_t Trunc = static_cast<int16_t>(Literal);
2516     return AMDGPU::isInlinableLiteral16(Trunc, HasInv2Pi);
2517   }
2518   if (!(Literal & 0xffff))
2519     return AMDGPU::isInlinableLiteral16(Literal >> 16, HasInv2Pi);
2520 
2521   int16_t Lo16 = static_cast<int16_t>(Literal);
2522   int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
2523   return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
2524 }
2525 
2526 bool isInlinableIntLiteralV216(int32_t Literal) {
2527   int16_t Lo16 = static_cast<int16_t>(Literal);
2528   if (isInt<16>(Literal) || isUInt<16>(Literal))
2529     return isInlinableIntLiteral(Lo16);
2530 
2531   int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
2532   if (!(Literal & 0xffff))
2533     return isInlinableIntLiteral(Hi16);
2534   return Lo16 == Hi16 && isInlinableIntLiteral(Lo16);
2535 }
2536 
2537 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi, uint8_t OpType) {
2538   switch (OpType) {
2539   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2540   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2541     return isInlinableLiteralV216(Literal, HasInv2Pi);
2542   default:
2543     return isInlinableIntLiteralV216(Literal);
2544   }
2545 }
2546 
2547 bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi) {
2548   assert(HasInv2Pi);
2549 
2550   int16_t Lo16 = static_cast<int16_t>(Literal);
2551   if (isInt<16>(Literal) || isUInt<16>(Literal))
2552     return true;
2553 
2554   int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
2555   if (!(Literal & 0xffff))
2556     return true;
2557   return Lo16 == Hi16;
2558 }
2559 
2560 bool isValid32BitLiteral(uint64_t Val, bool IsFP64) {
2561   if (IsFP64)
2562     return !(Val & 0xffffffffu);
2563 
2564   return isUInt<32>(Val) || isInt<32>(Val);
2565 }
2566 
2567 bool isArgPassedInSGPR(const Argument *A) {
2568   const Function *F = A->getParent();
2569 
2570   // Arguments to compute shaders are never a source of divergence.
2571   CallingConv::ID CC = F->getCallingConv();
2572   switch (CC) {
2573   case CallingConv::AMDGPU_KERNEL:
2574   case CallingConv::SPIR_KERNEL:
2575     return true;
2576   case CallingConv::AMDGPU_VS:
2577   case CallingConv::AMDGPU_LS:
2578   case CallingConv::AMDGPU_HS:
2579   case CallingConv::AMDGPU_ES:
2580   case CallingConv::AMDGPU_GS:
2581   case CallingConv::AMDGPU_PS:
2582   case CallingConv::AMDGPU_CS:
2583   case CallingConv::AMDGPU_Gfx:
2584   case CallingConv::AMDGPU_CS_Chain:
2585   case CallingConv::AMDGPU_CS_ChainPreserve:
2586     // For non-compute shaders, SGPR inputs are marked with either inreg or
2587     // byval. Everything else is in VGPRs.
2588     return A->hasAttribute(Attribute::InReg) ||
2589            A->hasAttribute(Attribute::ByVal);
2590   default:
2591     // TODO: treat i1 as divergent?
2592     return A->hasAttribute(Attribute::InReg);
2593   }
2594 }
2595 
2596 bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) {
2597   // Arguments to compute shaders are never a source of divergence.
2598   CallingConv::ID CC = CB->getCallingConv();
2599   switch (CC) {
2600   case CallingConv::AMDGPU_KERNEL:
2601   case CallingConv::SPIR_KERNEL:
2602     return true;
2603   case CallingConv::AMDGPU_VS:
2604   case CallingConv::AMDGPU_LS:
2605   case CallingConv::AMDGPU_HS:
2606   case CallingConv::AMDGPU_ES:
2607   case CallingConv::AMDGPU_GS:
2608   case CallingConv::AMDGPU_PS:
2609   case CallingConv::AMDGPU_CS:
2610   case CallingConv::AMDGPU_Gfx:
2611   case CallingConv::AMDGPU_CS_Chain:
2612   case CallingConv::AMDGPU_CS_ChainPreserve:
2613     // For non-compute shaders, SGPR inputs are marked with either inreg or
2614     // byval. Everything else is in VGPRs.
2615     return CB->paramHasAttr(ArgNo, Attribute::InReg) ||
2616            CB->paramHasAttr(ArgNo, Attribute::ByVal);
2617   default:
2618     return CB->paramHasAttr(ArgNo, Attribute::InReg);
2619   }
2620 }
2621 
2622 static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
2623   return isGCN3Encoding(ST) || isGFX10Plus(ST);
2624 }
2625 
2626 static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) {
2627   return isGFX9Plus(ST);
2628 }
2629 
2630 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
2631                                       int64_t EncodedOffset) {
2632   if (isGFX12Plus(ST))
2633     return isUInt<23>(EncodedOffset);
2634 
2635   return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
2636                                : isUInt<8>(EncodedOffset);
2637 }
2638 
2639 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
2640                                     int64_t EncodedOffset,
2641                                     bool IsBuffer) {
2642   if (isGFX12Plus(ST))
2643     return isInt<24>(EncodedOffset);
2644 
2645   return !IsBuffer &&
2646          hasSMRDSignedImmOffset(ST) &&
2647          isInt<21>(EncodedOffset);
2648 }
2649 
2650 static bool isDwordAligned(uint64_t ByteOffset) {
2651   return (ByteOffset & 3) == 0;
2652 }
2653 
2654 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST,
2655                                 uint64_t ByteOffset) {
2656   if (hasSMEMByteOffset(ST))
2657     return ByteOffset;
2658 
2659   assert(isDwordAligned(ByteOffset));
2660   return ByteOffset >> 2;
2661 }
2662 
2663 std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
2664                                             int64_t ByteOffset, bool IsBuffer) {
2665   if (isGFX12Plus(ST)) // 24 bit signed offsets
2666     return isInt<24>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
2667                                  : std::nullopt;
2668 
2669   // The signed version is always a byte offset.
2670   if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
2671     assert(hasSMEMByteOffset(ST));
2672     return isInt<20>(ByteOffset) ? std::optional<int64_t>(ByteOffset)
2673                                  : std::nullopt;
2674   }
2675 
2676   if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
2677     return std::nullopt;
2678 
2679   int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
2680   return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
2681              ? std::optional<int64_t>(EncodedOffset)
2682              : std::nullopt;
2683 }
2684 
2685 std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
2686                                                      int64_t ByteOffset) {
2687   if (!isCI(ST) || !isDwordAligned(ByteOffset))
2688     return std::nullopt;
2689 
2690   int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
2691   return isUInt<32>(EncodedOffset) ? std::optional<int64_t>(EncodedOffset)
2692                                    : std::nullopt;
2693 }
2694 
2695 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST) {
2696   if (AMDGPU::isGFX10(ST))
2697     return 12;
2698 
2699   if (AMDGPU::isGFX12(ST))
2700     return 24;
2701   return 13;
2702 }
2703 
2704 namespace {
2705 
2706 struct SourceOfDivergence {
2707   unsigned Intr;
2708 };
2709 const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
2710 
2711 struct AlwaysUniform {
2712   unsigned Intr;
2713 };
2714 const AlwaysUniform *lookupAlwaysUniform(unsigned Intr);
2715 
2716 #define GET_SourcesOfDivergence_IMPL
2717 #define GET_UniformIntrinsics_IMPL
2718 #define GET_Gfx9BufferFormat_IMPL
2719 #define GET_Gfx10BufferFormat_IMPL
2720 #define GET_Gfx11PlusBufferFormat_IMPL
2721 #include "AMDGPUGenSearchableTables.inc"
2722 
2723 } // end anonymous namespace
2724 
2725 bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
2726   return lookupSourceOfDivergence(IntrID);
2727 }
2728 
2729 bool isIntrinsicAlwaysUniform(unsigned IntrID) {
2730   return lookupAlwaysUniform(IntrID);
2731 }
2732 
2733 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
2734                                                   uint8_t NumComponents,
2735                                                   uint8_t NumFormat,
2736                                                   const MCSubtargetInfo &STI) {
2737   return isGFX11Plus(STI)
2738              ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents,
2739                                             NumFormat)
2740              : isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp,
2741                                                        NumComponents, NumFormat)
2742                             : getGfx9BufferFormatInfo(BitsPerComp,
2743                                                       NumComponents, NumFormat);
2744 }
2745 
2746 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
2747                                                   const MCSubtargetInfo &STI) {
2748   return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)
2749                           : isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
2750                                          : getGfx9BufferFormatInfo(Format);
2751 }
2752 
2753 bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc) {
2754   for (auto OpName : { OpName::vdst, OpName::src0, OpName::src1,
2755                        OpName::src2 }) {
2756     int Idx = getNamedOperandIdx(OpDesc.getOpcode(), OpName);
2757     if (Idx == -1)
2758       continue;
2759 
2760     if (OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64RegClassID ||
2761         OpDesc.operands()[Idx].RegClass == AMDGPU::VReg_64_Align2RegClassID)
2762       return true;
2763   }
2764 
2765   return false;
2766 }
2767 
2768 bool isDPALU_DPP(const MCInstrDesc &OpDesc) {
2769   return hasAny64BitVGPROperands(OpDesc);
2770 }
2771 
2772 } // namespace AMDGPU
2773 
2774 raw_ostream &operator<<(raw_ostream &OS,
2775                         const AMDGPU::IsaInfo::TargetIDSetting S) {
2776   switch (S) {
2777   case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported):
2778     OS << "Unsupported";
2779     break;
2780   case (AMDGPU::IsaInfo::TargetIDSetting::Any):
2781     OS << "Any";
2782     break;
2783   case (AMDGPU::IsaInfo::TargetIDSetting::Off):
2784     OS << "Off";
2785     break;
2786   case (AMDGPU::IsaInfo::TargetIDSetting::On):
2787     OS << "On";
2788     break;
2789   }
2790   return OS;
2791 }
2792 
2793 } // namespace llvm
2794