1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11 
12 #include "SIDefines.h"
13 #include "llvm/ADT/FloatingPointMode.h"
14 #include "llvm/IR/CallingConv.h"
15 #include "llvm/Support/Alignment.h"
16 #include <array>
17 #include <functional>
18 #include <utility>
19 
20 struct amd_kernel_code_t;
21 
22 namespace llvm {
23 
24 struct Align;
25 class Argument;
26 class Function;
27 class GCNSubtarget;
28 class GlobalValue;
29 class MCInstrInfo;
30 class MCRegisterClass;
31 class MCRegisterInfo;
32 class MCSubtargetInfo;
33 class StringRef;
34 class Triple;
35 class raw_ostream;
36 
37 namespace amdhsa {
38 struct kernel_descriptor_t;
39 }
40 
41 namespace AMDGPU {
42 
43 struct IsaVersion;
44 
45 /// \returns HSA OS ABI Version identification.
46 std::optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI);
47 /// \returns True if HSA OS ABI Version identification is 2,
48 /// false otherwise.
49 bool isHsaAbiVersion2(const MCSubtargetInfo *STI);
50 /// \returns True if HSA OS ABI Version identification is 3,
51 /// false otherwise.
52 bool isHsaAbiVersion3(const MCSubtargetInfo *STI);
53 /// \returns True if HSA OS ABI Version identification is 4,
54 /// false otherwise.
55 bool isHsaAbiVersion4(const MCSubtargetInfo *STI);
56 /// \returns True if HSA OS ABI Version identification is 5,
57 /// false otherwise.
58 bool isHsaAbiVersion5(const MCSubtargetInfo *STI);
59 /// \returns True if HSA OS ABI Version identification is 3 and above,
60 /// false otherwise.
61 bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI);
62 
63 /// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr
64 unsigned getMultigridSyncArgImplicitArgPosition();
65 
66 /// \returns The offset of the hostcall pointer argument from implicitarg_ptr
67 unsigned getHostcallImplicitArgPosition();
68 
69 unsigned getDefaultQueueImplicitArgPosition();
70 unsigned getCompletionActionImplicitArgPosition();
71 
72 /// \returns Code object version.
73 unsigned getAmdhsaCodeObjectVersion();
74 
75 struct GcnBufferFormatInfo {
76   unsigned Format;
77   unsigned BitsPerComp;
78   unsigned NumComponents;
79   unsigned NumFormat;
80   unsigned DataFormat;
81 };
82 
83 struct MAIInstInfo {
84   uint16_t Opcode;
85   bool is_dgemm;
86   bool is_gfx940_xdl;
87 };
88 
89 #define GET_MIMGBaseOpcode_DECL
90 #define GET_MIMGDim_DECL
91 #define GET_MIMGEncoding_DECL
92 #define GET_MIMGLZMapping_DECL
93 #define GET_MIMGMIPMapping_DECL
94 #define GET_MIMGBiASMapping_DECL
95 #define GET_MAIInstInfoTable_DECL
96 #include "AMDGPUGenSearchableTables.inc"
97 
98 namespace IsaInfo {
99 
100 enum {
101   // The closed Vulkan driver sets 96, which limits the wave count to 8 but
102   // doesn't spill SGPRs as much as when 80 is set.
103   FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
104   TRAP_NUM_SGPRS = 16
105 };
106 
107 enum class TargetIDSetting {
108   Unsupported,
109   Any,
110   Off,
111   On
112 };
113 
114 class AMDGPUTargetID {
115 private:
116   const MCSubtargetInfo &STI;
117   TargetIDSetting XnackSetting;
118   TargetIDSetting SramEccSetting;
119 
120 public:
121   explicit AMDGPUTargetID(const MCSubtargetInfo &STI);
122   ~AMDGPUTargetID() = default;
123 
124   /// \return True if the current xnack setting is not "Unsupported".
isXnackSupported()125   bool isXnackSupported() const {
126     return XnackSetting != TargetIDSetting::Unsupported;
127   }
128 
129   /// \returns True if the current xnack setting is "On" or "Any".
isXnackOnOrAny()130   bool isXnackOnOrAny() const {
131     return XnackSetting == TargetIDSetting::On ||
132         XnackSetting == TargetIDSetting::Any;
133   }
134 
135   /// \returns True if current xnack setting is "On" or "Off",
136   /// false otherwise.
isXnackOnOrOff()137   bool isXnackOnOrOff() const {
138     return getXnackSetting() == TargetIDSetting::On ||
139         getXnackSetting() == TargetIDSetting::Off;
140   }
141 
142   /// \returns The current xnack TargetIDSetting, possible options are
143   /// "Unsupported", "Any", "Off", and "On".
getXnackSetting()144   TargetIDSetting getXnackSetting() const {
145     return XnackSetting;
146   }
147 
148   /// Sets xnack setting to \p NewXnackSetting.
setXnackSetting(TargetIDSetting NewXnackSetting)149   void setXnackSetting(TargetIDSetting NewXnackSetting) {
150     XnackSetting = NewXnackSetting;
151   }
152 
153   /// \return True if the current sramecc setting is not "Unsupported".
isSramEccSupported()154   bool isSramEccSupported() const {
155     return SramEccSetting != TargetIDSetting::Unsupported;
156   }
157 
158   /// \returns True if the current sramecc setting is "On" or "Any".
isSramEccOnOrAny()159   bool isSramEccOnOrAny() const {
160   return SramEccSetting == TargetIDSetting::On ||
161       SramEccSetting == TargetIDSetting::Any;
162   }
163 
164   /// \returns True if current sramecc setting is "On" or "Off",
165   /// false otherwise.
isSramEccOnOrOff()166   bool isSramEccOnOrOff() const {
167     return getSramEccSetting() == TargetIDSetting::On ||
168         getSramEccSetting() == TargetIDSetting::Off;
169   }
170 
171   /// \returns The current sramecc TargetIDSetting, possible options are
172   /// "Unsupported", "Any", "Off", and "On".
getSramEccSetting()173   TargetIDSetting getSramEccSetting() const {
174     return SramEccSetting;
175   }
176 
177   /// Sets sramecc setting to \p NewSramEccSetting.
setSramEccSetting(TargetIDSetting NewSramEccSetting)178   void setSramEccSetting(TargetIDSetting NewSramEccSetting) {
179     SramEccSetting = NewSramEccSetting;
180   }
181 
182   void setTargetIDFromFeaturesString(StringRef FS);
183   void setTargetIDFromTargetIDStream(StringRef TargetID);
184 
185   /// \returns String representation of an object.
186   std::string toString() const;
187 };
188 
189 /// \returns Wavefront size for given subtarget \p STI.
190 unsigned getWavefrontSize(const MCSubtargetInfo *STI);
191 
192 /// \returns Local memory size in bytes for given subtarget \p STI.
193 unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
194 
195 /// \returns Maximum addressable local memory size in bytes for given subtarget
196 /// \p STI.
197 unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI);
198 
199 /// \returns Number of execution units per compute unit for given subtarget \p
200 /// STI.
201 unsigned getEUsPerCU(const MCSubtargetInfo *STI);
202 
203 /// \returns Maximum number of work groups per compute unit for given subtarget
204 /// \p STI and limited by given \p FlatWorkGroupSize.
205 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
206                                unsigned FlatWorkGroupSize);
207 
208 /// \returns Minimum number of waves per execution unit for given subtarget \p
209 /// STI.
210 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
211 
212 /// \returns Maximum number of waves per execution unit for given subtarget \p
213 /// STI without any kind of limitation.
214 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
215 
216 /// \returns Number of waves per execution unit required to support the given \p
217 /// FlatWorkGroupSize.
218 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
219                                    unsigned FlatWorkGroupSize);
220 
221 /// \returns Minimum flat work group size for given subtarget \p STI.
222 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
223 
224 /// \returns Maximum flat work group size for given subtarget \p STI.
225 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
226 
227 /// \returns Number of waves per work group for given subtarget \p STI and
228 /// \p FlatWorkGroupSize.
229 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
230                               unsigned FlatWorkGroupSize);
231 
232 /// \returns SGPR allocation granularity for given subtarget \p STI.
233 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
234 
235 /// \returns SGPR encoding granularity for given subtarget \p STI.
236 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
237 
238 /// \returns Total number of SGPRs for given subtarget \p STI.
239 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
240 
241 /// \returns Addressable number of SGPRs for given subtarget \p STI.
242 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
243 
244 /// \returns Minimum number of SGPRs that meets the given number of waves per
245 /// execution unit requirement for given subtarget \p STI.
246 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
247 
248 /// \returns Maximum number of SGPRs that meets the given number of waves per
249 /// execution unit requirement for given subtarget \p STI.
250 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
251                         bool Addressable);
252 
253 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
254 /// STI when the given special registers are used.
255 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
256                           bool FlatScrUsed, bool XNACKUsed);
257 
258 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
259 /// STI when the given special registers are used. XNACK is inferred from
260 /// \p STI.
261 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
262                           bool FlatScrUsed);
263 
264 /// \returns Number of SGPR blocks needed for given subtarget \p STI when
265 /// \p NumSGPRs are used. \p NumSGPRs should already include any special
266 /// register counts.
267 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
268 
269 /// \returns VGPR allocation granularity for given subtarget \p STI.
270 ///
271 /// For subtargets which support it, \p EnableWavefrontSize32 should match
272 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
273 unsigned
274 getVGPRAllocGranule(const MCSubtargetInfo *STI,
275                     std::optional<bool> EnableWavefrontSize32 = std::nullopt);
276 
277 /// \returns VGPR encoding granularity for given subtarget \p STI.
278 ///
279 /// For subtargets which support it, \p EnableWavefrontSize32 should match
280 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
281 unsigned getVGPREncodingGranule(
282     const MCSubtargetInfo *STI,
283     std::optional<bool> EnableWavefrontSize32 = std::nullopt);
284 
285 /// \returns Total number of VGPRs for given subtarget \p STI.
286 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
287 
288 /// \returns Addressable number of VGPRs for given subtarget \p STI.
289 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
290 
291 /// \returns Minimum number of VGPRs that meets given number of waves per
292 /// execution unit requirement for given subtarget \p STI.
293 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
294 
295 /// \returns Maximum number of VGPRs that meets given number of waves per
296 /// execution unit requirement for given subtarget \p STI.
297 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
298 
299 /// \returns Number of waves reachable for a given \p NumVGPRs usage for given
300 /// subtarget \p STI.
301 unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
302                                       unsigned NumVGPRs);
303 
304 /// \returns Number of VGPR blocks needed for given subtarget \p STI when
305 /// \p NumVGPRs are used.
306 ///
307 /// For subtargets which support it, \p EnableWavefrontSize32 should match the
308 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
309 unsigned
310 getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs,
311                  std::optional<bool> EnableWavefrontSize32 = std::nullopt);
312 
313 } // end namespace IsaInfo
314 
315 LLVM_READONLY
316 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
317 
318 LLVM_READONLY
hasNamedOperand(uint64_t Opcode,uint64_t NamedIdx)319 inline bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx) {
320   return getNamedOperandIdx(Opcode, NamedIdx) != -1;
321 }
322 
323 LLVM_READONLY
324 int getSOPPWithRelaxation(uint16_t Opcode);
325 
326 struct MIMGBaseOpcodeInfo {
327   MIMGBaseOpcode BaseOpcode;
328   bool Store;
329   bool Atomic;
330   bool AtomicX2;
331   bool Sampler;
332   bool Gather4;
333 
334   uint8_t NumExtraArgs;
335   bool Gradients;
336   bool G16;
337   bool Coordinates;
338   bool LodOrClampOrMip;
339   bool HasD16;
340   bool MSAA;
341   bool BVH;
342 };
343 
344 LLVM_READONLY
345 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc);
346 
347 LLVM_READONLY
348 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
349 
350 struct MIMGDimInfo {
351   MIMGDim Dim;
352   uint8_t NumCoords;
353   uint8_t NumGradients;
354   bool MSAA;
355   bool DA;
356   uint8_t Encoding;
357   const char *AsmSuffix;
358 };
359 
360 LLVM_READONLY
361 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
362 
363 LLVM_READONLY
364 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
365 
366 LLVM_READONLY
367 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
368 
369 struct MIMGLZMappingInfo {
370   MIMGBaseOpcode L;
371   MIMGBaseOpcode LZ;
372 };
373 
374 struct MIMGMIPMappingInfo {
375   MIMGBaseOpcode MIP;
376   MIMGBaseOpcode NONMIP;
377 };
378 
379 struct MIMGBiasMappingInfo {
380   MIMGBaseOpcode Bias;
381   MIMGBaseOpcode NoBias;
382 };
383 
384 struct MIMGOffsetMappingInfo {
385   MIMGBaseOpcode Offset;
386   MIMGBaseOpcode NoOffset;
387 };
388 
389 struct MIMGG16MappingInfo {
390   MIMGBaseOpcode G;
391   MIMGBaseOpcode G16;
392 };
393 
394 LLVM_READONLY
395 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
396 
397 struct WMMAOpcodeMappingInfo {
398   unsigned Opcode2Addr;
399   unsigned Opcode3Addr;
400 };
401 
402 LLVM_READONLY
403 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
404 
405 LLVM_READONLY
406 const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias);
407 
408 LLVM_READONLY
409 const MIMGOffsetMappingInfo *getMIMGOffsetMappingInfo(unsigned Offset);
410 
411 LLVM_READONLY
412 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
413 
414 LLVM_READONLY
415 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
416                   unsigned VDataDwords, unsigned VAddrDwords);
417 
418 LLVM_READONLY
419 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
420 
421 LLVM_READONLY
422 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
423                            const MIMGDimInfo *Dim, bool IsA16,
424                            bool IsG16Supported);
425 
426 struct MIMGInfo {
427   uint16_t Opcode;
428   uint16_t BaseOpcode;
429   uint8_t MIMGEncoding;
430   uint8_t VDataDwords;
431   uint8_t VAddrDwords;
432   uint8_t VAddrOperands;
433 };
434 
435 LLVM_READONLY
436 const MIMGInfo *getMIMGInfo(unsigned Opc);
437 
438 LLVM_READONLY
439 int getMTBUFBaseOpcode(unsigned Opc);
440 
441 LLVM_READONLY
442 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
443 
444 LLVM_READONLY
445 int getMTBUFElements(unsigned Opc);
446 
447 LLVM_READONLY
448 bool getMTBUFHasVAddr(unsigned Opc);
449 
450 LLVM_READONLY
451 bool getMTBUFHasSrsrc(unsigned Opc);
452 
453 LLVM_READONLY
454 bool getMTBUFHasSoffset(unsigned Opc);
455 
456 LLVM_READONLY
457 int getMUBUFBaseOpcode(unsigned Opc);
458 
459 LLVM_READONLY
460 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
461 
462 LLVM_READONLY
463 int getMUBUFElements(unsigned Opc);
464 
465 LLVM_READONLY
466 bool getMUBUFHasVAddr(unsigned Opc);
467 
468 LLVM_READONLY
469 bool getMUBUFHasSrsrc(unsigned Opc);
470 
471 LLVM_READONLY
472 bool getMUBUFHasSoffset(unsigned Opc);
473 
474 LLVM_READONLY
475 bool getMUBUFIsBufferInv(unsigned Opc);
476 
477 LLVM_READONLY
478 bool getSMEMIsBuffer(unsigned Opc);
479 
480 LLVM_READONLY
481 bool getVOP1IsSingle(unsigned Opc);
482 
483 LLVM_READONLY
484 bool getVOP2IsSingle(unsigned Opc);
485 
486 LLVM_READONLY
487 bool getVOP3IsSingle(unsigned Opc);
488 
489 LLVM_READONLY
490 bool isVOPC64DPP(unsigned Opc);
491 
492 /// Returns true if MAI operation is a double precision GEMM.
493 LLVM_READONLY
494 bool getMAIIsDGEMM(unsigned Opc);
495 
496 LLVM_READONLY
497 bool getMAIIsGFX940XDL(unsigned Opc);
498 
499 struct CanBeVOPD {
500   bool X;
501   bool Y;
502 };
503 
504 LLVM_READONLY
505 CanBeVOPD getCanBeVOPD(unsigned Opc);
506 
507 LLVM_READONLY
508 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
509                                                   uint8_t NumComponents,
510                                                   uint8_t NumFormat,
511                                                   const MCSubtargetInfo &STI);
512 LLVM_READONLY
513 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
514                                                   const MCSubtargetInfo &STI);
515 
516 LLVM_READONLY
517 int getMCOpcode(uint16_t Opcode, unsigned Gen);
518 
519 LLVM_READONLY
520 unsigned getVOPDOpcode(unsigned Opc);
521 
522 LLVM_READONLY
523 int getVOPDFull(unsigned OpX, unsigned OpY);
524 
525 LLVM_READONLY
526 bool isVOPD(unsigned Opc);
527 
528 LLVM_READNONE
529 bool isMAC(unsigned Opc);
530 
531 LLVM_READNONE
532 bool isPermlane16(unsigned Opc);
533 
534 namespace VOPD {
535 
536 enum Component : unsigned {
537   DST = 0,
538   SRC0,
539   SRC1,
540   SRC2,
541 
542   DST_NUM = 1,
543   MAX_SRC_NUM = 3,
544   MAX_OPR_NUM = DST_NUM + MAX_SRC_NUM
545 };
546 
547 // Number of VGPR banks per VOPD component operand.
548 constexpr unsigned BANKS_NUM[] = {2, 4, 4, 2};
549 
550 enum ComponentIndex : unsigned { X = 0, Y = 1 };
551 constexpr unsigned COMPONENTS[] = {ComponentIndex::X, ComponentIndex::Y};
552 constexpr unsigned COMPONENTS_NUM = 2;
553 
554 // Properties of VOPD components.
555 class ComponentProps {
556 private:
557   unsigned SrcOperandsNum = 0;
558   std::optional<unsigned> MandatoryLiteralIdx;
559   bool HasSrc2Acc = false;
560 
561 public:
562   ComponentProps() = default;
563   ComponentProps(const MCInstrDesc &OpDesc);
564 
565   // Return the total number of src operands this component has.
getCompSrcOperandsNum()566   unsigned getCompSrcOperandsNum() const { return SrcOperandsNum; }
567 
568   // Return the number of src operands of this component visible to the parser.
getCompParsedSrcOperandsNum()569   unsigned getCompParsedSrcOperandsNum() const {
570     return SrcOperandsNum - HasSrc2Acc;
571   }
572 
573   // Return true iif this component has a mandatory literal.
hasMandatoryLiteral()574   bool hasMandatoryLiteral() const { return MandatoryLiteralIdx.has_value(); }
575 
576   // If this component has a mandatory literal, return component operand
577   // index of this literal (i.e. either Component::SRC1 or Component::SRC2).
getMandatoryLiteralCompOperandIndex()578   unsigned getMandatoryLiteralCompOperandIndex() const {
579     assert(hasMandatoryLiteral());
580     return *MandatoryLiteralIdx;
581   }
582 
583   // Return true iif this component has operand
584   // with component index CompSrcIdx and this operand may be a register.
hasRegSrcOperand(unsigned CompSrcIdx)585   bool hasRegSrcOperand(unsigned CompSrcIdx) const {
586     assert(CompSrcIdx < Component::MAX_SRC_NUM);
587     return SrcOperandsNum > CompSrcIdx && !hasMandatoryLiteralAt(CompSrcIdx);
588   }
589 
590   // Return true iif this component has tied src2.
hasSrc2Acc()591   bool hasSrc2Acc() const { return HasSrc2Acc; }
592 
593 private:
hasMandatoryLiteralAt(unsigned CompSrcIdx)594   bool hasMandatoryLiteralAt(unsigned CompSrcIdx) const {
595     assert(CompSrcIdx < Component::MAX_SRC_NUM);
596     return hasMandatoryLiteral() &&
597            *MandatoryLiteralIdx == Component::DST_NUM + CompSrcIdx;
598   }
599 };
600 
601 enum ComponentKind : unsigned {
602   SINGLE = 0,  // A single VOP1 or VOP2 instruction which may be used in VOPD.
603   COMPONENT_X, // A VOPD instruction, X component.
604   COMPONENT_Y, // A VOPD instruction, Y component.
605   MAX = COMPONENT_Y
606 };
607 
608 // Interface functions of this class map VOPD component operand indices
609 // to indices of operands in MachineInstr/MCInst or parsed operands array.
610 //
611 // Note that this class operates with 3 kinds of indices:
612 // - VOPD component operand indices (Component::DST, Component::SRC0, etc.);
613 // - MC operand indices (they refer operands in a MachineInstr/MCInst);
614 // - parsed operand indices (they refer operands in parsed operands array).
615 //
616 // For SINGLE components mapping between these indices is trivial.
617 // But things get more complicated for COMPONENT_X and
618 // COMPONENT_Y because these components share the same
619 // MachineInstr/MCInst and the same parsed operands array.
620 // Below is an example of component operand to parsed operand
621 // mapping for the following instruction:
622 //
623 //   v_dual_add_f32 v255, v4, v5 :: v_dual_mov_b32 v6, v1
624 //
625 //                          PARSED        COMPONENT         PARSED
626 // COMPONENT               OPERANDS     OPERAND INDEX    OPERAND INDEX
627 // -------------------------------------------------------------------
628 //                     "v_dual_add_f32"                        0
629 // v_dual_add_f32            v255          0 (DST)    -->      1
630 //                           v4            1 (SRC0)   -->      2
631 //                           v5            2 (SRC1)   -->      3
632 //                          "::"                               4
633 //                     "v_dual_mov_b32"                        5
634 // v_dual_mov_b32            v6            0 (DST)    -->      6
635 //                           v1            1 (SRC0)   -->      7
636 // -------------------------------------------------------------------
637 //
638 class ComponentLayout {
639 private:
640   // Regular MachineInstr/MCInst operands are ordered as follows:
641   //   dst, src0 [, other src operands]
642   // VOPD MachineInstr/MCInst operands are ordered as follows:
643   //   dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
644   // Each ComponentKind has operand indices defined below.
645   static constexpr unsigned MC_DST_IDX[] = {0, 0, 1};
646   static constexpr unsigned FIRST_MC_SRC_IDX[] = {1, 2, 2 /* + OpX.MCSrcNum */};
647 
648   // Parsed operands of regular instructions are ordered as follows:
649   //   Mnemo dst src0 [vsrc1 ...]
650   // Parsed VOPD operands are ordered as follows:
651   //   OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::'
652   //   OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
653   // Each ComponentKind has operand indices defined below.
654   static constexpr unsigned PARSED_DST_IDX[] = {1, 1,
655                                                 4 /* + OpX.ParsedSrcNum */};
656   static constexpr unsigned FIRST_PARSED_SRC_IDX[] = {
657       2, 2, 5 /* + OpX.ParsedSrcNum */};
658 
659 private:
660   const ComponentKind Kind;
661   const ComponentProps PrevComp;
662 
663 public:
664   // Create layout for COMPONENT_X or SINGLE component.
ComponentLayout(ComponentKind Kind)665   ComponentLayout(ComponentKind Kind) : Kind(Kind) {
666     assert(Kind == ComponentKind::SINGLE || Kind == ComponentKind::COMPONENT_X);
667   }
668 
669   // Create layout for COMPONENT_Y which depends on COMPONENT_X layout.
ComponentLayout(const ComponentProps & OpXProps)670   ComponentLayout(const ComponentProps &OpXProps)
671       : Kind(ComponentKind::COMPONENT_Y), PrevComp(OpXProps) {}
672 
673 public:
674   // Return the index of dst operand in MCInst operands.
getIndexOfDstInMCOperands()675   unsigned getIndexOfDstInMCOperands() const { return MC_DST_IDX[Kind]; }
676 
677   // Return the index of the specified src operand in MCInst operands.
getIndexOfSrcInMCOperands(unsigned CompSrcIdx)678   unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx) const {
679     assert(CompSrcIdx < Component::MAX_SRC_NUM);
680     return FIRST_MC_SRC_IDX[Kind] + getPrevCompSrcNum() + CompSrcIdx;
681   }
682 
683   // Return the index of dst operand in the parsed operands array.
getIndexOfDstInParsedOperands()684   unsigned getIndexOfDstInParsedOperands() const {
685     return PARSED_DST_IDX[Kind] + getPrevCompParsedSrcNum();
686   }
687 
688   // Return the index of the specified src operand in the parsed operands array.
getIndexOfSrcInParsedOperands(unsigned CompSrcIdx)689   unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const {
690     assert(CompSrcIdx < Component::MAX_SRC_NUM);
691     return FIRST_PARSED_SRC_IDX[Kind] + getPrevCompParsedSrcNum() + CompSrcIdx;
692   }
693 
694 private:
getPrevCompSrcNum()695   unsigned getPrevCompSrcNum() const {
696     return PrevComp.getCompSrcOperandsNum();
697   }
getPrevCompParsedSrcNum()698   unsigned getPrevCompParsedSrcNum() const {
699     return PrevComp.getCompParsedSrcOperandsNum();
700   }
701 };
702 
703 // Layout and properties of VOPD components.
704 class ComponentInfo : public ComponentLayout, public ComponentProps {
705 public:
706   // Create ComponentInfo for COMPONENT_X or SINGLE component.
707   ComponentInfo(const MCInstrDesc &OpDesc,
708                 ComponentKind Kind = ComponentKind::SINGLE)
ComponentLayout(Kind)709       : ComponentLayout(Kind), ComponentProps(OpDesc) {}
710 
711   // Create ComponentInfo for COMPONENT_Y which depends on COMPONENT_X layout.
ComponentInfo(const MCInstrDesc & OpDesc,const ComponentProps & OpXProps)712   ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps)
713       : ComponentLayout(OpXProps), ComponentProps(OpDesc) {}
714 
715   // Map component operand index to parsed operand index.
716   // Return 0 if the specified operand does not exist.
717   unsigned getIndexInParsedOperands(unsigned CompOprIdx) const;
718 };
719 
720 // Properties of VOPD instructions.
721 class InstInfo {
722 private:
723   const ComponentInfo CompInfo[COMPONENTS_NUM];
724 
725 public:
726   using RegIndices = std::array<unsigned, Component::MAX_OPR_NUM>;
727 
InstInfo(const MCInstrDesc & OpX,const MCInstrDesc & OpY)728   InstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
729       : CompInfo{OpX, OpY} {}
730 
InstInfo(const ComponentInfo & OprInfoX,const ComponentInfo & OprInfoY)731   InstInfo(const ComponentInfo &OprInfoX, const ComponentInfo &OprInfoY)
732       : CompInfo{OprInfoX, OprInfoY} {}
733 
734   const ComponentInfo &operator[](size_t ComponentIdx) const {
735     assert(ComponentIdx < COMPONENTS_NUM);
736     return CompInfo[ComponentIdx];
737   }
738 
739   // Check VOPD operands constraints.
740   // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
741   // for the specified component and MC operand. The callback must return 0
742   // if the operand is not a register or not a VGPR.
hasInvalidOperand(std::function<unsigned (unsigned,unsigned)> GetRegIdx)743   bool hasInvalidOperand(
744       std::function<unsigned(unsigned, unsigned)> GetRegIdx) const {
745     return getInvalidCompOperandIndex(GetRegIdx).has_value();
746   }
747 
748   // Check VOPD operands constraints.
749   // Return the index of an invalid component operand, if any.
750   std::optional<unsigned> getInvalidCompOperandIndex(
751       std::function<unsigned(unsigned, unsigned)> GetRegIdx) const;
752 
753 private:
754   RegIndices
755   getRegIndices(unsigned ComponentIdx,
756                 std::function<unsigned(unsigned, unsigned)> GetRegIdx) const;
757 };
758 
759 } // namespace VOPD
760 
761 LLVM_READONLY
762 std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode);
763 
764 LLVM_READONLY
765 // Get properties of 2 single VOP1/VOP2 instructions
766 // used as components to create a VOPD instruction.
767 VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY);
768 
769 LLVM_READONLY
770 // Get properties of VOPD X and Y components.
771 VOPD::InstInfo
772 getVOPDInstInfo(unsigned VOPDOpcode, const MCInstrInfo *InstrInfo);
773 
774 LLVM_READONLY
775 bool isTrue16Inst(unsigned Opc);
776 
777 LLVM_READONLY
778 unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc);
779 
780 LLVM_READONLY
781 unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc);
782 
783 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
784                                const MCSubtargetInfo *STI);
785 
786 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
787     const MCSubtargetInfo *STI);
788 
789 bool isGroupSegment(const GlobalValue *GV);
790 bool isGlobalSegment(const GlobalValue *GV);
791 bool isReadOnlySegment(const GlobalValue *GV);
792 
793 /// \returns True if constants should be emitted to .text section for given
794 /// target triple \p TT, false otherwise.
795 bool shouldEmitConstantsToTextSection(const Triple &TT);
796 
797 /// \returns Integer value requested using \p F's \p Name attribute.
798 ///
799 /// \returns \p Default if attribute is not present.
800 ///
801 /// \returns \p Default and emits error if requested value cannot be converted
802 /// to integer.
803 int getIntegerAttribute(const Function &F, StringRef Name, int Default);
804 
805 /// \returns A pair of integer values requested using \p F's \p Name attribute
806 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
807 /// is false).
808 ///
809 /// \returns \p Default if attribute is not present.
810 ///
811 /// \returns \p Default and emits error if one of the requested values cannot be
812 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is
813 /// not present.
814 std::pair<int, int> getIntegerPairAttribute(const Function &F,
815                                             StringRef Name,
816                                             std::pair<int, int> Default,
817                                             bool OnlyFirstRequired = false);
818 
819 /// Represents the counter values to wait for in an s_waitcnt instruction.
820 ///
821 /// Large values (including the maximum possible integer) can be used to
822 /// represent "don't care" waits.
823 struct Waitcnt {
824   unsigned VmCnt = ~0u;
825   unsigned ExpCnt = ~0u;
826   unsigned LgkmCnt = ~0u;
827   unsigned VsCnt = ~0u;
828 
829   Waitcnt() = default;
WaitcntWaitcnt830   Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
831       : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {}
832 
allZeroWaitcnt833   static Waitcnt allZero(bool HasVscnt) {
834     return Waitcnt(0, 0, 0, HasVscnt ? 0 : ~0u);
835   }
allZeroExceptVsCntWaitcnt836   static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); }
837 
hasWaitWaitcnt838   bool hasWait() const {
839     return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u;
840   }
841 
hasWaitExceptVsCntWaitcnt842   bool hasWaitExceptVsCnt() const {
843     return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u;
844   }
845 
hasWaitVsCntWaitcnt846   bool hasWaitVsCnt() const {
847     return VsCnt != ~0u;
848   }
849 
dominatesWaitcnt850   bool dominates(const Waitcnt &Other) const {
851     return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt &&
852            LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt;
853   }
854 
combinedWaitcnt855   Waitcnt combined(const Waitcnt &Other) const {
856     return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt),
857                    std::min(LgkmCnt, Other.LgkmCnt),
858                    std::min(VsCnt, Other.VsCnt));
859   }
860 };
861 
862 /// \returns Vmcnt bit mask for given isa \p Version.
863 unsigned getVmcntBitMask(const IsaVersion &Version);
864 
865 /// \returns Expcnt bit mask for given isa \p Version.
866 unsigned getExpcntBitMask(const IsaVersion &Version);
867 
868 /// \returns Lgkmcnt bit mask for given isa \p Version.
869 unsigned getLgkmcntBitMask(const IsaVersion &Version);
870 
871 /// \returns Waitcnt bit mask for given isa \p Version.
872 unsigned getWaitcntBitMask(const IsaVersion &Version);
873 
874 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
875 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
876 
877 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
878 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
879 
880 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
881 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
882 
883 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
884 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
885 /// \p Lgkmcnt respectively.
886 ///
887 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
888 ///     \p Vmcnt = \p Waitcnt[3:0]        (pre-gfx9)
889 ///     \p Vmcnt = \p Waitcnt[15:14,3:0]  (gfx9,10)
890 ///     \p Vmcnt = \p Waitcnt[15:10]      (gfx11+)
891 ///     \p Expcnt = \p Waitcnt[6:4]       (pre-gfx11)
892 ///     \p Expcnt = \p Waitcnt[2:0]       (gfx11+)
893 ///     \p Lgkmcnt = \p Waitcnt[11:8]     (pre-gfx10)
894 ///     \p Lgkmcnt = \p Waitcnt[13:8]     (gfx10)
895 ///     \p Lgkmcnt = \p Waitcnt[9:4]      (gfx11+)
896 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
897                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
898 
899 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
900 
901 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
902 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
903                      unsigned Vmcnt);
904 
905 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
906 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
907                       unsigned Expcnt);
908 
909 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
910 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
911                        unsigned Lgkmcnt);
912 
913 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
914 /// \p Version.
915 ///
916 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
917 ///     Waitcnt[2:0]   = \p Expcnt      (gfx11+)
918 ///     Waitcnt[3:0]   = \p Vmcnt       (pre-gfx9)
919 ///     Waitcnt[3:0]   = \p Vmcnt[3:0]  (gfx9,10)
920 ///     Waitcnt[6:4]   = \p Expcnt      (pre-gfx11)
921 ///     Waitcnt[9:4]   = \p Lgkmcnt     (gfx11+)
922 ///     Waitcnt[11:8]  = \p Lgkmcnt     (pre-gfx10)
923 ///     Waitcnt[13:8]  = \p Lgkmcnt     (gfx10)
924 ///     Waitcnt[15:10] = \p Vmcnt       (gfx11+)
925 ///     Waitcnt[15:14] = \p Vmcnt[5:4]  (gfx9,10)
926 ///
927 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
928 /// isa \p Version.
929 unsigned encodeWaitcnt(const IsaVersion &Version,
930                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
931 
932 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
933 
934 namespace Hwreg {
935 
936 LLVM_READONLY
937 int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI);
938 
939 LLVM_READNONE
940 bool isValidHwreg(int64_t Id);
941 
942 LLVM_READNONE
943 bool isValidHwregOffset(int64_t Offset);
944 
945 LLVM_READNONE
946 bool isValidHwregWidth(int64_t Width);
947 
948 LLVM_READNONE
949 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width);
950 
951 LLVM_READNONE
952 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI);
953 
954 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width);
955 
956 } // namespace Hwreg
957 
958 namespace DepCtr {
959 
960 int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI);
961 int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
962                  const MCSubtargetInfo &STI);
963 bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
964                               const MCSubtargetInfo &STI);
965 bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
966                   bool &IsDefault, const MCSubtargetInfo &STI);
967 
968 } // namespace DepCtr
969 
970 namespace Exp {
971 
972 bool getTgtName(unsigned Id, StringRef &Name, int &Index);
973 
974 LLVM_READONLY
975 unsigned getTgtId(const StringRef Name);
976 
977 LLVM_READNONE
978 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI);
979 
980 } // namespace Exp
981 
982 namespace MTBUFFormat {
983 
984 LLVM_READNONE
985 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
986 
987 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
988 
989 int64_t getDfmt(const StringRef Name);
990 
991 StringRef getDfmtName(unsigned Id);
992 
993 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
994 
995 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
996 
997 bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
998 
999 bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
1000 
1001 int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI);
1002 
1003 StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI);
1004 
1005 bool isValidUnifiedFormat(unsigned Val, const MCSubtargetInfo &STI);
1006 
1007 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1008                              const MCSubtargetInfo &STI);
1009 
1010 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
1011 
1012 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
1013 
1014 } // namespace MTBUFFormat
1015 
1016 namespace SendMsg {
1017 
1018 LLVM_READONLY
1019 int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI);
1020 
1021 LLVM_READONLY
1022 int64_t getMsgOpId(int64_t MsgId, const StringRef Name);
1023 
1024 LLVM_READNONE
1025 StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI);
1026 
1027 LLVM_READNONE
1028 StringRef getMsgOpName(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI);
1029 
1030 LLVM_READNONE
1031 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI);
1032 
1033 LLVM_READNONE
1034 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1035                   bool Strict = true);
1036 
1037 LLVM_READNONE
1038 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1039                       const MCSubtargetInfo &STI, bool Strict = true);
1040 
1041 LLVM_READNONE
1042 bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI);
1043 
1044 LLVM_READNONE
1045 bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI);
1046 
1047 void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
1048                uint16_t &StreamId, const MCSubtargetInfo &STI);
1049 
1050 LLVM_READNONE
1051 uint64_t encodeMsg(uint64_t MsgId,
1052                    uint64_t OpId,
1053                    uint64_t StreamId);
1054 
1055 } // namespace SendMsg
1056 
1057 
1058 unsigned getInitialPSInputAddr(const Function &F);
1059 
1060 bool getHasColorExport(const Function &F);
1061 
1062 bool getHasDepthExport(const Function &F);
1063 
1064 LLVM_READNONE
1065 bool isShader(CallingConv::ID CC);
1066 
1067 LLVM_READNONE
1068 bool isGraphics(CallingConv::ID CC);
1069 
1070 LLVM_READNONE
1071 bool isCompute(CallingConv::ID CC);
1072 
1073 LLVM_READNONE
1074 bool isEntryFunctionCC(CallingConv::ID CC);
1075 
1076 // These functions are considered entrypoints into the current module, i.e. they
1077 // are allowed to be called from outside the current module. This is different
1078 // from isEntryFunctionCC, which is only true for functions that are entered by
1079 // the hardware. Module entry points include all entry functions but also
1080 // include functions that can be called from other functions inside or outside
1081 // the current module. Module entry functions are allowed to allocate LDS.
1082 LLVM_READNONE
1083 bool isModuleEntryFunctionCC(CallingConv::ID CC);
1084 
1085 bool isKernelCC(const Function *Func);
1086 
1087 // FIXME: Remove this when calling conventions cleaned up
1088 LLVM_READNONE
isKernel(CallingConv::ID CC)1089 inline bool isKernel(CallingConv::ID CC) {
1090   switch (CC) {
1091   case CallingConv::AMDGPU_KERNEL:
1092   case CallingConv::SPIR_KERNEL:
1093     return true;
1094   default:
1095     return false;
1096   }
1097 }
1098 
1099 bool hasXNACK(const MCSubtargetInfo &STI);
1100 bool hasSRAMECC(const MCSubtargetInfo &STI);
1101 bool hasMIMG_R128(const MCSubtargetInfo &STI);
1102 bool hasA16(const MCSubtargetInfo &STI);
1103 bool hasG16(const MCSubtargetInfo &STI);
1104 bool hasPackedD16(const MCSubtargetInfo &STI);
1105 
1106 bool isSI(const MCSubtargetInfo &STI);
1107 bool isCI(const MCSubtargetInfo &STI);
1108 bool isVI(const MCSubtargetInfo &STI);
1109 bool isGFX9(const MCSubtargetInfo &STI);
1110 bool isGFX9_GFX10(const MCSubtargetInfo &STI);
1111 bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI);
1112 bool isGFX8Plus(const MCSubtargetInfo &STI);
1113 bool isGFX9Plus(const MCSubtargetInfo &STI);
1114 bool isGFX10(const MCSubtargetInfo &STI);
1115 bool isGFX10Plus(const MCSubtargetInfo &STI);
1116 bool isNotGFX10Plus(const MCSubtargetInfo &STI);
1117 bool isGFX10Before1030(const MCSubtargetInfo &STI);
1118 bool isGFX11(const MCSubtargetInfo &STI);
1119 bool isGFX11Plus(const MCSubtargetInfo &STI);
1120 bool isNotGFX11Plus(const MCSubtargetInfo &STI);
1121 bool isGCN3Encoding(const MCSubtargetInfo &STI);
1122 bool isGFX10_AEncoding(const MCSubtargetInfo &STI);
1123 bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
1124 bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
1125 bool isGFX90A(const MCSubtargetInfo &STI);
1126 bool isGFX940(const MCSubtargetInfo &STI);
1127 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
1128 bool hasMAIInsts(const MCSubtargetInfo &STI);
1129 bool hasVOPD(const MCSubtargetInfo &STI);
1130 int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
1131 
1132 /// Is Reg - scalar register
1133 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
1134 
1135 /// If \p Reg is a pseudo reg, return the correct hardware register given
1136 /// \p STI otherwise return \p Reg.
1137 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
1138 
1139 /// Convert hardware register \p Reg to a pseudo register
1140 LLVM_READNONE
1141 unsigned mc2PseudoReg(unsigned Reg);
1142 
1143 LLVM_READNONE
1144 bool isInlineValue(unsigned Reg);
1145 
1146 /// Is this an AMDGPU specific source operand? These include registers,
1147 /// inline constants, literals and mandatory literals (KImm).
1148 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
1149 
1150 /// Is this a KImm operand?
1151 bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo);
1152 
1153 /// Is this floating-point operand?
1154 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
1155 
1156 /// Does this operand support only inlinable literals?
1157 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
1158 
1159 /// Get the size in bits of a register from the register class \p RC.
1160 unsigned getRegBitWidth(unsigned RCID);
1161 
1162 /// Get the size in bits of a register from the register class \p RC.
1163 unsigned getRegBitWidth(const MCRegisterClass &RC);
1164 
1165 /// Get size of register operand
1166 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
1167                            unsigned OpNo);
1168 
1169 LLVM_READNONE
getOperandSize(const MCOperandInfo & OpInfo)1170 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
1171   switch (OpInfo.OperandType) {
1172   case AMDGPU::OPERAND_REG_IMM_INT32:
1173   case AMDGPU::OPERAND_REG_IMM_FP32:
1174   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1175   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1176   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1177   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1178   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1179   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1180   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1181   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1182   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1183   case AMDGPU::OPERAND_KIMM32:
1184   case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
1185     return 4;
1186 
1187   case AMDGPU::OPERAND_REG_IMM_INT64:
1188   case AMDGPU::OPERAND_REG_IMM_FP64:
1189   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1190   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1191   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1192     return 8;
1193 
1194   case AMDGPU::OPERAND_REG_IMM_INT16:
1195   case AMDGPU::OPERAND_REG_IMM_FP16:
1196   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1197   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1198   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1199   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1200   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1201   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1202   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1203   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1204   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1205   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1206   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1207     return 2;
1208 
1209   default:
1210     llvm_unreachable("unhandled operand type");
1211   }
1212 }
1213 
1214 LLVM_READNONE
getOperandSize(const MCInstrDesc & Desc,unsigned OpNo)1215 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
1216   return getOperandSize(Desc.operands()[OpNo]);
1217 }
1218 
1219 /// Is this literal inlinable, and not one of the values intended for floating
1220 /// point values.
1221 LLVM_READNONE
isInlinableIntLiteral(int64_t Literal)1222 inline bool isInlinableIntLiteral(int64_t Literal) {
1223   return Literal >= -16 && Literal <= 64;
1224 }
1225 
1226 /// Is this literal inlinable
1227 LLVM_READNONE
1228 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
1229 
1230 LLVM_READNONE
1231 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
1232 
1233 LLVM_READNONE
1234 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
1235 
1236 LLVM_READNONE
1237 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
1238 
1239 LLVM_READNONE
1240 bool isInlinableIntLiteralV216(int32_t Literal);
1241 
1242 LLVM_READNONE
1243 bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi);
1244 
1245 bool isArgPassedInSGPR(const Argument *Arg);
1246 
1247 LLVM_READONLY
1248 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
1249                                       int64_t EncodedOffset);
1250 
1251 LLVM_READONLY
1252 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
1253                                     int64_t EncodedOffset,
1254                                     bool IsBuffer);
1255 
1256 /// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
1257 /// offsets.
1258 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
1259 
1260 /// \returns The encoding that will be used for \p ByteOffset in the
1261 /// SMRD offset field, or std::nullopt if it won't fit. On GFX9 and GFX10
1262 /// S_LOAD instructions have a signed offset, on other subtargets it is
1263 /// unsigned. S_BUFFER has an unsigned offset for all subtargets.
1264 std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
1265                                             int64_t ByteOffset, bool IsBuffer);
1266 
1267 /// \return The encoding that can be used for a 32-bit literal offset in an SMRD
1268 /// instruction. This is only useful on CI.s
1269 std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
1270                                                      int64_t ByteOffset);
1271 
1272 /// For FLAT segment the offset must be positive;
1273 /// MSB is ignored and forced to zero.
1274 ///
1275 /// \return The number of bits available for the signed offset field in flat
1276 /// instructions. Note that some forms of the instruction disallow negative
1277 /// offsets.
1278 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST);
1279 
1280 /// \returns true if this offset is small enough to fit in the SMRD
1281 /// offset field.  \p ByteOffset should be the offset in bytes and
1282 /// not the encoded offset.
1283 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
1284 
1285 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
1286                       const GCNSubtarget *Subtarget,
1287                       Align Alignment = Align(4));
1288 
1289 LLVM_READNONE
isLegal64BitDPPControl(unsigned DC)1290 inline bool isLegal64BitDPPControl(unsigned DC) {
1291   return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
1292 }
1293 
1294 /// \returns true if the intrinsic is divergent
1295 bool isIntrinsicSourceOfDivergence(unsigned IntrID);
1296 
1297 // Track defaults for fields in the MODE register.
1298 struct SIModeRegisterDefaults {
1299   /// Floating point opcodes that support exception flag gathering quiet and
1300   /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10
1301   /// become IEEE 754- 2008 compliant due to signaling NaN propagation and
1302   /// quieting.
1303   bool IEEE : 1;
1304 
1305   /// Used by the vector ALU to force DX10-style treatment of NaNs: when set,
1306   /// clamp NaN to zero; otherwise, pass NaN through.
1307   bool DX10Clamp : 1;
1308 
1309   /// If this is set, neither input or output denormals are flushed for most f32
1310   /// instructions.
1311   DenormalMode FP32Denormals;
1312 
1313   /// If this is set, neither input or output denormals are flushed for both f64
1314   /// and f16/v2f16 instructions.
1315   DenormalMode FP64FP16Denormals;
1316 
SIModeRegisterDefaultsSIModeRegisterDefaults1317   SIModeRegisterDefaults() :
1318     IEEE(true),
1319     DX10Clamp(true),
1320     FP32Denormals(DenormalMode::getIEEE()),
1321     FP64FP16Denormals(DenormalMode::getIEEE()) {}
1322 
1323   SIModeRegisterDefaults(const Function &F);
1324 
getDefaultForCallingConvSIModeRegisterDefaults1325   static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
1326     SIModeRegisterDefaults Mode;
1327     Mode.IEEE = !AMDGPU::isShader(CC);
1328     return Mode;
1329   }
1330 
1331   bool operator ==(const SIModeRegisterDefaults Other) const {
1332     return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp &&
1333            FP32Denormals == Other.FP32Denormals &&
1334            FP64FP16Denormals == Other.FP64FP16Denormals;
1335   }
1336 
allFP32DenormalsSIModeRegisterDefaults1337   bool allFP32Denormals() const {
1338     return FP32Denormals == DenormalMode::getIEEE();
1339   }
1340 
allFP64FP16DenormalsSIModeRegisterDefaults1341   bool allFP64FP16Denormals() const {
1342     return FP64FP16Denormals == DenormalMode::getIEEE();
1343   }
1344 
1345   /// Get the encoding value for the FP_DENORM bits of the mode register for the
1346   /// FP32 denormal mode.
fpDenormModeSPValueSIModeRegisterDefaults1347   uint32_t fpDenormModeSPValue() const {
1348     if (FP32Denormals == DenormalMode::getPreserveSign())
1349       return FP_DENORM_FLUSH_IN_FLUSH_OUT;
1350     if (FP32Denormals.Output == DenormalMode::PreserveSign)
1351       return FP_DENORM_FLUSH_OUT;
1352     if (FP32Denormals.Input == DenormalMode::PreserveSign)
1353       return FP_DENORM_FLUSH_IN;
1354     return FP_DENORM_FLUSH_NONE;
1355   }
1356 
1357   /// Get the encoding value for the FP_DENORM bits of the mode register for the
1358   /// FP64/FP16 denormal mode.
fpDenormModeDPValueSIModeRegisterDefaults1359   uint32_t fpDenormModeDPValue() const {
1360     if (FP64FP16Denormals == DenormalMode::getPreserveSign())
1361       return FP_DENORM_FLUSH_IN_FLUSH_OUT;
1362     if (FP64FP16Denormals.Output == DenormalMode::PreserveSign)
1363       return FP_DENORM_FLUSH_OUT;
1364     if (FP64FP16Denormals.Input == DenormalMode::PreserveSign)
1365       return FP_DENORM_FLUSH_IN;
1366     return FP_DENORM_FLUSH_NONE;
1367   }
1368 
1369   /// Returns true if a flag is compatible if it's enabled in the callee, but
1370   /// disabled in the caller.
oneWayCompatibleSIModeRegisterDefaults1371   static bool oneWayCompatible(bool CallerMode, bool CalleeMode) {
1372     return CallerMode == CalleeMode || (!CallerMode && CalleeMode);
1373   }
1374 
1375   // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
1376   // be able to override.
isInlineCompatibleSIModeRegisterDefaults1377   bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const {
1378     if (DX10Clamp != CalleeMode.DX10Clamp)
1379       return false;
1380     if (IEEE != CalleeMode.IEEE)
1381       return false;
1382 
1383     // Allow inlining denormals enabled into denormals flushed functions.
1384     return oneWayCompatible(FP64FP16Denormals.Input !=
1385                                 DenormalMode::PreserveSign,
1386                             CalleeMode.FP64FP16Denormals.Input !=
1387                                 DenormalMode::PreserveSign) &&
1388            oneWayCompatible(FP64FP16Denormals.Output !=
1389                                 DenormalMode::PreserveSign,
1390                             CalleeMode.FP64FP16Denormals.Output !=
1391                                 DenormalMode::PreserveSign) &&
1392            oneWayCompatible(FP32Denormals.Input != DenormalMode::PreserveSign,
1393                             CalleeMode.FP32Denormals.Input !=
1394                                 DenormalMode::PreserveSign) &&
1395            oneWayCompatible(FP32Denormals.Output != DenormalMode::PreserveSign,
1396                             CalleeMode.FP32Denormals.Output !=
1397                                 DenormalMode::PreserveSign);
1398   }
1399 };
1400 
1401 } // end namespace AMDGPU
1402 
1403 raw_ostream &operator<<(raw_ostream &OS,
1404                         const AMDGPU::IsaInfo::TargetIDSetting S);
1405 
1406 } // end namespace llvm
1407 
1408 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
1409