1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11 
12 #include "SIDefines.h"
13 #include "llvm/IR/CallingConv.h"
14 #include "llvm/Support/Alignment.h"
15 
16 struct amd_kernel_code_t;
17 
18 namespace llvm {
19 
20 struct Align;
21 class Argument;
22 class Function;
23 class GCNSubtarget;
24 class GlobalValue;
25 class MCRegisterClass;
26 class MCRegisterInfo;
27 class MCSubtargetInfo;
28 class StringRef;
29 class Triple;
30 
31 namespace amdhsa {
32 struct kernel_descriptor_t;
33 }
34 
35 namespace AMDGPU {
36 
37 struct IsaVersion;
38 
39 /// \returns HSA OS ABI Version identification.
40 Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI);
41 /// \returns True if HSA OS ABI Version identification is 2,
42 /// false otherwise.
43 bool isHsaAbiVersion2(const MCSubtargetInfo *STI);
44 /// \returns True if HSA OS ABI Version identification is 3,
45 /// false otherwise.
46 bool isHsaAbiVersion3(const MCSubtargetInfo *STI);
47 /// \returns True if HSA OS ABI Version identification is 4,
48 /// false otherwise.
49 bool isHsaAbiVersion4(const MCSubtargetInfo *STI);
50 /// \returns True if HSA OS ABI Version identification is 5,
51 /// false otherwise.
52 bool isHsaAbiVersion5(const MCSubtargetInfo *STI);
53 /// \returns True if HSA OS ABI Version identification is 3 or 4,
54 /// false otherwise.
55 bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI);
56 
57 struct GcnBufferFormatInfo {
58   unsigned Format;
59   unsigned BitsPerComp;
60   unsigned NumComponents;
61   unsigned NumFormat;
62   unsigned DataFormat;
63 };
64 
65 #define GET_MIMGBaseOpcode_DECL
66 #define GET_MIMGDim_DECL
67 #define GET_MIMGEncoding_DECL
68 #define GET_MIMGLZMapping_DECL
69 #define GET_MIMGMIPMapping_DECL
70 #define GET_MIMGBiASMapping_DECL
71 #include "AMDGPUGenSearchableTables.inc"
72 
73 namespace IsaInfo {
74 
75 enum {
76   // The closed Vulkan driver sets 96, which limits the wave count to 8 but
77   // doesn't spill SGPRs as much as when 80 is set.
78   FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
79   TRAP_NUM_SGPRS = 16
80 };
81 
82 enum class TargetIDSetting {
83   Unsupported,
84   Any,
85   Off,
86   On
87 };
88 
89 class AMDGPUTargetID {
90 private:
91   const MCSubtargetInfo &STI;
92   TargetIDSetting XnackSetting;
93   TargetIDSetting SramEccSetting;
94 
95 public:
96   explicit AMDGPUTargetID(const MCSubtargetInfo &STI);
97   ~AMDGPUTargetID() = default;
98 
99   /// \return True if the current xnack setting is not "Unsupported".
100   bool isXnackSupported() const {
101     return XnackSetting != TargetIDSetting::Unsupported;
102   }
103 
104   /// \returns True if the current xnack setting is "On" or "Any".
105   bool isXnackOnOrAny() const {
106     return XnackSetting == TargetIDSetting::On ||
107         XnackSetting == TargetIDSetting::Any;
108   }
109 
110   /// \returns True if current xnack setting is "On" or "Off",
111   /// false otherwise.
112   bool isXnackOnOrOff() const {
113     return getXnackSetting() == TargetIDSetting::On ||
114         getXnackSetting() == TargetIDSetting::Off;
115   }
116 
117   /// \returns The current xnack TargetIDSetting, possible options are
118   /// "Unsupported", "Any", "Off", and "On".
119   TargetIDSetting getXnackSetting() const {
120     return XnackSetting;
121   }
122 
123   /// Sets xnack setting to \p NewXnackSetting.
124   void setXnackSetting(TargetIDSetting NewXnackSetting) {
125     XnackSetting = NewXnackSetting;
126   }
127 
128   /// \return True if the current sramecc setting is not "Unsupported".
129   bool isSramEccSupported() const {
130     return SramEccSetting != TargetIDSetting::Unsupported;
131   }
132 
133   /// \returns True if the current sramecc setting is "On" or "Any".
134   bool isSramEccOnOrAny() const {
135   return SramEccSetting == TargetIDSetting::On ||
136       SramEccSetting == TargetIDSetting::Any;
137   }
138 
139   /// \returns True if current sramecc setting is "On" or "Off",
140   /// false otherwise.
141   bool isSramEccOnOrOff() const {
142     return getSramEccSetting() == TargetIDSetting::On ||
143         getSramEccSetting() == TargetIDSetting::Off;
144   }
145 
146   /// \returns The current sramecc TargetIDSetting, possible options are
147   /// "Unsupported", "Any", "Off", and "On".
148   TargetIDSetting getSramEccSetting() const {
149     return SramEccSetting;
150   }
151 
152   /// Sets sramecc setting to \p NewSramEccSetting.
153   void setSramEccSetting(TargetIDSetting NewSramEccSetting) {
154     SramEccSetting = NewSramEccSetting;
155   }
156 
157   void setTargetIDFromFeaturesString(StringRef FS);
158   void setTargetIDFromTargetIDStream(StringRef TargetID);
159 
160   /// \returns String representation of an object.
161   std::string toString() const;
162 };
163 
164 /// \returns Wavefront size for given subtarget \p STI.
165 unsigned getWavefrontSize(const MCSubtargetInfo *STI);
166 
167 /// \returns Local memory size in bytes for given subtarget \p STI.
168 unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
169 
170 /// \returns Number of execution units per compute unit for given subtarget \p
171 /// STI.
172 unsigned getEUsPerCU(const MCSubtargetInfo *STI);
173 
174 /// \returns Maximum number of work groups per compute unit for given subtarget
175 /// \p STI and limited by given \p FlatWorkGroupSize.
176 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
177                                unsigned FlatWorkGroupSize);
178 
179 /// \returns Minimum number of waves per execution unit for given subtarget \p
180 /// STI.
181 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
182 
183 /// \returns Maximum number of waves per execution unit for given subtarget \p
184 /// STI without any kind of limitation.
185 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
186 
187 /// \returns Number of waves per execution unit required to support the given \p
188 /// FlatWorkGroupSize.
189 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
190                                    unsigned FlatWorkGroupSize);
191 
192 /// \returns Minimum flat work group size for given subtarget \p STI.
193 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
194 
195 /// \returns Maximum flat work group size for given subtarget \p STI.
196 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
197 
198 /// \returns Number of waves per work group for given subtarget \p STI and
199 /// \p FlatWorkGroupSize.
200 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
201                               unsigned FlatWorkGroupSize);
202 
203 /// \returns SGPR allocation granularity for given subtarget \p STI.
204 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
205 
206 /// \returns SGPR encoding granularity for given subtarget \p STI.
207 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
208 
209 /// \returns Total number of SGPRs for given subtarget \p STI.
210 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
211 
212 /// \returns Addressable number of SGPRs for given subtarget \p STI.
213 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
214 
215 /// \returns Minimum number of SGPRs that meets the given number of waves per
216 /// execution unit requirement for given subtarget \p STI.
217 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
218 
219 /// \returns Maximum number of SGPRs that meets the given number of waves per
220 /// execution unit requirement for given subtarget \p STI.
221 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
222                         bool Addressable);
223 
224 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
225 /// STI when the given special registers are used.
226 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
227                           bool FlatScrUsed, bool XNACKUsed);
228 
229 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
230 /// STI when the given special registers are used. XNACK is inferred from
231 /// \p STI.
232 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
233                           bool FlatScrUsed);
234 
235 /// \returns Number of SGPR blocks needed for given subtarget \p STI when
236 /// \p NumSGPRs are used. \p NumSGPRs should already include any special
237 /// register counts.
238 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
239 
240 /// \returns VGPR allocation granularity for given subtarget \p STI.
241 ///
242 /// For subtargets which support it, \p EnableWavefrontSize32 should match
243 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
244 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
245                              Optional<bool> EnableWavefrontSize32 = None);
246 
247 /// \returns VGPR encoding granularity for given subtarget \p STI.
248 ///
249 /// For subtargets which support it, \p EnableWavefrontSize32 should match
250 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
251 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
252                                 Optional<bool> EnableWavefrontSize32 = None);
253 
254 /// \returns Total number of VGPRs for given subtarget \p STI.
255 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
256 
257 /// \returns Addressable number of VGPRs for given subtarget \p STI.
258 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
259 
260 /// \returns Minimum number of VGPRs that meets given number of waves per
261 /// execution unit requirement for given subtarget \p STI.
262 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
263 
264 /// \returns Maximum number of VGPRs that meets given number of waves per
265 /// execution unit requirement for given subtarget \p STI.
266 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
267 
268 /// \returns Number of VGPR blocks needed for given subtarget \p STI when
269 /// \p NumVGPRs are used.
270 ///
271 /// For subtargets which support it, \p EnableWavefrontSize32 should match the
272 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
273 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs,
274                           Optional<bool> EnableWavefrontSize32 = None);
275 
276 } // end namespace IsaInfo
277 
278 LLVM_READONLY
279 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
280 
281 LLVM_READONLY
282 int getSOPPWithRelaxation(uint16_t Opcode);
283 
284 struct MIMGBaseOpcodeInfo {
285   MIMGBaseOpcode BaseOpcode;
286   bool Store;
287   bool Atomic;
288   bool AtomicX2;
289   bool Sampler;
290   bool Gather4;
291 
292   uint8_t NumExtraArgs;
293   bool Gradients;
294   bool G16;
295   bool Coordinates;
296   bool LodOrClampOrMip;
297   bool HasD16;
298   bool MSAA;
299   bool BVH;
300 };
301 
302 LLVM_READONLY
303 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc);
304 
305 LLVM_READONLY
306 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
307 
308 struct MIMGDimInfo {
309   MIMGDim Dim;
310   uint8_t NumCoords;
311   uint8_t NumGradients;
312   bool MSAA;
313   bool DA;
314   uint8_t Encoding;
315   const char *AsmSuffix;
316 };
317 
318 LLVM_READONLY
319 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
320 
321 LLVM_READONLY
322 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
323 
324 LLVM_READONLY
325 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
326 
327 struct MIMGLZMappingInfo {
328   MIMGBaseOpcode L;
329   MIMGBaseOpcode LZ;
330 };
331 
332 struct MIMGMIPMappingInfo {
333   MIMGBaseOpcode MIP;
334   MIMGBaseOpcode NONMIP;
335 };
336 
337 struct MIMGBiasMappingInfo {
338   MIMGBaseOpcode Bias;
339   MIMGBaseOpcode NoBias;
340 };
341 
342 struct MIMGOffsetMappingInfo {
343   MIMGBaseOpcode Offset;
344   MIMGBaseOpcode NoOffset;
345 };
346 
347 struct MIMGG16MappingInfo {
348   MIMGBaseOpcode G;
349   MIMGBaseOpcode G16;
350 };
351 
352 LLVM_READONLY
353 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
354 
355 LLVM_READONLY
356 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
357 
358 LLVM_READONLY
359 const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias);
360 
361 LLVM_READONLY
362 const MIMGOffsetMappingInfo *getMIMGOffsetMappingInfo(unsigned Offset);
363 
364 LLVM_READONLY
365 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
366 
367 LLVM_READONLY
368 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
369                   unsigned VDataDwords, unsigned VAddrDwords);
370 
371 LLVM_READONLY
372 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
373 
374 LLVM_READONLY
375 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
376                            const MIMGDimInfo *Dim, bool IsA16,
377                            bool IsG16Supported);
378 
379 struct MIMGInfo {
380   uint16_t Opcode;
381   uint16_t BaseOpcode;
382   uint8_t MIMGEncoding;
383   uint8_t VDataDwords;
384   uint8_t VAddrDwords;
385 };
386 
387 LLVM_READONLY
388 const MIMGInfo *getMIMGInfo(unsigned Opc);
389 
390 LLVM_READONLY
391 int getMTBUFBaseOpcode(unsigned Opc);
392 
393 LLVM_READONLY
394 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
395 
396 LLVM_READONLY
397 int getMTBUFElements(unsigned Opc);
398 
399 LLVM_READONLY
400 bool getMTBUFHasVAddr(unsigned Opc);
401 
402 LLVM_READONLY
403 bool getMTBUFHasSrsrc(unsigned Opc);
404 
405 LLVM_READONLY
406 bool getMTBUFHasSoffset(unsigned Opc);
407 
408 LLVM_READONLY
409 int getMUBUFBaseOpcode(unsigned Opc);
410 
411 LLVM_READONLY
412 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
413 
414 LLVM_READONLY
415 int getMUBUFElements(unsigned Opc);
416 
417 LLVM_READONLY
418 bool getMUBUFHasVAddr(unsigned Opc);
419 
420 LLVM_READONLY
421 bool getMUBUFHasSrsrc(unsigned Opc);
422 
423 LLVM_READONLY
424 bool getMUBUFHasSoffset(unsigned Opc);
425 
426 LLVM_READONLY
427 bool getMUBUFIsBufferInv(unsigned Opc);
428 
429 LLVM_READONLY
430 bool getSMEMIsBuffer(unsigned Opc);
431 
432 LLVM_READONLY
433 bool getVOP1IsSingle(unsigned Opc);
434 
435 LLVM_READONLY
436 bool getVOP2IsSingle(unsigned Opc);
437 
438 LLVM_READONLY
439 bool getVOP3IsSingle(unsigned Opc);
440 
441 LLVM_READONLY
442 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
443                                                   uint8_t NumComponents,
444                                                   uint8_t NumFormat,
445                                                   const MCSubtargetInfo &STI);
446 LLVM_READONLY
447 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
448                                                   const MCSubtargetInfo &STI);
449 
450 LLVM_READONLY
451 int getMCOpcode(uint16_t Opcode, unsigned Gen);
452 
453 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
454                                const MCSubtargetInfo *STI);
455 
456 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
457     const MCSubtargetInfo *STI);
458 
459 bool isGroupSegment(const GlobalValue *GV);
460 bool isGlobalSegment(const GlobalValue *GV);
461 bool isReadOnlySegment(const GlobalValue *GV);
462 
463 /// \returns True if constants should be emitted to .text section for given
464 /// target triple \p TT, false otherwise.
465 bool shouldEmitConstantsToTextSection(const Triple &TT);
466 
467 /// \returns Integer value requested using \p F's \p Name attribute.
468 ///
469 /// \returns \p Default if attribute is not present.
470 ///
471 /// \returns \p Default and emits error if requested value cannot be converted
472 /// to integer.
473 int getIntegerAttribute(const Function &F, StringRef Name, int Default);
474 
475 /// \returns A pair of integer values requested using \p F's \p Name attribute
476 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
477 /// is false).
478 ///
479 /// \returns \p Default if attribute is not present.
480 ///
481 /// \returns \p Default and emits error if one of the requested values cannot be
482 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is
483 /// not present.
484 std::pair<int, int> getIntegerPairAttribute(const Function &F,
485                                             StringRef Name,
486                                             std::pair<int, int> Default,
487                                             bool OnlyFirstRequired = false);
488 
489 /// Represents the counter values to wait for in an s_waitcnt instruction.
490 ///
491 /// Large values (including the maximum possible integer) can be used to
492 /// represent "don't care" waits.
493 struct Waitcnt {
494   unsigned VmCnt = ~0u;
495   unsigned ExpCnt = ~0u;
496   unsigned LgkmCnt = ~0u;
497   unsigned VsCnt = ~0u;
498 
499   Waitcnt() {}
500   Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
501       : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {}
502 
503   static Waitcnt allZero(bool HasVscnt) {
504     return Waitcnt(0, 0, 0, HasVscnt ? 0 : ~0u);
505   }
506   static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); }
507 
508   bool hasWait() const {
509     return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u;
510   }
511 
512   bool hasWaitExceptVsCnt() const {
513     return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u;
514   }
515 
516   bool hasWaitVsCnt() const {
517     return VsCnt != ~0u;
518   }
519 
520   bool dominates(const Waitcnt &Other) const {
521     return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt &&
522            LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt;
523   }
524 
525   Waitcnt combined(const Waitcnt &Other) const {
526     return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt),
527                    std::min(LgkmCnt, Other.LgkmCnt),
528                    std::min(VsCnt, Other.VsCnt));
529   }
530 };
531 
532 /// \returns Vmcnt bit mask for given isa \p Version.
533 unsigned getVmcntBitMask(const IsaVersion &Version);
534 
535 /// \returns Expcnt bit mask for given isa \p Version.
536 unsigned getExpcntBitMask(const IsaVersion &Version);
537 
538 /// \returns Lgkmcnt bit mask for given isa \p Version.
539 unsigned getLgkmcntBitMask(const IsaVersion &Version);
540 
541 /// \returns Waitcnt bit mask for given isa \p Version.
542 unsigned getWaitcntBitMask(const IsaVersion &Version);
543 
544 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
545 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
546 
547 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
548 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
549 
550 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
551 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
552 
553 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
554 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
555 /// \p Lgkmcnt respectively.
556 ///
557 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
558 ///     \p Vmcnt = \p Waitcnt[3:0]                      (pre-gfx9 only)
559 ///     \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14]  (gfx9+ only)
560 ///     \p Expcnt = \p Waitcnt[6:4]
561 ///     \p Lgkmcnt = \p Waitcnt[11:8]                   (pre-gfx10 only)
562 ///     \p Lgkmcnt = \p Waitcnt[13:8]                   (gfx10+ only)
563 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
564                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
565 
566 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
567 
568 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
569 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
570                      unsigned Vmcnt);
571 
572 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
573 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
574                       unsigned Expcnt);
575 
576 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
577 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
578                        unsigned Lgkmcnt);
579 
580 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
581 /// \p Version.
582 ///
583 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
584 ///     Waitcnt[3:0]   = \p Vmcnt       (pre-gfx9 only)
585 ///     Waitcnt[3:0]   = \p Vmcnt[3:0]  (gfx9+ only)
586 ///     Waitcnt[6:4]   = \p Expcnt
587 ///     Waitcnt[11:8]  = \p Lgkmcnt     (pre-gfx10 only)
588 ///     Waitcnt[13:8]  = \p Lgkmcnt     (gfx10+ only)
589 ///     Waitcnt[15:14] = \p Vmcnt[5:4]  (gfx9+ only)
590 ///
591 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
592 /// isa \p Version.
593 unsigned encodeWaitcnt(const IsaVersion &Version,
594                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
595 
596 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
597 
598 namespace Hwreg {
599 
600 LLVM_READONLY
601 int64_t getHwregId(const StringRef Name);
602 
603 LLVM_READNONE
604 bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI);
605 
606 LLVM_READNONE
607 bool isValidHwreg(int64_t Id);
608 
609 LLVM_READNONE
610 bool isValidHwregOffset(int64_t Offset);
611 
612 LLVM_READNONE
613 bool isValidHwregWidth(int64_t Width);
614 
615 LLVM_READNONE
616 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width);
617 
618 LLVM_READNONE
619 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI);
620 
621 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width);
622 
623 } // namespace Hwreg
624 
625 namespace Exp {
626 
627 bool getTgtName(unsigned Id, StringRef &Name, int &Index);
628 
629 LLVM_READONLY
630 unsigned getTgtId(const StringRef Name);
631 
632 LLVM_READNONE
633 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI);
634 
635 } // namespace Exp
636 
637 namespace MTBUFFormat {
638 
639 LLVM_READNONE
640 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
641 
642 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
643 
644 int64_t getDfmt(const StringRef Name);
645 
646 StringRef getDfmtName(unsigned Id);
647 
648 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
649 
650 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
651 
652 bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
653 
654 bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
655 
656 int64_t getUnifiedFormat(const StringRef Name);
657 
658 StringRef getUnifiedFormatName(unsigned Id);
659 
660 bool isValidUnifiedFormat(unsigned Val);
661 
662 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt);
663 
664 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
665 
666 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
667 
668 } // namespace MTBUFFormat
669 
670 namespace SendMsg {
671 
672 LLVM_READONLY
673 int64_t getMsgId(const StringRef Name);
674 
675 LLVM_READONLY
676 int64_t getMsgOpId(int64_t MsgId, const StringRef Name);
677 
678 LLVM_READNONE
679 StringRef getMsgName(int64_t MsgId);
680 
681 LLVM_READNONE
682 StringRef getMsgOpName(int64_t MsgId, int64_t OpId);
683 
684 LLVM_READNONE
685 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict = true);
686 
687 LLVM_READNONE
688 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
689                   bool Strict = true);
690 
691 LLVM_READNONE
692 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
693                       const MCSubtargetInfo &STI, bool Strict = true);
694 
695 LLVM_READNONE
696 bool msgRequiresOp(int64_t MsgId);
697 
698 LLVM_READNONE
699 bool msgSupportsStream(int64_t MsgId, int64_t OpId);
700 
701 void decodeMsg(unsigned Val,
702                uint16_t &MsgId,
703                uint16_t &OpId,
704                uint16_t &StreamId);
705 
706 LLVM_READNONE
707 uint64_t encodeMsg(uint64_t MsgId,
708                    uint64_t OpId,
709                    uint64_t StreamId);
710 
711 } // namespace SendMsg
712 
713 
714 unsigned getInitialPSInputAddr(const Function &F);
715 
716 bool getHasColorExport(const Function &F);
717 
718 bool getHasDepthExport(const Function &F);
719 
720 LLVM_READNONE
721 bool isShader(CallingConv::ID CC);
722 
723 LLVM_READNONE
724 bool isGraphics(CallingConv::ID CC);
725 
726 LLVM_READNONE
727 bool isCompute(CallingConv::ID CC);
728 
729 LLVM_READNONE
730 bool isEntryFunctionCC(CallingConv::ID CC);
731 
732 // These functions are considered entrypoints into the current module, i.e. they
733 // are allowed to be called from outside the current module. This is different
734 // from isEntryFunctionCC, which is only true for functions that are entered by
735 // the hardware. Module entry points include all entry functions but also
736 // include functions that can be called from other functions inside or outside
737 // the current module. Module entry functions are allowed to allocate LDS.
738 LLVM_READNONE
739 bool isModuleEntryFunctionCC(CallingConv::ID CC);
740 
741 // FIXME: Remove this when calling conventions cleaned up
742 LLVM_READNONE
743 inline bool isKernel(CallingConv::ID CC) {
744   switch (CC) {
745   case CallingConv::AMDGPU_KERNEL:
746   case CallingConv::SPIR_KERNEL:
747     return true;
748   default:
749     return false;
750   }
751 }
752 
753 bool hasXNACK(const MCSubtargetInfo &STI);
754 bool hasSRAMECC(const MCSubtargetInfo &STI);
755 bool hasMIMG_R128(const MCSubtargetInfo &STI);
756 bool hasGFX10A16(const MCSubtargetInfo &STI);
757 bool hasG16(const MCSubtargetInfo &STI);
758 bool hasPackedD16(const MCSubtargetInfo &STI);
759 
760 bool isSI(const MCSubtargetInfo &STI);
761 bool isCI(const MCSubtargetInfo &STI);
762 bool isVI(const MCSubtargetInfo &STI);
763 bool isGFX9(const MCSubtargetInfo &STI);
764 bool isGFX9Plus(const MCSubtargetInfo &STI);
765 bool isGFX10(const MCSubtargetInfo &STI);
766 bool isGFX10Plus(const MCSubtargetInfo &STI);
767 bool isGCN3Encoding(const MCSubtargetInfo &STI);
768 bool isGFX10_AEncoding(const MCSubtargetInfo &STI);
769 bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
770 bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
771 bool isGFX90A(const MCSubtargetInfo &STI);
772 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
773 
774 /// Is Reg - scalar register
775 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
776 
777 /// Is there any intersection between registers
778 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI);
779 
780 /// If \p Reg is a pseudo reg, return the correct hardware register given
781 /// \p STI otherwise return \p Reg.
782 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
783 
784 /// Convert hardware register \p Reg to a pseudo register
785 LLVM_READNONE
786 unsigned mc2PseudoReg(unsigned Reg);
787 
788 /// Can this operand also contain immediate values?
789 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
790 
791 /// Is this floating-point operand?
792 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
793 
794 /// Does this operand support only inlinable literals?
795 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
796 
797 /// Get the size in bits of a register from the register class \p RC.
798 unsigned getRegBitWidth(unsigned RCID);
799 
800 /// Get the size in bits of a register from the register class \p RC.
801 unsigned getRegBitWidth(const MCRegisterClass &RC);
802 
803 /// Get size of register operand
804 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
805                            unsigned OpNo);
806 
807 LLVM_READNONE
808 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
809   switch (OpInfo.OperandType) {
810   case AMDGPU::OPERAND_REG_IMM_INT32:
811   case AMDGPU::OPERAND_REG_IMM_FP32:
812   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
813   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
814   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
815   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
816   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
817   case AMDGPU::OPERAND_REG_IMM_V2INT32:
818   case AMDGPU::OPERAND_REG_IMM_V2FP32:
819   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
820   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
821   case AMDGPU::OPERAND_KIMM32:
822   case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
823     return 4;
824 
825   case AMDGPU::OPERAND_REG_IMM_INT64:
826   case AMDGPU::OPERAND_REG_IMM_FP64:
827   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
828   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
829   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
830     return 8;
831 
832   case AMDGPU::OPERAND_REG_IMM_INT16:
833   case AMDGPU::OPERAND_REG_IMM_FP16:
834   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
835   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
836   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
837   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
838   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
839   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
840   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
841   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
842   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
843   case AMDGPU::OPERAND_REG_IMM_V2INT16:
844   case AMDGPU::OPERAND_REG_IMM_V2FP16:
845     return 2;
846 
847   default:
848     llvm_unreachable("unhandled operand type");
849   }
850 }
851 
852 LLVM_READNONE
853 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
854   return getOperandSize(Desc.OpInfo[OpNo]);
855 }
856 
857 /// Is this literal inlinable, and not one of the values intended for floating
858 /// point values.
859 LLVM_READNONE
860 inline bool isInlinableIntLiteral(int64_t Literal) {
861   return Literal >= -16 && Literal <= 64;
862 }
863 
864 /// Is this literal inlinable
865 LLVM_READNONE
866 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
867 
868 LLVM_READNONE
869 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
870 
871 LLVM_READNONE
872 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
873 
874 LLVM_READNONE
875 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
876 
877 LLVM_READNONE
878 bool isInlinableIntLiteralV216(int32_t Literal);
879 
880 LLVM_READNONE
881 bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi);
882 
883 bool isArgPassedInSGPR(const Argument *Arg);
884 
885 LLVM_READONLY
886 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
887                                       int64_t EncodedOffset);
888 
889 LLVM_READONLY
890 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
891                                     int64_t EncodedOffset,
892                                     bool IsBuffer);
893 
894 /// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
895 /// offsets.
896 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
897 
898 /// \returns The encoding that will be used for \p ByteOffset in the
899 /// SMRD offset field, or None if it won't fit. On GFX9 and GFX10
900 /// S_LOAD instructions have a signed offset, on other subtargets it is
901 /// unsigned. S_BUFFER has an unsigned offset for all subtargets.
902 Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
903                                        int64_t ByteOffset, bool IsBuffer);
904 
905 /// \return The encoding that can be used for a 32-bit literal offset in an SMRD
906 /// instruction. This is only useful on CI.s
907 Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
908                                                 int64_t ByteOffset);
909 
910 /// For FLAT segment the offset must be positive;
911 /// MSB is ignored and forced to zero.
912 ///
913 /// \return The number of bits available for the offset field in flat
914 /// instructions.
915 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed);
916 
917 /// \returns true if this offset is small enough to fit in the SMRD
918 /// offset field.  \p ByteOffset should be the offset in bytes and
919 /// not the encoded offset.
920 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
921 
922 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
923                       const GCNSubtarget *Subtarget,
924                       Align Alignment = Align(4));
925 
926 LLVM_READNONE
927 inline bool isLegal64BitDPPControl(unsigned DC) {
928   return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
929 }
930 
931 /// \returns true if the intrinsic is divergent
932 bool isIntrinsicSourceOfDivergence(unsigned IntrID);
933 
934 // Track defaults for fields in the MODE registser.
935 struct SIModeRegisterDefaults {
936   /// Floating point opcodes that support exception flag gathering quiet and
937   /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10
938   /// become IEEE 754- 2008 compliant due to signaling NaN propagation and
939   /// quieting.
940   bool IEEE : 1;
941 
942   /// Used by the vector ALU to force DX10-style treatment of NaNs: when set,
943   /// clamp NaN to zero; otherwise, pass NaN through.
944   bool DX10Clamp : 1;
945 
946   /// If this is set, neither input or output denormals are flushed for most f32
947   /// instructions.
948   bool FP32InputDenormals : 1;
949   bool FP32OutputDenormals : 1;
950 
951   /// If this is set, neither input or output denormals are flushed for both f64
952   /// and f16/v2f16 instructions.
953   bool FP64FP16InputDenormals : 1;
954   bool FP64FP16OutputDenormals : 1;
955 
956   SIModeRegisterDefaults() :
957     IEEE(true),
958     DX10Clamp(true),
959     FP32InputDenormals(true),
960     FP32OutputDenormals(true),
961     FP64FP16InputDenormals(true),
962     FP64FP16OutputDenormals(true) {}
963 
964   SIModeRegisterDefaults(const Function &F);
965 
966   static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
967     SIModeRegisterDefaults Mode;
968     Mode.IEEE = !AMDGPU::isShader(CC);
969     return Mode;
970   }
971 
972   bool operator ==(const SIModeRegisterDefaults Other) const {
973     return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp &&
974            FP32InputDenormals == Other.FP32InputDenormals &&
975            FP32OutputDenormals == Other.FP32OutputDenormals &&
976            FP64FP16InputDenormals == Other.FP64FP16InputDenormals &&
977            FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;
978   }
979 
980   bool allFP32Denormals() const {
981     return FP32InputDenormals && FP32OutputDenormals;
982   }
983 
984   bool allFP64FP16Denormals() const {
985     return FP64FP16InputDenormals && FP64FP16OutputDenormals;
986   }
987 
988   /// Get the encoding value for the FP_DENORM bits of the mode register for the
989   /// FP32 denormal mode.
990   uint32_t fpDenormModeSPValue() const {
991     if (FP32InputDenormals && FP32OutputDenormals)
992       return FP_DENORM_FLUSH_NONE;
993     if (FP32InputDenormals)
994       return FP_DENORM_FLUSH_OUT;
995     if (FP32OutputDenormals)
996       return FP_DENORM_FLUSH_IN;
997     return FP_DENORM_FLUSH_IN_FLUSH_OUT;
998   }
999 
1000   /// Get the encoding value for the FP_DENORM bits of the mode register for the
1001   /// FP64/FP16 denormal mode.
1002   uint32_t fpDenormModeDPValue() const {
1003     if (FP64FP16InputDenormals && FP64FP16OutputDenormals)
1004       return FP_DENORM_FLUSH_NONE;
1005     if (FP64FP16InputDenormals)
1006       return FP_DENORM_FLUSH_OUT;
1007     if (FP64FP16OutputDenormals)
1008       return FP_DENORM_FLUSH_IN;
1009     return FP_DENORM_FLUSH_IN_FLUSH_OUT;
1010   }
1011 
1012   /// Returns true if a flag is compatible if it's enabled in the callee, but
1013   /// disabled in the caller.
1014   static bool oneWayCompatible(bool CallerMode, bool CalleeMode) {
1015     return CallerMode == CalleeMode || (!CallerMode && CalleeMode);
1016   }
1017 
1018   // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
1019   // be able to override.
1020   bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const {
1021     if (DX10Clamp != CalleeMode.DX10Clamp)
1022       return false;
1023     if (IEEE != CalleeMode.IEEE)
1024       return false;
1025 
1026     // Allow inlining denormals enabled into denormals flushed functions.
1027     return oneWayCompatible(FP64FP16InputDenormals, CalleeMode.FP64FP16InputDenormals) &&
1028            oneWayCompatible(FP64FP16OutputDenormals, CalleeMode.FP64FP16OutputDenormals) &&
1029            oneWayCompatible(FP32InputDenormals, CalleeMode.FP32InputDenormals) &&
1030            oneWayCompatible(FP32OutputDenormals, CalleeMode.FP32OutputDenormals);
1031   }
1032 };
1033 
1034 } // end namespace AMDGPU
1035 
1036 raw_ostream &operator<<(raw_ostream &OS,
1037                         const AMDGPU::IsaInfo::TargetIDSetting S);
1038 
1039 } // end namespace llvm
1040 
1041 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
1042