1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11 
12 #include "SIDefines.h"
13 #include "llvm/IR/CallingConv.h"
14 #include "llvm/Support/Alignment.h"
15 
16 struct amd_kernel_code_t;
17 
18 namespace llvm {
19 
20 struct Align;
21 class Argument;
22 class Function;
23 class GCNSubtarget;
24 class GlobalValue;
25 class MCRegisterClass;
26 class MCRegisterInfo;
27 class MCSubtargetInfo;
28 class StringRef;
29 class Triple;
30 
31 namespace amdhsa {
32 struct kernel_descriptor_t;
33 }
34 
35 namespace AMDGPU {
36 
37 struct IsaVersion;
38 
39 /// \returns HSA OS ABI Version identification.
40 Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI);
41 /// \returns True if HSA OS ABI Version identification is 2,
42 /// false otherwise.
43 bool isHsaAbiVersion2(const MCSubtargetInfo *STI);
44 /// \returns True if HSA OS ABI Version identification is 3,
45 /// false otherwise.
46 bool isHsaAbiVersion3(const MCSubtargetInfo *STI);
47 
48 struct GcnBufferFormatInfo {
49   unsigned Format;
50   unsigned BitsPerComp;
51   unsigned NumComponents;
52   unsigned NumFormat;
53   unsigned DataFormat;
54 };
55 
56 #define GET_MIMGBaseOpcode_DECL
57 #define GET_MIMGDim_DECL
58 #define GET_MIMGEncoding_DECL
59 #define GET_MIMGLZMapping_DECL
60 #define GET_MIMGMIPMapping_DECL
61 #include "AMDGPUGenSearchableTables.inc"
62 
63 namespace IsaInfo {
64 
65 enum {
66   // The closed Vulkan driver sets 96, which limits the wave count to 8 but
67   // doesn't spill SGPRs as much as when 80 is set.
68   FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
69   TRAP_NUM_SGPRS = 16
70 };
71 
72 enum class TargetIDSetting {
73   Unsupported,
74   Any,
75   Off,
76   On
77 };
78 
79 class AMDGPUTargetID {
80 private:
81   TargetIDSetting XnackSetting;
82   TargetIDSetting SramEccSetting;
83 
84 public:
85   explicit AMDGPUTargetID(const MCSubtargetInfo &STI);
86   ~AMDGPUTargetID() = default;
87 
88   /// \return True if the current xnack setting is not "Unsupported".
89   bool isXnackSupported() const {
90     return XnackSetting != TargetIDSetting::Unsupported;
91   }
92 
93   /// \returns True if the current xnack setting is "On" or "Any".
94   bool isXnackOnOrAny() const {
95     return XnackSetting == TargetIDSetting::On ||
96         XnackSetting == TargetIDSetting::Any;
97   }
98 
99   /// \returns True if current xnack setting is "On" or "Off",
100   /// false otherwise.
101   bool isXnackOnOrOff() const {
102     return getXnackSetting() == TargetIDSetting::On ||
103         getXnackSetting() == TargetIDSetting::Off;
104   }
105 
106   /// \returns The current xnack TargetIDSetting, possible options are
107   /// "Unsupported", "Any", "Off", and "On".
108   TargetIDSetting getXnackSetting() const {
109     return XnackSetting;
110   }
111 
112   /// Sets xnack setting to \p NewXnackSetting.
113   void setXnackSetting(TargetIDSetting NewXnackSetting) {
114     XnackSetting = NewXnackSetting;
115   }
116 
117   /// \return True if the current sramecc setting is not "Unsupported".
118   bool isSramEccSupported() const {
119     return SramEccSetting != TargetIDSetting::Unsupported;
120   }
121 
122   /// \returns True if the current sramecc setting is "On" or "Any".
123   bool isSramEccOnOrAny() const {
124   return SramEccSetting == TargetIDSetting::On ||
125       SramEccSetting == TargetIDSetting::Any;
126   }
127 
128   /// \returns True if current sramecc setting is "On" or "Off",
129   /// false otherwise.
130   bool isSramEccOnOrOff() const {
131     return getSramEccSetting() == TargetIDSetting::On ||
132         getSramEccSetting() == TargetIDSetting::Off;
133   }
134 
135   /// \returns The current sramecc TargetIDSetting, possible options are
136   /// "Unsupported", "Any", "Off", and "On".
137   TargetIDSetting getSramEccSetting() const {
138     return SramEccSetting;
139   }
140 
141   /// Sets sramecc setting to \p NewSramEccSetting.
142   void setSramEccSetting(TargetIDSetting NewSramEccSetting) {
143     SramEccSetting = NewSramEccSetting;
144   }
145 
146   void setTargetIDFromFeaturesString(StringRef FS);
147   void setTargetIDFromTargetIDStream(StringRef TargetID);
148 };
149 
150 /// Streams isa version string for given subtarget \p STI into \p Stream.
151 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
152 
153 /// \returns Wavefront size for given subtarget \p STI.
154 unsigned getWavefrontSize(const MCSubtargetInfo *STI);
155 
156 /// \returns Local memory size in bytes for given subtarget \p STI.
157 unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
158 
159 /// \returns Number of execution units per compute unit for given subtarget \p
160 /// STI.
161 unsigned getEUsPerCU(const MCSubtargetInfo *STI);
162 
163 /// \returns Maximum number of work groups per compute unit for given subtarget
164 /// \p STI and limited by given \p FlatWorkGroupSize.
165 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
166                                unsigned FlatWorkGroupSize);
167 
168 /// \returns Minimum number of waves per execution unit for given subtarget \p
169 /// STI.
170 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
171 
172 /// \returns Maximum number of waves per execution unit for given subtarget \p
173 /// STI without any kind of limitation.
174 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
175 
176 /// \returns Number of waves per execution unit required to support the given \p
177 /// FlatWorkGroupSize.
178 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
179                                    unsigned FlatWorkGroupSize);
180 
181 /// \returns Minimum flat work group size for given subtarget \p STI.
182 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
183 
184 /// \returns Maximum flat work group size for given subtarget \p STI.
185 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
186 
187 /// \returns Number of waves per work group for given subtarget \p STI and
188 /// \p FlatWorkGroupSize.
189 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
190                               unsigned FlatWorkGroupSize);
191 
192 /// \returns SGPR allocation granularity for given subtarget \p STI.
193 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
194 
195 /// \returns SGPR encoding granularity for given subtarget \p STI.
196 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
197 
198 /// \returns Total number of SGPRs for given subtarget \p STI.
199 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
200 
201 /// \returns Addressable number of SGPRs for given subtarget \p STI.
202 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
203 
204 /// \returns Minimum number of SGPRs that meets the given number of waves per
205 /// execution unit requirement for given subtarget \p STI.
206 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
207 
208 /// \returns Maximum number of SGPRs that meets the given number of waves per
209 /// execution unit requirement for given subtarget \p STI.
210 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
211                         bool Addressable);
212 
213 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
214 /// STI when the given special registers are used.
215 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
216                           bool FlatScrUsed, bool XNACKUsed);
217 
218 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
219 /// STI when the given special registers are used. XNACK is inferred from
220 /// \p STI.
221 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
222                           bool FlatScrUsed);
223 
224 /// \returns Number of SGPR blocks needed for given subtarget \p STI when
225 /// \p NumSGPRs are used. \p NumSGPRs should already include any special
226 /// register counts.
227 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
228 
229 /// \returns VGPR allocation granularity for given subtarget \p STI.
230 ///
231 /// For subtargets which support it, \p EnableWavefrontSize32 should match
232 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
233 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
234                              Optional<bool> EnableWavefrontSize32 = None);
235 
236 /// \returns VGPR encoding granularity for given subtarget \p STI.
237 ///
238 /// For subtargets which support it, \p EnableWavefrontSize32 should match
239 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
240 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
241                                 Optional<bool> EnableWavefrontSize32 = None);
242 
243 /// \returns Total number of VGPRs for given subtarget \p STI.
244 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
245 
246 /// \returns Addressable number of VGPRs for given subtarget \p STI.
247 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
248 
249 /// \returns Minimum number of VGPRs that meets given number of waves per
250 /// execution unit requirement for given subtarget \p STI.
251 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
252 
253 /// \returns Maximum number of VGPRs that meets given number of waves per
254 /// execution unit requirement for given subtarget \p STI.
255 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
256 
257 /// \returns Number of VGPR blocks needed for given subtarget \p STI when
258 /// \p NumVGPRs are used.
259 ///
260 /// For subtargets which support it, \p EnableWavefrontSize32 should match the
261 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
262 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs,
263                           Optional<bool> EnableWavefrontSize32 = None);
264 
265 } // end namespace IsaInfo
266 
267 LLVM_READONLY
268 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
269 
270 LLVM_READONLY
271 int getSOPPWithRelaxation(uint16_t Opcode);
272 
273 struct MIMGBaseOpcodeInfo {
274   MIMGBaseOpcode BaseOpcode;
275   bool Store;
276   bool Atomic;
277   bool AtomicX2;
278   bool Sampler;
279   bool Gather4;
280 
281   uint8_t NumExtraArgs;
282   bool Gradients;
283   bool G16;
284   bool Coordinates;
285   bool LodOrClampOrMip;
286   bool HasD16;
287 };
288 
289 LLVM_READONLY
290 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
291 
292 struct MIMGDimInfo {
293   MIMGDim Dim;
294   uint8_t NumCoords;
295   uint8_t NumGradients;
296   bool DA;
297   uint8_t Encoding;
298   const char *AsmSuffix;
299 };
300 
301 LLVM_READONLY
302 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
303 
304 LLVM_READONLY
305 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
306 
307 LLVM_READONLY
308 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
309 
310 struct MIMGLZMappingInfo {
311   MIMGBaseOpcode L;
312   MIMGBaseOpcode LZ;
313 };
314 
315 struct MIMGMIPMappingInfo {
316   MIMGBaseOpcode MIP;
317   MIMGBaseOpcode NONMIP;
318 };
319 
320 struct MIMGG16MappingInfo {
321   MIMGBaseOpcode G;
322   MIMGBaseOpcode G16;
323 };
324 
325 LLVM_READONLY
326 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
327 
328 LLVM_READONLY
329 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
330 
331 LLVM_READONLY
332 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
333 
334 LLVM_READONLY
335 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
336                   unsigned VDataDwords, unsigned VAddrDwords);
337 
338 LLVM_READONLY
339 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
340 
341 struct MIMGInfo {
342   uint16_t Opcode;
343   uint16_t BaseOpcode;
344   uint8_t MIMGEncoding;
345   uint8_t VDataDwords;
346   uint8_t VAddrDwords;
347 };
348 
349 LLVM_READONLY
350 const MIMGInfo *getMIMGInfo(unsigned Opc);
351 
352 LLVM_READONLY
353 int getMTBUFBaseOpcode(unsigned Opc);
354 
355 LLVM_READONLY
356 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
357 
358 LLVM_READONLY
359 int getMTBUFElements(unsigned Opc);
360 
361 LLVM_READONLY
362 bool getMTBUFHasVAddr(unsigned Opc);
363 
364 LLVM_READONLY
365 bool getMTBUFHasSrsrc(unsigned Opc);
366 
367 LLVM_READONLY
368 bool getMTBUFHasSoffset(unsigned Opc);
369 
370 LLVM_READONLY
371 int getMUBUFBaseOpcode(unsigned Opc);
372 
373 LLVM_READONLY
374 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
375 
376 LLVM_READONLY
377 int getMUBUFElements(unsigned Opc);
378 
379 LLVM_READONLY
380 bool getMUBUFHasVAddr(unsigned Opc);
381 
382 LLVM_READONLY
383 bool getMUBUFHasSrsrc(unsigned Opc);
384 
385 LLVM_READONLY
386 bool getMUBUFHasSoffset(unsigned Opc);
387 
388 LLVM_READONLY
389 bool getSMEMIsBuffer(unsigned Opc);
390 
391 LLVM_READONLY
392 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
393                                                   uint8_t NumComponents,
394                                                   uint8_t NumFormat,
395                                                   const MCSubtargetInfo &STI);
396 LLVM_READONLY
397 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
398                                                   const MCSubtargetInfo &STI);
399 
400 LLVM_READONLY
401 int getMCOpcode(uint16_t Opcode, unsigned Gen);
402 
403 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
404                                const MCSubtargetInfo *STI);
405 
406 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
407     const MCSubtargetInfo *STI);
408 
409 bool isGroupSegment(const GlobalValue *GV);
410 bool isGlobalSegment(const GlobalValue *GV);
411 bool isReadOnlySegment(const GlobalValue *GV);
412 
413 /// \returns True if constants should be emitted to .text section for given
414 /// target triple \p TT, false otherwise.
415 bool shouldEmitConstantsToTextSection(const Triple &TT);
416 
417 /// \returns Integer value requested using \p F's \p Name attribute.
418 ///
419 /// \returns \p Default if attribute is not present.
420 ///
421 /// \returns \p Default and emits error if requested value cannot be converted
422 /// to integer.
423 int getIntegerAttribute(const Function &F, StringRef Name, int Default);
424 
425 /// \returns A pair of integer values requested using \p F's \p Name attribute
426 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
427 /// is false).
428 ///
429 /// \returns \p Default if attribute is not present.
430 ///
431 /// \returns \p Default and emits error if one of the requested values cannot be
432 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is
433 /// not present.
434 std::pair<int, int> getIntegerPairAttribute(const Function &F,
435                                             StringRef Name,
436                                             std::pair<int, int> Default,
437                                             bool OnlyFirstRequired = false);
438 
439 /// Represents the counter values to wait for in an s_waitcnt instruction.
440 ///
441 /// Large values (including the maximum possible integer) can be used to
442 /// represent "don't care" waits.
443 struct Waitcnt {
444   unsigned VmCnt = ~0u;
445   unsigned ExpCnt = ~0u;
446   unsigned LgkmCnt = ~0u;
447   unsigned VsCnt = ~0u;
448 
449   Waitcnt() {}
450   Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
451       : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {}
452 
453   static Waitcnt allZero(bool HasVscnt) {
454     return Waitcnt(0, 0, 0, HasVscnt ? 0 : ~0u);
455   }
456   static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); }
457 
458   bool hasWait() const {
459     return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u;
460   }
461 
462   bool dominates(const Waitcnt &Other) const {
463     return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt &&
464            LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt;
465   }
466 
467   Waitcnt combined(const Waitcnt &Other) const {
468     return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt),
469                    std::min(LgkmCnt, Other.LgkmCnt),
470                    std::min(VsCnt, Other.VsCnt));
471   }
472 };
473 
474 /// \returns Vmcnt bit mask for given isa \p Version.
475 unsigned getVmcntBitMask(const IsaVersion &Version);
476 
477 /// \returns Expcnt bit mask for given isa \p Version.
478 unsigned getExpcntBitMask(const IsaVersion &Version);
479 
480 /// \returns Lgkmcnt bit mask for given isa \p Version.
481 unsigned getLgkmcntBitMask(const IsaVersion &Version);
482 
483 /// \returns Waitcnt bit mask for given isa \p Version.
484 unsigned getWaitcntBitMask(const IsaVersion &Version);
485 
486 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
487 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
488 
489 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
490 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
491 
492 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
493 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
494 
495 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
496 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
497 /// \p Lgkmcnt respectively.
498 ///
499 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
500 ///     \p Vmcnt = \p Waitcnt[3:0]                      (pre-gfx9 only)
501 ///     \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14]  (gfx9+ only)
502 ///     \p Expcnt = \p Waitcnt[6:4]
503 ///     \p Lgkmcnt = \p Waitcnt[11:8]                   (pre-gfx10 only)
504 ///     \p Lgkmcnt = \p Waitcnt[13:8]                   (gfx10+ only)
505 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
506                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
507 
508 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
509 
510 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
511 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
512                      unsigned Vmcnt);
513 
514 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
515 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
516                       unsigned Expcnt);
517 
518 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
519 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
520                        unsigned Lgkmcnt);
521 
522 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
523 /// \p Version.
524 ///
525 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
526 ///     Waitcnt[3:0]   = \p Vmcnt       (pre-gfx9 only)
527 ///     Waitcnt[3:0]   = \p Vmcnt[3:0]  (gfx9+ only)
528 ///     Waitcnt[6:4]   = \p Expcnt
529 ///     Waitcnt[11:8]  = \p Lgkmcnt     (pre-gfx10 only)
530 ///     Waitcnt[13:8]  = \p Lgkmcnt     (gfx10+ only)
531 ///     Waitcnt[15:14] = \p Vmcnt[5:4]  (gfx9+ only)
532 ///
533 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
534 /// isa \p Version.
535 unsigned encodeWaitcnt(const IsaVersion &Version,
536                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
537 
538 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
539 
540 namespace Hwreg {
541 
542 LLVM_READONLY
543 int64_t getHwregId(const StringRef Name);
544 
545 LLVM_READNONE
546 bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI);
547 
548 LLVM_READNONE
549 bool isValidHwreg(int64_t Id);
550 
551 LLVM_READNONE
552 bool isValidHwregOffset(int64_t Offset);
553 
554 LLVM_READNONE
555 bool isValidHwregWidth(int64_t Width);
556 
557 LLVM_READNONE
558 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width);
559 
560 LLVM_READNONE
561 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI);
562 
563 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width);
564 
565 } // namespace Hwreg
566 
567 namespace Exp {
568 
569 bool getTgtName(unsigned Id, StringRef &Name, int &Index);
570 
571 LLVM_READONLY
572 unsigned getTgtId(const StringRef Name);
573 
574 LLVM_READNONE
575 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI);
576 
577 } // namespace Exp
578 
579 namespace MTBUFFormat {
580 
581 LLVM_READNONE
582 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
583 
584 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
585 
586 int64_t getDfmt(const StringRef Name);
587 
588 StringRef getDfmtName(unsigned Id);
589 
590 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
591 
592 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
593 
594 bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
595 
596 bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
597 
598 int64_t getUnifiedFormat(const StringRef Name);
599 
600 StringRef getUnifiedFormatName(unsigned Id);
601 
602 bool isValidUnifiedFormat(unsigned Val);
603 
604 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt);
605 
606 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
607 
608 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
609 
610 } // namespace MTBUFFormat
611 
612 namespace SendMsg {
613 
614 LLVM_READONLY
615 int64_t getMsgId(const StringRef Name);
616 
617 LLVM_READONLY
618 int64_t getMsgOpId(int64_t MsgId, const StringRef Name);
619 
620 LLVM_READNONE
621 StringRef getMsgName(int64_t MsgId);
622 
623 LLVM_READNONE
624 StringRef getMsgOpName(int64_t MsgId, int64_t OpId);
625 
626 LLVM_READNONE
627 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict = true);
628 
629 LLVM_READNONE
630 bool isValidMsgOp(int64_t MsgId, int64_t OpId, bool Strict = true);
631 
632 LLVM_READNONE
633 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, bool Strict = true);
634 
635 LLVM_READNONE
636 bool msgRequiresOp(int64_t MsgId);
637 
638 LLVM_READNONE
639 bool msgSupportsStream(int64_t MsgId, int64_t OpId);
640 
641 void decodeMsg(unsigned Val,
642                uint16_t &MsgId,
643                uint16_t &OpId,
644                uint16_t &StreamId);
645 
646 LLVM_READNONE
647 uint64_t encodeMsg(uint64_t MsgId,
648                    uint64_t OpId,
649                    uint64_t StreamId);
650 
651 } // namespace SendMsg
652 
653 
654 unsigned getInitialPSInputAddr(const Function &F);
655 
656 LLVM_READNONE
657 bool isShader(CallingConv::ID CC);
658 
659 LLVM_READNONE
660 bool isGraphics(CallingConv::ID CC);
661 
662 LLVM_READNONE
663 bool isCompute(CallingConv::ID CC);
664 
665 LLVM_READNONE
666 bool isEntryFunctionCC(CallingConv::ID CC);
667 
668 // These functions are considered entrypoints into the current module, i.e. they
669 // are allowed to be called from outside the current module. This is different
670 // from isEntryFunctionCC, which is only true for functions that are entered by
671 // the hardware. Module entry points include all entry functions but also
672 // include functions that can be called from other functions inside or outside
673 // the current module. Module entry functions are allowed to allocate LDS.
674 LLVM_READNONE
675 bool isModuleEntryFunctionCC(CallingConv::ID CC);
676 
677 // FIXME: Remove this when calling conventions cleaned up
678 LLVM_READNONE
679 inline bool isKernel(CallingConv::ID CC) {
680   switch (CC) {
681   case CallingConv::AMDGPU_KERNEL:
682   case CallingConv::SPIR_KERNEL:
683     return true;
684   default:
685     return false;
686   }
687 }
688 
689 bool hasXNACK(const MCSubtargetInfo &STI);
690 bool hasSRAMECC(const MCSubtargetInfo &STI);
691 bool hasMIMG_R128(const MCSubtargetInfo &STI);
692 bool hasGFX10A16(const MCSubtargetInfo &STI);
693 bool hasG16(const MCSubtargetInfo &STI);
694 bool hasPackedD16(const MCSubtargetInfo &STI);
695 
696 bool isSI(const MCSubtargetInfo &STI);
697 bool isCI(const MCSubtargetInfo &STI);
698 bool isVI(const MCSubtargetInfo &STI);
699 bool isGFX9(const MCSubtargetInfo &STI);
700 bool isGFX9Plus(const MCSubtargetInfo &STI);
701 bool isGFX10(const MCSubtargetInfo &STI);
702 bool isGFX10Plus(const MCSubtargetInfo &STI);
703 bool isGCN3Encoding(const MCSubtargetInfo &STI);
704 bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
705 bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
706 
707 /// Is Reg - scalar register
708 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
709 
710 /// Is there any intersection between registers
711 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI);
712 
713 /// If \p Reg is a pseudo reg, return the correct hardware register given
714 /// \p STI otherwise return \p Reg.
715 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
716 
717 /// Convert hardware register \p Reg to a pseudo register
718 LLVM_READNONE
719 unsigned mc2PseudoReg(unsigned Reg);
720 
721 /// Can this operand also contain immediate values?
722 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
723 
724 /// Is this floating-point operand?
725 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
726 
727 /// Does this opearnd support only inlinable literals?
728 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
729 
730 /// Get the size in bits of a register from the register class \p RC.
731 unsigned getRegBitWidth(unsigned RCID);
732 
733 /// Get the size in bits of a register from the register class \p RC.
734 unsigned getRegBitWidth(const MCRegisterClass &RC);
735 
736 /// Get size of register operand
737 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
738                            unsigned OpNo);
739 
740 LLVM_READNONE
741 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
742   switch (OpInfo.OperandType) {
743   case AMDGPU::OPERAND_REG_IMM_INT32:
744   case AMDGPU::OPERAND_REG_IMM_FP32:
745   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
746   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
747   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
748   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
749     return 4;
750 
751   case AMDGPU::OPERAND_REG_IMM_INT64:
752   case AMDGPU::OPERAND_REG_IMM_FP64:
753   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
754   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
755     return 8;
756 
757   case AMDGPU::OPERAND_REG_IMM_INT16:
758   case AMDGPU::OPERAND_REG_IMM_FP16:
759   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
760   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
761   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
762   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
763   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
764   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
765   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
766   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
767   case AMDGPU::OPERAND_REG_IMM_V2INT16:
768   case AMDGPU::OPERAND_REG_IMM_V2FP16:
769     return 2;
770 
771   default:
772     llvm_unreachable("unhandled operand type");
773   }
774 }
775 
776 LLVM_READNONE
777 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
778   return getOperandSize(Desc.OpInfo[OpNo]);
779 }
780 
781 /// Is this literal inlinable, and not one of the values intended for floating
782 /// point values.
783 LLVM_READNONE
784 inline bool isInlinableIntLiteral(int64_t Literal) {
785   return Literal >= -16 && Literal <= 64;
786 }
787 
788 /// Is this literal inlinable
789 LLVM_READNONE
790 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
791 
792 LLVM_READNONE
793 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
794 
795 LLVM_READNONE
796 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
797 
798 LLVM_READNONE
799 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
800 
801 LLVM_READNONE
802 bool isInlinableIntLiteralV216(int32_t Literal);
803 
804 LLVM_READNONE
805 bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi);
806 
807 bool isArgPassedInSGPR(const Argument *Arg);
808 
809 LLVM_READONLY
810 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
811                                       int64_t EncodedOffset);
812 
813 LLVM_READONLY
814 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
815                                     int64_t EncodedOffset,
816                                     bool IsBuffer);
817 
818 /// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
819 /// offsets.
820 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
821 
822 /// \returns The encoding that will be used for \p ByteOffset in the
823 /// SMRD offset field, or None if it won't fit. On GFX9 and GFX10
824 /// S_LOAD instructions have a signed offset, on other subtargets it is
825 /// unsigned. S_BUFFER has an unsigned offset for all subtargets.
826 Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
827                                        int64_t ByteOffset, bool IsBuffer);
828 
829 /// \return The encoding that can be used for a 32-bit literal offset in an SMRD
830 /// instruction. This is only useful on CI.s
831 Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
832                                                 int64_t ByteOffset);
833 
834 /// For FLAT segment the offset must be positive;
835 /// MSB is ignored and forced to zero.
836 ///
837 /// \return The number of bits available for the offset field in flat
838 /// instructions.
839 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed);
840 
841 /// \returns true if this offset is small enough to fit in the SMRD
842 /// offset field.  \p ByteOffset should be the offset in bytes and
843 /// not the encoded offset.
844 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
845 
846 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
847                       const GCNSubtarget *Subtarget,
848                       Align Alignment = Align(4));
849 
850 /// \returns true if the intrinsic is divergent
851 bool isIntrinsicSourceOfDivergence(unsigned IntrID);
852 
853 // Track defaults for fields in the MODE registser.
854 struct SIModeRegisterDefaults {
855   /// Floating point opcodes that support exception flag gathering quiet and
856   /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10
857   /// become IEEE 754- 2008 compliant due to signaling NaN propagation and
858   /// quieting.
859   bool IEEE : 1;
860 
861   /// Used by the vector ALU to force DX10-style treatment of NaNs: when set,
862   /// clamp NaN to zero; otherwise, pass NaN through.
863   bool DX10Clamp : 1;
864 
865   /// If this is set, neither input or output denormals are flushed for most f32
866   /// instructions.
867   bool FP32InputDenormals : 1;
868   bool FP32OutputDenormals : 1;
869 
870   /// If this is set, neither input or output denormals are flushed for both f64
871   /// and f16/v2f16 instructions.
872   bool FP64FP16InputDenormals : 1;
873   bool FP64FP16OutputDenormals : 1;
874 
875   SIModeRegisterDefaults() :
876     IEEE(true),
877     DX10Clamp(true),
878     FP32InputDenormals(true),
879     FP32OutputDenormals(true),
880     FP64FP16InputDenormals(true),
881     FP64FP16OutputDenormals(true) {}
882 
883   SIModeRegisterDefaults(const Function &F);
884 
885   static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
886     SIModeRegisterDefaults Mode;
887     Mode.IEEE = !AMDGPU::isShader(CC);
888     return Mode;
889   }
890 
891   bool operator ==(const SIModeRegisterDefaults Other) const {
892     return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp &&
893            FP32InputDenormals == Other.FP32InputDenormals &&
894            FP32OutputDenormals == Other.FP32OutputDenormals &&
895            FP64FP16InputDenormals == Other.FP64FP16InputDenormals &&
896            FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;
897   }
898 
899   bool allFP32Denormals() const {
900     return FP32InputDenormals && FP32OutputDenormals;
901   }
902 
903   bool allFP64FP16Denormals() const {
904     return FP64FP16InputDenormals && FP64FP16OutputDenormals;
905   }
906 
907   /// Get the encoding value for the FP_DENORM bits of the mode register for the
908   /// FP32 denormal mode.
909   uint32_t fpDenormModeSPValue() const {
910     if (FP32InputDenormals && FP32OutputDenormals)
911       return FP_DENORM_FLUSH_NONE;
912     if (FP32InputDenormals)
913       return FP_DENORM_FLUSH_OUT;
914     if (FP32OutputDenormals)
915       return FP_DENORM_FLUSH_IN;
916     return FP_DENORM_FLUSH_IN_FLUSH_OUT;
917   }
918 
919   /// Get the encoding value for the FP_DENORM bits of the mode register for the
920   /// FP64/FP16 denormal mode.
921   uint32_t fpDenormModeDPValue() const {
922     if (FP64FP16InputDenormals && FP64FP16OutputDenormals)
923       return FP_DENORM_FLUSH_NONE;
924     if (FP64FP16InputDenormals)
925       return FP_DENORM_FLUSH_OUT;
926     if (FP64FP16OutputDenormals)
927       return FP_DENORM_FLUSH_IN;
928     return FP_DENORM_FLUSH_IN_FLUSH_OUT;
929   }
930 
931   /// Returns true if a flag is compatible if it's enabled in the callee, but
932   /// disabled in the caller.
933   static bool oneWayCompatible(bool CallerMode, bool CalleeMode) {
934     return CallerMode == CalleeMode || (!CallerMode && CalleeMode);
935   }
936 
937   // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
938   // be able to override.
939   bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const {
940     if (DX10Clamp != CalleeMode.DX10Clamp)
941       return false;
942     if (IEEE != CalleeMode.IEEE)
943       return false;
944 
945     // Allow inlining denormals enabled into denormals flushed functions.
946     return oneWayCompatible(FP64FP16InputDenormals, CalleeMode.FP64FP16InputDenormals) &&
947            oneWayCompatible(FP64FP16OutputDenormals, CalleeMode.FP64FP16OutputDenormals) &&
948            oneWayCompatible(FP32InputDenormals, CalleeMode.FP32InputDenormals) &&
949            oneWayCompatible(FP32OutputDenormals, CalleeMode.FP32OutputDenormals);
950   }
951 };
952 
953 } // end namespace AMDGPU
954 
955 raw_ostream &operator<<(raw_ostream &OS,
956                         const AMDGPU::IsaInfo::TargetIDSetting S);
957 
958 } // end namespace llvm
959 
960 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
961