1 //===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10 #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11
12 #include "SIDefines.h"
13 #include "llvm/IR/CallingConv.h"
14 #include "llvm/Support/Alignment.h"
15
16 struct amd_kernel_code_t;
17
18 namespace llvm {
19
20 struct Align;
21 class Argument;
22 class Function;
23 class GCNSubtarget;
24 class GlobalValue;
25 class MCRegisterClass;
26 class MCRegisterInfo;
27 class MCSubtargetInfo;
28 class StringRef;
29 class Triple;
30
31 namespace amdhsa {
32 struct kernel_descriptor_t;
33 }
34
35 namespace AMDGPU {
36
37 struct IsaVersion;
38
39 /// \returns HSA OS ABI Version identification.
40 Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI);
41 /// \returns True if HSA OS ABI Version identification is 2,
42 /// false otherwise.
43 bool isHsaAbiVersion2(const MCSubtargetInfo *STI);
44 /// \returns True if HSA OS ABI Version identification is 3,
45 /// false otherwise.
46 bool isHsaAbiVersion3(const MCSubtargetInfo *STI);
47 /// \returns True if HSA OS ABI Version identification is 4,
48 /// false otherwise.
49 bool isHsaAbiVersion4(const MCSubtargetInfo *STI);
50 /// \returns True if HSA OS ABI Version identification is 3 or 4,
51 /// false otherwise.
52 bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI);
53
54 struct GcnBufferFormatInfo {
55 unsigned Format;
56 unsigned BitsPerComp;
57 unsigned NumComponents;
58 unsigned NumFormat;
59 unsigned DataFormat;
60 };
61
62 #define GET_MIMGBaseOpcode_DECL
63 #define GET_MIMGDim_DECL
64 #define GET_MIMGEncoding_DECL
65 #define GET_MIMGLZMapping_DECL
66 #define GET_MIMGMIPMapping_DECL
67 #include "AMDGPUGenSearchableTables.inc"
68
69 namespace IsaInfo {
70
71 enum {
72 // The closed Vulkan driver sets 96, which limits the wave count to 8 but
73 // doesn't spill SGPRs as much as when 80 is set.
74 FIXED_NUM_SGPRS_FOR_INIT_BUG = 96,
75 TRAP_NUM_SGPRS = 16
76 };
77
78 enum class TargetIDSetting {
79 Unsupported,
80 Any,
81 Off,
82 On
83 };
84
85 class AMDGPUTargetID {
86 private:
87 const MCSubtargetInfo &STI;
88 TargetIDSetting XnackSetting;
89 TargetIDSetting SramEccSetting;
90
91 public:
92 explicit AMDGPUTargetID(const MCSubtargetInfo &STI);
93 ~AMDGPUTargetID() = default;
94
95 /// \return True if the current xnack setting is not "Unsupported".
isXnackSupported()96 bool isXnackSupported() const {
97 return XnackSetting != TargetIDSetting::Unsupported;
98 }
99
100 /// \returns True if the current xnack setting is "On" or "Any".
isXnackOnOrAny()101 bool isXnackOnOrAny() const {
102 return XnackSetting == TargetIDSetting::On ||
103 XnackSetting == TargetIDSetting::Any;
104 }
105
106 /// \returns True if current xnack setting is "On" or "Off",
107 /// false otherwise.
isXnackOnOrOff()108 bool isXnackOnOrOff() const {
109 return getXnackSetting() == TargetIDSetting::On ||
110 getXnackSetting() == TargetIDSetting::Off;
111 }
112
113 /// \returns The current xnack TargetIDSetting, possible options are
114 /// "Unsupported", "Any", "Off", and "On".
getXnackSetting()115 TargetIDSetting getXnackSetting() const {
116 return XnackSetting;
117 }
118
119 /// Sets xnack setting to \p NewXnackSetting.
setXnackSetting(TargetIDSetting NewXnackSetting)120 void setXnackSetting(TargetIDSetting NewXnackSetting) {
121 XnackSetting = NewXnackSetting;
122 }
123
124 /// \return True if the current sramecc setting is not "Unsupported".
isSramEccSupported()125 bool isSramEccSupported() const {
126 return SramEccSetting != TargetIDSetting::Unsupported;
127 }
128
129 /// \returns True if the current sramecc setting is "On" or "Any".
isSramEccOnOrAny()130 bool isSramEccOnOrAny() const {
131 return SramEccSetting == TargetIDSetting::On ||
132 SramEccSetting == TargetIDSetting::Any;
133 }
134
135 /// \returns True if current sramecc setting is "On" or "Off",
136 /// false otherwise.
isSramEccOnOrOff()137 bool isSramEccOnOrOff() const {
138 return getSramEccSetting() == TargetIDSetting::On ||
139 getSramEccSetting() == TargetIDSetting::Off;
140 }
141
142 /// \returns The current sramecc TargetIDSetting, possible options are
143 /// "Unsupported", "Any", "Off", and "On".
getSramEccSetting()144 TargetIDSetting getSramEccSetting() const {
145 return SramEccSetting;
146 }
147
148 /// Sets sramecc setting to \p NewSramEccSetting.
setSramEccSetting(TargetIDSetting NewSramEccSetting)149 void setSramEccSetting(TargetIDSetting NewSramEccSetting) {
150 SramEccSetting = NewSramEccSetting;
151 }
152
153 void setTargetIDFromFeaturesString(StringRef FS);
154 void setTargetIDFromTargetIDStream(StringRef TargetID);
155
156 /// \returns String representation of an object.
157 std::string toString() const;
158 };
159
160 /// \returns Wavefront size for given subtarget \p STI.
161 unsigned getWavefrontSize(const MCSubtargetInfo *STI);
162
163 /// \returns Local memory size in bytes for given subtarget \p STI.
164 unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
165
166 /// \returns Number of execution units per compute unit for given subtarget \p
167 /// STI.
168 unsigned getEUsPerCU(const MCSubtargetInfo *STI);
169
170 /// \returns Maximum number of work groups per compute unit for given subtarget
171 /// \p STI and limited by given \p FlatWorkGroupSize.
172 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
173 unsigned FlatWorkGroupSize);
174
175 /// \returns Minimum number of waves per execution unit for given subtarget \p
176 /// STI.
177 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
178
179 /// \returns Maximum number of waves per execution unit for given subtarget \p
180 /// STI without any kind of limitation.
181 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
182
183 /// \returns Number of waves per execution unit required to support the given \p
184 /// FlatWorkGroupSize.
185 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
186 unsigned FlatWorkGroupSize);
187
188 /// \returns Minimum flat work group size for given subtarget \p STI.
189 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
190
191 /// \returns Maximum flat work group size for given subtarget \p STI.
192 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
193
194 /// \returns Number of waves per work group for given subtarget \p STI and
195 /// \p FlatWorkGroupSize.
196 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
197 unsigned FlatWorkGroupSize);
198
199 /// \returns SGPR allocation granularity for given subtarget \p STI.
200 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
201
202 /// \returns SGPR encoding granularity for given subtarget \p STI.
203 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
204
205 /// \returns Total number of SGPRs for given subtarget \p STI.
206 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
207
208 /// \returns Addressable number of SGPRs for given subtarget \p STI.
209 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
210
211 /// \returns Minimum number of SGPRs that meets the given number of waves per
212 /// execution unit requirement for given subtarget \p STI.
213 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
214
215 /// \returns Maximum number of SGPRs that meets the given number of waves per
216 /// execution unit requirement for given subtarget \p STI.
217 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
218 bool Addressable);
219
220 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
221 /// STI when the given special registers are used.
222 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
223 bool FlatScrUsed, bool XNACKUsed);
224
225 /// \returns Number of extra SGPRs implicitly required by given subtarget \p
226 /// STI when the given special registers are used. XNACK is inferred from
227 /// \p STI.
228 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
229 bool FlatScrUsed);
230
231 /// \returns Number of SGPR blocks needed for given subtarget \p STI when
232 /// \p NumSGPRs are used. \p NumSGPRs should already include any special
233 /// register counts.
234 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
235
236 /// \returns VGPR allocation granularity for given subtarget \p STI.
237 ///
238 /// For subtargets which support it, \p EnableWavefrontSize32 should match
239 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
240 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
241 Optional<bool> EnableWavefrontSize32 = None);
242
243 /// \returns VGPR encoding granularity for given subtarget \p STI.
244 ///
245 /// For subtargets which support it, \p EnableWavefrontSize32 should match
246 /// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
247 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
248 Optional<bool> EnableWavefrontSize32 = None);
249
250 /// \returns Total number of VGPRs for given subtarget \p STI.
251 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
252
253 /// \returns Addressable number of VGPRs for given subtarget \p STI.
254 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
255
256 /// \returns Minimum number of VGPRs that meets given number of waves per
257 /// execution unit requirement for given subtarget \p STI.
258 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
259
260 /// \returns Maximum number of VGPRs that meets given number of waves per
261 /// execution unit requirement for given subtarget \p STI.
262 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
263
264 /// \returns Number of VGPR blocks needed for given subtarget \p STI when
265 /// \p NumVGPRs are used.
266 ///
267 /// For subtargets which support it, \p EnableWavefrontSize32 should match the
268 /// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
269 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs,
270 Optional<bool> EnableWavefrontSize32 = None);
271
272 } // end namespace IsaInfo
273
274 LLVM_READONLY
275 int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
276
277 LLVM_READONLY
278 int getSOPPWithRelaxation(uint16_t Opcode);
279
280 struct MIMGBaseOpcodeInfo {
281 MIMGBaseOpcode BaseOpcode;
282 bool Store;
283 bool Atomic;
284 bool AtomicX2;
285 bool Sampler;
286 bool Gather4;
287
288 uint8_t NumExtraArgs;
289 bool Gradients;
290 bool G16;
291 bool Coordinates;
292 bool LodOrClampOrMip;
293 bool HasD16;
294 bool MSAA;
295 };
296
297 LLVM_READONLY
298 const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
299
300 struct MIMGDimInfo {
301 MIMGDim Dim;
302 uint8_t NumCoords;
303 uint8_t NumGradients;
304 bool MSAA;
305 bool DA;
306 uint8_t Encoding;
307 const char *AsmSuffix;
308 };
309
310 LLVM_READONLY
311 const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
312
313 LLVM_READONLY
314 const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
315
316 LLVM_READONLY
317 const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
318
319 struct MIMGLZMappingInfo {
320 MIMGBaseOpcode L;
321 MIMGBaseOpcode LZ;
322 };
323
324 struct MIMGMIPMappingInfo {
325 MIMGBaseOpcode MIP;
326 MIMGBaseOpcode NONMIP;
327 };
328
329 struct MIMGG16MappingInfo {
330 MIMGBaseOpcode G;
331 MIMGBaseOpcode G16;
332 };
333
334 LLVM_READONLY
335 const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
336
337 LLVM_READONLY
338 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
339
340 LLVM_READONLY
341 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
342
343 LLVM_READONLY
344 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
345 unsigned VDataDwords, unsigned VAddrDwords);
346
347 LLVM_READONLY
348 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
349
350 LLVM_READONLY
351 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
352 const MIMGDimInfo *Dim, bool IsA16,
353 bool IsG16Supported);
354
355 struct MIMGInfo {
356 uint16_t Opcode;
357 uint16_t BaseOpcode;
358 uint8_t MIMGEncoding;
359 uint8_t VDataDwords;
360 uint8_t VAddrDwords;
361 };
362
363 LLVM_READONLY
364 const MIMGInfo *getMIMGInfo(unsigned Opc);
365
366 LLVM_READONLY
367 int getMTBUFBaseOpcode(unsigned Opc);
368
369 LLVM_READONLY
370 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
371
372 LLVM_READONLY
373 int getMTBUFElements(unsigned Opc);
374
375 LLVM_READONLY
376 bool getMTBUFHasVAddr(unsigned Opc);
377
378 LLVM_READONLY
379 bool getMTBUFHasSrsrc(unsigned Opc);
380
381 LLVM_READONLY
382 bool getMTBUFHasSoffset(unsigned Opc);
383
384 LLVM_READONLY
385 int getMUBUFBaseOpcode(unsigned Opc);
386
387 LLVM_READONLY
388 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
389
390 LLVM_READONLY
391 int getMUBUFElements(unsigned Opc);
392
393 LLVM_READONLY
394 bool getMUBUFHasVAddr(unsigned Opc);
395
396 LLVM_READONLY
397 bool getMUBUFHasSrsrc(unsigned Opc);
398
399 LLVM_READONLY
400 bool getMUBUFHasSoffset(unsigned Opc);
401
402 LLVM_READONLY
403 bool getMUBUFIsBufferInv(unsigned Opc);
404
405 LLVM_READONLY
406 bool getSMEMIsBuffer(unsigned Opc);
407
408 LLVM_READONLY
409 bool getVOP1IsSingle(unsigned Opc);
410
411 LLVM_READONLY
412 bool getVOP2IsSingle(unsigned Opc);
413
414 LLVM_READONLY
415 bool getVOP3IsSingle(unsigned Opc);
416
417 LLVM_READONLY
418 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
419 uint8_t NumComponents,
420 uint8_t NumFormat,
421 const MCSubtargetInfo &STI);
422 LLVM_READONLY
423 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
424 const MCSubtargetInfo &STI);
425
426 LLVM_READONLY
427 int getMCOpcode(uint16_t Opcode, unsigned Gen);
428
429 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
430 const MCSubtargetInfo *STI);
431
432 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
433 const MCSubtargetInfo *STI);
434
435 bool isGroupSegment(const GlobalValue *GV);
436 bool isGlobalSegment(const GlobalValue *GV);
437 bool isReadOnlySegment(const GlobalValue *GV);
438
439 /// \returns True if constants should be emitted to .text section for given
440 /// target triple \p TT, false otherwise.
441 bool shouldEmitConstantsToTextSection(const Triple &TT);
442
443 /// \returns Integer value requested using \p F's \p Name attribute.
444 ///
445 /// \returns \p Default if attribute is not present.
446 ///
447 /// \returns \p Default and emits error if requested value cannot be converted
448 /// to integer.
449 int getIntegerAttribute(const Function &F, StringRef Name, int Default);
450
451 /// \returns A pair of integer values requested using \p F's \p Name attribute
452 /// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
453 /// is false).
454 ///
455 /// \returns \p Default if attribute is not present.
456 ///
457 /// \returns \p Default and emits error if one of the requested values cannot be
458 /// converted to integer, or \p OnlyFirstRequired is false and "second" value is
459 /// not present.
460 std::pair<int, int> getIntegerPairAttribute(const Function &F,
461 StringRef Name,
462 std::pair<int, int> Default,
463 bool OnlyFirstRequired = false);
464
465 /// Represents the counter values to wait for in an s_waitcnt instruction.
466 ///
467 /// Large values (including the maximum possible integer) can be used to
468 /// represent "don't care" waits.
469 struct Waitcnt {
470 unsigned VmCnt = ~0u;
471 unsigned ExpCnt = ~0u;
472 unsigned LgkmCnt = ~0u;
473 unsigned VsCnt = ~0u;
474
WaitcntWaitcnt475 Waitcnt() {}
WaitcntWaitcnt476 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
477 : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {}
478
allZeroWaitcnt479 static Waitcnt allZero(bool HasVscnt) {
480 return Waitcnt(0, 0, 0, HasVscnt ? 0 : ~0u);
481 }
allZeroExceptVsCntWaitcnt482 static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); }
483
hasWaitWaitcnt484 bool hasWait() const {
485 return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u;
486 }
487
hasWaitExceptVsCntWaitcnt488 bool hasWaitExceptVsCnt() const {
489 return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u;
490 }
491
hasWaitVsCntWaitcnt492 bool hasWaitVsCnt() const {
493 return VsCnt != ~0u;
494 }
495
dominatesWaitcnt496 bool dominates(const Waitcnt &Other) const {
497 return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt &&
498 LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt;
499 }
500
combinedWaitcnt501 Waitcnt combined(const Waitcnt &Other) const {
502 return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt),
503 std::min(LgkmCnt, Other.LgkmCnt),
504 std::min(VsCnt, Other.VsCnt));
505 }
506 };
507
508 /// \returns Vmcnt bit mask for given isa \p Version.
509 unsigned getVmcntBitMask(const IsaVersion &Version);
510
511 /// \returns Expcnt bit mask for given isa \p Version.
512 unsigned getExpcntBitMask(const IsaVersion &Version);
513
514 /// \returns Lgkmcnt bit mask for given isa \p Version.
515 unsigned getLgkmcntBitMask(const IsaVersion &Version);
516
517 /// \returns Waitcnt bit mask for given isa \p Version.
518 unsigned getWaitcntBitMask(const IsaVersion &Version);
519
520 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
521 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
522
523 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
524 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
525
526 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
527 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
528
529 /// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
530 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
531 /// \p Lgkmcnt respectively.
532 ///
533 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
534 /// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only)
535 /// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only)
536 /// \p Expcnt = \p Waitcnt[6:4]
537 /// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10 only)
538 /// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10+ only)
539 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
540 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
541
542 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
543
544 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
545 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
546 unsigned Vmcnt);
547
548 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
549 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
550 unsigned Expcnt);
551
552 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
553 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
554 unsigned Lgkmcnt);
555
556 /// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
557 /// \p Version.
558 ///
559 /// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
560 /// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only)
561 /// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only)
562 /// Waitcnt[6:4] = \p Expcnt
563 /// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10 only)
564 /// Waitcnt[13:8] = \p Lgkmcnt (gfx10+ only)
565 /// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only)
566 ///
567 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
568 /// isa \p Version.
569 unsigned encodeWaitcnt(const IsaVersion &Version,
570 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
571
572 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
573
574 namespace Hwreg {
575
576 LLVM_READONLY
577 int64_t getHwregId(const StringRef Name);
578
579 LLVM_READNONE
580 bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI);
581
582 LLVM_READNONE
583 bool isValidHwreg(int64_t Id);
584
585 LLVM_READNONE
586 bool isValidHwregOffset(int64_t Offset);
587
588 LLVM_READNONE
589 bool isValidHwregWidth(int64_t Width);
590
591 LLVM_READNONE
592 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width);
593
594 LLVM_READNONE
595 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI);
596
597 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width);
598
599 } // namespace Hwreg
600
601 namespace Exp {
602
603 bool getTgtName(unsigned Id, StringRef &Name, int &Index);
604
605 LLVM_READONLY
606 unsigned getTgtId(const StringRef Name);
607
608 LLVM_READNONE
609 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI);
610
611 } // namespace Exp
612
613 namespace MTBUFFormat {
614
615 LLVM_READNONE
616 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
617
618 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
619
620 int64_t getDfmt(const StringRef Name);
621
622 StringRef getDfmtName(unsigned Id);
623
624 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
625
626 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
627
628 bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
629
630 bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
631
632 int64_t getUnifiedFormat(const StringRef Name);
633
634 StringRef getUnifiedFormatName(unsigned Id);
635
636 bool isValidUnifiedFormat(unsigned Val);
637
638 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt);
639
640 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
641
642 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
643
644 } // namespace MTBUFFormat
645
646 namespace SendMsg {
647
648 LLVM_READONLY
649 int64_t getMsgId(const StringRef Name);
650
651 LLVM_READONLY
652 int64_t getMsgOpId(int64_t MsgId, const StringRef Name);
653
654 LLVM_READNONE
655 StringRef getMsgName(int64_t MsgId);
656
657 LLVM_READNONE
658 StringRef getMsgOpName(int64_t MsgId, int64_t OpId);
659
660 LLVM_READNONE
661 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict = true);
662
663 LLVM_READNONE
664 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
665 bool Strict = true);
666
667 LLVM_READNONE
668 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
669 const MCSubtargetInfo &STI, bool Strict = true);
670
671 LLVM_READNONE
672 bool msgRequiresOp(int64_t MsgId);
673
674 LLVM_READNONE
675 bool msgSupportsStream(int64_t MsgId, int64_t OpId);
676
677 void decodeMsg(unsigned Val,
678 uint16_t &MsgId,
679 uint16_t &OpId,
680 uint16_t &StreamId);
681
682 LLVM_READNONE
683 uint64_t encodeMsg(uint64_t MsgId,
684 uint64_t OpId,
685 uint64_t StreamId);
686
687 } // namespace SendMsg
688
689
690 unsigned getInitialPSInputAddr(const Function &F);
691
692 LLVM_READNONE
693 bool isShader(CallingConv::ID CC);
694
695 LLVM_READNONE
696 bool isGraphics(CallingConv::ID CC);
697
698 LLVM_READNONE
699 bool isCompute(CallingConv::ID CC);
700
701 LLVM_READNONE
702 bool isEntryFunctionCC(CallingConv::ID CC);
703
704 // These functions are considered entrypoints into the current module, i.e. they
705 // are allowed to be called from outside the current module. This is different
706 // from isEntryFunctionCC, which is only true for functions that are entered by
707 // the hardware. Module entry points include all entry functions but also
708 // include functions that can be called from other functions inside or outside
709 // the current module. Module entry functions are allowed to allocate LDS.
710 LLVM_READNONE
711 bool isModuleEntryFunctionCC(CallingConv::ID CC);
712
713 // FIXME: Remove this when calling conventions cleaned up
714 LLVM_READNONE
isKernel(CallingConv::ID CC)715 inline bool isKernel(CallingConv::ID CC) {
716 switch (CC) {
717 case CallingConv::AMDGPU_KERNEL:
718 case CallingConv::SPIR_KERNEL:
719 return true;
720 default:
721 return false;
722 }
723 }
724
725 bool hasXNACK(const MCSubtargetInfo &STI);
726 bool hasSRAMECC(const MCSubtargetInfo &STI);
727 bool hasMIMG_R128(const MCSubtargetInfo &STI);
728 bool hasGFX10A16(const MCSubtargetInfo &STI);
729 bool hasG16(const MCSubtargetInfo &STI);
730 bool hasPackedD16(const MCSubtargetInfo &STI);
731
732 bool isSI(const MCSubtargetInfo &STI);
733 bool isCI(const MCSubtargetInfo &STI);
734 bool isVI(const MCSubtargetInfo &STI);
735 bool isGFX9(const MCSubtargetInfo &STI);
736 bool isGFX9Plus(const MCSubtargetInfo &STI);
737 bool isGFX10(const MCSubtargetInfo &STI);
738 bool isGFX10Plus(const MCSubtargetInfo &STI);
739 bool isGCN3Encoding(const MCSubtargetInfo &STI);
740 bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
741 bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
742 bool isGFX90A(const MCSubtargetInfo &STI);
743 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
744
745 /// Is Reg - scalar register
746 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
747
748 /// Is there any intersection between registers
749 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI);
750
751 /// If \p Reg is a pseudo reg, return the correct hardware register given
752 /// \p STI otherwise return \p Reg.
753 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
754
755 /// Convert hardware register \p Reg to a pseudo register
756 LLVM_READNONE
757 unsigned mc2PseudoReg(unsigned Reg);
758
759 /// Can this operand also contain immediate values?
760 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
761
762 /// Is this floating-point operand?
763 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
764
765 /// Does this opearnd support only inlinable literals?
766 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
767
768 /// Get the size in bits of a register from the register class \p RC.
769 unsigned getRegBitWidth(unsigned RCID);
770
771 /// Get the size in bits of a register from the register class \p RC.
772 unsigned getRegBitWidth(const MCRegisterClass &RC);
773
774 /// Get size of register operand
775 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
776 unsigned OpNo);
777
778 LLVM_READNONE
getOperandSize(const MCOperandInfo & OpInfo)779 inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
780 switch (OpInfo.OperandType) {
781 case AMDGPU::OPERAND_REG_IMM_INT32:
782 case AMDGPU::OPERAND_REG_IMM_FP32:
783 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
784 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
785 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
786 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
787 case AMDGPU::OPERAND_REG_IMM_V2INT32:
788 case AMDGPU::OPERAND_REG_IMM_V2FP32:
789 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
790 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
791 return 4;
792
793 case AMDGPU::OPERAND_REG_IMM_INT64:
794 case AMDGPU::OPERAND_REG_IMM_FP64:
795 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
796 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
797 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
798 return 8;
799
800 case AMDGPU::OPERAND_REG_IMM_INT16:
801 case AMDGPU::OPERAND_REG_IMM_FP16:
802 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
803 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
804 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
805 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
806 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
807 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
808 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
809 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
810 case AMDGPU::OPERAND_REG_IMM_V2INT16:
811 case AMDGPU::OPERAND_REG_IMM_V2FP16:
812 return 2;
813
814 default:
815 llvm_unreachable("unhandled operand type");
816 }
817 }
818
819 LLVM_READNONE
getOperandSize(const MCInstrDesc & Desc,unsigned OpNo)820 inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
821 return getOperandSize(Desc.OpInfo[OpNo]);
822 }
823
824 /// Is this literal inlinable, and not one of the values intended for floating
825 /// point values.
826 LLVM_READNONE
isInlinableIntLiteral(int64_t Literal)827 inline bool isInlinableIntLiteral(int64_t Literal) {
828 return Literal >= -16 && Literal <= 64;
829 }
830
831 /// Is this literal inlinable
832 LLVM_READNONE
833 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
834
835 LLVM_READNONE
836 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
837
838 LLVM_READNONE
839 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
840
841 LLVM_READNONE
842 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi);
843
844 LLVM_READNONE
845 bool isInlinableIntLiteralV216(int32_t Literal);
846
847 LLVM_READNONE
848 bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi);
849
850 bool isArgPassedInSGPR(const Argument *Arg);
851
852 LLVM_READONLY
853 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
854 int64_t EncodedOffset);
855
856 LLVM_READONLY
857 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
858 int64_t EncodedOffset,
859 bool IsBuffer);
860
861 /// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
862 /// offsets.
863 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset);
864
865 /// \returns The encoding that will be used for \p ByteOffset in the
866 /// SMRD offset field, or None if it won't fit. On GFX9 and GFX10
867 /// S_LOAD instructions have a signed offset, on other subtargets it is
868 /// unsigned. S_BUFFER has an unsigned offset for all subtargets.
869 Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
870 int64_t ByteOffset, bool IsBuffer);
871
872 /// \return The encoding that can be used for a 32-bit literal offset in an SMRD
873 /// instruction. This is only useful on CI.s
874 Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
875 int64_t ByteOffset);
876
877 /// For FLAT segment the offset must be positive;
878 /// MSB is ignored and forced to zero.
879 ///
880 /// \return The number of bits available for the offset field in flat
881 /// instructions.
882 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed);
883
884 /// \returns true if this offset is small enough to fit in the SMRD
885 /// offset field. \p ByteOffset should be the offset in bytes and
886 /// not the encoded offset.
887 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
888
889 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
890 const GCNSubtarget *Subtarget,
891 Align Alignment = Align(4));
892
893 LLVM_READNONE
isLegal64BitDPPControl(unsigned DC)894 inline bool isLegal64BitDPPControl(unsigned DC) {
895 return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
896 }
897
898 /// \returns true if the intrinsic is divergent
899 bool isIntrinsicSourceOfDivergence(unsigned IntrID);
900
901 // Track defaults for fields in the MODE registser.
902 struct SIModeRegisterDefaults {
903 /// Floating point opcodes that support exception flag gathering quiet and
904 /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10
905 /// become IEEE 754- 2008 compliant due to signaling NaN propagation and
906 /// quieting.
907 bool IEEE : 1;
908
909 /// Used by the vector ALU to force DX10-style treatment of NaNs: when set,
910 /// clamp NaN to zero; otherwise, pass NaN through.
911 bool DX10Clamp : 1;
912
913 /// If this is set, neither input or output denormals are flushed for most f32
914 /// instructions.
915 bool FP32InputDenormals : 1;
916 bool FP32OutputDenormals : 1;
917
918 /// If this is set, neither input or output denormals are flushed for both f64
919 /// and f16/v2f16 instructions.
920 bool FP64FP16InputDenormals : 1;
921 bool FP64FP16OutputDenormals : 1;
922
SIModeRegisterDefaultsSIModeRegisterDefaults923 SIModeRegisterDefaults() :
924 IEEE(true),
925 DX10Clamp(true),
926 FP32InputDenormals(true),
927 FP32OutputDenormals(true),
928 FP64FP16InputDenormals(true),
929 FP64FP16OutputDenormals(true) {}
930
931 SIModeRegisterDefaults(const Function &F);
932
getDefaultForCallingConvSIModeRegisterDefaults933 static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
934 SIModeRegisterDefaults Mode;
935 Mode.IEEE = !AMDGPU::isShader(CC);
936 return Mode;
937 }
938
939 bool operator ==(const SIModeRegisterDefaults Other) const {
940 return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp &&
941 FP32InputDenormals == Other.FP32InputDenormals &&
942 FP32OutputDenormals == Other.FP32OutputDenormals &&
943 FP64FP16InputDenormals == Other.FP64FP16InputDenormals &&
944 FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;
945 }
946
allFP32DenormalsSIModeRegisterDefaults947 bool allFP32Denormals() const {
948 return FP32InputDenormals && FP32OutputDenormals;
949 }
950
allFP64FP16DenormalsSIModeRegisterDefaults951 bool allFP64FP16Denormals() const {
952 return FP64FP16InputDenormals && FP64FP16OutputDenormals;
953 }
954
955 /// Get the encoding value for the FP_DENORM bits of the mode register for the
956 /// FP32 denormal mode.
fpDenormModeSPValueSIModeRegisterDefaults957 uint32_t fpDenormModeSPValue() const {
958 if (FP32InputDenormals && FP32OutputDenormals)
959 return FP_DENORM_FLUSH_NONE;
960 if (FP32InputDenormals)
961 return FP_DENORM_FLUSH_OUT;
962 if (FP32OutputDenormals)
963 return FP_DENORM_FLUSH_IN;
964 return FP_DENORM_FLUSH_IN_FLUSH_OUT;
965 }
966
967 /// Get the encoding value for the FP_DENORM bits of the mode register for the
968 /// FP64/FP16 denormal mode.
fpDenormModeDPValueSIModeRegisterDefaults969 uint32_t fpDenormModeDPValue() const {
970 if (FP64FP16InputDenormals && FP64FP16OutputDenormals)
971 return FP_DENORM_FLUSH_NONE;
972 if (FP64FP16InputDenormals)
973 return FP_DENORM_FLUSH_OUT;
974 if (FP64FP16OutputDenormals)
975 return FP_DENORM_FLUSH_IN;
976 return FP_DENORM_FLUSH_IN_FLUSH_OUT;
977 }
978
979 /// Returns true if a flag is compatible if it's enabled in the callee, but
980 /// disabled in the caller.
oneWayCompatibleSIModeRegisterDefaults981 static bool oneWayCompatible(bool CallerMode, bool CalleeMode) {
982 return CallerMode == CalleeMode || (!CallerMode && CalleeMode);
983 }
984
985 // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
986 // be able to override.
isInlineCompatibleSIModeRegisterDefaults987 bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const {
988 if (DX10Clamp != CalleeMode.DX10Clamp)
989 return false;
990 if (IEEE != CalleeMode.IEEE)
991 return false;
992
993 // Allow inlining denormals enabled into denormals flushed functions.
994 return oneWayCompatible(FP64FP16InputDenormals, CalleeMode.FP64FP16InputDenormals) &&
995 oneWayCompatible(FP64FP16OutputDenormals, CalleeMode.FP64FP16OutputDenormals) &&
996 oneWayCompatible(FP32InputDenormals, CalleeMode.FP32InputDenormals) &&
997 oneWayCompatible(FP32OutputDenormals, CalleeMode.FP32OutputDenormals);
998 }
999 };
1000
1001 } // end namespace AMDGPU
1002
1003 raw_ostream &operator<<(raw_ostream &OS,
1004 const AMDGPU::IsaInfo::TargetIDSetting S);
1005
1006 } // end namespace llvm
1007
1008 #endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
1009