1 //===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Interface definition for SIInstrInfo.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
15 #define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
16
17 #include "AMDGPUMIRFormatter.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "SIRegisterInfo.h"
20 #include "Utils/AMDGPUBaseInfo.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/CodeGen/TargetInstrInfo.h"
23 #include "llvm/CodeGen/TargetSchedule.h"
24
25 #define GET_INSTRINFO_HEADER
26 #include "AMDGPUGenInstrInfo.inc"
27
28 namespace llvm {
29
30 class APInt;
31 class GCNSubtarget;
32 class LiveVariables;
33 class MachineDominatorTree;
34 class MachineRegisterInfo;
35 class RegScavenger;
36 class TargetRegisterClass;
37 class ScheduleHazardRecognizer;
38
39 /// Mark the MMO of a uniform load if there are no potentially clobbering stores
40 /// on any path from the start of an entry function to this load.
41 static const MachineMemOperand::Flags MONoClobber =
42 MachineMemOperand::MOTargetFlag1;
43
44 /// Mark the MMO of a load as the last use.
45 static const MachineMemOperand::Flags MOLastUse =
46 MachineMemOperand::MOTargetFlag2;
47
48 /// Utility to store machine instructions worklist.
49 struct SIInstrWorklist {
50 SIInstrWorklist() = default;
51
52 void insert(MachineInstr *MI);
53
topSIInstrWorklist54 MachineInstr *top() const {
55 auto iter = InstrList.begin();
56 return *iter;
57 }
58
erase_topSIInstrWorklist59 void erase_top() {
60 auto iter = InstrList.begin();
61 InstrList.erase(iter);
62 }
63
emptySIInstrWorklist64 bool empty() const { return InstrList.empty(); }
65
clearSIInstrWorklist66 void clear() {
67 InstrList.clear();
68 DeferredList.clear();
69 }
70
71 bool isDeferred(MachineInstr *MI);
72
getDeferredListSIInstrWorklist73 SetVector<MachineInstr *> &getDeferredList() { return DeferredList; }
74
75 private:
76 /// InstrList contains the MachineInstrs.
77 SetVector<MachineInstr *> InstrList;
78 /// Deferred instructions are specific MachineInstr
79 /// that will be added by insert method.
80 SetVector<MachineInstr *> DeferredList;
81 };
82
83 class SIInstrInfo final : public AMDGPUGenInstrInfo {
84 private:
85 const SIRegisterInfo RI;
86 const GCNSubtarget &ST;
87 TargetSchedModel SchedModel;
88 mutable std::unique_ptr<AMDGPUMIRFormatter> Formatter;
89
90 // The inverse predicate should have the negative value.
91 enum BranchPredicate {
92 INVALID_BR = 0,
93 SCC_TRUE = 1,
94 SCC_FALSE = -1,
95 VCCNZ = 2,
96 VCCZ = -2,
97 EXECNZ = -3,
98 EXECZ = 3
99 };
100
101 using SetVectorType = SmallSetVector<MachineInstr *, 32>;
102
103 static unsigned getBranchOpcode(BranchPredicate Cond);
104 static BranchPredicate getBranchPredicate(unsigned Opcode);
105
106 public:
107 unsigned buildExtractSubReg(MachineBasicBlock::iterator MI,
108 MachineRegisterInfo &MRI,
109 const MachineOperand &SuperReg,
110 const TargetRegisterClass *SuperRC,
111 unsigned SubIdx,
112 const TargetRegisterClass *SubRC) const;
113 MachineOperand buildExtractSubRegOrImm(
114 MachineBasicBlock::iterator MI, MachineRegisterInfo &MRI,
115 const MachineOperand &SuperReg, const TargetRegisterClass *SuperRC,
116 unsigned SubIdx, const TargetRegisterClass *SubRC) const;
117
118 private:
119 void swapOperands(MachineInstr &Inst) const;
120
121 std::pair<bool, MachineBasicBlock *>
122 moveScalarAddSub(SIInstrWorklist &Worklist, MachineInstr &Inst,
123 MachineDominatorTree *MDT = nullptr) const;
124
125 void lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst,
126 MachineDominatorTree *MDT = nullptr) const;
127
128 void lowerScalarAbs(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
129
130 void lowerScalarXnor(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
131
132 void splitScalarNotBinop(SIInstrWorklist &Worklist, MachineInstr &Inst,
133 unsigned Opcode) const;
134
135 void splitScalarBinOpN2(SIInstrWorklist &Worklist, MachineInstr &Inst,
136 unsigned Opcode) const;
137
138 void splitScalar64BitUnaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
139 unsigned Opcode, bool Swap = false) const;
140
141 void splitScalar64BitBinaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
142 unsigned Opcode,
143 MachineDominatorTree *MDT = nullptr) const;
144
145 void splitScalarSMulU64(SIInstrWorklist &Worklist, MachineInstr &Inst,
146 MachineDominatorTree *MDT) const;
147
148 void splitScalarSMulPseudo(SIInstrWorklist &Worklist, MachineInstr &Inst,
149 MachineDominatorTree *MDT) const;
150
151 void splitScalar64BitXnor(SIInstrWorklist &Worklist, MachineInstr &Inst,
152 MachineDominatorTree *MDT = nullptr) const;
153
154 void splitScalar64BitBCNT(SIInstrWorklist &Worklist,
155 MachineInstr &Inst) const;
156 void splitScalar64BitBFE(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
157 void splitScalar64BitCountOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
158 unsigned Opcode,
159 MachineDominatorTree *MDT = nullptr) const;
160 void movePackToVALU(SIInstrWorklist &Worklist, MachineRegisterInfo &MRI,
161 MachineInstr &Inst) const;
162
163 void addUsersToMoveToVALUWorklist(Register Reg, MachineRegisterInfo &MRI,
164 SIInstrWorklist &Worklist) const;
165
166 void addSCCDefUsersToVALUWorklist(MachineOperand &Op,
167 MachineInstr &SCCDefInst,
168 SIInstrWorklist &Worklist,
169 Register NewCond = Register()) const;
170 void addSCCDefsToVALUWorklist(MachineInstr *SCCUseInst,
171 SIInstrWorklist &Worklist) const;
172
173 const TargetRegisterClass *
174 getDestEquivalentVGPRClass(const MachineInstr &Inst) const;
175
176 bool checkInstOffsetsDoNotOverlap(const MachineInstr &MIa,
177 const MachineInstr &MIb) const;
178
179 Register findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const;
180
181 protected:
182 /// If the specific machine instruction is a instruction that moves/copies
183 /// value from one register to another register return destination and source
184 /// registers as machine operands.
185 std::optional<DestSourcePair>
186 isCopyInstrImpl(const MachineInstr &MI) const override;
187
188 bool swapSourceModifiers(MachineInstr &MI,
189 MachineOperand &Src0, unsigned Src0OpName,
190 MachineOperand &Src1, unsigned Src1OpName) const;
191
192 MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
193 unsigned OpIdx0,
194 unsigned OpIdx1) const override;
195
196 public:
197 enum TargetOperandFlags {
198 MO_MASK = 0xf,
199
200 MO_NONE = 0,
201 // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL.
202 MO_GOTPCREL = 1,
203 // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO.
204 MO_GOTPCREL32 = 2,
205 MO_GOTPCREL32_LO = 2,
206 // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI.
207 MO_GOTPCREL32_HI = 3,
208 // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO.
209 MO_REL32 = 4,
210 MO_REL32_LO = 4,
211 // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI.
212 MO_REL32_HI = 5,
213
214 MO_FAR_BRANCH_OFFSET = 6,
215
216 MO_ABS32_LO = 8,
217 MO_ABS32_HI = 9,
218 };
219
220 explicit SIInstrInfo(const GCNSubtarget &ST);
221
getRegisterInfo()222 const SIRegisterInfo &getRegisterInfo() const {
223 return RI;
224 }
225
getSubtarget()226 const GCNSubtarget &getSubtarget() const {
227 return ST;
228 }
229
230 bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override;
231
232 bool isIgnorableUse(const MachineOperand &MO) const override;
233
234 bool isSafeToSink(MachineInstr &MI, MachineBasicBlock *SuccToSinkTo,
235 MachineCycleInfo *CI) const override;
236
237 bool areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1, int64_t &Offset0,
238 int64_t &Offset1) const override;
239
240 bool getMemOperandsWithOffsetWidth(
241 const MachineInstr &LdSt,
242 SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
243 bool &OffsetIsScalable, unsigned &Width,
244 const TargetRegisterInfo *TRI) const final;
245
246 bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
247 int64_t Offset1, bool OffsetIsScalable1,
248 ArrayRef<const MachineOperand *> BaseOps2,
249 int64_t Offset2, bool OffsetIsScalable2,
250 unsigned ClusterSize,
251 unsigned NumBytes) const override;
252
253 bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,
254 int64_t Offset1, unsigned NumLoads) const override;
255
256 void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
257 const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
258 bool KillSrc) const override;
259
260 void materializeImmediate(MachineBasicBlock &MBB,
261 MachineBasicBlock::iterator MI, const DebugLoc &DL,
262 Register DestReg, int64_t Value) const;
263
264 const TargetRegisterClass *getPreferredSelectRegClass(
265 unsigned Size) const;
266
267 Register insertNE(MachineBasicBlock *MBB,
268 MachineBasicBlock::iterator I, const DebugLoc &DL,
269 Register SrcReg, int Value) const;
270
271 Register insertEQ(MachineBasicBlock *MBB,
272 MachineBasicBlock::iterator I, const DebugLoc &DL,
273 Register SrcReg, int Value) const;
274
275 void storeRegToStackSlot(MachineBasicBlock &MBB,
276 MachineBasicBlock::iterator MI, Register SrcReg,
277 bool isKill, int FrameIndex,
278 const TargetRegisterClass *RC,
279 const TargetRegisterInfo *TRI,
280 Register VReg) const override;
281
282 void loadRegFromStackSlot(MachineBasicBlock &MBB,
283 MachineBasicBlock::iterator MI, Register DestReg,
284 int FrameIndex, const TargetRegisterClass *RC,
285 const TargetRegisterInfo *TRI,
286 Register VReg) const override;
287
288 bool expandPostRAPseudo(MachineInstr &MI) const override;
289
290 void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
291 Register DestReg, unsigned SubIdx,
292 const MachineInstr &Orig,
293 const TargetRegisterInfo &TRI) const override;
294
295 // Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp
296 // instructions. Returns a pair of generated instructions.
297 // Can split either post-RA with physical registers or pre-RA with
298 // virtual registers. In latter case IR needs to be in SSA form and
299 // and a REG_SEQUENCE is produced to define original register.
300 std::pair<MachineInstr*, MachineInstr*>
301 expandMovDPP64(MachineInstr &MI) const;
302
303 // Returns an opcode that can be used to move a value to a \p DstRC
304 // register. If there is no hardware instruction that can store to \p
305 // DstRC, then AMDGPU::COPY is returned.
306 unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
307
308 const MCInstrDesc &getIndirectRegWriteMovRelPseudo(unsigned VecSize,
309 unsigned EltSize,
310 bool IsSGPR) const;
311
312 const MCInstrDesc &getIndirectGPRIDXPseudo(unsigned VecSize,
313 bool IsIndirectSrc) const;
314 LLVM_READONLY
315 int commuteOpcode(unsigned Opc) const;
316
317 LLVM_READONLY
commuteOpcode(const MachineInstr & MI)318 inline int commuteOpcode(const MachineInstr &MI) const {
319 return commuteOpcode(MI.getOpcode());
320 }
321
322 bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0,
323 unsigned &SrcOpIdx1) const override;
324
325 bool findCommutedOpIndices(const MCInstrDesc &Desc, unsigned &SrcOpIdx0,
326 unsigned &SrcOpIdx1) const;
327
328 bool isBranchOffsetInRange(unsigned BranchOpc,
329 int64_t BrOffset) const override;
330
331 MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
332
333 /// Return whether the block terminate with divergent branch.
334 /// Note this only work before lowering the pseudo control flow instructions.
335 bool hasDivergentBranch(const MachineBasicBlock *MBB) const;
336
337 void insertIndirectBranch(MachineBasicBlock &MBB,
338 MachineBasicBlock &NewDestBB,
339 MachineBasicBlock &RestoreBB, const DebugLoc &DL,
340 int64_t BrOffset, RegScavenger *RS) const override;
341
342 bool analyzeBranchImpl(MachineBasicBlock &MBB,
343 MachineBasicBlock::iterator I,
344 MachineBasicBlock *&TBB,
345 MachineBasicBlock *&FBB,
346 SmallVectorImpl<MachineOperand> &Cond,
347 bool AllowModify) const;
348
349 bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
350 MachineBasicBlock *&FBB,
351 SmallVectorImpl<MachineOperand> &Cond,
352 bool AllowModify = false) const override;
353
354 unsigned removeBranch(MachineBasicBlock &MBB,
355 int *BytesRemoved = nullptr) const override;
356
357 unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
358 MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
359 const DebugLoc &DL,
360 int *BytesAdded = nullptr) const override;
361
362 bool reverseBranchCondition(
363 SmallVectorImpl<MachineOperand> &Cond) const override;
364
365 bool canInsertSelect(const MachineBasicBlock &MBB,
366 ArrayRef<MachineOperand> Cond, Register DstReg,
367 Register TrueReg, Register FalseReg, int &CondCycles,
368 int &TrueCycles, int &FalseCycles) const override;
369
370 void insertSelect(MachineBasicBlock &MBB,
371 MachineBasicBlock::iterator I, const DebugLoc &DL,
372 Register DstReg, ArrayRef<MachineOperand> Cond,
373 Register TrueReg, Register FalseReg) const override;
374
375 void insertVectorSelect(MachineBasicBlock &MBB,
376 MachineBasicBlock::iterator I, const DebugLoc &DL,
377 Register DstReg, ArrayRef<MachineOperand> Cond,
378 Register TrueReg, Register FalseReg) const;
379
380 bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
381 Register &SrcReg2, int64_t &CmpMask,
382 int64_t &CmpValue) const override;
383
384 bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
385 Register SrcReg2, int64_t CmpMask, int64_t CmpValue,
386 const MachineRegisterInfo *MRI) const override;
387
388 bool
389 areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
390 const MachineInstr &MIb) const override;
391
392 static bool isFoldableCopy(const MachineInstr &MI);
393
394 void removeModOperands(MachineInstr &MI) const;
395
396 bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
397 MachineRegisterInfo *MRI) const final;
398
getMachineCSELookAheadLimit()399 unsigned getMachineCSELookAheadLimit() const override { return 500; }
400
401 MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
402 LiveIntervals *LIS) const override;
403
404 bool isSchedulingBoundary(const MachineInstr &MI,
405 const MachineBasicBlock *MBB,
406 const MachineFunction &MF) const override;
407
isSALU(const MachineInstr & MI)408 static bool isSALU(const MachineInstr &MI) {
409 return MI.getDesc().TSFlags & SIInstrFlags::SALU;
410 }
411
isSALU(uint16_t Opcode)412 bool isSALU(uint16_t Opcode) const {
413 return get(Opcode).TSFlags & SIInstrFlags::SALU;
414 }
415
isVALU(const MachineInstr & MI)416 static bool isVALU(const MachineInstr &MI) {
417 return MI.getDesc().TSFlags & SIInstrFlags::VALU;
418 }
419
isVALU(uint16_t Opcode)420 bool isVALU(uint16_t Opcode) const {
421 return get(Opcode).TSFlags & SIInstrFlags::VALU;
422 }
423
isImage(const MachineInstr & MI)424 static bool isImage(const MachineInstr &MI) {
425 return isMIMG(MI) || isVSAMPLE(MI) || isVIMAGE(MI);
426 }
427
isImage(uint16_t Opcode)428 bool isImage(uint16_t Opcode) const {
429 return isMIMG(Opcode) || isVSAMPLE(Opcode) || isVIMAGE(Opcode);
430 }
431
isVMEM(const MachineInstr & MI)432 static bool isVMEM(const MachineInstr &MI) {
433 return isMUBUF(MI) || isMTBUF(MI) || isImage(MI);
434 }
435
isVMEM(uint16_t Opcode)436 bool isVMEM(uint16_t Opcode) const {
437 return isMUBUF(Opcode) || isMTBUF(Opcode) || isImage(Opcode);
438 }
439
isSOP1(const MachineInstr & MI)440 static bool isSOP1(const MachineInstr &MI) {
441 return MI.getDesc().TSFlags & SIInstrFlags::SOP1;
442 }
443
isSOP1(uint16_t Opcode)444 bool isSOP1(uint16_t Opcode) const {
445 return get(Opcode).TSFlags & SIInstrFlags::SOP1;
446 }
447
isSOP2(const MachineInstr & MI)448 static bool isSOP2(const MachineInstr &MI) {
449 return MI.getDesc().TSFlags & SIInstrFlags::SOP2;
450 }
451
isSOP2(uint16_t Opcode)452 bool isSOP2(uint16_t Opcode) const {
453 return get(Opcode).TSFlags & SIInstrFlags::SOP2;
454 }
455
isSOPC(const MachineInstr & MI)456 static bool isSOPC(const MachineInstr &MI) {
457 return MI.getDesc().TSFlags & SIInstrFlags::SOPC;
458 }
459
isSOPC(uint16_t Opcode)460 bool isSOPC(uint16_t Opcode) const {
461 return get(Opcode).TSFlags & SIInstrFlags::SOPC;
462 }
463
isSOPK(const MachineInstr & MI)464 static bool isSOPK(const MachineInstr &MI) {
465 return MI.getDesc().TSFlags & SIInstrFlags::SOPK;
466 }
467
isSOPK(uint16_t Opcode)468 bool isSOPK(uint16_t Opcode) const {
469 return get(Opcode).TSFlags & SIInstrFlags::SOPK;
470 }
471
isSOPP(const MachineInstr & MI)472 static bool isSOPP(const MachineInstr &MI) {
473 return MI.getDesc().TSFlags & SIInstrFlags::SOPP;
474 }
475
isSOPP(uint16_t Opcode)476 bool isSOPP(uint16_t Opcode) const {
477 return get(Opcode).TSFlags & SIInstrFlags::SOPP;
478 }
479
isPacked(const MachineInstr & MI)480 static bool isPacked(const MachineInstr &MI) {
481 return MI.getDesc().TSFlags & SIInstrFlags::IsPacked;
482 }
483
isPacked(uint16_t Opcode)484 bool isPacked(uint16_t Opcode) const {
485 return get(Opcode).TSFlags & SIInstrFlags::IsPacked;
486 }
487
isVOP1(const MachineInstr & MI)488 static bool isVOP1(const MachineInstr &MI) {
489 return MI.getDesc().TSFlags & SIInstrFlags::VOP1;
490 }
491
isVOP1(uint16_t Opcode)492 bool isVOP1(uint16_t Opcode) const {
493 return get(Opcode).TSFlags & SIInstrFlags::VOP1;
494 }
495
isVOP2(const MachineInstr & MI)496 static bool isVOP2(const MachineInstr &MI) {
497 return MI.getDesc().TSFlags & SIInstrFlags::VOP2;
498 }
499
isVOP2(uint16_t Opcode)500 bool isVOP2(uint16_t Opcode) const {
501 return get(Opcode).TSFlags & SIInstrFlags::VOP2;
502 }
503
isVOP3(const MachineInstr & MI)504 static bool isVOP3(const MachineInstr &MI) {
505 return MI.getDesc().TSFlags & SIInstrFlags::VOP3;
506 }
507
isVOP3(uint16_t Opcode)508 bool isVOP3(uint16_t Opcode) const {
509 return get(Opcode).TSFlags & SIInstrFlags::VOP3;
510 }
511
isSDWA(const MachineInstr & MI)512 static bool isSDWA(const MachineInstr &MI) {
513 return MI.getDesc().TSFlags & SIInstrFlags::SDWA;
514 }
515
isSDWA(uint16_t Opcode)516 bool isSDWA(uint16_t Opcode) const {
517 return get(Opcode).TSFlags & SIInstrFlags::SDWA;
518 }
519
isVOPC(const MachineInstr & MI)520 static bool isVOPC(const MachineInstr &MI) {
521 return MI.getDesc().TSFlags & SIInstrFlags::VOPC;
522 }
523
isVOPC(uint16_t Opcode)524 bool isVOPC(uint16_t Opcode) const {
525 return get(Opcode).TSFlags & SIInstrFlags::VOPC;
526 }
527
isMUBUF(const MachineInstr & MI)528 static bool isMUBUF(const MachineInstr &MI) {
529 return MI.getDesc().TSFlags & SIInstrFlags::MUBUF;
530 }
531
isMUBUF(uint16_t Opcode)532 bool isMUBUF(uint16_t Opcode) const {
533 return get(Opcode).TSFlags & SIInstrFlags::MUBUF;
534 }
535
isMTBUF(const MachineInstr & MI)536 static bool isMTBUF(const MachineInstr &MI) {
537 return MI.getDesc().TSFlags & SIInstrFlags::MTBUF;
538 }
539
isMTBUF(uint16_t Opcode)540 bool isMTBUF(uint16_t Opcode) const {
541 return get(Opcode).TSFlags & SIInstrFlags::MTBUF;
542 }
543
isSMRD(const MachineInstr & MI)544 static bool isSMRD(const MachineInstr &MI) {
545 return MI.getDesc().TSFlags & SIInstrFlags::SMRD;
546 }
547
isSMRD(uint16_t Opcode)548 bool isSMRD(uint16_t Opcode) const {
549 return get(Opcode).TSFlags & SIInstrFlags::SMRD;
550 }
551
552 bool isBufferSMRD(const MachineInstr &MI) const;
553
isDS(const MachineInstr & MI)554 static bool isDS(const MachineInstr &MI) {
555 return MI.getDesc().TSFlags & SIInstrFlags::DS;
556 }
557
isDS(uint16_t Opcode)558 bool isDS(uint16_t Opcode) const {
559 return get(Opcode).TSFlags & SIInstrFlags::DS;
560 }
561
isLDSDMA(const MachineInstr & MI)562 static bool isLDSDMA(const MachineInstr &MI) {
563 return isVALU(MI) && (isMUBUF(MI) || isFLAT(MI));
564 }
565
isLDSDMA(uint16_t Opcode)566 bool isLDSDMA(uint16_t Opcode) {
567 return isVALU(Opcode) && (isMUBUF(Opcode) || isFLAT(Opcode));
568 }
569
isGWS(const MachineInstr & MI)570 static bool isGWS(const MachineInstr &MI) {
571 return MI.getDesc().TSFlags & SIInstrFlags::GWS;
572 }
573
isGWS(uint16_t Opcode)574 bool isGWS(uint16_t Opcode) const {
575 return get(Opcode).TSFlags & SIInstrFlags::GWS;
576 }
577
578 bool isAlwaysGDS(uint16_t Opcode) const;
579
isMIMG(const MachineInstr & MI)580 static bool isMIMG(const MachineInstr &MI) {
581 return MI.getDesc().TSFlags & SIInstrFlags::MIMG;
582 }
583
isMIMG(uint16_t Opcode)584 bool isMIMG(uint16_t Opcode) const {
585 return get(Opcode).TSFlags & SIInstrFlags::MIMG;
586 }
587
isVIMAGE(const MachineInstr & MI)588 static bool isVIMAGE(const MachineInstr &MI) {
589 return MI.getDesc().TSFlags & SIInstrFlags::VIMAGE;
590 }
591
isVIMAGE(uint16_t Opcode)592 bool isVIMAGE(uint16_t Opcode) const {
593 return get(Opcode).TSFlags & SIInstrFlags::VIMAGE;
594 }
595
isVSAMPLE(const MachineInstr & MI)596 static bool isVSAMPLE(const MachineInstr &MI) {
597 return MI.getDesc().TSFlags & SIInstrFlags::VSAMPLE;
598 }
599
isVSAMPLE(uint16_t Opcode)600 bool isVSAMPLE(uint16_t Opcode) const {
601 return get(Opcode).TSFlags & SIInstrFlags::VSAMPLE;
602 }
603
isGather4(const MachineInstr & MI)604 static bool isGather4(const MachineInstr &MI) {
605 return MI.getDesc().TSFlags & SIInstrFlags::Gather4;
606 }
607
isGather4(uint16_t Opcode)608 bool isGather4(uint16_t Opcode) const {
609 return get(Opcode).TSFlags & SIInstrFlags::Gather4;
610 }
611
isFLAT(const MachineInstr & MI)612 static bool isFLAT(const MachineInstr &MI) {
613 return MI.getDesc().TSFlags & SIInstrFlags::FLAT;
614 }
615
616 // Is a FLAT encoded instruction which accesses a specific segment,
617 // i.e. global_* or scratch_*.
isSegmentSpecificFLAT(const MachineInstr & MI)618 static bool isSegmentSpecificFLAT(const MachineInstr &MI) {
619 auto Flags = MI.getDesc().TSFlags;
620 return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch);
621 }
622
isSegmentSpecificFLAT(uint16_t Opcode)623 bool isSegmentSpecificFLAT(uint16_t Opcode) const {
624 auto Flags = get(Opcode).TSFlags;
625 return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch);
626 }
627
isFLATGlobal(const MachineInstr & MI)628 static bool isFLATGlobal(const MachineInstr &MI) {
629 return MI.getDesc().TSFlags & SIInstrFlags::FlatGlobal;
630 }
631
isFLATGlobal(uint16_t Opcode)632 bool isFLATGlobal(uint16_t Opcode) const {
633 return get(Opcode).TSFlags & SIInstrFlags::FlatGlobal;
634 }
635
isFLATScratch(const MachineInstr & MI)636 static bool isFLATScratch(const MachineInstr &MI) {
637 return MI.getDesc().TSFlags & SIInstrFlags::FlatScratch;
638 }
639
isFLATScratch(uint16_t Opcode)640 bool isFLATScratch(uint16_t Opcode) const {
641 return get(Opcode).TSFlags & SIInstrFlags::FlatScratch;
642 }
643
644 // Any FLAT encoded instruction, including global_* and scratch_*.
isFLAT(uint16_t Opcode)645 bool isFLAT(uint16_t Opcode) const {
646 return get(Opcode).TSFlags & SIInstrFlags::FLAT;
647 }
648
isEXP(const MachineInstr & MI)649 static bool isEXP(const MachineInstr &MI) {
650 return MI.getDesc().TSFlags & SIInstrFlags::EXP;
651 }
652
isDualSourceBlendEXP(const MachineInstr & MI)653 static bool isDualSourceBlendEXP(const MachineInstr &MI) {
654 if (!isEXP(MI))
655 return false;
656 unsigned Target = MI.getOperand(0).getImm();
657 return Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND0 ||
658 Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND1;
659 }
660
isEXP(uint16_t Opcode)661 bool isEXP(uint16_t Opcode) const {
662 return get(Opcode).TSFlags & SIInstrFlags::EXP;
663 }
664
isAtomicNoRet(const MachineInstr & MI)665 static bool isAtomicNoRet(const MachineInstr &MI) {
666 return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicNoRet;
667 }
668
isAtomicNoRet(uint16_t Opcode)669 bool isAtomicNoRet(uint16_t Opcode) const {
670 return get(Opcode).TSFlags & SIInstrFlags::IsAtomicNoRet;
671 }
672
isAtomicRet(const MachineInstr & MI)673 static bool isAtomicRet(const MachineInstr &MI) {
674 return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicRet;
675 }
676
isAtomicRet(uint16_t Opcode)677 bool isAtomicRet(uint16_t Opcode) const {
678 return get(Opcode).TSFlags & SIInstrFlags::IsAtomicRet;
679 }
680
isAtomic(const MachineInstr & MI)681 static bool isAtomic(const MachineInstr &MI) {
682 return MI.getDesc().TSFlags & (SIInstrFlags::IsAtomicRet |
683 SIInstrFlags::IsAtomicNoRet);
684 }
685
isAtomic(uint16_t Opcode)686 bool isAtomic(uint16_t Opcode) const {
687 return get(Opcode).TSFlags & (SIInstrFlags::IsAtomicRet |
688 SIInstrFlags::IsAtomicNoRet);
689 }
690
mayWriteLDSThroughDMA(const MachineInstr & MI)691 static bool mayWriteLDSThroughDMA(const MachineInstr &MI) {
692 return isLDSDMA(MI) && MI.getOpcode() != AMDGPU::BUFFER_STORE_LDS_DWORD;
693 }
694
isWQM(const MachineInstr & MI)695 static bool isWQM(const MachineInstr &MI) {
696 return MI.getDesc().TSFlags & SIInstrFlags::WQM;
697 }
698
isWQM(uint16_t Opcode)699 bool isWQM(uint16_t Opcode) const {
700 return get(Opcode).TSFlags & SIInstrFlags::WQM;
701 }
702
isDisableWQM(const MachineInstr & MI)703 static bool isDisableWQM(const MachineInstr &MI) {
704 return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM;
705 }
706
isDisableWQM(uint16_t Opcode)707 bool isDisableWQM(uint16_t Opcode) const {
708 return get(Opcode).TSFlags & SIInstrFlags::DisableWQM;
709 }
710
isVGPRSpill(const MachineInstr & MI)711 static bool isVGPRSpill(const MachineInstr &MI) {
712 return MI.getDesc().TSFlags & SIInstrFlags::VGPRSpill;
713 }
714
isVGPRSpill(uint16_t Opcode)715 bool isVGPRSpill(uint16_t Opcode) const {
716 return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill;
717 }
718
isSGPRSpill(const MachineInstr & MI)719 static bool isSGPRSpill(const MachineInstr &MI) {
720 return MI.getDesc().TSFlags & SIInstrFlags::SGPRSpill;
721 }
722
isSGPRSpill(uint16_t Opcode)723 bool isSGPRSpill(uint16_t Opcode) const {
724 return get(Opcode).TSFlags & SIInstrFlags::SGPRSpill;
725 }
726
isSpillOpcode(uint16_t Opcode)727 bool isSpillOpcode(uint16_t Opcode) const {
728 return get(Opcode).TSFlags &
729 (SIInstrFlags::SGPRSpill | SIInstrFlags::VGPRSpill);
730 }
731
isWWMRegSpillOpcode(uint16_t Opcode)732 static bool isWWMRegSpillOpcode(uint16_t Opcode) {
733 return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE ||
734 Opcode == AMDGPU::SI_SPILL_WWM_AV32_SAVE ||
735 Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE ||
736 Opcode == AMDGPU::SI_SPILL_WWM_AV32_RESTORE;
737 }
738
isChainCallOpcode(uint64_t Opcode)739 static bool isChainCallOpcode(uint64_t Opcode) {
740 return Opcode == AMDGPU::SI_CS_CHAIN_TC_W32 ||
741 Opcode == AMDGPU::SI_CS_CHAIN_TC_W64;
742 }
743
isDPP(const MachineInstr & MI)744 static bool isDPP(const MachineInstr &MI) {
745 return MI.getDesc().TSFlags & SIInstrFlags::DPP;
746 }
747
isDPP(uint16_t Opcode)748 bool isDPP(uint16_t Opcode) const {
749 return get(Opcode).TSFlags & SIInstrFlags::DPP;
750 }
751
isTRANS(const MachineInstr & MI)752 static bool isTRANS(const MachineInstr &MI) {
753 return MI.getDesc().TSFlags & SIInstrFlags::TRANS;
754 }
755
isTRANS(uint16_t Opcode)756 bool isTRANS(uint16_t Opcode) const {
757 return get(Opcode).TSFlags & SIInstrFlags::TRANS;
758 }
759
isVOP3P(const MachineInstr & MI)760 static bool isVOP3P(const MachineInstr &MI) {
761 return MI.getDesc().TSFlags & SIInstrFlags::VOP3P;
762 }
763
isVOP3P(uint16_t Opcode)764 bool isVOP3P(uint16_t Opcode) const {
765 return get(Opcode).TSFlags & SIInstrFlags::VOP3P;
766 }
767
isVINTRP(const MachineInstr & MI)768 static bool isVINTRP(const MachineInstr &MI) {
769 return MI.getDesc().TSFlags & SIInstrFlags::VINTRP;
770 }
771
isVINTRP(uint16_t Opcode)772 bool isVINTRP(uint16_t Opcode) const {
773 return get(Opcode).TSFlags & SIInstrFlags::VINTRP;
774 }
775
isMAI(const MachineInstr & MI)776 static bool isMAI(const MachineInstr &MI) {
777 return MI.getDesc().TSFlags & SIInstrFlags::IsMAI;
778 }
779
isMAI(uint16_t Opcode)780 bool isMAI(uint16_t Opcode) const {
781 return get(Opcode).TSFlags & SIInstrFlags::IsMAI;
782 }
783
isMFMA(const MachineInstr & MI)784 static bool isMFMA(const MachineInstr &MI) {
785 return isMAI(MI) && MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
786 MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64;
787 }
788
isDOT(const MachineInstr & MI)789 static bool isDOT(const MachineInstr &MI) {
790 return MI.getDesc().TSFlags & SIInstrFlags::IsDOT;
791 }
792
isWMMA(const MachineInstr & MI)793 static bool isWMMA(const MachineInstr &MI) {
794 return MI.getDesc().TSFlags & SIInstrFlags::IsWMMA;
795 }
796
isWMMA(uint16_t Opcode)797 bool isWMMA(uint16_t Opcode) const {
798 return get(Opcode).TSFlags & SIInstrFlags::IsWMMA;
799 }
800
isMFMAorWMMA(const MachineInstr & MI)801 static bool isMFMAorWMMA(const MachineInstr &MI) {
802 return isMFMA(MI) || isWMMA(MI);
803 }
804
isSWMMAC(const MachineInstr & MI)805 static bool isSWMMAC(const MachineInstr &MI) {
806 return MI.getDesc().TSFlags & SIInstrFlags::IsSWMMAC;
807 }
808
isSWMMAC(uint16_t Opcode)809 bool isSWMMAC(uint16_t Opcode) const {
810 return get(Opcode).TSFlags & SIInstrFlags::IsSWMMAC;
811 }
812
isDOT(uint16_t Opcode)813 bool isDOT(uint16_t Opcode) const {
814 return get(Opcode).TSFlags & SIInstrFlags::IsDOT;
815 }
816
isLDSDIR(const MachineInstr & MI)817 static bool isLDSDIR(const MachineInstr &MI) {
818 return MI.getDesc().TSFlags & SIInstrFlags::LDSDIR;
819 }
820
isLDSDIR(uint16_t Opcode)821 bool isLDSDIR(uint16_t Opcode) const {
822 return get(Opcode).TSFlags & SIInstrFlags::LDSDIR;
823 }
824
isVINTERP(const MachineInstr & MI)825 static bool isVINTERP(const MachineInstr &MI) {
826 return MI.getDesc().TSFlags & SIInstrFlags::VINTERP;
827 }
828
isVINTERP(uint16_t Opcode)829 bool isVINTERP(uint16_t Opcode) const {
830 return get(Opcode).TSFlags & SIInstrFlags::VINTERP;
831 }
832
isScalarUnit(const MachineInstr & MI)833 static bool isScalarUnit(const MachineInstr &MI) {
834 return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD);
835 }
836
usesVM_CNT(const MachineInstr & MI)837 static bool usesVM_CNT(const MachineInstr &MI) {
838 return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT;
839 }
840
usesLGKM_CNT(const MachineInstr & MI)841 static bool usesLGKM_CNT(const MachineInstr &MI) {
842 return MI.getDesc().TSFlags & SIInstrFlags::LGKM_CNT;
843 }
844
sopkIsZext(const MachineInstr & MI)845 static bool sopkIsZext(const MachineInstr &MI) {
846 return MI.getDesc().TSFlags & SIInstrFlags::SOPK_ZEXT;
847 }
848
sopkIsZext(uint16_t Opcode)849 bool sopkIsZext(uint16_t Opcode) const {
850 return get(Opcode).TSFlags & SIInstrFlags::SOPK_ZEXT;
851 }
852
853 /// \returns true if this is an s_store_dword* instruction. This is more
854 /// specific than isSMEM && mayStore.
isScalarStore(const MachineInstr & MI)855 static bool isScalarStore(const MachineInstr &MI) {
856 return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE;
857 }
858
isScalarStore(uint16_t Opcode)859 bool isScalarStore(uint16_t Opcode) const {
860 return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE;
861 }
862
isFixedSize(const MachineInstr & MI)863 static bool isFixedSize(const MachineInstr &MI) {
864 return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE;
865 }
866
isFixedSize(uint16_t Opcode)867 bool isFixedSize(uint16_t Opcode) const {
868 return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE;
869 }
870
hasFPClamp(const MachineInstr & MI)871 static bool hasFPClamp(const MachineInstr &MI) {
872 return MI.getDesc().TSFlags & SIInstrFlags::FPClamp;
873 }
874
hasFPClamp(uint16_t Opcode)875 bool hasFPClamp(uint16_t Opcode) const {
876 return get(Opcode).TSFlags & SIInstrFlags::FPClamp;
877 }
878
hasIntClamp(const MachineInstr & MI)879 static bool hasIntClamp(const MachineInstr &MI) {
880 return MI.getDesc().TSFlags & SIInstrFlags::IntClamp;
881 }
882
getClampMask(const MachineInstr & MI)883 uint64_t getClampMask(const MachineInstr &MI) const {
884 const uint64_t ClampFlags = SIInstrFlags::FPClamp |
885 SIInstrFlags::IntClamp |
886 SIInstrFlags::ClampLo |
887 SIInstrFlags::ClampHi;
888 return MI.getDesc().TSFlags & ClampFlags;
889 }
890
usesFPDPRounding(const MachineInstr & MI)891 static bool usesFPDPRounding(const MachineInstr &MI) {
892 return MI.getDesc().TSFlags & SIInstrFlags::FPDPRounding;
893 }
894
usesFPDPRounding(uint16_t Opcode)895 bool usesFPDPRounding(uint16_t Opcode) const {
896 return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding;
897 }
898
isFPAtomic(const MachineInstr & MI)899 static bool isFPAtomic(const MachineInstr &MI) {
900 return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic;
901 }
902
isFPAtomic(uint16_t Opcode)903 bool isFPAtomic(uint16_t Opcode) const {
904 return get(Opcode).TSFlags & SIInstrFlags::FPAtomic;
905 }
906
isNeverUniform(const MachineInstr & MI)907 static bool isNeverUniform(const MachineInstr &MI) {
908 return MI.getDesc().TSFlags & SIInstrFlags::IsNeverUniform;
909 }
910
911 // Check to see if opcode is for a barrier start. Pre gfx12 this is just the
912 // S_BARRIER, but after support for S_BARRIER_SIGNAL* / S_BARRIER_WAIT we want
913 // to check for the barrier start (S_BARRIER_SIGNAL*)
isBarrierStart(unsigned Opcode)914 bool isBarrierStart(unsigned Opcode) const {
915 return Opcode == AMDGPU::S_BARRIER ||
916 Opcode == AMDGPU::S_BARRIER_SIGNAL_M0 ||
917 Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0 ||
918 Opcode == AMDGPU::S_BARRIER_SIGNAL_IMM ||
919 Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM;
920 }
921
doesNotReadTiedSource(const MachineInstr & MI)922 static bool doesNotReadTiedSource(const MachineInstr &MI) {
923 return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead;
924 }
925
doesNotReadTiedSource(uint16_t Opcode)926 bool doesNotReadTiedSource(uint16_t Opcode) const {
927 return get(Opcode).TSFlags & SIInstrFlags::TiedSourceNotRead;
928 }
929
getNonSoftWaitcntOpcode(unsigned Opcode)930 static unsigned getNonSoftWaitcntOpcode(unsigned Opcode) {
931 switch (Opcode) {
932 case AMDGPU::S_WAITCNT_soft:
933 return AMDGPU::S_WAITCNT;
934 case AMDGPU::S_WAITCNT_VSCNT_soft:
935 return AMDGPU::S_WAITCNT_VSCNT;
936 case AMDGPU::S_WAIT_LOADCNT_soft:
937 return AMDGPU::S_WAIT_LOADCNT;
938 case AMDGPU::S_WAIT_STORECNT_soft:
939 return AMDGPU::S_WAIT_STORECNT;
940 case AMDGPU::S_WAIT_SAMPLECNT_soft:
941 return AMDGPU::S_WAIT_SAMPLECNT;
942 case AMDGPU::S_WAIT_BVHCNT_soft:
943 return AMDGPU::S_WAIT_BVHCNT;
944 case AMDGPU::S_WAIT_DSCNT_soft:
945 return AMDGPU::S_WAIT_DSCNT;
946 default:
947 return Opcode;
948 }
949 }
950
isVGPRCopy(const MachineInstr & MI)951 bool isVGPRCopy(const MachineInstr &MI) const {
952 assert(isCopyInstr(MI));
953 Register Dest = MI.getOperand(0).getReg();
954 const MachineFunction &MF = *MI.getParent()->getParent();
955 const MachineRegisterInfo &MRI = MF.getRegInfo();
956 return !RI.isSGPRReg(MRI, Dest);
957 }
958
hasVGPRUses(const MachineInstr & MI)959 bool hasVGPRUses(const MachineInstr &MI) const {
960 const MachineFunction &MF = *MI.getParent()->getParent();
961 const MachineRegisterInfo &MRI = MF.getRegInfo();
962 return llvm::any_of(MI.explicit_uses(),
963 [&MRI, this](const MachineOperand &MO) {
964 return MO.isReg() && RI.isVGPR(MRI, MO.getReg());});
965 }
966
967 /// Return true if the instruction modifies the mode register.q
968 static bool modifiesModeRegister(const MachineInstr &MI);
969
970 /// Whether we must prevent this instruction from executing with EXEC = 0.
971 bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const;
972
973 /// Returns true if the instruction could potentially depend on the value of
974 /// exec. If false, exec dependencies may safely be ignored.
975 bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const;
976
977 bool isInlineConstant(const APInt &Imm) const;
978
isInlineConstant(const APFloat & Imm)979 bool isInlineConstant(const APFloat &Imm) const {
980 return isInlineConstant(Imm.bitcastToAPInt());
981 }
982
983 // Returns true if this non-register operand definitely does not need to be
984 // encoded as a 32-bit literal. Note that this function handles all kinds of
985 // operands, not just immediates.
986 //
987 // Some operands like FrameIndexes could resolve to an inline immediate value
988 // that will not require an additional 4-bytes; this function assumes that it
989 // will.
990 bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const;
991
isInlineConstant(const MachineOperand & MO,const MCOperandInfo & OpInfo)992 bool isInlineConstant(const MachineOperand &MO,
993 const MCOperandInfo &OpInfo) const {
994 return isInlineConstant(MO, OpInfo.OperandType);
995 }
996
997 /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would
998 /// be an inline immediate.
isInlineConstant(const MachineInstr & MI,const MachineOperand & UseMO,const MachineOperand & DefMO)999 bool isInlineConstant(const MachineInstr &MI,
1000 const MachineOperand &UseMO,
1001 const MachineOperand &DefMO) const {
1002 assert(UseMO.getParent() == &MI);
1003 int OpIdx = UseMO.getOperandNo();
1004 if (OpIdx >= MI.getDesc().NumOperands)
1005 return false;
1006
1007 return isInlineConstant(DefMO, MI.getDesc().operands()[OpIdx]);
1008 }
1009
1010 /// \p returns true if the operand \p OpIdx in \p MI is a valid inline
1011 /// immediate.
isInlineConstant(const MachineInstr & MI,unsigned OpIdx)1012 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const {
1013 const MachineOperand &MO = MI.getOperand(OpIdx);
1014 return isInlineConstant(MO, MI.getDesc().operands()[OpIdx].OperandType);
1015 }
1016
isInlineConstant(const MachineInstr & MI,unsigned OpIdx,const MachineOperand & MO)1017 bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx,
1018 const MachineOperand &MO) const {
1019 if (OpIdx >= MI.getDesc().NumOperands)
1020 return false;
1021
1022 if (isCopyInstr(MI)) {
1023 unsigned Size = getOpSize(MI, OpIdx);
1024 assert(Size == 8 || Size == 4);
1025
1026 uint8_t OpType = (Size == 8) ?
1027 AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32;
1028 return isInlineConstant(MO, OpType);
1029 }
1030
1031 return isInlineConstant(MO, MI.getDesc().operands()[OpIdx].OperandType);
1032 }
1033
isInlineConstant(const MachineOperand & MO)1034 bool isInlineConstant(const MachineOperand &MO) const {
1035 return isInlineConstant(*MO.getParent(), MO.getOperandNo());
1036 }
1037
1038 bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
1039 const MachineOperand &MO) const;
1040
1041 /// Return true if this 64-bit VALU instruction has a 32-bit encoding.
1042 /// This function will return false if you pass it a 32-bit instruction.
1043 bool hasVALU32BitEncoding(unsigned Opcode) const;
1044
1045 /// Returns true if this operand uses the constant bus.
1046 bool usesConstantBus(const MachineRegisterInfo &MRI,
1047 const MachineOperand &MO,
1048 const MCOperandInfo &OpInfo) const;
1049
1050 /// Return true if this instruction has any modifiers.
1051 /// e.g. src[012]_mod, omod, clamp.
1052 bool hasModifiers(unsigned Opcode) const;
1053
1054 bool hasModifiersSet(const MachineInstr &MI,
1055 unsigned OpName) const;
1056 bool hasAnyModifiersSet(const MachineInstr &MI) const;
1057
1058 bool canShrink(const MachineInstr &MI,
1059 const MachineRegisterInfo &MRI) const;
1060
1061 MachineInstr *buildShrunkInst(MachineInstr &MI,
1062 unsigned NewOpcode) const;
1063
1064 bool verifyInstruction(const MachineInstr &MI,
1065 StringRef &ErrInfo) const override;
1066
1067 unsigned getVALUOp(const MachineInstr &MI) const;
1068
1069 void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB,
1070 MachineBasicBlock::iterator MBBI,
1071 const DebugLoc &DL, Register Reg, bool IsSCCLive,
1072 SlotIndexes *Indexes = nullptr) const;
1073
1074 void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB,
1075 MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
1076 Register Reg, SlotIndexes *Indexes = nullptr) const;
1077
1078 /// Return the correct register class for \p OpNo. For target-specific
1079 /// instructions, this will return the register class that has been defined
1080 /// in tablegen. For generic instructions, like REG_SEQUENCE it will return
1081 /// the register class of its machine operand.
1082 /// to infer the correct register class base on the other operands.
1083 const TargetRegisterClass *getOpRegClass(const MachineInstr &MI,
1084 unsigned OpNo) const;
1085
1086 /// Return the size in bytes of the operand OpNo on the given
1087 // instruction opcode.
getOpSize(uint16_t Opcode,unsigned OpNo)1088 unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const {
1089 const MCOperandInfo &OpInfo = get(Opcode).operands()[OpNo];
1090
1091 if (OpInfo.RegClass == -1) {
1092 // If this is an immediate operand, this must be a 32-bit literal.
1093 assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE);
1094 return 4;
1095 }
1096
1097 return RI.getRegSizeInBits(*RI.getRegClass(OpInfo.RegClass)) / 8;
1098 }
1099
1100 /// This form should usually be preferred since it handles operands
1101 /// with unknown register classes.
getOpSize(const MachineInstr & MI,unsigned OpNo)1102 unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {
1103 const MachineOperand &MO = MI.getOperand(OpNo);
1104 if (MO.isReg()) {
1105 if (unsigned SubReg = MO.getSubReg()) {
1106 return RI.getSubRegIdxSize(SubReg) / 8;
1107 }
1108 }
1109 return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8;
1110 }
1111
1112 /// Legalize the \p OpIndex operand of this instruction by inserting
1113 /// a MOV. For example:
1114 /// ADD_I32_e32 VGPR0, 15
1115 /// to
1116 /// MOV VGPR1, 15
1117 /// ADD_I32_e32 VGPR0, VGPR1
1118 ///
1119 /// If the operand being legalized is a register, then a COPY will be used
1120 /// instead of MOV.
1121 void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const;
1122
1123 /// Check if \p MO is a legal operand if it was the \p OpIdx Operand
1124 /// for \p MI.
1125 bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
1126 const MachineOperand *MO = nullptr) const;
1127
1128 /// Check if \p MO would be a valid operand for the given operand
1129 /// definition \p OpInfo. Note this does not attempt to validate constant bus
1130 /// restrictions (e.g. literal constant usage).
1131 bool isLegalVSrcOperand(const MachineRegisterInfo &MRI,
1132 const MCOperandInfo &OpInfo,
1133 const MachineOperand &MO) const;
1134
1135 /// Check if \p MO (a register operand) is a legal register for the
1136 /// given operand description.
1137 bool isLegalRegOperand(const MachineRegisterInfo &MRI,
1138 const MCOperandInfo &OpInfo,
1139 const MachineOperand &MO) const;
1140
1141 /// Legalize operands in \p MI by either commuting it or inserting a
1142 /// copy of src1.
1143 void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1144
1145 /// Fix operands in \p MI to satisfy constant bus requirements.
1146 void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1147
1148 /// Copy a value from a VGPR (\p SrcReg) to SGPR. This function can only
1149 /// be used when it is know that the value in SrcReg is same across all
1150 /// threads in the wave.
1151 /// \returns The SGPR register that \p SrcReg was copied to.
1152 Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI,
1153 MachineRegisterInfo &MRI) const;
1154
1155 void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1156 void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const;
1157
1158 void legalizeGenericOperand(MachineBasicBlock &InsertMBB,
1159 MachineBasicBlock::iterator I,
1160 const TargetRegisterClass *DstRC,
1161 MachineOperand &Op, MachineRegisterInfo &MRI,
1162 const DebugLoc &DL) const;
1163
1164 /// Legalize all operands in this instruction. This function may create new
1165 /// instructions and control-flow around \p MI. If present, \p MDT is
1166 /// updated.
1167 /// \returns A new basic block that contains \p MI if new blocks were created.
1168 MachineBasicBlock *
1169 legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const;
1170
1171 /// Change SADDR form of a FLAT \p Inst to its VADDR form if saddr operand
1172 /// was moved to VGPR. \returns true if succeeded.
1173 bool moveFlatAddrToVGPR(MachineInstr &Inst) const;
1174
1175 /// Replace the instructions opcode with the equivalent VALU
1176 /// opcode. This function will also move the users of MachineInstruntions
1177 /// in the \p WorkList to the VALU if necessary. If present, \p MDT is
1178 /// updated.
1179 void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const;
1180
1181 void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT,
1182 MachineInstr &Inst) const;
1183
1184 void insertNoop(MachineBasicBlock &MBB,
1185 MachineBasicBlock::iterator MI) const override;
1186
1187 void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1188 unsigned Quantity) const override;
1189
1190 void insertReturn(MachineBasicBlock &MBB) const;
1191 /// Return the number of wait states that result from executing this
1192 /// instruction.
1193 static unsigned getNumWaitStates(const MachineInstr &MI);
1194
1195 /// Returns the operand named \p Op. If \p MI does not have an
1196 /// operand named \c Op, this function returns nullptr.
1197 LLVM_READONLY
1198 MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const;
1199
1200 LLVM_READONLY
getNamedOperand(const MachineInstr & MI,unsigned OpName)1201 const MachineOperand *getNamedOperand(const MachineInstr &MI,
1202 unsigned OpName) const {
1203 return getNamedOperand(const_cast<MachineInstr &>(MI), OpName);
1204 }
1205
1206 /// Get required immediate operand
getNamedImmOperand(const MachineInstr & MI,unsigned OpName)1207 int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const {
1208 int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName);
1209 return MI.getOperand(Idx).getImm();
1210 }
1211
1212 uint64_t getDefaultRsrcDataFormat() const;
1213 uint64_t getScratchRsrcWords23() const;
1214
1215 bool isLowLatencyInstruction(const MachineInstr &MI) const;
1216 bool isHighLatencyDef(int Opc) const override;
1217
1218 /// Return the descriptor of the target-specific machine instruction
1219 /// that corresponds to the specified pseudo or native opcode.
getMCOpcodeFromPseudo(unsigned Opcode)1220 const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const {
1221 return get(pseudoToMCOpcode(Opcode));
1222 }
1223
1224 unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const;
1225 unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const;
1226
1227 unsigned isLoadFromStackSlot(const MachineInstr &MI,
1228 int &FrameIndex) const override;
1229 unsigned isStoreToStackSlot(const MachineInstr &MI,
1230 int &FrameIndex) const override;
1231
1232 unsigned getInstBundleSize(const MachineInstr &MI) const;
1233 unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
1234
1235 bool mayAccessFlatAddressSpace(const MachineInstr &MI) const;
1236
1237 bool isNonUniformBranchInstr(MachineInstr &Instr) const;
1238
1239 void convertNonUniformIfRegion(MachineBasicBlock *IfEntry,
1240 MachineBasicBlock *IfEnd) const;
1241
1242 void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry,
1243 MachineBasicBlock *LoopEnd) const;
1244
1245 std::pair<unsigned, unsigned>
1246 decomposeMachineOperandsTargetFlags(unsigned TF) const override;
1247
1248 ArrayRef<std::pair<int, const char *>>
1249 getSerializableTargetIndices() const override;
1250
1251 ArrayRef<std::pair<unsigned, const char *>>
1252 getSerializableDirectMachineOperandTargetFlags() const override;
1253
1254 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
1255 getSerializableMachineMemOperandTargetFlags() const override;
1256
1257 ScheduleHazardRecognizer *
1258 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
1259 const ScheduleDAG *DAG) const override;
1260
1261 ScheduleHazardRecognizer *
1262 CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override;
1263
1264 ScheduleHazardRecognizer *
1265 CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
1266 const ScheduleDAGMI *DAG) const override;
1267
1268 unsigned getLiveRangeSplitOpcode(Register Reg,
1269 const MachineFunction &MF) const override;
1270
1271 bool isBasicBlockPrologue(const MachineInstr &MI,
1272 Register Reg = Register()) const override;
1273
1274 MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB,
1275 MachineBasicBlock::iterator InsPt,
1276 const DebugLoc &DL, Register Src,
1277 Register Dst) const override;
1278
1279 MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB,
1280 MachineBasicBlock::iterator InsPt,
1281 const DebugLoc &DL, Register Src,
1282 unsigned SrcSubReg,
1283 Register Dst) const override;
1284
1285 bool isWave32() const;
1286
1287 /// Return a partially built integer add instruction without carry.
1288 /// Caller must add source operands.
1289 /// For pre-GFX9 it will generate unused carry destination operand.
1290 /// TODO: After GFX9 it should return a no-carry operation.
1291 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
1292 MachineBasicBlock::iterator I,
1293 const DebugLoc &DL,
1294 Register DestReg) const;
1295
1296 MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
1297 MachineBasicBlock::iterator I,
1298 const DebugLoc &DL,
1299 Register DestReg,
1300 RegScavenger &RS) const;
1301
1302 static bool isKillTerminator(unsigned Opcode);
1303 const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const;
1304
1305 bool isLegalMUBUFImmOffset(unsigned Imm) const;
1306
1307 static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST);
1308
1309 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
1310 Align Alignment = Align(4)) const;
1311
1312 /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT
1313 /// encoded instruction. If \p Signed, this is for an instruction that
1314 /// interprets the offset as signed.
1315 bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace,
1316 uint64_t FlatVariant) const;
1317
1318 /// Split \p COffsetVal into {immediate offset field, remainder offset}
1319 /// values.
1320 std::pair<int64_t, int64_t> splitFlatOffset(int64_t COffsetVal,
1321 unsigned AddrSpace,
1322 uint64_t FlatVariant) const;
1323
1324 /// Returns true if negative offsets are allowed for the given \p FlatVariant.
1325 bool allowNegativeFlatOffset(uint64_t FlatVariant) const;
1326
1327 /// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
1328 /// Return -1 if the target-specific opcode for the pseudo instruction does
1329 /// not exist. If Opcode is not a pseudo instruction, this is identity.
1330 int pseudoToMCOpcode(int Opcode) const;
1331
1332 /// \brief Check if this instruction should only be used by assembler.
1333 /// Return true if this opcode should not be used by codegen.
1334 bool isAsmOnlyOpcode(int MCOp) const;
1335
1336 const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum,
1337 const TargetRegisterInfo *TRI,
1338 const MachineFunction &MF)
1339 const override;
1340
1341 void fixImplicitOperands(MachineInstr &MI) const;
1342
1343 MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
1344 ArrayRef<unsigned> Ops,
1345 MachineBasicBlock::iterator InsertPt,
1346 int FrameIndex,
1347 LiveIntervals *LIS = nullptr,
1348 VirtRegMap *VRM = nullptr) const override;
1349
1350 unsigned getInstrLatency(const InstrItineraryData *ItinData,
1351 const MachineInstr &MI,
1352 unsigned *PredCost = nullptr) const override;
1353
1354 InstructionUniformity
1355 getInstructionUniformity(const MachineInstr &MI) const override final;
1356
1357 InstructionUniformity
1358 getGenericInstructionUniformity(const MachineInstr &MI) const;
1359
getMIRFormatter()1360 const MIRFormatter *getMIRFormatter() const override {
1361 if (!Formatter.get())
1362 Formatter = std::make_unique<AMDGPUMIRFormatter>();
1363 return Formatter.get();
1364 }
1365
1366 static unsigned getDSShaderTypeValue(const MachineFunction &MF);
1367
getSchedModel()1368 const TargetSchedModel &getSchedModel() const { return SchedModel; }
1369
1370 // Enforce operand's \p OpName even alignment if required by target.
1371 // This is used if an operand is a 32 bit register but needs to be aligned
1372 // regardless.
1373 void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const;
1374 };
1375
1376 /// \brief Returns true if a reg:subreg pair P has a TRC class
isOfRegClass(const TargetInstrInfo::RegSubRegPair & P,const TargetRegisterClass & TRC,MachineRegisterInfo & MRI)1377 inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P,
1378 const TargetRegisterClass &TRC,
1379 MachineRegisterInfo &MRI) {
1380 auto *RC = MRI.getRegClass(P.Reg);
1381 if (!P.SubReg)
1382 return RC == &TRC;
1383 auto *TRI = MRI.getTargetRegisterInfo();
1384 return RC == TRI->getMatchingSuperRegClass(RC, &TRC, P.SubReg);
1385 }
1386
1387 /// \brief Create RegSubRegPair from a register MachineOperand
1388 inline
getRegSubRegPair(const MachineOperand & O)1389 TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O) {
1390 assert(O.isReg());
1391 return TargetInstrInfo::RegSubRegPair(O.getReg(), O.getSubReg());
1392 }
1393
1394 /// \brief Return the SubReg component from REG_SEQUENCE
1395 TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI,
1396 unsigned SubReg);
1397
1398 /// \brief Return the defining instruction for a given reg:subreg pair
1399 /// skipping copy like instructions and subreg-manipulation pseudos.
1400 /// Following another subreg of a reg:subreg isn't supported.
1401 MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
1402 MachineRegisterInfo &MRI);
1403
1404 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p
1405 /// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not
1406 /// attempt to track between blocks.
1407 bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI,
1408 Register VReg,
1409 const MachineInstr &DefMI,
1410 const MachineInstr &UseMI);
1411
1412 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p
1413 /// DefMI and all its uses. Should be run on SSA. Currently does not attempt to
1414 /// track between blocks.
1415 bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI,
1416 Register VReg,
1417 const MachineInstr &DefMI);
1418
1419 namespace AMDGPU {
1420
1421 LLVM_READONLY
1422 int getVOPe64(uint16_t Opcode);
1423
1424 LLVM_READONLY
1425 int getVOPe32(uint16_t Opcode);
1426
1427 LLVM_READONLY
1428 int getSDWAOp(uint16_t Opcode);
1429
1430 LLVM_READONLY
1431 int getDPPOp32(uint16_t Opcode);
1432
1433 LLVM_READONLY
1434 int getDPPOp64(uint16_t Opcode);
1435
1436 LLVM_READONLY
1437 int getBasicFromSDWAOp(uint16_t Opcode);
1438
1439 LLVM_READONLY
1440 int getCommuteRev(uint16_t Opcode);
1441
1442 LLVM_READONLY
1443 int getCommuteOrig(uint16_t Opcode);
1444
1445 LLVM_READONLY
1446 int getAddr64Inst(uint16_t Opcode);
1447
1448 /// Check if \p Opcode is an Addr64 opcode.
1449 ///
1450 /// \returns \p Opcode if it is an Addr64 opcode, otherwise -1.
1451 LLVM_READONLY
1452 int getIfAddr64Inst(uint16_t Opcode);
1453
1454 LLVM_READONLY
1455 int getAtomicNoRetOp(uint16_t Opcode);
1456
1457 LLVM_READONLY
1458 int getSOPKOp(uint16_t Opcode);
1459
1460 /// \returns SADDR form of a FLAT Global instruction given an \p Opcode
1461 /// of a VADDR form.
1462 LLVM_READONLY
1463 int getGlobalSaddrOp(uint16_t Opcode);
1464
1465 /// \returns VADDR form of a FLAT Global instruction given an \p Opcode
1466 /// of a SADDR form.
1467 LLVM_READONLY
1468 int getGlobalVaddrOp(uint16_t Opcode);
1469
1470 LLVM_READONLY
1471 int getVCMPXNoSDstOp(uint16_t Opcode);
1472
1473 /// \returns ST form with only immediate offset of a FLAT Scratch instruction
1474 /// given an \p Opcode of an SS (SADDR) form.
1475 LLVM_READONLY
1476 int getFlatScratchInstSTfromSS(uint16_t Opcode);
1477
1478 /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode
1479 /// of an SVS (SADDR + VADDR) form.
1480 LLVM_READONLY
1481 int getFlatScratchInstSVfromSVS(uint16_t Opcode);
1482
1483 /// \returns SS (SADDR) form of a FLAT Scratch instruction given an \p Opcode
1484 /// of an SV (VADDR) form.
1485 LLVM_READONLY
1486 int getFlatScratchInstSSfromSV(uint16_t Opcode);
1487
1488 /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode
1489 /// of an SS (SADDR) form.
1490 LLVM_READONLY
1491 int getFlatScratchInstSVfromSS(uint16_t Opcode);
1492
1493 /// \returns earlyclobber version of a MAC MFMA is exists.
1494 LLVM_READONLY
1495 int getMFMAEarlyClobberOp(uint16_t Opcode);
1496
1497 /// \returns v_cmpx version of a v_cmp instruction.
1498 LLVM_READONLY
1499 int getVCMPXOpFromVCMP(uint16_t Opcode);
1500
1501 const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
1502 const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
1503 const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
1504 const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23);
1505
1506 } // end namespace AMDGPU
1507
1508 namespace AMDGPU {
1509 enum AsmComments {
1510 // For sgpr to vgpr spill instructions
1511 SGPR_SPILL = MachineInstr::TAsmComments
1512 };
1513 } // namespace AMDGPU
1514
1515 namespace SI {
1516 namespace KernelInputOffsets {
1517
1518 /// Offsets in bytes from the start of the input buffer
1519 enum Offsets {
1520 NGROUPS_X = 0,
1521 NGROUPS_Y = 4,
1522 NGROUPS_Z = 8,
1523 GLOBAL_SIZE_X = 12,
1524 GLOBAL_SIZE_Y = 16,
1525 GLOBAL_SIZE_Z = 20,
1526 LOCAL_SIZE_X = 24,
1527 LOCAL_SIZE_Y = 28,
1528 LOCAL_SIZE_Z = 32
1529 };
1530
1531 } // end namespace KernelInputOffsets
1532 } // end namespace SI
1533
1534 } // end namespace llvm
1535
1536 #endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
1537