1 //===- SIInstrInfo.h - SI Instruction Info Interface ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Interface definition for SIInstrInfo.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
15 #define LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
16 
17 #include "AMDGPUMIRFormatter.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "SIRegisterInfo.h"
20 #include "Utils/AMDGPUBaseInfo.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/CodeGen/TargetInstrInfo.h"
23 #include "llvm/CodeGen/TargetSchedule.h"
24 
25 #define GET_INSTRINFO_HEADER
26 #include "AMDGPUGenInstrInfo.inc"
27 
28 namespace llvm {
29 
30 class APInt;
31 class GCNSubtarget;
32 class LiveVariables;
33 class MachineDominatorTree;
34 class MachineRegisterInfo;
35 class RegScavenger;
36 class TargetRegisterClass;
37 class ScheduleHazardRecognizer;
38 
39 /// Mark the MMO of a uniform load if there are no potentially clobbering stores
40 /// on any path from the start of an entry function to this load.
41 static const MachineMemOperand::Flags MONoClobber =
42     MachineMemOperand::MOTargetFlag1;
43 
44 class SIInstrInfo final : public AMDGPUGenInstrInfo {
45 private:
46   const SIRegisterInfo RI;
47   const GCNSubtarget &ST;
48   TargetSchedModel SchedModel;
49   mutable std::unique_ptr<AMDGPUMIRFormatter> Formatter;
50 
51   // The inverse predicate should have the negative value.
52   enum BranchPredicate {
53     INVALID_BR = 0,
54     SCC_TRUE = 1,
55     SCC_FALSE = -1,
56     VCCNZ = 2,
57     VCCZ = -2,
58     EXECNZ = -3,
59     EXECZ = 3
60   };
61 
62   using SetVectorType = SmallSetVector<MachineInstr *, 32>;
63 
64   static unsigned getBranchOpcode(BranchPredicate Cond);
65   static BranchPredicate getBranchPredicate(unsigned Opcode);
66 
67 public:
68   unsigned buildExtractSubReg(MachineBasicBlock::iterator MI,
69                               MachineRegisterInfo &MRI,
70                               MachineOperand &SuperReg,
71                               const TargetRegisterClass *SuperRC,
72                               unsigned SubIdx,
73                               const TargetRegisterClass *SubRC) const;
74   MachineOperand buildExtractSubRegOrImm(MachineBasicBlock::iterator MI,
75                                          MachineRegisterInfo &MRI,
76                                          MachineOperand &SuperReg,
77                                          const TargetRegisterClass *SuperRC,
78                                          unsigned SubIdx,
79                                          const TargetRegisterClass *SubRC) const;
80 private:
81   void swapOperands(MachineInstr &Inst) const;
82 
83   std::pair<bool, MachineBasicBlock *>
84   moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
85                    MachineDominatorTree *MDT = nullptr) const;
86 
87   void lowerSelect(SetVectorType &Worklist, MachineInstr &Inst,
88                    MachineDominatorTree *MDT = nullptr) const;
89 
90   void lowerScalarAbs(SetVectorType &Worklist,
91                       MachineInstr &Inst) const;
92 
93   void lowerScalarXnor(SetVectorType &Worklist,
94                        MachineInstr &Inst) const;
95 
96   void splitScalarNotBinop(SetVectorType &Worklist,
97                            MachineInstr &Inst,
98                            unsigned Opcode) const;
99 
100   void splitScalarBinOpN2(SetVectorType &Worklist,
101                           MachineInstr &Inst,
102                           unsigned Opcode) const;
103 
104   void splitScalar64BitUnaryOp(SetVectorType &Worklist,
105                                MachineInstr &Inst, unsigned Opcode,
106                                bool Swap = false) const;
107 
108   void splitScalar64BitAddSub(SetVectorType &Worklist, MachineInstr &Inst,
109                               MachineDominatorTree *MDT = nullptr) const;
110 
111   void splitScalar64BitBinaryOp(SetVectorType &Worklist, MachineInstr &Inst,
112                                 unsigned Opcode,
113                                 MachineDominatorTree *MDT = nullptr) const;
114 
115   void splitScalar64BitXnor(SetVectorType &Worklist, MachineInstr &Inst,
116                                 MachineDominatorTree *MDT = nullptr) const;
117 
118   void splitScalar64BitBCNT(SetVectorType &Worklist,
119                             MachineInstr &Inst) const;
120   void splitScalar64BitBFE(SetVectorType &Worklist,
121                            MachineInstr &Inst) const;
122   void movePackToVALU(SetVectorType &Worklist,
123                       MachineRegisterInfo &MRI,
124                       MachineInstr &Inst) const;
125 
126   void addUsersToMoveToVALUWorklist(Register Reg, MachineRegisterInfo &MRI,
127                                     SetVectorType &Worklist) const;
128 
129   void addSCCDefUsersToVALUWorklist(MachineOperand &Op,
130                                     MachineInstr &SCCDefInst,
131                                     SetVectorType &Worklist,
132                                     Register NewCond = Register()) const;
133   void addSCCDefsToVALUWorklist(MachineOperand &Op,
134                                 SetVectorType &Worklist) const;
135 
136   const TargetRegisterClass *
137   getDestEquivalentVGPRClass(const MachineInstr &Inst) const;
138 
139   bool checkInstOffsetsDoNotOverlap(const MachineInstr &MIa,
140                                     const MachineInstr &MIb) const;
141 
142   Register findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const;
143 
144 protected:
145   bool swapSourceModifiers(MachineInstr &MI,
146                            MachineOperand &Src0, unsigned Src0OpName,
147                            MachineOperand &Src1, unsigned Src1OpName) const;
148 
149   MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
150                                        unsigned OpIdx0,
151                                        unsigned OpIdx1) const override;
152 
153 public:
154   enum TargetOperandFlags {
155     MO_MASK = 0xf,
156 
157     MO_NONE = 0,
158     // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL.
159     MO_GOTPCREL = 1,
160     // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO.
161     MO_GOTPCREL32 = 2,
162     MO_GOTPCREL32_LO = 2,
163     // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI.
164     MO_GOTPCREL32_HI = 3,
165     // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO.
166     MO_REL32 = 4,
167     MO_REL32_LO = 4,
168     // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI.
169     MO_REL32_HI = 5,
170 
171     MO_FAR_BRANCH_OFFSET = 6,
172 
173     MO_ABS32_LO = 8,
174     MO_ABS32_HI = 9,
175   };
176 
177   explicit SIInstrInfo(const GCNSubtarget &ST);
178 
179   const SIRegisterInfo &getRegisterInfo() const {
180     return RI;
181   }
182 
183   const GCNSubtarget &getSubtarget() const {
184     return ST;
185   }
186 
187   bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override;
188 
189   bool isIgnorableUse(const MachineOperand &MO) const override;
190 
191   bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
192                                int64_t &Offset1,
193                                int64_t &Offset2) const override;
194 
195   bool getMemOperandsWithOffsetWidth(
196       const MachineInstr &LdSt,
197       SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
198       bool &OffsetIsScalable, unsigned &Width,
199       const TargetRegisterInfo *TRI) const final;
200 
201   bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
202                            ArrayRef<const MachineOperand *> BaseOps2,
203                            unsigned NumLoads, unsigned NumBytes) const override;
204 
205   bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,
206                                int64_t Offset1, unsigned NumLoads) const override;
207 
208   void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
209                    const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
210                    bool KillSrc) const override;
211 
212   void materializeImmediate(MachineBasicBlock &MBB,
213                             MachineBasicBlock::iterator MI,
214                             const DebugLoc &DL,
215                             unsigned DestReg,
216                             int64_t Value) const;
217 
218   const TargetRegisterClass *getPreferredSelectRegClass(
219                                unsigned Size) const;
220 
221   Register insertNE(MachineBasicBlock *MBB,
222                     MachineBasicBlock::iterator I, const DebugLoc &DL,
223                     Register SrcReg, int Value) const;
224 
225   Register insertEQ(MachineBasicBlock *MBB,
226                     MachineBasicBlock::iterator I, const DebugLoc &DL,
227                     Register SrcReg, int Value)  const;
228 
229   void storeRegToStackSlot(MachineBasicBlock &MBB,
230                            MachineBasicBlock::iterator MI, Register SrcReg,
231                            bool isKill, int FrameIndex,
232                            const TargetRegisterClass *RC,
233                            const TargetRegisterInfo *TRI) const override;
234 
235   void loadRegFromStackSlot(MachineBasicBlock &MBB,
236                             MachineBasicBlock::iterator MI, Register DestReg,
237                             int FrameIndex, const TargetRegisterClass *RC,
238                             const TargetRegisterInfo *TRI) const override;
239 
240   bool expandPostRAPseudo(MachineInstr &MI) const override;
241 
242   // Splits a V_MOV_B64_DPP_PSEUDO opcode into a pair of v_mov_b32_dpp
243   // instructions. Returns a pair of generated instructions.
244   // Can split either post-RA with physical registers or pre-RA with
245   // virtual registers. In latter case IR needs to be in SSA form and
246   // and a REG_SEQUENCE is produced to define original register.
247   std::pair<MachineInstr*, MachineInstr*>
248   expandMovDPP64(MachineInstr &MI) const;
249 
250   // Returns an opcode that can be used to move a value to a \p DstRC
251   // register.  If there is no hardware instruction that can store to \p
252   // DstRC, then AMDGPU::COPY is returned.
253   unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
254 
255   const MCInstrDesc &getIndirectRegWriteMovRelPseudo(unsigned VecSize,
256                                                      unsigned EltSize,
257                                                      bool IsSGPR) const;
258 
259   const MCInstrDesc &getIndirectGPRIDXPseudo(unsigned VecSize,
260                                              bool IsIndirectSrc) const;
261   LLVM_READONLY
262   int commuteOpcode(unsigned Opc) const;
263 
264   LLVM_READONLY
265   inline int commuteOpcode(const MachineInstr &MI) const {
266     return commuteOpcode(MI.getOpcode());
267   }
268 
269   bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx1,
270                              unsigned &SrcOpIdx2) const override;
271 
272   bool findCommutedOpIndices(MCInstrDesc Desc, unsigned & SrcOpIdx0,
273    unsigned & SrcOpIdx1) const;
274 
275   bool isBranchOffsetInRange(unsigned BranchOpc,
276                              int64_t BrOffset) const override;
277 
278   MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
279 
280   void insertIndirectBranch(MachineBasicBlock &MBB,
281                             MachineBasicBlock &NewDestBB,
282                             MachineBasicBlock &RestoreBB, const DebugLoc &DL,
283                             int64_t BrOffset, RegScavenger *RS) const override;
284 
285   bool analyzeBranchImpl(MachineBasicBlock &MBB,
286                          MachineBasicBlock::iterator I,
287                          MachineBasicBlock *&TBB,
288                          MachineBasicBlock *&FBB,
289                          SmallVectorImpl<MachineOperand> &Cond,
290                          bool AllowModify) const;
291 
292   bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
293                      MachineBasicBlock *&FBB,
294                      SmallVectorImpl<MachineOperand> &Cond,
295                      bool AllowModify = false) const override;
296 
297   unsigned removeBranch(MachineBasicBlock &MBB,
298                         int *BytesRemoved = nullptr) const override;
299 
300   unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
301                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
302                         const DebugLoc &DL,
303                         int *BytesAdded = nullptr) const override;
304 
305   bool reverseBranchCondition(
306     SmallVectorImpl<MachineOperand> &Cond) const override;
307 
308   bool canInsertSelect(const MachineBasicBlock &MBB,
309                        ArrayRef<MachineOperand> Cond, Register DstReg,
310                        Register TrueReg, Register FalseReg, int &CondCycles,
311                        int &TrueCycles, int &FalseCycles) const override;
312 
313   void insertSelect(MachineBasicBlock &MBB,
314                     MachineBasicBlock::iterator I, const DebugLoc &DL,
315                     Register DstReg, ArrayRef<MachineOperand> Cond,
316                     Register TrueReg, Register FalseReg) const override;
317 
318   void insertVectorSelect(MachineBasicBlock &MBB,
319                           MachineBasicBlock::iterator I, const DebugLoc &DL,
320                           Register DstReg, ArrayRef<MachineOperand> Cond,
321                           Register TrueReg, Register FalseReg) const;
322 
323   bool analyzeCompare(const MachineInstr &MI, Register &SrcReg,
324                       Register &SrcReg2, int64_t &CmpMask,
325                       int64_t &CmpValue) const override;
326 
327   bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
328                             Register SrcReg2, int64_t CmpMask, int64_t CmpValue,
329                             const MachineRegisterInfo *MRI) const override;
330 
331   bool
332   areMemAccessesTriviallyDisjoint(const MachineInstr &MIa,
333                                   const MachineInstr &MIb) const override;
334 
335   static bool isFoldableCopy(const MachineInstr &MI);
336 
337   void removeModOperands(MachineInstr &MI) const;
338 
339   bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg,
340                      MachineRegisterInfo *MRI) const final;
341 
342   unsigned getMachineCSELookAheadLimit() const override { return 500; }
343 
344   MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,
345                                       LiveIntervals *LIS) const override;
346 
347   bool isSchedulingBoundary(const MachineInstr &MI,
348                             const MachineBasicBlock *MBB,
349                             const MachineFunction &MF) const override;
350 
351   static bool isSALU(const MachineInstr &MI) {
352     return MI.getDesc().TSFlags & SIInstrFlags::SALU;
353   }
354 
355   bool isSALU(uint16_t Opcode) const {
356     return get(Opcode).TSFlags & SIInstrFlags::SALU;
357   }
358 
359   static bool isVALU(const MachineInstr &MI) {
360     return MI.getDesc().TSFlags & SIInstrFlags::VALU;
361   }
362 
363   bool isVALU(uint16_t Opcode) const {
364     return get(Opcode).TSFlags & SIInstrFlags::VALU;
365   }
366 
367   static bool isVMEM(const MachineInstr &MI) {
368     return isMUBUF(MI) || isMTBUF(MI) || isMIMG(MI);
369   }
370 
371   bool isVMEM(uint16_t Opcode) const {
372     return isMUBUF(Opcode) || isMTBUF(Opcode) || isMIMG(Opcode);
373   }
374 
375   static bool isSOP1(const MachineInstr &MI) {
376     return MI.getDesc().TSFlags & SIInstrFlags::SOP1;
377   }
378 
379   bool isSOP1(uint16_t Opcode) const {
380     return get(Opcode).TSFlags & SIInstrFlags::SOP1;
381   }
382 
383   static bool isSOP2(const MachineInstr &MI) {
384     return MI.getDesc().TSFlags & SIInstrFlags::SOP2;
385   }
386 
387   bool isSOP2(uint16_t Opcode) const {
388     return get(Opcode).TSFlags & SIInstrFlags::SOP2;
389   }
390 
391   static bool isSOPC(const MachineInstr &MI) {
392     return MI.getDesc().TSFlags & SIInstrFlags::SOPC;
393   }
394 
395   bool isSOPC(uint16_t Opcode) const {
396     return get(Opcode).TSFlags & SIInstrFlags::SOPC;
397   }
398 
399   static bool isSOPK(const MachineInstr &MI) {
400     return MI.getDesc().TSFlags & SIInstrFlags::SOPK;
401   }
402 
403   bool isSOPK(uint16_t Opcode) const {
404     return get(Opcode).TSFlags & SIInstrFlags::SOPK;
405   }
406 
407   static bool isSOPP(const MachineInstr &MI) {
408     return MI.getDesc().TSFlags & SIInstrFlags::SOPP;
409   }
410 
411   bool isSOPP(uint16_t Opcode) const {
412     return get(Opcode).TSFlags & SIInstrFlags::SOPP;
413   }
414 
415   static bool isPacked(const MachineInstr &MI) {
416     return MI.getDesc().TSFlags & SIInstrFlags::IsPacked;
417   }
418 
419   bool isPacked(uint16_t Opcode) const {
420     return get(Opcode).TSFlags & SIInstrFlags::IsPacked;
421   }
422 
423   static bool isVOP1(const MachineInstr &MI) {
424     return MI.getDesc().TSFlags & SIInstrFlags::VOP1;
425   }
426 
427   bool isVOP1(uint16_t Opcode) const {
428     return get(Opcode).TSFlags & SIInstrFlags::VOP1;
429   }
430 
431   static bool isVOP2(const MachineInstr &MI) {
432     return MI.getDesc().TSFlags & SIInstrFlags::VOP2;
433   }
434 
435   bool isVOP2(uint16_t Opcode) const {
436     return get(Opcode).TSFlags & SIInstrFlags::VOP2;
437   }
438 
439   static bool isVOP3(const MachineInstr &MI) {
440     return MI.getDesc().TSFlags & SIInstrFlags::VOP3;
441   }
442 
443   bool isVOP3(uint16_t Opcode) const {
444     return get(Opcode).TSFlags & SIInstrFlags::VOP3;
445   }
446 
447   static bool isSDWA(const MachineInstr &MI) {
448     return MI.getDesc().TSFlags & SIInstrFlags::SDWA;
449   }
450 
451   bool isSDWA(uint16_t Opcode) const {
452     return get(Opcode).TSFlags & SIInstrFlags::SDWA;
453   }
454 
455   static bool isVOPC(const MachineInstr &MI) {
456     return MI.getDesc().TSFlags & SIInstrFlags::VOPC;
457   }
458 
459   bool isVOPC(uint16_t Opcode) const {
460     return get(Opcode).TSFlags & SIInstrFlags::VOPC;
461   }
462 
463   static bool isMUBUF(const MachineInstr &MI) {
464     return MI.getDesc().TSFlags & SIInstrFlags::MUBUF;
465   }
466 
467   bool isMUBUF(uint16_t Opcode) const {
468     return get(Opcode).TSFlags & SIInstrFlags::MUBUF;
469   }
470 
471   static bool isMTBUF(const MachineInstr &MI) {
472     return MI.getDesc().TSFlags & SIInstrFlags::MTBUF;
473   }
474 
475   bool isMTBUF(uint16_t Opcode) const {
476     return get(Opcode).TSFlags & SIInstrFlags::MTBUF;
477   }
478 
479   static bool isSMRD(const MachineInstr &MI) {
480     return MI.getDesc().TSFlags & SIInstrFlags::SMRD;
481   }
482 
483   bool isSMRD(uint16_t Opcode) const {
484     return get(Opcode).TSFlags & SIInstrFlags::SMRD;
485   }
486 
487   bool isBufferSMRD(const MachineInstr &MI) const;
488 
489   static bool isDS(const MachineInstr &MI) {
490     return MI.getDesc().TSFlags & SIInstrFlags::DS;
491   }
492 
493   bool isDS(uint16_t Opcode) const {
494     return get(Opcode).TSFlags & SIInstrFlags::DS;
495   }
496 
497   bool isAlwaysGDS(uint16_t Opcode) const;
498 
499   static bool isMIMG(const MachineInstr &MI) {
500     return MI.getDesc().TSFlags & SIInstrFlags::MIMG;
501   }
502 
503   bool isMIMG(uint16_t Opcode) const {
504     return get(Opcode).TSFlags & SIInstrFlags::MIMG;
505   }
506 
507   static bool isGather4(const MachineInstr &MI) {
508     return MI.getDesc().TSFlags & SIInstrFlags::Gather4;
509   }
510 
511   bool isGather4(uint16_t Opcode) const {
512     return get(Opcode).TSFlags & SIInstrFlags::Gather4;
513   }
514 
515   static bool isFLAT(const MachineInstr &MI) {
516     return MI.getDesc().TSFlags & SIInstrFlags::FLAT;
517   }
518 
519   // Is a FLAT encoded instruction which accesses a specific segment,
520   // i.e. global_* or scratch_*.
521   static bool isSegmentSpecificFLAT(const MachineInstr &MI) {
522     auto Flags = MI.getDesc().TSFlags;
523     return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch);
524   }
525 
526   bool isSegmentSpecificFLAT(uint16_t Opcode) const {
527     auto Flags = get(Opcode).TSFlags;
528     return Flags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch);
529   }
530 
531   static bool isFLATGlobal(const MachineInstr &MI) {
532     return MI.getDesc().TSFlags & SIInstrFlags::FlatGlobal;
533   }
534 
535   bool isFLATGlobal(uint16_t Opcode) const {
536     return get(Opcode).TSFlags & SIInstrFlags::FlatGlobal;
537   }
538 
539   static bool isFLATScratch(const MachineInstr &MI) {
540     return MI.getDesc().TSFlags & SIInstrFlags::FlatScratch;
541   }
542 
543   bool isFLATScratch(uint16_t Opcode) const {
544     return get(Opcode).TSFlags & SIInstrFlags::FlatScratch;
545   }
546 
547   // Any FLAT encoded instruction, including global_* and scratch_*.
548   bool isFLAT(uint16_t Opcode) const {
549     return get(Opcode).TSFlags & SIInstrFlags::FLAT;
550   }
551 
552   static bool isEXP(const MachineInstr &MI) {
553     return MI.getDesc().TSFlags & SIInstrFlags::EXP;
554   }
555 
556   static bool isDualSourceBlendEXP(const MachineInstr &MI) {
557     if (!isEXP(MI))
558       return false;
559     unsigned Target = MI.getOperand(0).getImm();
560     return Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND0 ||
561            Target == AMDGPU::Exp::ET_DUAL_SRC_BLEND1;
562   }
563 
564   bool isEXP(uint16_t Opcode) const {
565     return get(Opcode).TSFlags & SIInstrFlags::EXP;
566   }
567 
568   static bool isAtomicNoRet(const MachineInstr &MI) {
569     return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicNoRet;
570   }
571 
572   bool isAtomicNoRet(uint16_t Opcode) const {
573     return get(Opcode).TSFlags & SIInstrFlags::IsAtomicNoRet;
574   }
575 
576   static bool isAtomicRet(const MachineInstr &MI) {
577     return MI.getDesc().TSFlags & SIInstrFlags::IsAtomicRet;
578   }
579 
580   bool isAtomicRet(uint16_t Opcode) const {
581     return get(Opcode).TSFlags & SIInstrFlags::IsAtomicRet;
582   }
583 
584   static bool isAtomic(const MachineInstr &MI) {
585     return MI.getDesc().TSFlags & (SIInstrFlags::IsAtomicRet |
586                                    SIInstrFlags::IsAtomicNoRet);
587   }
588 
589   bool isAtomic(uint16_t Opcode) const {
590     return get(Opcode).TSFlags & (SIInstrFlags::IsAtomicRet |
591                                   SIInstrFlags::IsAtomicNoRet);
592   }
593 
594   static bool isWQM(const MachineInstr &MI) {
595     return MI.getDesc().TSFlags & SIInstrFlags::WQM;
596   }
597 
598   bool isWQM(uint16_t Opcode) const {
599     return get(Opcode).TSFlags & SIInstrFlags::WQM;
600   }
601 
602   static bool isDisableWQM(const MachineInstr &MI) {
603     return MI.getDesc().TSFlags & SIInstrFlags::DisableWQM;
604   }
605 
606   bool isDisableWQM(uint16_t Opcode) const {
607     return get(Opcode).TSFlags & SIInstrFlags::DisableWQM;
608   }
609 
610   static bool isVGPRSpill(const MachineInstr &MI) {
611     return MI.getDesc().TSFlags & SIInstrFlags::VGPRSpill;
612   }
613 
614   bool isVGPRSpill(uint16_t Opcode) const {
615     return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill;
616   }
617 
618   static bool isSGPRSpill(const MachineInstr &MI) {
619     return MI.getDesc().TSFlags & SIInstrFlags::SGPRSpill;
620   }
621 
622   bool isSGPRSpill(uint16_t Opcode) const {
623     return get(Opcode).TSFlags & SIInstrFlags::SGPRSpill;
624   }
625 
626   static bool isDPP(const MachineInstr &MI) {
627     return MI.getDesc().TSFlags & SIInstrFlags::DPP;
628   }
629 
630   bool isDPP(uint16_t Opcode) const {
631     return get(Opcode).TSFlags & SIInstrFlags::DPP;
632   }
633 
634   static bool isTRANS(const MachineInstr &MI) {
635     return MI.getDesc().TSFlags & SIInstrFlags::TRANS;
636   }
637 
638   bool isTRANS(uint16_t Opcode) const {
639     return get(Opcode).TSFlags & SIInstrFlags::TRANS;
640   }
641 
642   static bool isVOP3P(const MachineInstr &MI) {
643     return MI.getDesc().TSFlags & SIInstrFlags::VOP3P;
644   }
645 
646   bool isVOP3P(uint16_t Opcode) const {
647     return get(Opcode).TSFlags & SIInstrFlags::VOP3P;
648   }
649 
650   static bool isVINTRP(const MachineInstr &MI) {
651     return MI.getDesc().TSFlags & SIInstrFlags::VINTRP;
652   }
653 
654   bool isVINTRP(uint16_t Opcode) const {
655     return get(Opcode).TSFlags & SIInstrFlags::VINTRP;
656   }
657 
658   static bool isMAI(const MachineInstr &MI) {
659     return MI.getDesc().TSFlags & SIInstrFlags::IsMAI;
660   }
661 
662   bool isMAI(uint16_t Opcode) const {
663     return get(Opcode).TSFlags & SIInstrFlags::IsMAI;
664   }
665 
666   static bool isMFMA(const MachineInstr &MI) {
667     return isMAI(MI) && MI.getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 &&
668            MI.getOpcode() != AMDGPU::V_ACCVGPR_READ_B32_e64;
669   }
670 
671   static bool isDOT(const MachineInstr &MI) {
672     return MI.getDesc().TSFlags & SIInstrFlags::IsDOT;
673   }
674 
675   static bool isWMMA(const MachineInstr &MI) {
676     return MI.getDesc().TSFlags & SIInstrFlags::IsWMMA;
677   }
678 
679   bool isWMMA(uint16_t Opcode) const {
680     return get(Opcode).TSFlags & SIInstrFlags::IsWMMA;
681   }
682 
683   bool isDOT(uint16_t Opcode) const {
684     return get(Opcode).TSFlags & SIInstrFlags::IsDOT;
685   }
686 
687   static bool isLDSDIR(const MachineInstr &MI) {
688     return MI.getDesc().TSFlags & SIInstrFlags::LDSDIR;
689   }
690 
691   bool isLDSDIR(uint16_t Opcode) const {
692     return get(Opcode).TSFlags & SIInstrFlags::LDSDIR;
693   }
694 
695   static bool isVINTERP(const MachineInstr &MI) {
696     return MI.getDesc().TSFlags & SIInstrFlags::VINTERP;
697   }
698 
699   bool isVINTERP(uint16_t Opcode) const {
700     return get(Opcode).TSFlags & SIInstrFlags::VINTERP;
701   }
702 
703   static bool isScalarUnit(const MachineInstr &MI) {
704     return MI.getDesc().TSFlags & (SIInstrFlags::SALU | SIInstrFlags::SMRD);
705   }
706 
707   static bool usesVM_CNT(const MachineInstr &MI) {
708     return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT;
709   }
710 
711   static bool usesLGKM_CNT(const MachineInstr &MI) {
712     return MI.getDesc().TSFlags & SIInstrFlags::LGKM_CNT;
713   }
714 
715   static bool sopkIsZext(const MachineInstr &MI) {
716     return MI.getDesc().TSFlags & SIInstrFlags::SOPK_ZEXT;
717   }
718 
719   bool sopkIsZext(uint16_t Opcode) const {
720     return get(Opcode).TSFlags & SIInstrFlags::SOPK_ZEXT;
721   }
722 
723   /// \returns true if this is an s_store_dword* instruction. This is more
724   /// specific than than isSMEM && mayStore.
725   static bool isScalarStore(const MachineInstr &MI) {
726     return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE;
727   }
728 
729   bool isScalarStore(uint16_t Opcode) const {
730     return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE;
731   }
732 
733   static bool isFixedSize(const MachineInstr &MI) {
734     return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE;
735   }
736 
737   bool isFixedSize(uint16_t Opcode) const {
738     return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE;
739   }
740 
741   static bool hasFPClamp(const MachineInstr &MI) {
742     return MI.getDesc().TSFlags & SIInstrFlags::FPClamp;
743   }
744 
745   bool hasFPClamp(uint16_t Opcode) const {
746     return get(Opcode).TSFlags & SIInstrFlags::FPClamp;
747   }
748 
749   static bool hasIntClamp(const MachineInstr &MI) {
750     return MI.getDesc().TSFlags & SIInstrFlags::IntClamp;
751   }
752 
753   uint64_t getClampMask(const MachineInstr &MI) const {
754     const uint64_t ClampFlags = SIInstrFlags::FPClamp |
755                                 SIInstrFlags::IntClamp |
756                                 SIInstrFlags::ClampLo |
757                                 SIInstrFlags::ClampHi;
758       return MI.getDesc().TSFlags & ClampFlags;
759   }
760 
761   static bool usesFPDPRounding(const MachineInstr &MI) {
762     return MI.getDesc().TSFlags & SIInstrFlags::FPDPRounding;
763   }
764 
765   bool usesFPDPRounding(uint16_t Opcode) const {
766     return get(Opcode).TSFlags & SIInstrFlags::FPDPRounding;
767   }
768 
769   static bool isFPAtomic(const MachineInstr &MI) {
770     return MI.getDesc().TSFlags & SIInstrFlags::FPAtomic;
771   }
772 
773   bool isFPAtomic(uint16_t Opcode) const {
774     return get(Opcode).TSFlags & SIInstrFlags::FPAtomic;
775   }
776 
777   bool isVGPRCopy(const MachineInstr &MI) const {
778     assert(MI.isCopy());
779     Register Dest = MI.getOperand(0).getReg();
780     const MachineFunction &MF = *MI.getParent()->getParent();
781     const MachineRegisterInfo &MRI = MF.getRegInfo();
782     return !RI.isSGPRReg(MRI, Dest);
783   }
784 
785   bool hasVGPRUses(const MachineInstr &MI) const {
786     const MachineFunction &MF = *MI.getParent()->getParent();
787     const MachineRegisterInfo &MRI = MF.getRegInfo();
788     return llvm::any_of(MI.explicit_uses(),
789                         [&MRI, this](const MachineOperand &MO) {
790       return MO.isReg() && RI.isVGPR(MRI, MO.getReg());});
791   }
792 
793   /// Return true if the instruction modifies the mode register.q
794   static bool modifiesModeRegister(const MachineInstr &MI);
795 
796   /// Whether we must prevent this instruction from executing with EXEC = 0.
797   bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const;
798 
799   /// Returns true if the instruction could potentially depend on the value of
800   /// exec. If false, exec dependencies may safely be ignored.
801   bool mayReadEXEC(const MachineRegisterInfo &MRI, const MachineInstr &MI) const;
802 
803   bool isInlineConstant(const APInt &Imm) const;
804 
805   bool isInlineConstant(const APFloat &Imm) const {
806     return isInlineConstant(Imm.bitcastToAPInt());
807   }
808 
809   bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const;
810 
811   bool isInlineConstant(const MachineOperand &MO,
812                         const MCOperandInfo &OpInfo) const {
813     return isInlineConstant(MO, OpInfo.OperandType);
814   }
815 
816   /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would
817   /// be an inline immediate.
818   bool isInlineConstant(const MachineInstr &MI,
819                         const MachineOperand &UseMO,
820                         const MachineOperand &DefMO) const {
821     assert(UseMO.getParent() == &MI);
822     int OpIdx = MI.getOperandNo(&UseMO);
823     if (!MI.getDesc().OpInfo || OpIdx >= MI.getDesc().NumOperands) {
824       return false;
825     }
826 
827     return isInlineConstant(DefMO, MI.getDesc().OpInfo[OpIdx]);
828   }
829 
830   /// \p returns true if the operand \p OpIdx in \p MI is a valid inline
831   /// immediate.
832   bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const {
833     const MachineOperand &MO = MI.getOperand(OpIdx);
834     return isInlineConstant(MO, MI.getDesc().OpInfo[OpIdx].OperandType);
835   }
836 
837   bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx,
838                         const MachineOperand &MO) const {
839     if (!MI.getDesc().OpInfo || OpIdx >= MI.getDesc().NumOperands)
840       return false;
841 
842     if (MI.isCopy()) {
843       unsigned Size = getOpSize(MI, OpIdx);
844       assert(Size == 8 || Size == 4);
845 
846       uint8_t OpType = (Size == 8) ?
847         AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32;
848       return isInlineConstant(MO, OpType);
849     }
850 
851     return isInlineConstant(MO, MI.getDesc().OpInfo[OpIdx].OperandType);
852   }
853 
854   bool isInlineConstant(const MachineOperand &MO) const {
855     const MachineInstr *Parent = MO.getParent();
856     return isInlineConstant(*Parent, Parent->getOperandNo(&MO));
857   }
858 
859   bool isLiteralConstant(const MachineOperand &MO,
860                          const MCOperandInfo &OpInfo) const {
861     return MO.isImm() && !isInlineConstant(MO, OpInfo.OperandType);
862   }
863 
864   bool isLiteralConstant(const MachineInstr &MI, int OpIdx) const {
865     const MachineOperand &MO = MI.getOperand(OpIdx);
866     return MO.isImm() && !isInlineConstant(MI, OpIdx);
867   }
868 
869   // Returns true if this operand could potentially require a 32-bit literal
870   // operand, but not necessarily. A FrameIndex for example could resolve to an
871   // inline immediate value that will not require an additional 4-bytes; this
872   // assumes that it will.
873   bool isLiteralConstantLike(const MachineOperand &MO,
874                              const MCOperandInfo &OpInfo) const;
875 
876   bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
877                          const MachineOperand &MO) const;
878 
879   /// Return true if this 64-bit VALU instruction has a 32-bit encoding.
880   /// This function will return false if you pass it a 32-bit instruction.
881   bool hasVALU32BitEncoding(unsigned Opcode) const;
882 
883   /// Returns true if this operand uses the constant bus.
884   bool usesConstantBus(const MachineRegisterInfo &MRI,
885                        const MachineOperand &MO,
886                        const MCOperandInfo &OpInfo) const;
887 
888   /// Return true if this instruction has any modifiers.
889   ///  e.g. src[012]_mod, omod, clamp.
890   bool hasModifiers(unsigned Opcode) const;
891 
892   bool hasModifiersSet(const MachineInstr &MI,
893                        unsigned OpName) const;
894   bool hasAnyModifiersSet(const MachineInstr &MI) const;
895 
896   bool canShrink(const MachineInstr &MI,
897                  const MachineRegisterInfo &MRI) const;
898 
899   MachineInstr *buildShrunkInst(MachineInstr &MI,
900                                 unsigned NewOpcode) const;
901 
902   bool verifyInstruction(const MachineInstr &MI,
903                          StringRef &ErrInfo) const override;
904 
905   unsigned getVALUOp(const MachineInstr &MI) const;
906 
907   /// Return the correct register class for \p OpNo.  For target-specific
908   /// instructions, this will return the register class that has been defined
909   /// in tablegen.  For generic instructions, like REG_SEQUENCE it will return
910   /// the register class of its machine operand.
911   /// to infer the correct register class base on the other operands.
912   const TargetRegisterClass *getOpRegClass(const MachineInstr &MI,
913                                            unsigned OpNo) const;
914 
915   /// Return the size in bytes of the operand OpNo on the given
916   // instruction opcode.
917   unsigned getOpSize(uint16_t Opcode, unsigned OpNo) const {
918     const MCOperandInfo &OpInfo = get(Opcode).OpInfo[OpNo];
919 
920     if (OpInfo.RegClass == -1) {
921       // If this is an immediate operand, this must be a 32-bit literal.
922       assert(OpInfo.OperandType == MCOI::OPERAND_IMMEDIATE);
923       return 4;
924     }
925 
926     return RI.getRegSizeInBits(*RI.getRegClass(OpInfo.RegClass)) / 8;
927   }
928 
929   /// This form should usually be preferred since it handles operands
930   /// with unknown register classes.
931   unsigned getOpSize(const MachineInstr &MI, unsigned OpNo) const {
932     const MachineOperand &MO = MI.getOperand(OpNo);
933     if (MO.isReg()) {
934       if (unsigned SubReg = MO.getSubReg()) {
935         return RI.getSubRegIdxSize(SubReg) / 8;
936       }
937     }
938     return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8;
939   }
940 
941   /// Legalize the \p OpIndex operand of this instruction by inserting
942   /// a MOV.  For example:
943   /// ADD_I32_e32 VGPR0, 15
944   /// to
945   /// MOV VGPR1, 15
946   /// ADD_I32_e32 VGPR0, VGPR1
947   ///
948   /// If the operand being legalized is a register, then a COPY will be used
949   /// instead of MOV.
950   void legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const;
951 
952   /// Check if \p MO is a legal operand if it was the \p OpIdx Operand
953   /// for \p MI.
954   bool isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
955                       const MachineOperand *MO = nullptr) const;
956 
957   /// Check if \p MO would be a valid operand for the given operand
958   /// definition \p OpInfo. Note this does not attempt to validate constant bus
959   /// restrictions (e.g. literal constant usage).
960   bool isLegalVSrcOperand(const MachineRegisterInfo &MRI,
961                           const MCOperandInfo &OpInfo,
962                           const MachineOperand &MO) const;
963 
964   /// Check if \p MO (a register operand) is a legal register for the
965   /// given operand description.
966   bool isLegalRegOperand(const MachineRegisterInfo &MRI,
967                          const MCOperandInfo &OpInfo,
968                          const MachineOperand &MO) const;
969 
970   /// Legalize operands in \p MI by either commuting it or inserting a
971   /// copy of src1.
972   void legalizeOperandsVOP2(MachineRegisterInfo &MRI, MachineInstr &MI) const;
973 
974   /// Fix operands in \p MI to satisfy constant bus requirements.
975   void legalizeOperandsVOP3(MachineRegisterInfo &MRI, MachineInstr &MI) const;
976 
977   /// Copy a value from a VGPR (\p SrcReg) to SGPR.  This function can only
978   /// be used when it is know that the value in SrcReg is same across all
979   /// threads in the wave.
980   /// \returns The SGPR register that \p SrcReg was copied to.
981   Register readlaneVGPRToSGPR(Register SrcReg, MachineInstr &UseMI,
982                               MachineRegisterInfo &MRI) const;
983 
984   void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const;
985   void legalizeOperandsFLAT(MachineRegisterInfo &MRI, MachineInstr &MI) const;
986 
987   void legalizeGenericOperand(MachineBasicBlock &InsertMBB,
988                               MachineBasicBlock::iterator I,
989                               const TargetRegisterClass *DstRC,
990                               MachineOperand &Op, MachineRegisterInfo &MRI,
991                               const DebugLoc &DL) const;
992 
993   /// Legalize all operands in this instruction.  This function may create new
994   /// instructions and control-flow around \p MI.  If present, \p MDT is
995   /// updated.
996   /// \returns A new basic block that contains \p MI if new blocks were created.
997   MachineBasicBlock *
998   legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const;
999 
1000   /// Change SADDR form of a FLAT \p Inst to its VADDR form if saddr operand
1001   /// was moved to VGPR. \returns true if succeeded.
1002   bool moveFlatAddrToVGPR(MachineInstr &Inst) const;
1003 
1004   /// Replace this instruction's opcode with the equivalent VALU
1005   /// opcode.  This function will also move the users of \p MI to the
1006   /// VALU if necessary. If present, \p MDT is updated.
1007   MachineBasicBlock *moveToVALU(MachineInstr &MI,
1008                                 MachineDominatorTree *MDT = nullptr) const;
1009 
1010   void insertNoop(MachineBasicBlock &MBB,
1011                   MachineBasicBlock::iterator MI) const override;
1012 
1013   void insertNoops(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1014                    unsigned Quantity) const override;
1015 
1016   void insertReturn(MachineBasicBlock &MBB) const;
1017   /// Return the number of wait states that result from executing this
1018   /// instruction.
1019   static unsigned getNumWaitStates(const MachineInstr &MI);
1020 
1021   /// Returns the operand named \p Op.  If \p MI does not have an
1022   /// operand named \c Op, this function returns nullptr.
1023   LLVM_READONLY
1024   MachineOperand *getNamedOperand(MachineInstr &MI, unsigned OperandName) const;
1025 
1026   LLVM_READONLY
1027   const MachineOperand *getNamedOperand(const MachineInstr &MI,
1028                                         unsigned OpName) const {
1029     return getNamedOperand(const_cast<MachineInstr &>(MI), OpName);
1030   }
1031 
1032   /// Get required immediate operand
1033   int64_t getNamedImmOperand(const MachineInstr &MI, unsigned OpName) const {
1034     int Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), OpName);
1035     return MI.getOperand(Idx).getImm();
1036   }
1037 
1038   uint64_t getDefaultRsrcDataFormat() const;
1039   uint64_t getScratchRsrcWords23() const;
1040 
1041   bool isLowLatencyInstruction(const MachineInstr &MI) const;
1042   bool isHighLatencyDef(int Opc) const override;
1043 
1044   /// Return the descriptor of the target-specific machine instruction
1045   /// that corresponds to the specified pseudo or native opcode.
1046   const MCInstrDesc &getMCOpcodeFromPseudo(unsigned Opcode) const {
1047     return get(pseudoToMCOpcode(Opcode));
1048   }
1049 
1050   unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const;
1051   unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const;
1052 
1053   unsigned isLoadFromStackSlot(const MachineInstr &MI,
1054                                int &FrameIndex) const override;
1055   unsigned isStoreToStackSlot(const MachineInstr &MI,
1056                               int &FrameIndex) const override;
1057 
1058   unsigned getInstBundleSize(const MachineInstr &MI) const;
1059   unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
1060 
1061   bool mayAccessFlatAddressSpace(const MachineInstr &MI) const;
1062 
1063   bool isNonUniformBranchInstr(MachineInstr &Instr) const;
1064 
1065   void convertNonUniformIfRegion(MachineBasicBlock *IfEntry,
1066                                  MachineBasicBlock *IfEnd) const;
1067 
1068   void convertNonUniformLoopRegion(MachineBasicBlock *LoopEntry,
1069                                    MachineBasicBlock *LoopEnd) const;
1070 
1071   std::pair<unsigned, unsigned>
1072   decomposeMachineOperandsTargetFlags(unsigned TF) const override;
1073 
1074   ArrayRef<std::pair<int, const char *>>
1075   getSerializableTargetIndices() const override;
1076 
1077   ArrayRef<std::pair<unsigned, const char *>>
1078   getSerializableDirectMachineOperandTargetFlags() const override;
1079 
1080   ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
1081   getSerializableMachineMemOperandTargetFlags() const override;
1082 
1083   ScheduleHazardRecognizer *
1084   CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
1085                                  const ScheduleDAG *DAG) const override;
1086 
1087   ScheduleHazardRecognizer *
1088   CreateTargetPostRAHazardRecognizer(const MachineFunction &MF) const override;
1089 
1090   ScheduleHazardRecognizer *
1091   CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
1092                                  const ScheduleDAGMI *DAG) const override;
1093 
1094   bool isBasicBlockPrologue(const MachineInstr &MI) const override;
1095 
1096   MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB,
1097                                          MachineBasicBlock::iterator InsPt,
1098                                          const DebugLoc &DL, Register Src,
1099                                          Register Dst) const override;
1100 
1101   MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB,
1102                                     MachineBasicBlock::iterator InsPt,
1103                                     const DebugLoc &DL, Register Src,
1104                                     unsigned SrcSubReg,
1105                                     Register Dst) const override;
1106 
1107   bool isWave32() const;
1108 
1109   /// Return a partially built integer add instruction without carry.
1110   /// Caller must add source operands.
1111   /// For pre-GFX9 it will generate unused carry destination operand.
1112   /// TODO: After GFX9 it should return a no-carry operation.
1113   MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
1114                                     MachineBasicBlock::iterator I,
1115                                     const DebugLoc &DL,
1116                                     Register DestReg) const;
1117 
1118   MachineInstrBuilder getAddNoCarry(MachineBasicBlock &MBB,
1119                                     MachineBasicBlock::iterator I,
1120                                     const DebugLoc &DL,
1121                                     Register DestReg,
1122                                     RegScavenger &RS) const;
1123 
1124   static bool isKillTerminator(unsigned Opcode);
1125   const MCInstrDesc &getKillTerminatorFromPseudo(unsigned Opcode) const;
1126 
1127   static bool isLegalMUBUFImmOffset(unsigned Imm) {
1128     return isUInt<12>(Imm);
1129   }
1130 
1131   /// Returns if \p Offset is legal for the subtarget as the offset to a FLAT
1132   /// encoded instruction. If \p Signed, this is for an instruction that
1133   /// interprets the offset as signed.
1134   bool isLegalFLATOffset(int64_t Offset, unsigned AddrSpace,
1135                          uint64_t FlatVariant) const;
1136 
1137   /// Split \p COffsetVal into {immediate offset field, remainder offset}
1138   /// values.
1139   std::pair<int64_t, int64_t> splitFlatOffset(int64_t COffsetVal,
1140                                               unsigned AddrSpace,
1141                                               uint64_t FlatVariant) const;
1142 
1143   /// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
1144   /// Return -1 if the target-specific opcode for the pseudo instruction does
1145   /// not exist. If Opcode is not a pseudo instruction, this is identity.
1146   int pseudoToMCOpcode(int Opcode) const;
1147 
1148   /// \brief Check if this instruction should only be used by assembler.
1149   /// Return true if this opcode should not be used by codegen.
1150   bool isAsmOnlyOpcode(int MCOp) const;
1151 
1152   const TargetRegisterClass *getRegClass(const MCInstrDesc &TID, unsigned OpNum,
1153                                          const TargetRegisterInfo *TRI,
1154                                          const MachineFunction &MF)
1155     const override;
1156 
1157   void fixImplicitOperands(MachineInstr &MI) const;
1158 
1159   MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
1160                                       ArrayRef<unsigned> Ops,
1161                                       MachineBasicBlock::iterator InsertPt,
1162                                       int FrameIndex,
1163                                       LiveIntervals *LIS = nullptr,
1164                                       VirtRegMap *VRM = nullptr) const override;
1165 
1166   unsigned getInstrLatency(const InstrItineraryData *ItinData,
1167                            const MachineInstr &MI,
1168                            unsigned *PredCost = nullptr) const override;
1169 
1170   const MIRFormatter *getMIRFormatter() const override {
1171     if (!Formatter.get())
1172       Formatter = std::make_unique<AMDGPUMIRFormatter>();
1173     return Formatter.get();
1174   }
1175 
1176   static unsigned getDSShaderTypeValue(const MachineFunction &MF);
1177 
1178   const TargetSchedModel &getSchedModel() const { return SchedModel; }
1179 
1180   // Enforce operand's \p OpName even alignment if required by target.
1181   // This is used if an operand is a 32 bit register but needs to be aligned
1182   // regardless.
1183   void enforceOperandRCAlignment(MachineInstr &MI, unsigned OpName) const;
1184 };
1185 
1186 /// \brief Returns true if a reg:subreg pair P has a TRC class
1187 inline bool isOfRegClass(const TargetInstrInfo::RegSubRegPair &P,
1188                          const TargetRegisterClass &TRC,
1189                          MachineRegisterInfo &MRI) {
1190   auto *RC = MRI.getRegClass(P.Reg);
1191   if (!P.SubReg)
1192     return RC == &TRC;
1193   auto *TRI = MRI.getTargetRegisterInfo();
1194   return RC == TRI->getMatchingSuperRegClass(RC, &TRC, P.SubReg);
1195 }
1196 
1197 /// \brief Create RegSubRegPair from a register MachineOperand
1198 inline
1199 TargetInstrInfo::RegSubRegPair getRegSubRegPair(const MachineOperand &O) {
1200   assert(O.isReg());
1201   return TargetInstrInfo::RegSubRegPair(O.getReg(), O.getSubReg());
1202 }
1203 
1204 /// \brief Return the SubReg component from REG_SEQUENCE
1205 TargetInstrInfo::RegSubRegPair getRegSequenceSubReg(MachineInstr &MI,
1206                                                     unsigned SubReg);
1207 
1208 /// \brief Return the defining instruction for a given reg:subreg pair
1209 /// skipping copy like instructions and subreg-manipulation pseudos.
1210 /// Following another subreg of a reg:subreg isn't supported.
1211 MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P,
1212                                MachineRegisterInfo &MRI);
1213 
1214 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p
1215 /// DefMI and the use at \p UseMI. Should be run on SSA. Currently does not
1216 /// attempt to track between blocks.
1217 bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI,
1218                                 Register VReg,
1219                                 const MachineInstr &DefMI,
1220                                 const MachineInstr &UseMI);
1221 
1222 /// \brief Return false if EXEC is not changed between the def of \p VReg at \p
1223 /// DefMI and all its uses. Should be run on SSA. Currently does not attempt to
1224 /// track between blocks.
1225 bool execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI,
1226                                    Register VReg,
1227                                    const MachineInstr &DefMI);
1228 
1229 namespace AMDGPU {
1230 
1231   LLVM_READONLY
1232   int getVOPe64(uint16_t Opcode);
1233 
1234   LLVM_READONLY
1235   int getVOPe32(uint16_t Opcode);
1236 
1237   LLVM_READONLY
1238   int getSDWAOp(uint16_t Opcode);
1239 
1240   LLVM_READONLY
1241   int getDPPOp32(uint16_t Opcode);
1242 
1243   LLVM_READONLY
1244   int getDPPOp64(uint16_t Opcode);
1245 
1246   LLVM_READONLY
1247   int getBasicFromSDWAOp(uint16_t Opcode);
1248 
1249   LLVM_READONLY
1250   int getCommuteRev(uint16_t Opcode);
1251 
1252   LLVM_READONLY
1253   int getCommuteOrig(uint16_t Opcode);
1254 
1255   LLVM_READONLY
1256   int getAddr64Inst(uint16_t Opcode);
1257 
1258   /// Check if \p Opcode is an Addr64 opcode.
1259   ///
1260   /// \returns \p Opcode if it is an Addr64 opcode, otherwise -1.
1261   LLVM_READONLY
1262   int getIfAddr64Inst(uint16_t Opcode);
1263 
1264   LLVM_READONLY
1265   int getAtomicNoRetOp(uint16_t Opcode);
1266 
1267   LLVM_READONLY
1268   int getSOPKOp(uint16_t Opcode);
1269 
1270   /// \returns SADDR form of a FLAT Global instruction given an \p Opcode
1271   /// of a VADDR form.
1272   LLVM_READONLY
1273   int getGlobalSaddrOp(uint16_t Opcode);
1274 
1275   /// \returns VADDR form of a FLAT Global instruction given an \p Opcode
1276   /// of a SADDR form.
1277   LLVM_READONLY
1278   int getGlobalVaddrOp(uint16_t Opcode);
1279 
1280   LLVM_READONLY
1281   int getVCMPXNoSDstOp(uint16_t Opcode);
1282 
1283   /// \returns ST form with only immediate offset of a FLAT Scratch instruction
1284   /// given an \p Opcode of an SS (SADDR) form.
1285   LLVM_READONLY
1286   int getFlatScratchInstSTfromSS(uint16_t Opcode);
1287 
1288   /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode
1289   /// of an SVS (SADDR + VADDR) form.
1290   LLVM_READONLY
1291   int getFlatScratchInstSVfromSVS(uint16_t Opcode);
1292 
1293   /// \returns SS (SADDR) form of a FLAT Scratch instruction given an \p Opcode
1294   /// of an SV (VADDR) form.
1295   LLVM_READONLY
1296   int getFlatScratchInstSSfromSV(uint16_t Opcode);
1297 
1298   /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode
1299   /// of an SS (SADDR) form.
1300   LLVM_READONLY
1301   int getFlatScratchInstSVfromSS(uint16_t Opcode);
1302 
1303   /// \returns earlyclobber version of a MAC MFMA is exists.
1304   LLVM_READONLY
1305   int getMFMAEarlyClobberOp(uint16_t Opcode);
1306 
1307   /// \returns v_cmpx version of a v_cmp instruction.
1308   LLVM_READONLY
1309   int getVCMPXOpFromVCMP(uint16_t Opcode);
1310 
1311   const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
1312   const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
1313   const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
1314   const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23);
1315 
1316 } // end namespace AMDGPU
1317 
1318 namespace SI {
1319 namespace KernelInputOffsets {
1320 
1321 /// Offsets in bytes from the start of the input buffer
1322 enum Offsets {
1323   NGROUPS_X = 0,
1324   NGROUPS_Y = 4,
1325   NGROUPS_Z = 8,
1326   GLOBAL_SIZE_X = 12,
1327   GLOBAL_SIZE_Y = 16,
1328   GLOBAL_SIZE_Z = 20,
1329   LOCAL_SIZE_X = 24,
1330   LOCAL_SIZE_Y = 28,
1331   LOCAL_SIZE_Z = 32
1332 };
1333 
1334 } // end namespace KernelInputOffsets
1335 } // end namespace SI
1336 
1337 } // end namespace llvm
1338 
1339 #endif // LLVM_LIB_TARGET_AMDGPU_SIINSTRINFO_H
1340