1 //===-- AMDGPUInstPrinter.cpp - AMDGPU MC Inst -> ASM ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 // \file
8 //===----------------------------------------------------------------------===//
9 
10 #include "AMDGPUInstPrinter.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "SIDefines.h"
13 #include "Utils/AMDGPUAsmUtils.h"
14 #include "Utils/AMDGPUBaseInfo.h"
15 #include "llvm/MC/MCExpr.h"
16 #include "llvm/MC/MCInst.h"
17 #include "llvm/MC/MCInstrDesc.h"
18 #include "llvm/MC/MCInstrInfo.h"
19 #include "llvm/MC/MCRegisterInfo.h"
20 #include "llvm/MC/MCSubtargetInfo.h"
21 #include "llvm/Support/CommandLine.h"
22 #include "llvm/TargetParser/TargetParser.h"
23 
24 using namespace llvm;
25 using namespace llvm::AMDGPU;
26 
27 void AMDGPUInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const {
28   // FIXME: The current implementation of
29   // AsmParser::parseRegisterOrRegisterNumber in MC implies we either emit this
30   // as an integer or we provide a name which represents a physical register.
31   // For CFI instructions we really want to emit a name for the DWARF register
32   // instead, because there may be multiple DWARF registers corresponding to a
33   // single physical register. One case where this problem manifests is with
34   // wave32/wave64 where using the physical register name is ambiguous: if we
35   // write e.g. `.cfi_undefined v0` we lose information about the wavefront
36   // size which we need to encode the register in the final DWARF. Ideally we
37   // would extend MC to support parsing DWARF register names so we could do
38   // something like `.cfi_undefined dwarf_wave32_v0`. For now we just live with
39   // non-pretty DWARF register names in assembly text.
40   OS << Reg.id();
41 }
42 
43 void AMDGPUInstPrinter::printInst(const MCInst *MI, uint64_t Address,
44                                   StringRef Annot, const MCSubtargetInfo &STI,
45                                   raw_ostream &OS) {
46   OS.flush();
47   printInstruction(MI, Address, STI, OS);
48   printAnnotation(OS, Annot);
49 }
50 
51 void AMDGPUInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo,
52                                           const MCSubtargetInfo &STI,
53                                           raw_ostream &O) {
54   O << formatHex(MI->getOperand(OpNo).getImm() & 0xf);
55 }
56 
57 void AMDGPUInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
58                                            const MCSubtargetInfo &STI,
59                                            raw_ostream &O) {
60   // It's possible to end up with a 32-bit literal used with a 16-bit operand
61   // with ignored high bits. Print as 32-bit anyway in that case.
62   int64_t Imm = MI->getOperand(OpNo).getImm();
63   if (isInt<16>(Imm) || isUInt<16>(Imm))
64     O << formatHex(static_cast<uint64_t>(Imm & 0xffff));
65   else
66     printU32ImmOperand(MI, OpNo, STI, O);
67 }
68 
69 void AMDGPUInstPrinter::printU4ImmDecOperand(const MCInst *MI, unsigned OpNo,
70                                              raw_ostream &O) {
71   O << formatDec(MI->getOperand(OpNo).getImm() & 0xf);
72 }
73 
74 void AMDGPUInstPrinter::printU8ImmDecOperand(const MCInst *MI, unsigned OpNo,
75                                              raw_ostream &O) {
76   O << formatDec(MI->getOperand(OpNo).getImm() & 0xff);
77 }
78 
79 void AMDGPUInstPrinter::printU16ImmDecOperand(const MCInst *MI, unsigned OpNo,
80                                               raw_ostream &O) {
81   O << formatDec(MI->getOperand(OpNo).getImm() & 0xffff);
82 }
83 
84 void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo,
85                                            const MCSubtargetInfo &STI,
86                                            raw_ostream &O) {
87   O << formatHex(MI->getOperand(OpNo).getImm() & 0xffffffff);
88 }
89 
90 void AMDGPUInstPrinter::printNamedBit(const MCInst *MI, unsigned OpNo,
91                                       raw_ostream &O, StringRef BitName) {
92   if (MI->getOperand(OpNo).getImm()) {
93     O << ' ' << BitName;
94   }
95 }
96 
97 void AMDGPUInstPrinter::printOffset(const MCInst *MI, unsigned OpNo,
98                                     const MCSubtargetInfo &STI,
99                                     raw_ostream &O) {
100   uint32_t Imm = MI->getOperand(OpNo).getImm();
101   if (Imm != 0) {
102     O << " offset:";
103 
104     // GFX12 uses a 24-bit signed offset for VBUFFER.
105     const MCInstrDesc &Desc = MII.get(MI->getOpcode());
106     bool IsVBuffer = Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF);
107     if (AMDGPU::isGFX12(STI) && IsVBuffer)
108       O << formatDec(SignExtend32<24>(Imm));
109     else
110       printU16ImmDecOperand(MI, OpNo, O);
111   }
112 }
113 
114 void AMDGPUInstPrinter::printFlatOffset(const MCInst *MI, unsigned OpNo,
115                                         const MCSubtargetInfo &STI,
116                                         raw_ostream &O) {
117   uint32_t Imm = MI->getOperand(OpNo).getImm();
118   if (Imm != 0) {
119     O << " offset:";
120 
121     const MCInstrDesc &Desc = MII.get(MI->getOpcode());
122     bool AllowNegative = (Desc.TSFlags & (SIInstrFlags::FlatGlobal |
123                                           SIInstrFlags::FlatScratch)) ||
124                          AMDGPU::isGFX12(STI);
125 
126     if (AllowNegative) // Signed offset
127       O << formatDec(SignExtend32(Imm, AMDGPU::getNumFlatOffsetBits(STI)));
128     else // Unsigned offset
129       printU16ImmDecOperand(MI, OpNo, O);
130   }
131 }
132 
133 void AMDGPUInstPrinter::printOffset0(const MCInst *MI, unsigned OpNo,
134                                      const MCSubtargetInfo &STI,
135                                      raw_ostream &O) {
136   if (MI->getOperand(OpNo).getImm()) {
137     O << " offset0:";
138     printU8ImmDecOperand(MI, OpNo, O);
139   }
140 }
141 
142 void AMDGPUInstPrinter::printOffset1(const MCInst *MI, unsigned OpNo,
143                                      const MCSubtargetInfo &STI,
144                                      raw_ostream &O) {
145   if (MI->getOperand(OpNo).getImm()) {
146     O << " offset1:";
147     printU8ImmDecOperand(MI, OpNo, O);
148   }
149 }
150 
151 void AMDGPUInstPrinter::printSMRDOffset8(const MCInst *MI, unsigned OpNo,
152                                         const MCSubtargetInfo &STI,
153                                         raw_ostream &O) {
154   printU32ImmOperand(MI, OpNo, STI, O);
155 }
156 
157 void AMDGPUInstPrinter::printSMEMOffset(const MCInst *MI, unsigned OpNo,
158                                         const MCSubtargetInfo &STI,
159                                         raw_ostream &O) {
160   O << formatHex(MI->getOperand(OpNo).getImm());
161 }
162 
163 void AMDGPUInstPrinter::printSMEMOffsetMod(const MCInst *MI, unsigned OpNo,
164                                            const MCSubtargetInfo &STI,
165                                            raw_ostream &O) {
166   O << " offset:";
167   printSMEMOffset(MI, OpNo, STI, O);
168 }
169 
170 void AMDGPUInstPrinter::printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo,
171                                                const MCSubtargetInfo &STI,
172                                                raw_ostream &O) {
173   printU32ImmOperand(MI, OpNo, STI, O);
174 }
175 
176 void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
177                                   const MCSubtargetInfo &STI, raw_ostream &O) {
178   auto Imm = MI->getOperand(OpNo).getImm();
179 
180   if (AMDGPU::isGFX12Plus(STI)) {
181     const int64_t TH = Imm & CPol::TH;
182     const int64_t Scope = Imm & CPol::SCOPE;
183 
184     printTH(MI, TH, Scope, O);
185     printScope(Scope, O);
186 
187     return;
188   }
189 
190   if (Imm & CPol::GLC)
191     O << ((AMDGPU::isGFX940(STI) &&
192            !(MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::SMRD)) ? " sc0"
193                                                                      : " glc");
194   if (Imm & CPol::SLC)
195     O << (AMDGPU::isGFX940(STI) ? " nt" : " slc");
196   if ((Imm & CPol::DLC) && AMDGPU::isGFX10Plus(STI))
197     O << " dlc";
198   if ((Imm & CPol::SCC) && AMDGPU::isGFX90A(STI))
199     O << (AMDGPU::isGFX940(STI) ? " sc1" : " scc");
200   if (Imm & ~CPol::ALL)
201     O << " /* unexpected cache policy bit */";
202 }
203 
204 void AMDGPUInstPrinter::printTH(const MCInst *MI, int64_t TH, int64_t Scope,
205                                 raw_ostream &O) {
206   // For th = 0 do not print this field
207   if (TH == 0)
208     return;
209 
210   const unsigned Opcode = MI->getOpcode();
211   const MCInstrDesc &TID = MII.get(Opcode);
212   bool IsStore = TID.mayStore();
213   bool IsAtomic =
214       TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet);
215 
216   O << " th:";
217 
218   if (IsAtomic) {
219     O << "TH_ATOMIC_";
220     if (TH & AMDGPU::CPol::TH_ATOMIC_CASCADE) {
221       if (Scope >= AMDGPU::CPol::SCOPE_DEV)
222         O << "CASCADE" << (TH & AMDGPU::CPol::TH_ATOMIC_NT ? "_NT" : "_RT");
223       else
224         O << formatHex(TH);
225     } else if (TH & AMDGPU::CPol::TH_ATOMIC_NT)
226       O << "NT" << (TH & AMDGPU::CPol::TH_ATOMIC_RETURN ? "_RETURN" : "");
227     else if (TH & AMDGPU::CPol::TH_ATOMIC_RETURN)
228       O << "RETURN";
229     else
230       O << formatHex(TH);
231   } else {
232     if (!IsStore && TH == AMDGPU::CPol::TH_RESERVED)
233       O << formatHex(TH);
234     else {
235       // This will default to printing load variants when neither MayStore nor
236       // MayLoad flag is present which is the case with instructions like
237       // image_get_resinfo.
238       O << (IsStore ? "TH_STORE_" : "TH_LOAD_");
239       switch (TH) {
240       case AMDGPU::CPol::TH_NT:
241         O << "NT";
242         break;
243       case AMDGPU::CPol::TH_HT:
244         O << "HT";
245         break;
246       case AMDGPU::CPol::TH_BYPASS: // or LU or RT_WB
247         O << (Scope == AMDGPU::CPol::SCOPE_SYS ? "BYPASS"
248                                                : (IsStore ? "RT_WB" : "LU"));
249         break;
250       case AMDGPU::CPol::TH_NT_RT:
251         O << "NT_RT";
252         break;
253       case AMDGPU::CPol::TH_RT_NT:
254         O << "RT_NT";
255         break;
256       case AMDGPU::CPol::TH_NT_HT:
257         O << "NT_HT";
258         break;
259       case AMDGPU::CPol::TH_NT_WB:
260         O << "NT_WB";
261         break;
262       default:
263         llvm_unreachable("unexpected th value");
264       }
265     }
266   }
267 }
268 
269 void AMDGPUInstPrinter::printScope(int64_t Scope, raw_ostream &O) {
270   if (Scope == CPol::SCOPE_CU)
271     return;
272 
273   O << " scope:";
274 
275   if (Scope == CPol::SCOPE_SE)
276     O << "SCOPE_SE";
277   else if (Scope == CPol::SCOPE_DEV)
278     O << "SCOPE_DEV";
279   else if (Scope == CPol::SCOPE_SYS)
280     O << "SCOPE_SYS";
281   else
282     llvm_unreachable("unexpected scope policy value");
283 
284   return;
285 }
286 
287 void AMDGPUInstPrinter::printDMask(const MCInst *MI, unsigned OpNo,
288                                    const MCSubtargetInfo &STI, raw_ostream &O) {
289   if (MI->getOperand(OpNo).getImm()) {
290     O << " dmask:";
291     printU16ImmOperand(MI, OpNo, STI, O);
292   }
293 }
294 
295 void AMDGPUInstPrinter::printDim(const MCInst *MI, unsigned OpNo,
296                                  const MCSubtargetInfo &STI, raw_ostream &O) {
297   unsigned Dim = MI->getOperand(OpNo).getImm();
298   O << " dim:SQ_RSRC_IMG_";
299 
300   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
301   if (DimInfo)
302     O << DimInfo->AsmSuffix;
303   else
304     O << Dim;
305 }
306 
307 void AMDGPUInstPrinter::printR128A16(const MCInst *MI, unsigned OpNo,
308                                   const MCSubtargetInfo &STI, raw_ostream &O) {
309   if (STI.hasFeature(AMDGPU::FeatureR128A16))
310     printNamedBit(MI, OpNo, O, "a16");
311   else
312     printNamedBit(MI, OpNo, O, "r128");
313 }
314 
315 void AMDGPUInstPrinter::printFORMAT(const MCInst *MI, unsigned OpNo,
316                                     const MCSubtargetInfo &STI,
317                                     raw_ostream &O) {
318 }
319 
320 void AMDGPUInstPrinter::printSymbolicFormat(const MCInst *MI,
321                                             const MCSubtargetInfo &STI,
322                                             raw_ostream &O) {
323   using namespace llvm::AMDGPU::MTBUFFormat;
324 
325   int OpNo =
326     AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::format);
327   assert(OpNo != -1);
328 
329   unsigned Val = MI->getOperand(OpNo).getImm();
330   if (AMDGPU::isGFX10Plus(STI)) {
331     if (Val == UFMT_DEFAULT)
332       return;
333     if (isValidUnifiedFormat(Val, STI)) {
334       O << " format:[" << getUnifiedFormatName(Val, STI) << ']';
335     } else {
336       O << " format:" << Val;
337     }
338   } else {
339     if (Val == DFMT_NFMT_DEFAULT)
340       return;
341     if (isValidDfmtNfmt(Val, STI)) {
342       unsigned Dfmt;
343       unsigned Nfmt;
344       decodeDfmtNfmt(Val, Dfmt, Nfmt);
345       O << " format:[";
346       if (Dfmt != DFMT_DEFAULT) {
347         O << getDfmtName(Dfmt);
348         if (Nfmt != NFMT_DEFAULT) {
349           O << ',';
350         }
351       }
352       if (Nfmt != NFMT_DEFAULT) {
353         O << getNfmtName(Nfmt, STI);
354       }
355       O << ']';
356     } else {
357       O << " format:" << Val;
358     }
359   }
360 }
361 
362 void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
363                                         const MCRegisterInfo &MRI) {
364 #if !defined(NDEBUG)
365   switch (RegNo) {
366   case AMDGPU::FP_REG:
367   case AMDGPU::SP_REG:
368   case AMDGPU::PRIVATE_RSRC_REG:
369     llvm_unreachable("pseudo-register should not ever be emitted");
370   case AMDGPU::SCC:
371     llvm_unreachable("pseudo scc should not ever be emitted");
372   default:
373     break;
374   }
375 #endif
376 
377   O << getRegisterName(RegNo);
378 }
379 
380 void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
381                                     const MCSubtargetInfo &STI, raw_ostream &O) {
382   auto Opcode = MI->getOpcode();
383   auto Flags = MII.get(Opcode).TSFlags;
384   if (OpNo == 0) {
385     if (Flags & SIInstrFlags::VOP3 && Flags & SIInstrFlags::DPP)
386       O << "_e64_dpp";
387     else if (Flags & SIInstrFlags::VOP3) {
388       if (!getVOP3IsSingle(Opcode))
389         O << "_e64";
390     } else if (Flags & SIInstrFlags::DPP)
391       O << "_dpp";
392     else if (Flags & SIInstrFlags::SDWA)
393       O << "_sdwa";
394     else if (((Flags & SIInstrFlags::VOP1) && !getVOP1IsSingle(Opcode)) ||
395              ((Flags & SIInstrFlags::VOP2) && !getVOP2IsSingle(Opcode)))
396       O << "_e32";
397     O << " ";
398   }
399 
400   printRegularOperand(MI, OpNo, STI, O);
401 
402   // Print default vcc/vcc_lo operand.
403   switch (Opcode) {
404   default: break;
405 
406   case AMDGPU::V_ADD_CO_CI_U32_e32_gfx10:
407   case AMDGPU::V_SUB_CO_CI_U32_e32_gfx10:
408   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
409   case AMDGPU::V_ADD_CO_CI_U32_sdwa_gfx10:
410   case AMDGPU::V_SUB_CO_CI_U32_sdwa_gfx10:
411   case AMDGPU::V_SUBREV_CO_CI_U32_sdwa_gfx10:
412   case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx10:
413   case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx10:
414   case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx10:
415   case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx10:
416   case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx10:
417   case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx10:
418   case AMDGPU::V_ADD_CO_CI_U32_e32_gfx11:
419   case AMDGPU::V_SUB_CO_CI_U32_e32_gfx11:
420   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx11:
421   case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx11:
422   case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx11:
423   case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx11:
424   case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx11:
425   case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx11:
426   case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx11:
427   case AMDGPU::V_ADD_CO_CI_U32_e32_gfx12:
428   case AMDGPU::V_SUB_CO_CI_U32_e32_gfx12:
429   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx12:
430   case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx12:
431   case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx12:
432   case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx12:
433   case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx12:
434   case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx12:
435   case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx12:
436     printDefaultVccOperand(false, STI, O);
437     break;
438   }
439 }
440 
441 void AMDGPUInstPrinter::printVINTRPDst(const MCInst *MI, unsigned OpNo,
442                                        const MCSubtargetInfo &STI, raw_ostream &O) {
443   if (AMDGPU::isSI(STI) || AMDGPU::isCI(STI))
444     O << " ";
445   else
446     O << "_e32 ";
447 
448   printRegularOperand(MI, OpNo, STI, O);
449 }
450 
451 void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm,
452                                             const MCSubtargetInfo &STI,
453                                             raw_ostream &O) {
454   int16_t SImm = static_cast<int16_t>(Imm);
455   if (isInlinableIntLiteral(SImm)) {
456     O << SImm;
457   } else {
458     uint64_t Imm16 = static_cast<uint16_t>(Imm);
459     O << formatHex(Imm16);
460   }
461 }
462 
463 // This must accept a 32-bit immediate value to correctly handle packed 16-bit
464 // operations.
465 static bool printImmediateFloat16(uint32_t Imm, const MCSubtargetInfo &STI,
466                                   raw_ostream &O) {
467   if (Imm == 0x3C00)
468     O << "1.0";
469   else if (Imm == 0xBC00)
470     O << "-1.0";
471   else if (Imm == 0x3800)
472     O << "0.5";
473   else if (Imm == 0xB800)
474     O << "-0.5";
475   else if (Imm == 0x4000)
476     O << "2.0";
477   else if (Imm == 0xC000)
478     O << "-2.0";
479   else if (Imm == 0x4400)
480     O << "4.0";
481   else if (Imm == 0xC400)
482     O << "-4.0";
483   else if (Imm == 0x3118 && STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm))
484     O << "0.15915494";
485   else
486     return false;
487 
488   return true;
489 }
490 
491 void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
492                                          const MCSubtargetInfo &STI,
493                                          raw_ostream &O) {
494   int16_t SImm = static_cast<int16_t>(Imm);
495   if (isInlinableIntLiteral(SImm)) {
496     O << SImm;
497     return;
498   }
499 
500   uint16_t HImm = static_cast<uint16_t>(Imm);
501   if (printImmediateFloat16(HImm, STI, O))
502     return;
503 
504   uint64_t Imm16 = static_cast<uint16_t>(Imm);
505   O << formatHex(Imm16);
506 }
507 
508 void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm, uint8_t OpType,
509                                            const MCSubtargetInfo &STI,
510                                            raw_ostream &O) {
511   int32_t SImm = static_cast<int32_t>(Imm);
512   if (isInlinableIntLiteral(SImm)) {
513     O << SImm;
514     return;
515   }
516 
517   switch (OpType) {
518   case AMDGPU::OPERAND_REG_IMM_V2INT16:
519   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
520   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
521     if (printImmediateFloat32(Imm, STI, O))
522       return;
523     break;
524   case AMDGPU::OPERAND_REG_IMM_V2FP16:
525   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
526   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
527     if (isUInt<16>(Imm) &&
528         printImmediateFloat16(static_cast<uint16_t>(Imm), STI, O))
529       return;
530     break;
531   default:
532     llvm_unreachable("bad operand type");
533   }
534 
535   O << formatHex(static_cast<uint64_t>(Imm));
536 }
537 
538 bool AMDGPUInstPrinter::printImmediateFloat32(uint32_t Imm,
539                                               const MCSubtargetInfo &STI,
540                                               raw_ostream &O) {
541   if (Imm == llvm::bit_cast<uint32_t>(0.0f))
542     O << "0.0";
543   else if (Imm == llvm::bit_cast<uint32_t>(1.0f))
544     O << "1.0";
545   else if (Imm == llvm::bit_cast<uint32_t>(-1.0f))
546     O << "-1.0";
547   else if (Imm == llvm::bit_cast<uint32_t>(0.5f))
548     O << "0.5";
549   else if (Imm == llvm::bit_cast<uint32_t>(-0.5f))
550     O << "-0.5";
551   else if (Imm == llvm::bit_cast<uint32_t>(2.0f))
552     O << "2.0";
553   else if (Imm == llvm::bit_cast<uint32_t>(-2.0f))
554     O << "-2.0";
555   else if (Imm == llvm::bit_cast<uint32_t>(4.0f))
556     O << "4.0";
557   else if (Imm == llvm::bit_cast<uint32_t>(-4.0f))
558     O << "-4.0";
559   else if (Imm == 0x3e22f983 &&
560            STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm))
561     O << "0.15915494";
562   else
563     return false;
564 
565   return true;
566 }
567 
568 void AMDGPUInstPrinter::printImmediate32(uint32_t Imm,
569                                          const MCSubtargetInfo &STI,
570                                          raw_ostream &O) {
571   int32_t SImm = static_cast<int32_t>(Imm);
572   if (isInlinableIntLiteral(SImm)) {
573     O << SImm;
574     return;
575   }
576 
577   if (printImmediateFloat32(Imm, STI, O))
578     return;
579 
580   O << formatHex(static_cast<uint64_t>(Imm));
581 }
582 
583 void AMDGPUInstPrinter::printImmediate64(uint64_t Imm,
584                                          const MCSubtargetInfo &STI,
585                                          raw_ostream &O, bool IsFP) {
586   int64_t SImm = static_cast<int64_t>(Imm);
587   if (SImm >= -16 && SImm <= 64) {
588     O << SImm;
589     return;
590   }
591 
592   if (Imm == llvm::bit_cast<uint64_t>(0.0))
593     O << "0.0";
594   else if (Imm == llvm::bit_cast<uint64_t>(1.0))
595     O << "1.0";
596   else if (Imm == llvm::bit_cast<uint64_t>(-1.0))
597     O << "-1.0";
598   else if (Imm == llvm::bit_cast<uint64_t>(0.5))
599     O << "0.5";
600   else if (Imm == llvm::bit_cast<uint64_t>(-0.5))
601     O << "-0.5";
602   else if (Imm == llvm::bit_cast<uint64_t>(2.0))
603     O << "2.0";
604   else if (Imm == llvm::bit_cast<uint64_t>(-2.0))
605     O << "-2.0";
606   else if (Imm == llvm::bit_cast<uint64_t>(4.0))
607     O << "4.0";
608   else if (Imm == llvm::bit_cast<uint64_t>(-4.0))
609     O << "-4.0";
610   else if (Imm == 0x3fc45f306dc9c882 &&
611            STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm))
612     O << "0.15915494309189532";
613   else if (IsFP) {
614     assert(AMDGPU::isValid32BitLiteral(Imm, true));
615     O << formatHex(static_cast<uint64_t>(Hi_32(Imm)));
616   } else {
617     assert(isUInt<32>(Imm) || isInt<32>(Imm));
618 
619     // In rare situations, we will have a 32-bit literal in a 64-bit
620     // operand. This is technically allowed for the encoding of s_mov_b64.
621     O << formatHex(static_cast<uint64_t>(Imm));
622   }
623 }
624 
625 void AMDGPUInstPrinter::printBLGP(const MCInst *MI, unsigned OpNo,
626                                   const MCSubtargetInfo &STI,
627                                   raw_ostream &O) {
628   unsigned Imm = MI->getOperand(OpNo).getImm();
629   if (!Imm)
630     return;
631 
632   if (AMDGPU::isGFX940(STI)) {
633     switch (MI->getOpcode()) {
634     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
635     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
636     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
637     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
638       O << " neg:[" << (Imm & 1) << ',' << ((Imm >> 1) & 1) << ','
639         << ((Imm >> 2) & 1) << ']';
640       return;
641     }
642   }
643 
644   O << " blgp:" << Imm;
645 }
646 
647 void AMDGPUInstPrinter::printCBSZ(const MCInst *MI, unsigned OpNo,
648                                   const MCSubtargetInfo &STI,
649                                   raw_ostream &O) {
650   unsigned Imm = MI->getOperand(OpNo).getImm();
651   if (!Imm)
652     return;
653 
654   O << " cbsz:" << Imm;
655 }
656 
657 void AMDGPUInstPrinter::printABID(const MCInst *MI, unsigned OpNo,
658                                   const MCSubtargetInfo &STI,
659                                   raw_ostream &O) {
660   unsigned Imm = MI->getOperand(OpNo).getImm();
661   if (!Imm)
662     return;
663 
664   O << " abid:" << Imm;
665 }
666 
667 void AMDGPUInstPrinter::printDefaultVccOperand(bool FirstOperand,
668                                                const MCSubtargetInfo &STI,
669                                                raw_ostream &O) {
670   if (!FirstOperand)
671     O << ", ";
672   printRegOperand(STI.hasFeature(AMDGPU::FeatureWavefrontSize64)
673                       ? AMDGPU::VCC
674                       : AMDGPU::VCC_LO,
675                   O, MRI);
676   if (FirstOperand)
677     O << ", ";
678 }
679 
680 void AMDGPUInstPrinter::printWaitVDST(const MCInst *MI, unsigned OpNo,
681                                       const MCSubtargetInfo &STI,
682                                       raw_ostream &O) {
683   O << " wait_vdst:";
684   printU4ImmDecOperand(MI, OpNo, O);
685 }
686 
687 void AMDGPUInstPrinter::printWaitVAVDst(const MCInst *MI, unsigned OpNo,
688                                         const MCSubtargetInfo &STI,
689                                         raw_ostream &O) {
690   O << " wait_va_vdst:";
691   printU4ImmDecOperand(MI, OpNo, O);
692 }
693 
694 void AMDGPUInstPrinter::printWaitVMVSrc(const MCInst *MI, unsigned OpNo,
695                                         const MCSubtargetInfo &STI,
696                                         raw_ostream &O) {
697   O << " wait_vm_vsrc:";
698   printU4ImmDecOperand(MI, OpNo, O);
699 }
700 
701 void AMDGPUInstPrinter::printWaitEXP(const MCInst *MI, unsigned OpNo,
702                                     const MCSubtargetInfo &STI,
703                                     raw_ostream &O) {
704   O << " wait_exp:";
705   printU4ImmDecOperand(MI, OpNo, O);
706 }
707 
708 bool AMDGPUInstPrinter::needsImpliedVcc(const MCInstrDesc &Desc,
709                                         unsigned OpNo) const {
710   return OpNo == 0 && (Desc.TSFlags & SIInstrFlags::DPP) &&
711          (Desc.TSFlags & SIInstrFlags::VOPC) &&
712          (Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC) ||
713           Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC_LO));
714 }
715 
716 // Print default vcc/vcc_lo operand of VOPC.
717 void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
718                                      const MCSubtargetInfo &STI,
719                                      raw_ostream &O) {
720   unsigned Opc = MI->getOpcode();
721   const MCInstrDesc &Desc = MII.get(Opc);
722   int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
723   // 0, 1 and 2 are the first printed operands in different cases
724   // If there are printed modifiers, printOperandAndFPInputMods or
725   // printOperandAndIntInputMods will be called instead
726   if ((OpNo == 0 ||
727        (OpNo == 1 && (Desc.TSFlags & SIInstrFlags::DPP) && ModIdx != -1)) &&
728       (Desc.TSFlags & SIInstrFlags::VOPC) &&
729       (Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC) ||
730        Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC_LO)))
731     printDefaultVccOperand(true, STI, O);
732 
733   printRegularOperand(MI, OpNo, STI, O);
734 }
735 
736 // Print operands after vcc or modifier handling.
737 void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
738                                             const MCSubtargetInfo &STI,
739                                             raw_ostream &O) {
740   const MCInstrDesc &Desc = MII.get(MI->getOpcode());
741 
742   if (OpNo >= MI->getNumOperands()) {
743     O << "/*Missing OP" << OpNo << "*/";
744     return;
745   }
746 
747   const MCOperand &Op = MI->getOperand(OpNo);
748   if (Op.isReg()) {
749     printRegOperand(Op.getReg(), O, MRI);
750 
751     // Check if operand register class contains register used.
752     // Intention: print disassembler message when invalid code is decoded,
753     // for example sgpr register used in VReg or VISrc(VReg or imm) operand.
754     int RCID = Desc.operands()[OpNo].RegClass;
755     if (RCID != -1) {
756       const MCRegisterClass RC = MRI.getRegClass(RCID);
757       auto Reg = mc2PseudoReg(Op.getReg());
758       if (!RC.contains(Reg) && !isInlineValue(Reg)) {
759         O << "/*Invalid register, operand has \'" << MRI.getRegClassName(&RC)
760           << "\' register class*/";
761       }
762     }
763   } else if (Op.isImm()) {
764     const uint8_t OpTy = Desc.operands()[OpNo].OperandType;
765     switch (OpTy) {
766     case AMDGPU::OPERAND_REG_IMM_INT32:
767     case AMDGPU::OPERAND_REG_IMM_FP32:
768     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
769     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
770     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
771     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
772     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
773     case AMDGPU::OPERAND_REG_IMM_V2INT32:
774     case AMDGPU::OPERAND_REG_IMM_V2FP32:
775     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
776     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
777     case MCOI::OPERAND_IMMEDIATE:
778     case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
779       printImmediate32(Op.getImm(), STI, O);
780       break;
781     case AMDGPU::OPERAND_REG_IMM_INT64:
782     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
783       printImmediate64(Op.getImm(), STI, O, false);
784       break;
785     case AMDGPU::OPERAND_REG_IMM_FP64:
786     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
787     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
788       printImmediate64(Op.getImm(), STI, O, true);
789       break;
790     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
791     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
792     case AMDGPU::OPERAND_REG_IMM_INT16:
793       printImmediateInt16(Op.getImm(), STI, O);
794       break;
795     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
796     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
797     case AMDGPU::OPERAND_REG_IMM_FP16:
798     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
799       printImmediate16(Op.getImm(), STI, O);
800       break;
801     case AMDGPU::OPERAND_REG_IMM_V2INT16:
802     case AMDGPU::OPERAND_REG_IMM_V2FP16:
803     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
804     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
805     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
806     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
807       printImmediateV216(Op.getImm(), OpTy, STI, O);
808       break;
809     case MCOI::OPERAND_UNKNOWN:
810     case MCOI::OPERAND_PCREL:
811       O << formatDec(Op.getImm());
812       break;
813     case MCOI::OPERAND_REGISTER:
814       // Disassembler does not fail when operand should not allow immediate
815       // operands but decodes them into 32bit immediate operand.
816       printImmediate32(Op.getImm(), STI, O);
817       O << "/*Invalid immediate*/";
818       break;
819     default:
820       // We hit this for the immediate instruction bits that don't yet have a
821       // custom printer.
822       llvm_unreachable("unexpected immediate operand type");
823     }
824   } else if (Op.isDFPImm()) {
825     double Value = bit_cast<double>(Op.getDFPImm());
826     // We special case 0.0 because otherwise it will be printed as an integer.
827     if (Value == 0.0)
828       O << "0.0";
829     else {
830       const MCInstrDesc &Desc = MII.get(MI->getOpcode());
831       int RCID = Desc.operands()[OpNo].RegClass;
832       unsigned RCBits = AMDGPU::getRegBitWidth(MRI.getRegClass(RCID));
833       if (RCBits == 32)
834         printImmediate32(llvm::bit_cast<uint32_t>((float)Value), STI, O);
835       else if (RCBits == 64)
836         printImmediate64(llvm::bit_cast<uint64_t>(Value), STI, O, true);
837       else
838         llvm_unreachable("Invalid register class size");
839     }
840   } else if (Op.isExpr()) {
841     const MCExpr *Exp = Op.getExpr();
842     Exp->print(O, &MAI);
843   } else {
844     O << "/*INV_OP*/";
845   }
846 
847   // Print default vcc/vcc_lo operand of v_cndmask_b32_e32.
848   switch (MI->getOpcode()) {
849   default: break;
850 
851   case AMDGPU::V_CNDMASK_B32_e32_gfx10:
852   case AMDGPU::V_ADD_CO_CI_U32_e32_gfx10:
853   case AMDGPU::V_SUB_CO_CI_U32_e32_gfx10:
854   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
855   case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx10:
856   case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx10:
857   case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx10:
858   case AMDGPU::V_CNDMASK_B32_dpp8_gfx10:
859   case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx10:
860   case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx10:
861   case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx10:
862   case AMDGPU::V_CNDMASK_B32_e32_gfx11:
863   case AMDGPU::V_ADD_CO_CI_U32_e32_gfx11:
864   case AMDGPU::V_SUB_CO_CI_U32_e32_gfx11:
865   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx11:
866   case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx11:
867   case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx11:
868   case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx11:
869   case AMDGPU::V_CNDMASK_B32_dpp8_gfx11:
870   case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx11:
871   case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx11:
872   case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx11:
873   case AMDGPU::V_CNDMASK_B32_e32_gfx12:
874   case AMDGPU::V_ADD_CO_CI_U32_e32_gfx12:
875   case AMDGPU::V_SUB_CO_CI_U32_e32_gfx12:
876   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx12:
877   case AMDGPU::V_CNDMASK_B32_dpp_gfx12:
878   case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx12:
879   case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx12:
880   case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx12:
881   case AMDGPU::V_CNDMASK_B32_dpp8_gfx12:
882   case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx12:
883   case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx12:
884   case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx12:
885 
886   case AMDGPU::V_CNDMASK_B32_e32_gfx6_gfx7:
887   case AMDGPU::V_CNDMASK_B32_e32_vi:
888     if ((int)OpNo == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
889                                                 AMDGPU::OpName::src1))
890       printDefaultVccOperand(OpNo == 0, STI, O);
891     break;
892   }
893 
894   if (Desc.TSFlags & SIInstrFlags::MTBUF) {
895     int SOffsetIdx =
896       AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::soffset);
897     assert(SOffsetIdx != -1);
898     if ((int)OpNo == SOffsetIdx)
899       printSymbolicFormat(MI, STI, O);
900   }
901 }
902 
903 void AMDGPUInstPrinter::printOperandAndFPInputMods(const MCInst *MI,
904                                                    unsigned OpNo,
905                                                    const MCSubtargetInfo &STI,
906                                                    raw_ostream &O) {
907   const MCInstrDesc &Desc = MII.get(MI->getOpcode());
908   if (needsImpliedVcc(Desc, OpNo))
909     printDefaultVccOperand(true, STI, O);
910 
911   unsigned InputModifiers = MI->getOperand(OpNo).getImm();
912 
913   // Use 'neg(...)' instead of '-' to avoid ambiguity.
914   // This is important for integer literals because
915   // -1 is not the same value as neg(1).
916   bool NegMnemo = false;
917 
918   if (InputModifiers & SISrcMods::NEG) {
919     if (OpNo + 1 < MI->getNumOperands() &&
920         (InputModifiers & SISrcMods::ABS) == 0) {
921       const MCOperand &Op = MI->getOperand(OpNo + 1);
922       NegMnemo = Op.isImm() || Op.isDFPImm();
923     }
924     if (NegMnemo) {
925       O << "neg(";
926     } else {
927       O << '-';
928     }
929   }
930 
931   if (InputModifiers & SISrcMods::ABS)
932     O << '|';
933   printRegularOperand(MI, OpNo + 1, STI, O);
934   if (InputModifiers & SISrcMods::ABS)
935     O << '|';
936 
937   if (NegMnemo) {
938     O << ')';
939   }
940 
941   // Print default vcc/vcc_lo operand of VOP2b.
942   switch (MI->getOpcode()) {
943   default:
944     break;
945 
946   case AMDGPU::V_CNDMASK_B32_sdwa_gfx10:
947   case AMDGPU::V_CNDMASK_B32_dpp_gfx10:
948   case AMDGPU::V_CNDMASK_B32_dpp_gfx11:
949     if ((int)OpNo + 1 ==
950         AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::src1))
951       printDefaultVccOperand(OpNo == 0, STI, O);
952     break;
953   }
954 }
955 
956 void AMDGPUInstPrinter::printOperandAndIntInputMods(const MCInst *MI,
957                                                     unsigned OpNo,
958                                                     const MCSubtargetInfo &STI,
959                                                     raw_ostream &O) {
960   const MCInstrDesc &Desc = MII.get(MI->getOpcode());
961   if (needsImpliedVcc(Desc, OpNo))
962     printDefaultVccOperand(true, STI, O);
963 
964   unsigned InputModifiers = MI->getOperand(OpNo).getImm();
965   if (InputModifiers & SISrcMods::SEXT)
966     O << "sext(";
967   printRegularOperand(MI, OpNo + 1, STI, O);
968   if (InputModifiers & SISrcMods::SEXT)
969     O << ')';
970 
971   // Print default vcc/vcc_lo operand of VOP2b.
972   switch (MI->getOpcode()) {
973   default: break;
974 
975   case AMDGPU::V_ADD_CO_CI_U32_sdwa_gfx10:
976   case AMDGPU::V_SUB_CO_CI_U32_sdwa_gfx10:
977   case AMDGPU::V_SUBREV_CO_CI_U32_sdwa_gfx10:
978     if ((int)OpNo + 1 == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
979                                                     AMDGPU::OpName::src1))
980       printDefaultVccOperand(OpNo == 0, STI, O);
981     break;
982   }
983 }
984 
985 void AMDGPUInstPrinter::printDPP8(const MCInst *MI, unsigned OpNo,
986                                   const MCSubtargetInfo &STI,
987                                   raw_ostream &O) {
988   if (!AMDGPU::isGFX10Plus(STI))
989     llvm_unreachable("dpp8 is not supported on ASICs earlier than GFX10");
990 
991   unsigned Imm = MI->getOperand(OpNo).getImm();
992   O << "dpp8:[" << formatDec(Imm & 0x7);
993   for (size_t i = 1; i < 8; ++i) {
994     O << ',' << formatDec((Imm >> (3 * i)) & 0x7);
995   }
996   O << ']';
997 }
998 
999 void AMDGPUInstPrinter::printDPPCtrl(const MCInst *MI, unsigned OpNo,
1000                                      const MCSubtargetInfo &STI,
1001                                      raw_ostream &O) {
1002   using namespace AMDGPU::DPP;
1003 
1004   unsigned Imm = MI->getOperand(OpNo).getImm();
1005   const MCInstrDesc &Desc = MII.get(MI->getOpcode());
1006 
1007   if (!AMDGPU::isLegalDPALU_DPPControl(Imm) && AMDGPU::isDPALU_DPP(Desc)) {
1008     O << " /* DP ALU dpp only supports row_newbcast */";
1009     return;
1010   } else if (Imm <= DppCtrl::QUAD_PERM_LAST) {
1011     O << "quad_perm:[";
1012     O << formatDec(Imm & 0x3)         << ',';
1013     O << formatDec((Imm & 0xc)  >> 2) << ',';
1014     O << formatDec((Imm & 0x30) >> 4) << ',';
1015     O << formatDec((Imm & 0xc0) >> 6) << ']';
1016   } else if ((Imm >= DppCtrl::ROW_SHL_FIRST) &&
1017              (Imm <= DppCtrl::ROW_SHL_LAST)) {
1018     O << "row_shl:";
1019     printU4ImmDecOperand(MI, OpNo, O);
1020   } else if ((Imm >= DppCtrl::ROW_SHR_FIRST) &&
1021              (Imm <= DppCtrl::ROW_SHR_LAST)) {
1022     O << "row_shr:";
1023     printU4ImmDecOperand(MI, OpNo, O);
1024   } else if ((Imm >= DppCtrl::ROW_ROR_FIRST) &&
1025              (Imm <= DppCtrl::ROW_ROR_LAST)) {
1026     O << "row_ror:";
1027     printU4ImmDecOperand(MI, OpNo, O);
1028   } else if (Imm == DppCtrl::WAVE_SHL1) {
1029     if (AMDGPU::isGFX10Plus(STI)) {
1030       O << "/* wave_shl is not supported starting from GFX10 */";
1031       return;
1032     }
1033     O << "wave_shl:1";
1034   } else if (Imm == DppCtrl::WAVE_ROL1) {
1035     if (AMDGPU::isGFX10Plus(STI)) {
1036       O << "/* wave_rol is not supported starting from GFX10 */";
1037       return;
1038     }
1039     O << "wave_rol:1";
1040   } else if (Imm == DppCtrl::WAVE_SHR1) {
1041     if (AMDGPU::isGFX10Plus(STI)) {
1042       O << "/* wave_shr is not supported starting from GFX10 */";
1043       return;
1044     }
1045     O << "wave_shr:1";
1046   } else if (Imm == DppCtrl::WAVE_ROR1) {
1047     if (AMDGPU::isGFX10Plus(STI)) {
1048       O << "/* wave_ror is not supported starting from GFX10 */";
1049       return;
1050     }
1051     O << "wave_ror:1";
1052   } else if (Imm == DppCtrl::ROW_MIRROR) {
1053     O << "row_mirror";
1054   } else if (Imm == DppCtrl::ROW_HALF_MIRROR) {
1055     O << "row_half_mirror";
1056   } else if (Imm == DppCtrl::BCAST15) {
1057     if (AMDGPU::isGFX10Plus(STI)) {
1058       O << "/* row_bcast is not supported starting from GFX10 */";
1059       return;
1060     }
1061     O << "row_bcast:15";
1062   } else if (Imm == DppCtrl::BCAST31) {
1063     if (AMDGPU::isGFX10Plus(STI)) {
1064       O << "/* row_bcast is not supported starting from GFX10 */";
1065       return;
1066     }
1067     O << "row_bcast:31";
1068   } else if ((Imm >= DppCtrl::ROW_SHARE_FIRST) &&
1069              (Imm <= DppCtrl::ROW_SHARE_LAST)) {
1070     if (AMDGPU::isGFX90A(STI)) {
1071       O << "row_newbcast:";
1072     } else if (AMDGPU::isGFX10Plus(STI)) {
1073       O << "row_share:";
1074     } else {
1075       O << " /* row_newbcast/row_share is not supported on ASICs earlier "
1076            "than GFX90A/GFX10 */";
1077       return;
1078     }
1079     printU4ImmDecOperand(MI, OpNo, O);
1080   } else if ((Imm >= DppCtrl::ROW_XMASK_FIRST) &&
1081              (Imm <= DppCtrl::ROW_XMASK_LAST)) {
1082     if (!AMDGPU::isGFX10Plus(STI)) {
1083       O << "/* row_xmask is not supported on ASICs earlier than GFX10 */";
1084       return;
1085     }
1086     O << "row_xmask:";
1087     printU4ImmDecOperand(MI, OpNo, O);
1088   } else {
1089     O << "/* Invalid dpp_ctrl value */";
1090   }
1091 }
1092 
1093 void AMDGPUInstPrinter::printDppRowMask(const MCInst *MI, unsigned OpNo,
1094                                         const MCSubtargetInfo &STI,
1095                                         raw_ostream &O) {
1096   O << " row_mask:";
1097   printU4ImmOperand(MI, OpNo, STI, O);
1098 }
1099 
1100 void AMDGPUInstPrinter::printDppBankMask(const MCInst *MI, unsigned OpNo,
1101                                          const MCSubtargetInfo &STI,
1102                                          raw_ostream &O) {
1103   O << " bank_mask:";
1104   printU4ImmOperand(MI, OpNo, STI, O);
1105 }
1106 
1107 void AMDGPUInstPrinter::printDppBoundCtrl(const MCInst *MI, unsigned OpNo,
1108                                           const MCSubtargetInfo &STI,
1109                                           raw_ostream &O) {
1110   unsigned Imm = MI->getOperand(OpNo).getImm();
1111   if (Imm) {
1112     O << " bound_ctrl:1";
1113   }
1114 }
1115 
1116 void AMDGPUInstPrinter::printDppFI(const MCInst *MI, unsigned OpNo,
1117                                    const MCSubtargetInfo &STI, raw_ostream &O) {
1118   using namespace llvm::AMDGPU::DPP;
1119   unsigned Imm = MI->getOperand(OpNo).getImm();
1120   if (Imm == DPP_FI_1 || Imm == DPP8_FI_1) {
1121     O << " fi:1";
1122   }
1123 }
1124 
1125 void AMDGPUInstPrinter::printSDWASel(const MCInst *MI, unsigned OpNo,
1126                                      raw_ostream &O) {
1127   using namespace llvm::AMDGPU::SDWA;
1128 
1129   unsigned Imm = MI->getOperand(OpNo).getImm();
1130   switch (Imm) {
1131   case SdwaSel::BYTE_0: O << "BYTE_0"; break;
1132   case SdwaSel::BYTE_1: O << "BYTE_1"; break;
1133   case SdwaSel::BYTE_2: O << "BYTE_2"; break;
1134   case SdwaSel::BYTE_3: O << "BYTE_3"; break;
1135   case SdwaSel::WORD_0: O << "WORD_0"; break;
1136   case SdwaSel::WORD_1: O << "WORD_1"; break;
1137   case SdwaSel::DWORD: O << "DWORD"; break;
1138   default: llvm_unreachable("Invalid SDWA data select operand");
1139   }
1140 }
1141 
1142 void AMDGPUInstPrinter::printSDWADstSel(const MCInst *MI, unsigned OpNo,
1143                                         const MCSubtargetInfo &STI,
1144                                         raw_ostream &O) {
1145   O << "dst_sel:";
1146   printSDWASel(MI, OpNo, O);
1147 }
1148 
1149 void AMDGPUInstPrinter::printSDWASrc0Sel(const MCInst *MI, unsigned OpNo,
1150                                          const MCSubtargetInfo &STI,
1151                                          raw_ostream &O) {
1152   O << "src0_sel:";
1153   printSDWASel(MI, OpNo, O);
1154 }
1155 
1156 void AMDGPUInstPrinter::printSDWASrc1Sel(const MCInst *MI, unsigned OpNo,
1157                                          const MCSubtargetInfo &STI,
1158                                          raw_ostream &O) {
1159   O << "src1_sel:";
1160   printSDWASel(MI, OpNo, O);
1161 }
1162 
1163 void AMDGPUInstPrinter::printSDWADstUnused(const MCInst *MI, unsigned OpNo,
1164                                            const MCSubtargetInfo &STI,
1165                                            raw_ostream &O) {
1166   using namespace llvm::AMDGPU::SDWA;
1167 
1168   O << "dst_unused:";
1169   unsigned Imm = MI->getOperand(OpNo).getImm();
1170   switch (Imm) {
1171   case DstUnused::UNUSED_PAD: O << "UNUSED_PAD"; break;
1172   case DstUnused::UNUSED_SEXT: O << "UNUSED_SEXT"; break;
1173   case DstUnused::UNUSED_PRESERVE: O << "UNUSED_PRESERVE"; break;
1174   default: llvm_unreachable("Invalid SDWA dest_unused operand");
1175   }
1176 }
1177 
1178 void AMDGPUInstPrinter::printExpSrcN(const MCInst *MI, unsigned OpNo,
1179                                      const MCSubtargetInfo &STI, raw_ostream &O,
1180                                      unsigned N) {
1181   unsigned Opc = MI->getOpcode();
1182   int EnIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::en);
1183   unsigned En = MI->getOperand(EnIdx).getImm();
1184 
1185   int ComprIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::compr);
1186 
1187   // If compr is set, print as src0, src0, src1, src1
1188   if (MI->getOperand(ComprIdx).getImm())
1189     OpNo = OpNo - N + N / 2;
1190 
1191   if (En & (1 << N))
1192     printRegOperand(MI->getOperand(OpNo).getReg(), O, MRI);
1193   else
1194     O << "off";
1195 }
1196 
1197 void AMDGPUInstPrinter::printExpSrc0(const MCInst *MI, unsigned OpNo,
1198                                      const MCSubtargetInfo &STI,
1199                                      raw_ostream &O) {
1200   printExpSrcN(MI, OpNo, STI, O, 0);
1201 }
1202 
1203 void AMDGPUInstPrinter::printExpSrc1(const MCInst *MI, unsigned OpNo,
1204                                      const MCSubtargetInfo &STI,
1205                                      raw_ostream &O) {
1206   printExpSrcN(MI, OpNo, STI, O, 1);
1207 }
1208 
1209 void AMDGPUInstPrinter::printExpSrc2(const MCInst *MI, unsigned OpNo,
1210                                      const MCSubtargetInfo &STI,
1211                                      raw_ostream &O) {
1212   printExpSrcN(MI, OpNo, STI, O, 2);
1213 }
1214 
1215 void AMDGPUInstPrinter::printExpSrc3(const MCInst *MI, unsigned OpNo,
1216                                      const MCSubtargetInfo &STI,
1217                                      raw_ostream &O) {
1218   printExpSrcN(MI, OpNo, STI, O, 3);
1219 }
1220 
1221 void AMDGPUInstPrinter::printExpTgt(const MCInst *MI, unsigned OpNo,
1222                                     const MCSubtargetInfo &STI,
1223                                     raw_ostream &O) {
1224   using namespace llvm::AMDGPU::Exp;
1225 
1226   // This is really a 6 bit field.
1227   unsigned Id = MI->getOperand(OpNo).getImm() & ((1 << 6) - 1);
1228 
1229   int Index;
1230   StringRef TgtName;
1231   if (getTgtName(Id, TgtName, Index) && isSupportedTgtId(Id, STI)) {
1232     O << ' ' << TgtName;
1233     if (Index >= 0)
1234       O << Index;
1235   } else {
1236     O << " invalid_target_" << Id;
1237   }
1238 }
1239 
1240 static bool allOpsDefaultValue(const int* Ops, int NumOps, int Mod,
1241                                bool IsPacked, bool HasDstSel) {
1242   int DefaultValue = IsPacked && (Mod == SISrcMods::OP_SEL_1);
1243 
1244   for (int I = 0; I < NumOps; ++I) {
1245     if (!!(Ops[I] & Mod) != DefaultValue)
1246       return false;
1247   }
1248 
1249   if (HasDstSel && (Ops[0] & SISrcMods::DST_OP_SEL) != 0)
1250     return false;
1251 
1252   return true;
1253 }
1254 
1255 void AMDGPUInstPrinter::printPackedModifier(const MCInst *MI,
1256                                             StringRef Name,
1257                                             unsigned Mod,
1258                                             raw_ostream &O) {
1259   unsigned Opc = MI->getOpcode();
1260   int NumOps = 0;
1261   int Ops[3];
1262 
1263   std::pair<int, int> MOps[] = {
1264       {AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src0},
1265       {AMDGPU::OpName::src1_modifiers, AMDGPU::OpName::src1},
1266       {AMDGPU::OpName::src2_modifiers, AMDGPU::OpName::src2}};
1267   int DefaultValue = (Mod == SISrcMods::OP_SEL_1);
1268 
1269   for (auto [SrcMod, Src] : MOps) {
1270     if (!AMDGPU::hasNamedOperand(Opc, Src))
1271       break;
1272 
1273     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, SrcMod);
1274     Ops[NumOps++] =
1275         (ModIdx != -1) ? MI->getOperand(ModIdx).getImm() : DefaultValue;
1276   }
1277 
1278   // Print three values of neg/opsel for wmma instructions (prints 0 when there
1279   // is no src_modifier operand instead of not printing anything).
1280   if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::IsSWMMAC ||
1281       MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::IsWMMA) {
1282     NumOps = 0;
1283     int DefaultValue = Mod == SISrcMods::OP_SEL_1;
1284     for (int OpName :
1285          {AMDGPU::OpName::src0_modifiers, AMDGPU::OpName::src1_modifiers,
1286           AMDGPU::OpName::src2_modifiers}) {
1287       int Idx = AMDGPU::getNamedOperandIdx(Opc, OpName);
1288       if (Idx != -1)
1289         Ops[NumOps++] = MI->getOperand(Idx).getImm();
1290       else
1291         Ops[NumOps++] = DefaultValue;
1292     }
1293   }
1294 
1295   const bool HasDstSel =
1296     NumOps > 0 &&
1297     Mod == SISrcMods::OP_SEL_0 &&
1298     MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::VOP3_OPSEL;
1299 
1300   const bool IsPacked =
1301     MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::IsPacked;
1302 
1303   if (allOpsDefaultValue(Ops, NumOps, Mod, IsPacked, HasDstSel))
1304     return;
1305 
1306   O << Name;
1307   for (int I = 0; I < NumOps; ++I) {
1308     if (I != 0)
1309       O << ',';
1310 
1311     O << !!(Ops[I] & Mod);
1312   }
1313 
1314   if (HasDstSel) {
1315     O << ',' << !!(Ops[0] & SISrcMods::DST_OP_SEL);
1316   }
1317 
1318   O << ']';
1319 }
1320 
1321 void AMDGPUInstPrinter::printOpSel(const MCInst *MI, unsigned,
1322                                    const MCSubtargetInfo &STI,
1323                                    raw_ostream &O) {
1324   unsigned Opc = MI->getOpcode();
1325   if (isCvt_F32_Fp8_Bf8_e64(Opc)) {
1326     auto SrcMod =
1327         AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
1328     unsigned Mod = MI->getOperand(SrcMod).getImm();
1329     unsigned Index0 = !!(Mod & SISrcMods::OP_SEL_0);
1330     unsigned Index1 = !!(Mod & SISrcMods::OP_SEL_1);
1331     if (Index0 || Index1)
1332       O << " op_sel:[" << Index0 << ',' << Index1 << ']';
1333     return;
1334   }
1335   if (isPermlane16(Opc)) {
1336     auto FIN = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
1337     auto BCN = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers);
1338     unsigned FI = !!(MI->getOperand(FIN).getImm() & SISrcMods::OP_SEL_0);
1339     unsigned BC = !!(MI->getOperand(BCN).getImm() & SISrcMods::OP_SEL_0);
1340     if (FI || BC)
1341       O << " op_sel:[" << FI << ',' << BC << ']';
1342     return;
1343   }
1344 
1345   printPackedModifier(MI, " op_sel:[", SISrcMods::OP_SEL_0, O);
1346 }
1347 
1348 void AMDGPUInstPrinter::printOpSelHi(const MCInst *MI, unsigned OpNo,
1349                                      const MCSubtargetInfo &STI,
1350                                      raw_ostream &O) {
1351   printPackedModifier(MI, " op_sel_hi:[", SISrcMods::OP_SEL_1, O);
1352 }
1353 
1354 void AMDGPUInstPrinter::printNegLo(const MCInst *MI, unsigned OpNo,
1355                                    const MCSubtargetInfo &STI,
1356                                    raw_ostream &O) {
1357   printPackedModifier(MI, " neg_lo:[", SISrcMods::NEG, O);
1358 }
1359 
1360 void AMDGPUInstPrinter::printNegHi(const MCInst *MI, unsigned OpNo,
1361                                    const MCSubtargetInfo &STI,
1362                                    raw_ostream &O) {
1363   printPackedModifier(MI, " neg_hi:[", SISrcMods::NEG_HI, O);
1364 }
1365 
1366 void AMDGPUInstPrinter::printIndexKey8bit(const MCInst *MI, unsigned OpNo,
1367                                           const MCSubtargetInfo &STI,
1368                                           raw_ostream &O) {
1369   auto Imm = MI->getOperand(OpNo).getImm() & 0x7;
1370   if (Imm == 0)
1371     return;
1372 
1373   O << " index_key:" << Imm;
1374 }
1375 
1376 void AMDGPUInstPrinter::printIndexKey16bit(const MCInst *MI, unsigned OpNo,
1377                                            const MCSubtargetInfo &STI,
1378                                            raw_ostream &O) {
1379   auto Imm = MI->getOperand(OpNo).getImm() & 0x7;
1380   if (Imm == 0)
1381     return;
1382 
1383   O << " index_key:" << Imm;
1384 }
1385 
1386 void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
1387                                         const MCSubtargetInfo &STI,
1388                                         raw_ostream &O) {
1389   unsigned Imm = MI->getOperand(OpNum).getImm();
1390   switch (Imm) {
1391   case 0:
1392     O << "p10";
1393     break;
1394   case 1:
1395     O << "p20";
1396     break;
1397   case 2:
1398     O << "p0";
1399     break;
1400   default:
1401     O << "invalid_param_" << Imm;
1402   }
1403 }
1404 
1405 void AMDGPUInstPrinter::printInterpAttr(const MCInst *MI, unsigned OpNum,
1406                                         const MCSubtargetInfo &STI,
1407                                         raw_ostream &O) {
1408   unsigned Attr = MI->getOperand(OpNum).getImm();
1409   O << "attr" << Attr;
1410 }
1411 
1412 void AMDGPUInstPrinter::printInterpAttrChan(const MCInst *MI, unsigned OpNum,
1413                                         const MCSubtargetInfo &STI,
1414                                         raw_ostream &O) {
1415   unsigned Chan = MI->getOperand(OpNum).getImm();
1416   O << '.' << "xyzw"[Chan & 0x3];
1417 }
1418 
1419 void AMDGPUInstPrinter::printGPRIdxMode(const MCInst *MI, unsigned OpNo,
1420                                         const MCSubtargetInfo &STI,
1421                                         raw_ostream &O) {
1422   using namespace llvm::AMDGPU::VGPRIndexMode;
1423   unsigned Val = MI->getOperand(OpNo).getImm();
1424 
1425   if ((Val & ~ENABLE_MASK) != 0) {
1426     O << formatHex(static_cast<uint64_t>(Val));
1427   } else {
1428     O << "gpr_idx(";
1429     bool NeedComma = false;
1430     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
1431       if (Val & (1 << ModeId)) {
1432         if (NeedComma)
1433           O << ',';
1434         O << IdSymbolic[ModeId];
1435         NeedComma = true;
1436       }
1437     }
1438     O << ')';
1439   }
1440 }
1441 
1442 void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
1443                                         const MCSubtargetInfo &STI,
1444                                         raw_ostream &O) {
1445   printRegularOperand(MI, OpNo, STI, O);
1446   O  << ", ";
1447   printRegularOperand(MI, OpNo + 1, STI, O);
1448 }
1449 
1450 void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
1451                                    raw_ostream &O, StringRef Asm,
1452                                    StringRef Default) {
1453   const MCOperand &Op = MI->getOperand(OpNo);
1454   assert(Op.isImm());
1455   if (Op.getImm() == 1) {
1456     O << Asm;
1457   } else {
1458     O << Default;
1459   }
1460 }
1461 
1462 void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
1463                                    raw_ostream &O, char Asm) {
1464   const MCOperand &Op = MI->getOperand(OpNo);
1465   assert(Op.isImm());
1466   if (Op.getImm() == 1)
1467     O << Asm;
1468 }
1469 
1470 void AMDGPUInstPrinter::printOModSI(const MCInst *MI, unsigned OpNo,
1471                                     const MCSubtargetInfo &STI,
1472                                     raw_ostream &O) {
1473   int Imm = MI->getOperand(OpNo).getImm();
1474   if (Imm == SIOutMods::MUL2)
1475     O << " mul:2";
1476   else if (Imm == SIOutMods::MUL4)
1477     O << " mul:4";
1478   else if (Imm == SIOutMods::DIV2)
1479     O << " div:2";
1480 }
1481 
1482 void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo,
1483                                      const MCSubtargetInfo &STI,
1484                                      raw_ostream &O) {
1485   using namespace llvm::AMDGPU::SendMsg;
1486 
1487   const unsigned Imm16 = MI->getOperand(OpNo).getImm();
1488 
1489   uint16_t MsgId;
1490   uint16_t OpId;
1491   uint16_t StreamId;
1492   decodeMsg(Imm16, MsgId, OpId, StreamId, STI);
1493 
1494   StringRef MsgName = getMsgName(MsgId, STI);
1495 
1496   if (!MsgName.empty() && isValidMsgOp(MsgId, OpId, STI) &&
1497       isValidMsgStream(MsgId, OpId, StreamId, STI)) {
1498     O << "sendmsg(" << MsgName;
1499     if (msgRequiresOp(MsgId, STI)) {
1500       O << ", " << getMsgOpName(MsgId, OpId, STI);
1501       if (msgSupportsStream(MsgId, OpId, STI)) {
1502         O << ", " << StreamId;
1503       }
1504     }
1505     O << ')';
1506   } else if (encodeMsg(MsgId, OpId, StreamId) == Imm16) {
1507     O << "sendmsg(" << MsgId << ", " << OpId << ", " << StreamId << ')';
1508   } else {
1509     O << Imm16; // Unknown imm16 code.
1510   }
1511 }
1512 
1513 static void printSwizzleBitmask(const uint16_t AndMask,
1514                                 const uint16_t OrMask,
1515                                 const uint16_t XorMask,
1516                                 raw_ostream &O) {
1517   using namespace llvm::AMDGPU::Swizzle;
1518 
1519   uint16_t Probe0 = ((0            & AndMask) | OrMask) ^ XorMask;
1520   uint16_t Probe1 = ((BITMASK_MASK & AndMask) | OrMask) ^ XorMask;
1521 
1522   O << "\"";
1523 
1524   for (unsigned Mask = 1 << (BITMASK_WIDTH - 1); Mask > 0; Mask >>= 1) {
1525     uint16_t p0 = Probe0 & Mask;
1526     uint16_t p1 = Probe1 & Mask;
1527 
1528     if (p0 == p1) {
1529       if (p0 == 0) {
1530         O << "0";
1531       } else {
1532         O << "1";
1533       }
1534     } else {
1535       if (p0 == 0) {
1536         O << "p";
1537       } else {
1538         O << "i";
1539       }
1540     }
1541   }
1542 
1543   O << "\"";
1544 }
1545 
1546 void AMDGPUInstPrinter::printSwizzle(const MCInst *MI, unsigned OpNo,
1547                                      const MCSubtargetInfo &STI,
1548                                      raw_ostream &O) {
1549   using namespace llvm::AMDGPU::Swizzle;
1550 
1551   uint16_t Imm = MI->getOperand(OpNo).getImm();
1552   if (Imm == 0) {
1553     return;
1554   }
1555 
1556   O << " offset:";
1557 
1558   if ((Imm & QUAD_PERM_ENC_MASK) == QUAD_PERM_ENC) {
1559 
1560     O << "swizzle(" << IdSymbolic[ID_QUAD_PERM];
1561     for (unsigned I = 0; I < LANE_NUM; ++I) {
1562       O << ",";
1563       O << formatDec(Imm & LANE_MASK);
1564       Imm >>= LANE_SHIFT;
1565     }
1566     O << ")";
1567 
1568   } else if ((Imm & BITMASK_PERM_ENC_MASK) == BITMASK_PERM_ENC) {
1569 
1570     uint16_t AndMask = (Imm >> BITMASK_AND_SHIFT) & BITMASK_MASK;
1571     uint16_t OrMask  = (Imm >> BITMASK_OR_SHIFT)  & BITMASK_MASK;
1572     uint16_t XorMask = (Imm >> BITMASK_XOR_SHIFT) & BITMASK_MASK;
1573 
1574     if (AndMask == BITMASK_MAX && OrMask == 0 && llvm::popcount(XorMask) == 1) {
1575 
1576       O << "swizzle(" << IdSymbolic[ID_SWAP];
1577       O << ",";
1578       O << formatDec(XorMask);
1579       O << ")";
1580 
1581     } else if (AndMask == BITMASK_MAX && OrMask == 0 && XorMask > 0 &&
1582                isPowerOf2_64(XorMask + 1)) {
1583 
1584       O << "swizzle(" << IdSymbolic[ID_REVERSE];
1585       O << ",";
1586       O << formatDec(XorMask + 1);
1587       O << ")";
1588 
1589     } else {
1590 
1591       uint16_t GroupSize = BITMASK_MAX - AndMask + 1;
1592       if (GroupSize > 1 &&
1593           isPowerOf2_64(GroupSize) &&
1594           OrMask < GroupSize &&
1595           XorMask == 0) {
1596 
1597         O << "swizzle(" << IdSymbolic[ID_BROADCAST];
1598         O << ",";
1599         O << formatDec(GroupSize);
1600         O << ",";
1601         O << formatDec(OrMask);
1602         O << ")";
1603 
1604       } else {
1605         O << "swizzle(" << IdSymbolic[ID_BITMASK_PERM];
1606         O << ",";
1607         printSwizzleBitmask(AndMask, OrMask, XorMask, O);
1608         O << ")";
1609       }
1610     }
1611   } else {
1612     printU16ImmDecOperand(MI, OpNo, O);
1613   }
1614 }
1615 
1616 void AMDGPUInstPrinter::printSWaitCnt(const MCInst *MI, unsigned OpNo,
1617                                       const MCSubtargetInfo &STI,
1618                                       raw_ostream &O) {
1619   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI.getCPU());
1620 
1621   unsigned SImm16 = MI->getOperand(OpNo).getImm();
1622   unsigned Vmcnt, Expcnt, Lgkmcnt;
1623   decodeWaitcnt(ISA, SImm16, Vmcnt, Expcnt, Lgkmcnt);
1624 
1625   bool IsDefaultVmcnt = Vmcnt == getVmcntBitMask(ISA);
1626   bool IsDefaultExpcnt = Expcnt == getExpcntBitMask(ISA);
1627   bool IsDefaultLgkmcnt = Lgkmcnt == getLgkmcntBitMask(ISA);
1628   bool PrintAll = IsDefaultVmcnt && IsDefaultExpcnt && IsDefaultLgkmcnt;
1629 
1630   bool NeedSpace = false;
1631 
1632   if (!IsDefaultVmcnt || PrintAll) {
1633     O << "vmcnt(" << Vmcnt << ')';
1634     NeedSpace = true;
1635   }
1636 
1637   if (!IsDefaultExpcnt || PrintAll) {
1638     if (NeedSpace)
1639       O << ' ';
1640     O << "expcnt(" << Expcnt << ')';
1641     NeedSpace = true;
1642   }
1643 
1644   if (!IsDefaultLgkmcnt || PrintAll) {
1645     if (NeedSpace)
1646       O << ' ';
1647     O << "lgkmcnt(" << Lgkmcnt << ')';
1648   }
1649 }
1650 
1651 void AMDGPUInstPrinter::printDepCtr(const MCInst *MI, unsigned OpNo,
1652                                     const MCSubtargetInfo &STI,
1653                                     raw_ostream &O) {
1654   using namespace llvm::AMDGPU::DepCtr;
1655 
1656   uint64_t Imm16 = MI->getOperand(OpNo).getImm() & 0xffff;
1657 
1658   bool HasNonDefaultVal = false;
1659   if (isSymbolicDepCtrEncoding(Imm16, HasNonDefaultVal, STI)) {
1660     int Id = 0;
1661     StringRef Name;
1662     unsigned Val;
1663     bool IsDefault;
1664     bool NeedSpace = false;
1665     while (decodeDepCtr(Imm16, Id, Name, Val, IsDefault, STI)) {
1666       if (!IsDefault || !HasNonDefaultVal) {
1667         if (NeedSpace)
1668           O << ' ';
1669         O << Name << '(' << Val << ')';
1670         NeedSpace = true;
1671       }
1672     }
1673   } else {
1674     O << formatHex(Imm16);
1675   }
1676 }
1677 
1678 void AMDGPUInstPrinter::printSDelayALU(const MCInst *MI, unsigned OpNo,
1679                                        const MCSubtargetInfo &STI,
1680                                        raw_ostream &O) {
1681   const char *BadInstId = "/* invalid instid value */";
1682   static const std::array<const char *, 12> InstIds = {
1683       "NO_DEP",        "VALU_DEP_1",    "VALU_DEP_2",
1684       "VALU_DEP_3",    "VALU_DEP_4",    "TRANS32_DEP_1",
1685       "TRANS32_DEP_2", "TRANS32_DEP_3", "FMA_ACCUM_CYCLE_1",
1686       "SALU_CYCLE_1",  "SALU_CYCLE_2",  "SALU_CYCLE_3"};
1687 
1688   const char *BadInstSkip = "/* invalid instskip value */";
1689   static const std::array<const char *, 6> InstSkips = {
1690       "SAME", "NEXT", "SKIP_1", "SKIP_2", "SKIP_3", "SKIP_4"};
1691 
1692   unsigned SImm16 = MI->getOperand(OpNo).getImm();
1693   const char *Prefix = "";
1694 
1695   unsigned Value = SImm16 & 0xF;
1696   if (Value) {
1697     const char *Name = Value < InstIds.size() ? InstIds[Value] : BadInstId;
1698     O << Prefix << "instid0(" << Name << ')';
1699     Prefix = " | ";
1700   }
1701 
1702   Value = (SImm16 >> 4) & 7;
1703   if (Value) {
1704     const char *Name =
1705         Value < InstSkips.size() ? InstSkips[Value] : BadInstSkip;
1706     O << Prefix << "instskip(" << Name << ')';
1707     Prefix = " | ";
1708   }
1709 
1710   Value = (SImm16 >> 7) & 0xF;
1711   if (Value) {
1712     const char *Name = Value < InstIds.size() ? InstIds[Value] : BadInstId;
1713     O << Prefix << "instid1(" << Name << ')';
1714     Prefix = " | ";
1715   }
1716 
1717   if (!*Prefix)
1718     O << "0";
1719 }
1720 
1721 void AMDGPUInstPrinter::printHwreg(const MCInst *MI, unsigned OpNo,
1722                                    const MCSubtargetInfo &STI, raw_ostream &O) {
1723   unsigned Id;
1724   unsigned Offset;
1725   unsigned Width;
1726 
1727   using namespace llvm::AMDGPU::Hwreg;
1728   unsigned Val = MI->getOperand(OpNo).getImm();
1729   decodeHwreg(Val, Id, Offset, Width);
1730   StringRef HwRegName = getHwreg(Id, STI);
1731 
1732   O << "hwreg(";
1733   if (!HwRegName.empty()) {
1734     O << HwRegName;
1735   } else {
1736     O << Id;
1737   }
1738   if (Width != WIDTH_DEFAULT_ || Offset != OFFSET_DEFAULT_) {
1739     O << ", " << Offset << ", " << Width;
1740   }
1741   O << ')';
1742 }
1743 
1744 void AMDGPUInstPrinter::printEndpgm(const MCInst *MI, unsigned OpNo,
1745                                     const MCSubtargetInfo &STI,
1746                                     raw_ostream &O) {
1747   uint16_t Imm = MI->getOperand(OpNo).getImm();
1748   if (Imm == 0) {
1749     return;
1750   }
1751 
1752   O << ' ' << formatDec(Imm);
1753 }
1754 
1755 #include "AMDGPUGenAsmWriter.inc"
1756