1 //===-- AMDGPUInstPrinter.cpp - AMDGPU MC Inst -> ASM ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 // \file
8 //===----------------------------------------------------------------------===//
9 
10 #include "AMDGPUInstPrinter.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "SIDefines.h"
13 #include "SIRegisterInfo.h"
14 #include "Utils/AMDGPUAsmUtils.h"
15 #include "Utils/AMDGPUBaseInfo.h"
16 #include "llvm/MC/MCExpr.h"
17 #include "llvm/MC/MCInst.h"
18 #include "llvm/MC/MCInstrDesc.h"
19 #include "llvm/MC/MCInstrInfo.h"
20 #include "llvm/MC/MCSubtargetInfo.h"
21 #include "llvm/Support/CommandLine.h"
22 #include "llvm/Support/TargetParser.h"
23 
24 using namespace llvm;
25 using namespace llvm::AMDGPU;
26 
27 static cl::opt<bool> Keep16BitSuffixes(
28   "amdgpu-keep-16-bit-reg-suffixes",
29   cl::desc("Keep .l and .h suffixes in asm for debugging purposes"),
30   cl::init(false),
31   cl::ReallyHidden);
32 
33 void AMDGPUInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const {
34   // FIXME: The current implementation of
35   // AsmParser::parseRegisterOrRegisterNumber in MC implies we either emit this
36   // as an integer or we provide a name which represents a physical register.
37   // For CFI instructions we really want to emit a name for the DWARF register
38   // instead, because there may be multiple DWARF registers corresponding to a
39   // single physical register. One case where this problem manifests is with
40   // wave32/wave64 where using the physical register name is ambiguous: if we
41   // write e.g. `.cfi_undefined v0` we lose information about the wavefront
42   // size which we need to encode the register in the final DWARF. Ideally we
43   // would extend MC to support parsing DWARF register names so we could do
44   // something like `.cfi_undefined dwarf_wave32_v0`. For now we just live with
45   // non-pretty DWARF register names in assembly text.
46   OS << Reg.id();
47 }
48 
49 void AMDGPUInstPrinter::printInst(const MCInst *MI, uint64_t Address,
50                                   StringRef Annot, const MCSubtargetInfo &STI,
51                                   raw_ostream &OS) {
52   OS.flush();
53   printInstruction(MI, Address, STI, OS);
54   printAnnotation(OS, Annot);
55 }
56 
57 void AMDGPUInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo,
58                                           const MCSubtargetInfo &STI,
59                                           raw_ostream &O) {
60   O << formatHex(MI->getOperand(OpNo).getImm() & 0xf);
61 }
62 
63 void AMDGPUInstPrinter::printU8ImmOperand(const MCInst *MI, unsigned OpNo,
64                                           raw_ostream &O) {
65   O << formatHex(MI->getOperand(OpNo).getImm() & 0xff);
66 }
67 
68 void AMDGPUInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
69                                            const MCSubtargetInfo &STI,
70                                            raw_ostream &O) {
71   // It's possible to end up with a 32-bit literal used with a 16-bit operand
72   // with ignored high bits. Print as 32-bit anyway in that case.
73   int64_t Imm = MI->getOperand(OpNo).getImm();
74   if (isInt<16>(Imm) || isUInt<16>(Imm))
75     O << formatHex(static_cast<uint64_t>(Imm & 0xffff));
76   else
77     printU32ImmOperand(MI, OpNo, STI, O);
78 }
79 
80 void AMDGPUInstPrinter::printU4ImmDecOperand(const MCInst *MI, unsigned OpNo,
81                                              raw_ostream &O) {
82   O << formatDec(MI->getOperand(OpNo).getImm() & 0xf);
83 }
84 
85 void AMDGPUInstPrinter::printU8ImmDecOperand(const MCInst *MI, unsigned OpNo,
86                                              raw_ostream &O) {
87   O << formatDec(MI->getOperand(OpNo).getImm() & 0xff);
88 }
89 
90 void AMDGPUInstPrinter::printU16ImmDecOperand(const MCInst *MI, unsigned OpNo,
91                                               raw_ostream &O) {
92   O << formatDec(MI->getOperand(OpNo).getImm() & 0xffff);
93 }
94 
95 void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo,
96                                            const MCSubtargetInfo &STI,
97                                            raw_ostream &O) {
98   O << formatHex(MI->getOperand(OpNo).getImm() & 0xffffffff);
99 }
100 
101 void AMDGPUInstPrinter::printNamedBit(const MCInst *MI, unsigned OpNo,
102                                       raw_ostream &O, StringRef BitName) {
103   if (MI->getOperand(OpNo).getImm()) {
104     O << ' ' << BitName;
105   }
106 }
107 
108 void AMDGPUInstPrinter::printOffen(const MCInst *MI, unsigned OpNo,
109                                    raw_ostream &O) {
110   printNamedBit(MI, OpNo, O, "offen");
111 }
112 
113 void AMDGPUInstPrinter::printIdxen(const MCInst *MI, unsigned OpNo,
114                                    raw_ostream &O) {
115   printNamedBit(MI, OpNo, O, "idxen");
116 }
117 
118 void AMDGPUInstPrinter::printAddr64(const MCInst *MI, unsigned OpNo,
119                                     raw_ostream &O) {
120   printNamedBit(MI, OpNo, O, "addr64");
121 }
122 
123 void AMDGPUInstPrinter::printOffset(const MCInst *MI, unsigned OpNo,
124                                     const MCSubtargetInfo &STI,
125                                     raw_ostream &O) {
126   uint16_t Imm = MI->getOperand(OpNo).getImm();
127   if (Imm != 0) {
128     O << " offset:";
129     printU16ImmDecOperand(MI, OpNo, O);
130   }
131 }
132 
133 void AMDGPUInstPrinter::printFlatOffset(const MCInst *MI, unsigned OpNo,
134                                         const MCSubtargetInfo &STI,
135                                         raw_ostream &O) {
136   uint16_t Imm = MI->getOperand(OpNo).getImm();
137   if (Imm != 0) {
138     O << " offset:";
139 
140     const MCInstrDesc &Desc = MII.get(MI->getOpcode());
141     bool IsFlatSeg = !(Desc.TSFlags &
142                        (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch));
143 
144     if (IsFlatSeg) { // Unsigned offset
145       printU16ImmDecOperand(MI, OpNo, O);
146     } else {         // Signed offset
147       if (AMDGPU::isGFX10(STI)) {
148         O << formatDec(SignExtend32<12>(MI->getOperand(OpNo).getImm()));
149       } else {
150         O << formatDec(SignExtend32<13>(MI->getOperand(OpNo).getImm()));
151       }
152     }
153   }
154 }
155 
156 void AMDGPUInstPrinter::printOffset0(const MCInst *MI, unsigned OpNo,
157                                      const MCSubtargetInfo &STI,
158                                      raw_ostream &O) {
159   if (MI->getOperand(OpNo).getImm()) {
160     O << " offset0:";
161     printU8ImmDecOperand(MI, OpNo, O);
162   }
163 }
164 
165 void AMDGPUInstPrinter::printOffset1(const MCInst *MI, unsigned OpNo,
166                                      const MCSubtargetInfo &STI,
167                                      raw_ostream &O) {
168   if (MI->getOperand(OpNo).getImm()) {
169     O << " offset1:";
170     printU8ImmDecOperand(MI, OpNo, O);
171   }
172 }
173 
174 void AMDGPUInstPrinter::printSMRDOffset8(const MCInst *MI, unsigned OpNo,
175                                         const MCSubtargetInfo &STI,
176                                         raw_ostream &O) {
177   printU32ImmOperand(MI, OpNo, STI, O);
178 }
179 
180 void AMDGPUInstPrinter::printSMEMOffset(const MCInst *MI, unsigned OpNo,
181                                         const MCSubtargetInfo &STI,
182                                         raw_ostream &O) {
183   O << formatHex(MI->getOperand(OpNo).getImm());
184 }
185 
186 void AMDGPUInstPrinter::printSMEMOffsetMod(const MCInst *MI, unsigned OpNo,
187                                            const MCSubtargetInfo &STI,
188                                            raw_ostream &O) {
189   O << " offset:";
190   printSMEMOffset(MI, OpNo, STI, O);
191 }
192 
193 void AMDGPUInstPrinter::printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo,
194                                                const MCSubtargetInfo &STI,
195                                                raw_ostream &O) {
196   printU32ImmOperand(MI, OpNo, STI, O);
197 }
198 
199 void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo,
200                                  const MCSubtargetInfo &STI, raw_ostream &O) {
201   printNamedBit(MI, OpNo, O, "gds");
202 }
203 
204 void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
205                                   const MCSubtargetInfo &STI, raw_ostream &O) {
206   auto Imm = MI->getOperand(OpNo).getImm();
207   if (Imm & CPol::GLC)
208     O << ((AMDGPU::isGFX940(STI) &&
209            !(MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::SMRD)) ? " sc0"
210                                                                      : " glc");
211   if (Imm & CPol::SLC)
212     O << (AMDGPU::isGFX940(STI) ? " nt" : " slc");
213   if ((Imm & CPol::DLC) && AMDGPU::isGFX10Plus(STI))
214     O << " dlc";
215   if ((Imm & CPol::SCC) && AMDGPU::isGFX90A(STI))
216     O << (AMDGPU::isGFX940(STI) ? " sc1" : " scc");
217   if (Imm & ~CPol::ALL)
218     O << " /* unexpected cache policy bit */";
219 }
220 
221 void AMDGPUInstPrinter::printSWZ(const MCInst *MI, unsigned OpNo,
222                                  const MCSubtargetInfo &STI, raw_ostream &O) {
223 }
224 
225 void AMDGPUInstPrinter::printTFE(const MCInst *MI, unsigned OpNo,
226                                  const MCSubtargetInfo &STI, raw_ostream &O) {
227   printNamedBit(MI, OpNo, O, "tfe");
228 }
229 
230 void AMDGPUInstPrinter::printDMask(const MCInst *MI, unsigned OpNo,
231                                    const MCSubtargetInfo &STI, raw_ostream &O) {
232   if (MI->getOperand(OpNo).getImm()) {
233     O << " dmask:";
234     printU16ImmOperand(MI, OpNo, STI, O);
235   }
236 }
237 
238 void AMDGPUInstPrinter::printDim(const MCInst *MI, unsigned OpNo,
239                                  const MCSubtargetInfo &STI, raw_ostream &O) {
240   unsigned Dim = MI->getOperand(OpNo).getImm();
241   O << " dim:SQ_RSRC_IMG_";
242 
243   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
244   if (DimInfo)
245     O << DimInfo->AsmSuffix;
246   else
247     O << Dim;
248 }
249 
250 void AMDGPUInstPrinter::printUNorm(const MCInst *MI, unsigned OpNo,
251                                    const MCSubtargetInfo &STI, raw_ostream &O) {
252   printNamedBit(MI, OpNo, O, "unorm");
253 }
254 
255 void AMDGPUInstPrinter::printDA(const MCInst *MI, unsigned OpNo,
256                                 const MCSubtargetInfo &STI, raw_ostream &O) {
257   printNamedBit(MI, OpNo, O, "da");
258 }
259 
260 void AMDGPUInstPrinter::printR128A16(const MCInst *MI, unsigned OpNo,
261                                   const MCSubtargetInfo &STI, raw_ostream &O) {
262   if (STI.hasFeature(AMDGPU::FeatureR128A16))
263     printNamedBit(MI, OpNo, O, "a16");
264   else
265     printNamedBit(MI, OpNo, O, "r128");
266 }
267 
268 void AMDGPUInstPrinter::printA16(const MCInst *MI, unsigned OpNo,
269                                  const MCSubtargetInfo &STI, raw_ostream &O) {
270   printNamedBit(MI, OpNo, O, "a16");
271 }
272 
273 void AMDGPUInstPrinter::printLWE(const MCInst *MI, unsigned OpNo,
274                                  const MCSubtargetInfo &STI, raw_ostream &O) {
275   printNamedBit(MI, OpNo, O, "lwe");
276 }
277 
278 void AMDGPUInstPrinter::printD16(const MCInst *MI, unsigned OpNo,
279                                  const MCSubtargetInfo &STI, raw_ostream &O) {
280   printNamedBit(MI, OpNo, O, "d16");
281 }
282 
283 void AMDGPUInstPrinter::printExpCompr(const MCInst *MI, unsigned OpNo,
284                                       const MCSubtargetInfo &STI,
285                                       raw_ostream &O) {
286   printNamedBit(MI, OpNo, O, "compr");
287 }
288 
289 void AMDGPUInstPrinter::printExpVM(const MCInst *MI, unsigned OpNo,
290                                    const MCSubtargetInfo &STI,
291                                    raw_ostream &O) {
292   printNamedBit(MI, OpNo, O, "vm");
293 }
294 
295 void AMDGPUInstPrinter::printFORMAT(const MCInst *MI, unsigned OpNo,
296                                     const MCSubtargetInfo &STI,
297                                     raw_ostream &O) {
298 }
299 
300 void AMDGPUInstPrinter::printSymbolicFormat(const MCInst *MI,
301                                             const MCSubtargetInfo &STI,
302                                             raw_ostream &O) {
303   using namespace llvm::AMDGPU::MTBUFFormat;
304 
305   int OpNo =
306     AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::format);
307   assert(OpNo != -1);
308 
309   unsigned Val = MI->getOperand(OpNo).getImm();
310   if (AMDGPU::isGFX10Plus(STI)) {
311     if (Val == UFMT_DEFAULT)
312       return;
313     if (isValidUnifiedFormat(Val, STI)) {
314       O << " format:[" << getUnifiedFormatName(Val, STI) << ']';
315     } else {
316       O << " format:" << Val;
317     }
318   } else {
319     if (Val == DFMT_NFMT_DEFAULT)
320       return;
321     if (isValidDfmtNfmt(Val, STI)) {
322       unsigned Dfmt;
323       unsigned Nfmt;
324       decodeDfmtNfmt(Val, Dfmt, Nfmt);
325       O << " format:[";
326       if (Dfmt != DFMT_DEFAULT) {
327         O << getDfmtName(Dfmt);
328         if (Nfmt != NFMT_DEFAULT) {
329           O << ',';
330         }
331       }
332       if (Nfmt != NFMT_DEFAULT) {
333         O << getNfmtName(Nfmt, STI);
334       }
335       O << ']';
336     } else {
337       O << " format:" << Val;
338     }
339   }
340 }
341 
342 void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
343                                         const MCRegisterInfo &MRI) {
344 #if !defined(NDEBUG)
345   switch (RegNo) {
346   case AMDGPU::FP_REG:
347   case AMDGPU::SP_REG:
348   case AMDGPU::PRIVATE_RSRC_REG:
349     llvm_unreachable("pseudo-register should not ever be emitted");
350   case AMDGPU::SCC:
351     llvm_unreachable("pseudo scc should not ever be emitted");
352   default:
353     break;
354   }
355 #endif
356 
357   StringRef RegName(getRegisterName(RegNo));
358   if (!Keep16BitSuffixes)
359     if (!RegName.consume_back(".l"))
360       RegName.consume_back(".h");
361 
362   O << RegName;
363 }
364 
365 void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
366                                     const MCSubtargetInfo &STI, raw_ostream &O) {
367   auto Opcode = MI->getOpcode();
368   auto Flags = MII.get(Opcode).TSFlags;
369   if (OpNo == 0) {
370     if (Flags & SIInstrFlags::VOP3 && Flags & SIInstrFlags::DPP)
371       O << "_e64_dpp";
372     else if (Flags & SIInstrFlags::VOP3) {
373       if (!getVOP3IsSingle(Opcode))
374         O << "_e64";
375     } else if (Flags & SIInstrFlags::DPP)
376       O << "_dpp";
377     else if (Flags & SIInstrFlags::SDWA)
378       O << "_sdwa";
379     else if (((Flags & SIInstrFlags::VOP1) && !getVOP1IsSingle(Opcode)) ||
380              ((Flags & SIInstrFlags::VOP2) && !getVOP2IsSingle(Opcode)))
381       O << "_e32";
382     O << " ";
383   }
384 
385   printRegularOperand(MI, OpNo, STI, O);
386 
387   // Print default vcc/vcc_lo operand.
388   switch (Opcode) {
389   default: break;
390 
391   case AMDGPU::V_ADD_CO_CI_U32_e32_gfx10:
392   case AMDGPU::V_SUB_CO_CI_U32_e32_gfx10:
393   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
394   case AMDGPU::V_ADD_CO_CI_U32_sdwa_gfx10:
395   case AMDGPU::V_SUB_CO_CI_U32_sdwa_gfx10:
396   case AMDGPU::V_SUBREV_CO_CI_U32_sdwa_gfx10:
397   case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx10:
398   case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx10:
399   case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx10:
400   case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx10:
401   case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx10:
402   case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx10:
403   case AMDGPU::V_ADD_CO_CI_U32_e32_gfx11:
404   case AMDGPU::V_SUB_CO_CI_U32_e32_gfx11:
405   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx11:
406   case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx11:
407   case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx11:
408   case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx11:
409   case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx11:
410   case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx11:
411   case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx11:
412     printDefaultVccOperand(false, STI, O);
413     break;
414   }
415 }
416 
417 void AMDGPUInstPrinter::printVINTRPDst(const MCInst *MI, unsigned OpNo,
418                                        const MCSubtargetInfo &STI, raw_ostream &O) {
419   if (AMDGPU::isSI(STI) || AMDGPU::isCI(STI))
420     O << " ";
421   else
422     O << "_e32 ";
423 
424   printRegularOperand(MI, OpNo, STI, O);
425 }
426 
427 void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm,
428                                             const MCSubtargetInfo &STI,
429                                             raw_ostream &O) {
430   int16_t SImm = static_cast<int16_t>(Imm);
431   if (isInlinableIntLiteral(SImm)) {
432     O << SImm;
433   } else {
434     uint64_t Imm16 = static_cast<uint16_t>(Imm);
435     O << formatHex(Imm16);
436   }
437 }
438 
439 void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
440                                          const MCSubtargetInfo &STI,
441                                          raw_ostream &O) {
442   int16_t SImm = static_cast<int16_t>(Imm);
443   if (isInlinableIntLiteral(SImm)) {
444     O << SImm;
445     return;
446   }
447 
448   if (Imm == 0x3C00)
449     O<< "1.0";
450   else if (Imm == 0xBC00)
451     O<< "-1.0";
452   else if (Imm == 0x3800)
453     O<< "0.5";
454   else if (Imm == 0xB800)
455     O<< "-0.5";
456   else if (Imm == 0x4000)
457     O<< "2.0";
458   else if (Imm == 0xC000)
459     O<< "-2.0";
460   else if (Imm == 0x4400)
461     O<< "4.0";
462   else if (Imm == 0xC400)
463     O<< "-4.0";
464   else if (Imm == 0x3118 &&
465            STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]) {
466     O << "0.15915494";
467   } else {
468     uint64_t Imm16 = static_cast<uint16_t>(Imm);
469     O << formatHex(Imm16);
470   }
471 }
472 
473 void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm,
474                                            const MCSubtargetInfo &STI,
475                                            raw_ostream &O) {
476   uint16_t Lo16 = static_cast<uint16_t>(Imm);
477   printImmediate16(Lo16, STI, O);
478 }
479 
480 void AMDGPUInstPrinter::printImmediate32(uint32_t Imm,
481                                          const MCSubtargetInfo &STI,
482                                          raw_ostream &O) {
483   int32_t SImm = static_cast<int32_t>(Imm);
484   if (SImm >= -16 && SImm <= 64) {
485     O << SImm;
486     return;
487   }
488 
489   if (Imm == FloatToBits(0.0f))
490     O << "0.0";
491   else if (Imm == FloatToBits(1.0f))
492     O << "1.0";
493   else if (Imm == FloatToBits(-1.0f))
494     O << "-1.0";
495   else if (Imm == FloatToBits(0.5f))
496     O << "0.5";
497   else if (Imm == FloatToBits(-0.5f))
498     O << "-0.5";
499   else if (Imm == FloatToBits(2.0f))
500     O << "2.0";
501   else if (Imm == FloatToBits(-2.0f))
502     O << "-2.0";
503   else if (Imm == FloatToBits(4.0f))
504     O << "4.0";
505   else if (Imm == FloatToBits(-4.0f))
506     O << "-4.0";
507   else if (Imm == 0x3e22f983 &&
508            STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
509     O << "0.15915494";
510   else
511     O << formatHex(static_cast<uint64_t>(Imm));
512 }
513 
514 void AMDGPUInstPrinter::printImmediate64(uint64_t Imm,
515                                          const MCSubtargetInfo &STI,
516                                          raw_ostream &O) {
517   int64_t SImm = static_cast<int64_t>(Imm);
518   if (SImm >= -16 && SImm <= 64) {
519     O << SImm;
520     return;
521   }
522 
523   if (Imm == DoubleToBits(0.0))
524     O << "0.0";
525   else if (Imm == DoubleToBits(1.0))
526     O << "1.0";
527   else if (Imm == DoubleToBits(-1.0))
528     O << "-1.0";
529   else if (Imm == DoubleToBits(0.5))
530     O << "0.5";
531   else if (Imm == DoubleToBits(-0.5))
532     O << "-0.5";
533   else if (Imm == DoubleToBits(2.0))
534     O << "2.0";
535   else if (Imm == DoubleToBits(-2.0))
536     O << "-2.0";
537   else if (Imm == DoubleToBits(4.0))
538     O << "4.0";
539   else if (Imm == DoubleToBits(-4.0))
540     O << "-4.0";
541   else if (Imm == 0x3fc45f306dc9c882 &&
542            STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
543     O << "0.15915494309189532";
544   else {
545     assert(isUInt<32>(Imm) || isInt<32>(Imm));
546 
547     // In rare situations, we will have a 32-bit literal in a 64-bit
548     // operand. This is technically allowed for the encoding of s_mov_b64.
549     O << formatHex(static_cast<uint64_t>(Imm));
550   }
551 }
552 
553 void AMDGPUInstPrinter::printBLGP(const MCInst *MI, unsigned OpNo,
554                                   const MCSubtargetInfo &STI,
555                                   raw_ostream &O) {
556   unsigned Imm = MI->getOperand(OpNo).getImm();
557   if (!Imm)
558     return;
559 
560   if (AMDGPU::isGFX940(STI)) {
561     switch (MI->getOpcode()) {
562     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
563     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
564     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
565     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
566       O << " neg:[" << (Imm & 1) << ',' << ((Imm >> 1) & 1) << ','
567         << ((Imm >> 2) & 1) << ']';
568       return;
569     }
570   }
571 
572   O << " blgp:" << Imm;
573 }
574 
575 void AMDGPUInstPrinter::printCBSZ(const MCInst *MI, unsigned OpNo,
576                                   const MCSubtargetInfo &STI,
577                                   raw_ostream &O) {
578   unsigned Imm = MI->getOperand(OpNo).getImm();
579   if (!Imm)
580     return;
581 
582   O << " cbsz:" << Imm;
583 }
584 
585 void AMDGPUInstPrinter::printABID(const MCInst *MI, unsigned OpNo,
586                                   const MCSubtargetInfo &STI,
587                                   raw_ostream &O) {
588   unsigned Imm = MI->getOperand(OpNo).getImm();
589   if (!Imm)
590     return;
591 
592   O << " abid:" << Imm;
593 }
594 
595 void AMDGPUInstPrinter::printDefaultVccOperand(bool FirstOperand,
596                                                const MCSubtargetInfo &STI,
597                                                raw_ostream &O) {
598   if (!FirstOperand)
599     O << ", ";
600   printRegOperand(STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64]
601                       ? AMDGPU::VCC
602                       : AMDGPU::VCC_LO,
603                   O, MRI);
604   if (FirstOperand)
605     O << ", ";
606 }
607 
608 void AMDGPUInstPrinter::printWaitVDST(const MCInst *MI, unsigned OpNo,
609                                       const MCSubtargetInfo &STI,
610                                       raw_ostream &O) {
611   uint8_t Imm = MI->getOperand(OpNo).getImm();
612   if (Imm != 0) {
613     O << " wait_vdst:";
614     printU4ImmDecOperand(MI, OpNo, O);
615   }
616 }
617 
618 void AMDGPUInstPrinter::printWaitEXP(const MCInst *MI, unsigned OpNo,
619                                     const MCSubtargetInfo &STI,
620                                     raw_ostream &O) {
621   uint8_t Imm = MI->getOperand(OpNo).getImm();
622   if (Imm != 0) {
623     O << " wait_exp:";
624     printU4ImmDecOperand(MI, OpNo, O);
625   }
626 }
627 
628 bool AMDGPUInstPrinter::needsImpliedVcc(const MCInstrDesc &Desc,
629                                         unsigned OpNo) const {
630   return OpNo == 0 && (Desc.TSFlags & SIInstrFlags::DPP) &&
631          (Desc.TSFlags & SIInstrFlags::VOPC) &&
632          (Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC) ||
633           Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC_LO));
634 }
635 
636 // Print default vcc/vcc_lo operand of VOPC.
637 void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
638                                      const MCSubtargetInfo &STI,
639                                      raw_ostream &O) {
640   unsigned Opc = MI->getOpcode();
641   const MCInstrDesc &Desc = MII.get(Opc);
642   int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
643   // 0, 1 and 2 are the first printed operands in different cases
644   // If there are printed modifiers, printOperandAndFPInputMods or
645   // printOperandAndIntInputMods will be called instead
646   if ((OpNo == 0 ||
647        (OpNo == 1 && (Desc.TSFlags & SIInstrFlags::DPP) && ModIdx != -1)) &&
648       (Desc.TSFlags & SIInstrFlags::VOPC) &&
649       (Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC) ||
650        Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC_LO)))
651     printDefaultVccOperand(true, STI, O);
652 
653   printRegularOperand(MI, OpNo, STI, O);
654 }
655 
656 // Print operands after vcc or modifier handling.
657 void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
658                                             const MCSubtargetInfo &STI,
659                                             raw_ostream &O) {
660   const MCInstrDesc &Desc = MII.get(MI->getOpcode());
661 
662   if (OpNo >= MI->getNumOperands()) {
663     O << "/*Missing OP" << OpNo << "*/";
664     return;
665   }
666 
667   const MCOperand &Op = MI->getOperand(OpNo);
668   if (Op.isReg()) {
669     printRegOperand(Op.getReg(), O, MRI);
670 
671     // Check if operand register class contains register used.
672     // Intention: print disassembler message when invalid code is decoded,
673     // for example sgpr register used in VReg or VISrc(VReg or imm) operand.
674     int RCID = Desc.operands()[OpNo].RegClass;
675     if (RCID != -1) {
676       const MCRegisterClass RC = MRI.getRegClass(RCID);
677       auto Reg = mc2PseudoReg(Op.getReg());
678       if (!RC.contains(Reg) && !isInlineValue(Reg)) {
679         O << "/*Invalid register, operand has \'" << MRI.getRegClassName(&RC)
680           << "\' register class*/";
681       }
682     }
683   } else if (Op.isImm()) {
684     const uint8_t OpTy = Desc.operands()[OpNo].OperandType;
685     switch (OpTy) {
686     case AMDGPU::OPERAND_REG_IMM_INT32:
687     case AMDGPU::OPERAND_REG_IMM_FP32:
688     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
689     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
690     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
691     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
692     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
693     case AMDGPU::OPERAND_REG_IMM_V2INT32:
694     case AMDGPU::OPERAND_REG_IMM_V2FP32:
695     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
696     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
697     case MCOI::OPERAND_IMMEDIATE:
698       printImmediate32(Op.getImm(), STI, O);
699       break;
700     case AMDGPU::OPERAND_REG_IMM_INT64:
701     case AMDGPU::OPERAND_REG_IMM_FP64:
702     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
703     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
704     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
705       printImmediate64(Op.getImm(), STI, O);
706       break;
707     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
708     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
709     case AMDGPU::OPERAND_REG_IMM_INT16:
710       printImmediateInt16(Op.getImm(), STI, O);
711       break;
712     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
713     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
714     case AMDGPU::OPERAND_REG_IMM_FP16:
715     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
716       printImmediate16(Op.getImm(), STI, O);
717       break;
718     case AMDGPU::OPERAND_REG_IMM_V2INT16:
719     case AMDGPU::OPERAND_REG_IMM_V2FP16:
720       if (!isUInt<16>(Op.getImm()) &&
721           STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
722         printImmediate32(Op.getImm(), STI, O);
723         break;
724       }
725 
726       //  Deal with 16-bit FP inline immediates not working.
727       if (OpTy == AMDGPU::OPERAND_REG_IMM_V2FP16) {
728         printImmediate16(static_cast<uint16_t>(Op.getImm()), STI, O);
729         break;
730       }
731       [[fallthrough]];
732     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
733     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
734       printImmediateInt16(static_cast<uint16_t>(Op.getImm()), STI, O);
735       break;
736     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
737     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
738       printImmediateV216(Op.getImm(), STI, O);
739       break;
740     case MCOI::OPERAND_UNKNOWN:
741     case MCOI::OPERAND_PCREL:
742       O << formatDec(Op.getImm());
743       break;
744     case MCOI::OPERAND_REGISTER:
745       // FIXME: This should be removed and handled somewhere else. Seems to come
746       // from a disassembler bug.
747       O << "/*invalid immediate*/";
748       break;
749     default:
750       // We hit this for the immediate instruction bits that don't yet have a
751       // custom printer.
752       llvm_unreachable("unexpected immediate operand type");
753     }
754   } else if (Op.isDFPImm()) {
755     double Value = bit_cast<double>(Op.getDFPImm());
756     // We special case 0.0 because otherwise it will be printed as an integer.
757     if (Value == 0.0)
758       O << "0.0";
759     else {
760       const MCInstrDesc &Desc = MII.get(MI->getOpcode());
761       int RCID = Desc.operands()[OpNo].RegClass;
762       unsigned RCBits = AMDGPU::getRegBitWidth(MRI.getRegClass(RCID));
763       if (RCBits == 32)
764         printImmediate32(FloatToBits(Value), STI, O);
765       else if (RCBits == 64)
766         printImmediate64(DoubleToBits(Value), STI, O);
767       else
768         llvm_unreachable("Invalid register class size");
769     }
770   } else if (Op.isExpr()) {
771     const MCExpr *Exp = Op.getExpr();
772     Exp->print(O, &MAI);
773   } else {
774     O << "/*INV_OP*/";
775   }
776 
777   // Print default vcc/vcc_lo operand of v_cndmask_b32_e32.
778   switch (MI->getOpcode()) {
779   default: break;
780 
781   case AMDGPU::V_CNDMASK_B32_e32_gfx10:
782   case AMDGPU::V_ADD_CO_CI_U32_e32_gfx10:
783   case AMDGPU::V_SUB_CO_CI_U32_e32_gfx10:
784   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
785   case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx10:
786   case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx10:
787   case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx10:
788   case AMDGPU::V_CNDMASK_B32_dpp8_gfx10:
789   case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx10:
790   case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx10:
791   case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx10:
792   case AMDGPU::V_CNDMASK_B32_e32_gfx11:
793   case AMDGPU::V_ADD_CO_CI_U32_e32_gfx11:
794   case AMDGPU::V_SUB_CO_CI_U32_e32_gfx11:
795   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx11:
796   case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx11:
797   case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx11:
798   case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx11:
799   case AMDGPU::V_CNDMASK_B32_dpp8_gfx11:
800   case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx11:
801   case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx11:
802   case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx11:
803 
804   case AMDGPU::V_CNDMASK_B32_e32_gfx6_gfx7:
805   case AMDGPU::V_CNDMASK_B32_e32_vi:
806     if ((int)OpNo == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
807                                                 AMDGPU::OpName::src1))
808       printDefaultVccOperand(OpNo == 0, STI, O);
809     break;
810   }
811 
812   if (Desc.TSFlags & SIInstrFlags::MTBUF) {
813     int SOffsetIdx =
814       AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::soffset);
815     assert(SOffsetIdx != -1);
816     if ((int)OpNo == SOffsetIdx)
817       printSymbolicFormat(MI, STI, O);
818   }
819 }
820 
821 void AMDGPUInstPrinter::printOperandAndFPInputMods(const MCInst *MI,
822                                                    unsigned OpNo,
823                                                    const MCSubtargetInfo &STI,
824                                                    raw_ostream &O) {
825   const MCInstrDesc &Desc = MII.get(MI->getOpcode());
826   if (needsImpliedVcc(Desc, OpNo))
827     printDefaultVccOperand(true, STI, O);
828 
829   unsigned InputModifiers = MI->getOperand(OpNo).getImm();
830 
831   // Use 'neg(...)' instead of '-' to avoid ambiguity.
832   // This is important for integer literals because
833   // -1 is not the same value as neg(1).
834   bool NegMnemo = false;
835 
836   if (InputModifiers & SISrcMods::NEG) {
837     if (OpNo + 1 < MI->getNumOperands() &&
838         (InputModifiers & SISrcMods::ABS) == 0) {
839       const MCOperand &Op = MI->getOperand(OpNo + 1);
840       NegMnemo = Op.isImm() || Op.isDFPImm();
841     }
842     if (NegMnemo) {
843       O << "neg(";
844     } else {
845       O << '-';
846     }
847   }
848 
849   if (InputModifiers & SISrcMods::ABS)
850     O << '|';
851   printRegularOperand(MI, OpNo + 1, STI, O);
852   if (InputModifiers & SISrcMods::ABS)
853     O << '|';
854 
855   if (NegMnemo) {
856     O << ')';
857   }
858 
859   // Print default vcc/vcc_lo operand of VOP2b.
860   switch (MI->getOpcode()) {
861   default:
862     break;
863 
864   case AMDGPU::V_CNDMASK_B32_sdwa_gfx10:
865   case AMDGPU::V_CNDMASK_B32_dpp_gfx10:
866   case AMDGPU::V_CNDMASK_B32_dpp_gfx11:
867     if ((int)OpNo + 1 ==
868         AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::src1))
869       printDefaultVccOperand(OpNo == 0, STI, O);
870     break;
871   }
872 }
873 
874 void AMDGPUInstPrinter::printOperandAndIntInputMods(const MCInst *MI,
875                                                     unsigned OpNo,
876                                                     const MCSubtargetInfo &STI,
877                                                     raw_ostream &O) {
878   const MCInstrDesc &Desc = MII.get(MI->getOpcode());
879   if (needsImpliedVcc(Desc, OpNo))
880     printDefaultVccOperand(true, STI, O);
881 
882   unsigned InputModifiers = MI->getOperand(OpNo).getImm();
883   if (InputModifiers & SISrcMods::SEXT)
884     O << "sext(";
885   printRegularOperand(MI, OpNo + 1, STI, O);
886   if (InputModifiers & SISrcMods::SEXT)
887     O << ')';
888 
889   // Print default vcc/vcc_lo operand of VOP2b.
890   switch (MI->getOpcode()) {
891   default: break;
892 
893   case AMDGPU::V_ADD_CO_CI_U32_sdwa_gfx10:
894   case AMDGPU::V_SUB_CO_CI_U32_sdwa_gfx10:
895   case AMDGPU::V_SUBREV_CO_CI_U32_sdwa_gfx10:
896     if ((int)OpNo + 1 == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
897                                                     AMDGPU::OpName::src1))
898       printDefaultVccOperand(OpNo == 0, STI, O);
899     break;
900   }
901 }
902 
903 void AMDGPUInstPrinter::printDPP8(const MCInst *MI, unsigned OpNo,
904                                   const MCSubtargetInfo &STI,
905                                   raw_ostream &O) {
906   if (!AMDGPU::isGFX10Plus(STI))
907     llvm_unreachable("dpp8 is not supported on ASICs earlier than GFX10");
908 
909   unsigned Imm = MI->getOperand(OpNo).getImm();
910   O << "dpp8:[" << formatDec(Imm & 0x7);
911   for (size_t i = 1; i < 8; ++i) {
912     O << ',' << formatDec((Imm >> (3 * i)) & 0x7);
913   }
914   O << ']';
915 }
916 
917 void AMDGPUInstPrinter::printDPPCtrl(const MCInst *MI, unsigned OpNo,
918                                      const MCSubtargetInfo &STI,
919                                      raw_ostream &O) {
920   using namespace AMDGPU::DPP;
921 
922   unsigned Imm = MI->getOperand(OpNo).getImm();
923   const MCInstrDesc &Desc = MII.get(MI->getOpcode());
924   int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
925                                            AMDGPU::OpName::src0);
926 
927   if (Src0Idx >= 0 &&
928       Desc.operands()[Src0Idx].RegClass == AMDGPU::VReg_64RegClassID &&
929       !AMDGPU::isLegal64BitDPPControl(Imm)) {
930     O << " /* 64 bit dpp only supports row_newbcast */";
931     return;
932   } else if (Imm <= DppCtrl::QUAD_PERM_LAST) {
933     O << "quad_perm:[";
934     O << formatDec(Imm & 0x3)         << ',';
935     O << formatDec((Imm & 0xc)  >> 2) << ',';
936     O << formatDec((Imm & 0x30) >> 4) << ',';
937     O << formatDec((Imm & 0xc0) >> 6) << ']';
938   } else if ((Imm >= DppCtrl::ROW_SHL_FIRST) &&
939              (Imm <= DppCtrl::ROW_SHL_LAST)) {
940     O << "row_shl:";
941     printU4ImmDecOperand(MI, OpNo, O);
942   } else if ((Imm >= DppCtrl::ROW_SHR_FIRST) &&
943              (Imm <= DppCtrl::ROW_SHR_LAST)) {
944     O << "row_shr:";
945     printU4ImmDecOperand(MI, OpNo, O);
946   } else if ((Imm >= DppCtrl::ROW_ROR_FIRST) &&
947              (Imm <= DppCtrl::ROW_ROR_LAST)) {
948     O << "row_ror:";
949     printU4ImmDecOperand(MI, OpNo, O);
950   } else if (Imm == DppCtrl::WAVE_SHL1) {
951     if (AMDGPU::isGFX10Plus(STI)) {
952       O << "/* wave_shl is not supported starting from GFX10 */";
953       return;
954     }
955     O << "wave_shl:1";
956   } else if (Imm == DppCtrl::WAVE_ROL1) {
957     if (AMDGPU::isGFX10Plus(STI)) {
958       O << "/* wave_rol is not supported starting from GFX10 */";
959       return;
960     }
961     O << "wave_rol:1";
962   } else if (Imm == DppCtrl::WAVE_SHR1) {
963     if (AMDGPU::isGFX10Plus(STI)) {
964       O << "/* wave_shr is not supported starting from GFX10 */";
965       return;
966     }
967     O << "wave_shr:1";
968   } else if (Imm == DppCtrl::WAVE_ROR1) {
969     if (AMDGPU::isGFX10Plus(STI)) {
970       O << "/* wave_ror is not supported starting from GFX10 */";
971       return;
972     }
973     O << "wave_ror:1";
974   } else if (Imm == DppCtrl::ROW_MIRROR) {
975     O << "row_mirror";
976   } else if (Imm == DppCtrl::ROW_HALF_MIRROR) {
977     O << "row_half_mirror";
978   } else if (Imm == DppCtrl::BCAST15) {
979     if (AMDGPU::isGFX10Plus(STI)) {
980       O << "/* row_bcast is not supported starting from GFX10 */";
981       return;
982     }
983     O << "row_bcast:15";
984   } else if (Imm == DppCtrl::BCAST31) {
985     if (AMDGPU::isGFX10Plus(STI)) {
986       O << "/* row_bcast is not supported starting from GFX10 */";
987       return;
988     }
989     O << "row_bcast:31";
990   } else if ((Imm >= DppCtrl::ROW_SHARE_FIRST) &&
991              (Imm <= DppCtrl::ROW_SHARE_LAST)) {
992     if (AMDGPU::isGFX90A(STI)) {
993       O << "row_newbcast:";
994     } else if (AMDGPU::isGFX10Plus(STI)) {
995       O << "row_share:";
996     } else {
997       O << " /* row_newbcast/row_share is not supported on ASICs earlier "
998            "than GFX90A/GFX10 */";
999       return;
1000     }
1001     printU4ImmDecOperand(MI, OpNo, O);
1002   } else if ((Imm >= DppCtrl::ROW_XMASK_FIRST) &&
1003              (Imm <= DppCtrl::ROW_XMASK_LAST)) {
1004     if (!AMDGPU::isGFX10Plus(STI)) {
1005       O << "/* row_xmask is not supported on ASICs earlier than GFX10 */";
1006       return;
1007     }
1008     O << "row_xmask:";
1009     printU4ImmDecOperand(MI, OpNo, O);
1010   } else {
1011     O << "/* Invalid dpp_ctrl value */";
1012   }
1013 }
1014 
1015 void AMDGPUInstPrinter::printRowMask(const MCInst *MI, unsigned OpNo,
1016                                      const MCSubtargetInfo &STI,
1017                                      raw_ostream &O) {
1018   O << " row_mask:";
1019   printU4ImmOperand(MI, OpNo, STI, O);
1020 }
1021 
1022 void AMDGPUInstPrinter::printBankMask(const MCInst *MI, unsigned OpNo,
1023                                       const MCSubtargetInfo &STI,
1024                                       raw_ostream &O) {
1025   O << " bank_mask:";
1026   printU4ImmOperand(MI, OpNo, STI, O);
1027 }
1028 
1029 void AMDGPUInstPrinter::printDppBoundCtrl(const MCInst *MI, unsigned OpNo,
1030                                           const MCSubtargetInfo &STI,
1031                                           raw_ostream &O) {
1032   unsigned Imm = MI->getOperand(OpNo).getImm();
1033   if (Imm) {
1034     O << " bound_ctrl:1";
1035   }
1036 }
1037 
1038 void AMDGPUInstPrinter::printFI(const MCInst *MI, unsigned OpNo,
1039                                 const MCSubtargetInfo &STI,
1040                                 raw_ostream &O) {
1041   using namespace llvm::AMDGPU::DPP;
1042   unsigned Imm = MI->getOperand(OpNo).getImm();
1043   if (Imm == DPP_FI_1 || Imm == DPP8_FI_1) {
1044     O << " fi:1";
1045   }
1046 }
1047 
1048 void AMDGPUInstPrinter::printSDWASel(const MCInst *MI, unsigned OpNo,
1049                                      raw_ostream &O) {
1050   using namespace llvm::AMDGPU::SDWA;
1051 
1052   unsigned Imm = MI->getOperand(OpNo).getImm();
1053   switch (Imm) {
1054   case SdwaSel::BYTE_0: O << "BYTE_0"; break;
1055   case SdwaSel::BYTE_1: O << "BYTE_1"; break;
1056   case SdwaSel::BYTE_2: O << "BYTE_2"; break;
1057   case SdwaSel::BYTE_3: O << "BYTE_3"; break;
1058   case SdwaSel::WORD_0: O << "WORD_0"; break;
1059   case SdwaSel::WORD_1: O << "WORD_1"; break;
1060   case SdwaSel::DWORD: O << "DWORD"; break;
1061   default: llvm_unreachable("Invalid SDWA data select operand");
1062   }
1063 }
1064 
1065 void AMDGPUInstPrinter::printSDWADstSel(const MCInst *MI, unsigned OpNo,
1066                                         const MCSubtargetInfo &STI,
1067                                         raw_ostream &O) {
1068   O << "dst_sel:";
1069   printSDWASel(MI, OpNo, O);
1070 }
1071 
1072 void AMDGPUInstPrinter::printSDWASrc0Sel(const MCInst *MI, unsigned OpNo,
1073                                          const MCSubtargetInfo &STI,
1074                                          raw_ostream &O) {
1075   O << "src0_sel:";
1076   printSDWASel(MI, OpNo, O);
1077 }
1078 
1079 void AMDGPUInstPrinter::printSDWASrc1Sel(const MCInst *MI, unsigned OpNo,
1080                                          const MCSubtargetInfo &STI,
1081                                          raw_ostream &O) {
1082   O << "src1_sel:";
1083   printSDWASel(MI, OpNo, O);
1084 }
1085 
1086 void AMDGPUInstPrinter::printSDWADstUnused(const MCInst *MI, unsigned OpNo,
1087                                            const MCSubtargetInfo &STI,
1088                                            raw_ostream &O) {
1089   using namespace llvm::AMDGPU::SDWA;
1090 
1091   O << "dst_unused:";
1092   unsigned Imm = MI->getOperand(OpNo).getImm();
1093   switch (Imm) {
1094   case DstUnused::UNUSED_PAD: O << "UNUSED_PAD"; break;
1095   case DstUnused::UNUSED_SEXT: O << "UNUSED_SEXT"; break;
1096   case DstUnused::UNUSED_PRESERVE: O << "UNUSED_PRESERVE"; break;
1097   default: llvm_unreachable("Invalid SDWA dest_unused operand");
1098   }
1099 }
1100 
1101 void AMDGPUInstPrinter::printExpSrcN(const MCInst *MI, unsigned OpNo,
1102                                      const MCSubtargetInfo &STI, raw_ostream &O,
1103                                      unsigned N) {
1104   unsigned Opc = MI->getOpcode();
1105   int EnIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::en);
1106   unsigned En = MI->getOperand(EnIdx).getImm();
1107 
1108   int ComprIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::compr);
1109 
1110   // If compr is set, print as src0, src0, src1, src1
1111   if (MI->getOperand(ComprIdx).getImm())
1112     OpNo = OpNo - N + N / 2;
1113 
1114   if (En & (1 << N))
1115     printRegOperand(MI->getOperand(OpNo).getReg(), O, MRI);
1116   else
1117     O << "off";
1118 }
1119 
1120 void AMDGPUInstPrinter::printExpSrc0(const MCInst *MI, unsigned OpNo,
1121                                      const MCSubtargetInfo &STI,
1122                                      raw_ostream &O) {
1123   printExpSrcN(MI, OpNo, STI, O, 0);
1124 }
1125 
1126 void AMDGPUInstPrinter::printExpSrc1(const MCInst *MI, unsigned OpNo,
1127                                      const MCSubtargetInfo &STI,
1128                                      raw_ostream &O) {
1129   printExpSrcN(MI, OpNo, STI, O, 1);
1130 }
1131 
1132 void AMDGPUInstPrinter::printExpSrc2(const MCInst *MI, unsigned OpNo,
1133                                      const MCSubtargetInfo &STI,
1134                                      raw_ostream &O) {
1135   printExpSrcN(MI, OpNo, STI, O, 2);
1136 }
1137 
1138 void AMDGPUInstPrinter::printExpSrc3(const MCInst *MI, unsigned OpNo,
1139                                      const MCSubtargetInfo &STI,
1140                                      raw_ostream &O) {
1141   printExpSrcN(MI, OpNo, STI, O, 3);
1142 }
1143 
1144 void AMDGPUInstPrinter::printExpTgt(const MCInst *MI, unsigned OpNo,
1145                                     const MCSubtargetInfo &STI,
1146                                     raw_ostream &O) {
1147   using namespace llvm::AMDGPU::Exp;
1148 
1149   // This is really a 6 bit field.
1150   unsigned Id = MI->getOperand(OpNo).getImm() & ((1 << 6) - 1);
1151 
1152   int Index;
1153   StringRef TgtName;
1154   if (getTgtName(Id, TgtName, Index) && isSupportedTgtId(Id, STI)) {
1155     O << ' ' << TgtName;
1156     if (Index >= 0)
1157       O << Index;
1158   } else {
1159     O << " invalid_target_" << Id;
1160   }
1161 }
1162 
1163 static bool allOpsDefaultValue(const int* Ops, int NumOps, int Mod,
1164                                bool IsPacked, bool HasDstSel) {
1165   int DefaultValue = IsPacked && (Mod == SISrcMods::OP_SEL_1);
1166 
1167   for (int I = 0; I < NumOps; ++I) {
1168     if (!!(Ops[I] & Mod) != DefaultValue)
1169       return false;
1170   }
1171 
1172   if (HasDstSel && (Ops[0] & SISrcMods::DST_OP_SEL) != 0)
1173     return false;
1174 
1175   return true;
1176 }
1177 
1178 void AMDGPUInstPrinter::printPackedModifier(const MCInst *MI,
1179                                             StringRef Name,
1180                                             unsigned Mod,
1181                                             raw_ostream &O) {
1182   unsigned Opc = MI->getOpcode();
1183   int NumOps = 0;
1184   int Ops[3];
1185 
1186   for (int OpName : { AMDGPU::OpName::src0_modifiers,
1187                       AMDGPU::OpName::src1_modifiers,
1188                       AMDGPU::OpName::src2_modifiers }) {
1189     int Idx = AMDGPU::getNamedOperandIdx(Opc, OpName);
1190     if (Idx == -1)
1191       break;
1192 
1193     Ops[NumOps++] = MI->getOperand(Idx).getImm();
1194   }
1195 
1196   const bool HasDstSel =
1197     NumOps > 0 &&
1198     Mod == SISrcMods::OP_SEL_0 &&
1199     MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::VOP3_OPSEL;
1200 
1201   const bool IsPacked =
1202     MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::IsPacked;
1203 
1204   if (allOpsDefaultValue(Ops, NumOps, Mod, IsPacked, HasDstSel))
1205     return;
1206 
1207   O << Name;
1208   for (int I = 0; I < NumOps; ++I) {
1209     if (I != 0)
1210       O << ',';
1211 
1212     O << !!(Ops[I] & Mod);
1213   }
1214 
1215   if (HasDstSel) {
1216     O << ',' << !!(Ops[0] & SISrcMods::DST_OP_SEL);
1217   }
1218 
1219   O << ']';
1220 }
1221 
1222 void AMDGPUInstPrinter::printOpSel(const MCInst *MI, unsigned,
1223                                    const MCSubtargetInfo &STI,
1224                                    raw_ostream &O) {
1225   unsigned Opc = MI->getOpcode();
1226   if (isPermlane16(Opc)) {
1227     auto FIN = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
1228     auto BCN = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers);
1229     unsigned FI = !!(MI->getOperand(FIN).getImm() & SISrcMods::OP_SEL_0);
1230     unsigned BC = !!(MI->getOperand(BCN).getImm() & SISrcMods::OP_SEL_0);
1231     if (FI || BC)
1232       O << " op_sel:[" << FI << ',' << BC << ']';
1233     return;
1234   }
1235 
1236   printPackedModifier(MI, " op_sel:[", SISrcMods::OP_SEL_0, O);
1237 }
1238 
1239 void AMDGPUInstPrinter::printOpSelHi(const MCInst *MI, unsigned OpNo,
1240                                      const MCSubtargetInfo &STI,
1241                                      raw_ostream &O) {
1242   printPackedModifier(MI, " op_sel_hi:[", SISrcMods::OP_SEL_1, O);
1243 }
1244 
1245 void AMDGPUInstPrinter::printNegLo(const MCInst *MI, unsigned OpNo,
1246                                    const MCSubtargetInfo &STI,
1247                                    raw_ostream &O) {
1248   printPackedModifier(MI, " neg_lo:[", SISrcMods::NEG, O);
1249 }
1250 
1251 void AMDGPUInstPrinter::printNegHi(const MCInst *MI, unsigned OpNo,
1252                                    const MCSubtargetInfo &STI,
1253                                    raw_ostream &O) {
1254   printPackedModifier(MI, " neg_hi:[", SISrcMods::NEG_HI, O);
1255 }
1256 
1257 void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
1258                                         const MCSubtargetInfo &STI,
1259                                         raw_ostream &O) {
1260   unsigned Imm = MI->getOperand(OpNum).getImm();
1261   switch (Imm) {
1262   case 0:
1263     O << "p10";
1264     break;
1265   case 1:
1266     O << "p20";
1267     break;
1268   case 2:
1269     O << "p0";
1270     break;
1271   default:
1272     O << "invalid_param_" << Imm;
1273   }
1274 }
1275 
1276 void AMDGPUInstPrinter::printInterpAttr(const MCInst *MI, unsigned OpNum,
1277                                         const MCSubtargetInfo &STI,
1278                                         raw_ostream &O) {
1279   unsigned Attr = MI->getOperand(OpNum).getImm();
1280   O << "attr" << Attr;
1281 }
1282 
1283 void AMDGPUInstPrinter::printInterpAttrChan(const MCInst *MI, unsigned OpNum,
1284                                         const MCSubtargetInfo &STI,
1285                                         raw_ostream &O) {
1286   unsigned Chan = MI->getOperand(OpNum).getImm();
1287   O << '.' << "xyzw"[Chan & 0x3];
1288 }
1289 
1290 void AMDGPUInstPrinter::printVGPRIndexMode(const MCInst *MI, unsigned OpNo,
1291                                            const MCSubtargetInfo &STI,
1292                                            raw_ostream &O) {
1293   using namespace llvm::AMDGPU::VGPRIndexMode;
1294   unsigned Val = MI->getOperand(OpNo).getImm();
1295 
1296   if ((Val & ~ENABLE_MASK) != 0) {
1297     O << formatHex(static_cast<uint64_t>(Val));
1298   } else {
1299     O << "gpr_idx(";
1300     bool NeedComma = false;
1301     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
1302       if (Val & (1 << ModeId)) {
1303         if (NeedComma)
1304           O << ',';
1305         O << IdSymbolic[ModeId];
1306         NeedComma = true;
1307       }
1308     }
1309     O << ')';
1310   }
1311 }
1312 
1313 void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
1314                                         const MCSubtargetInfo &STI,
1315                                         raw_ostream &O) {
1316   printRegularOperand(MI, OpNo, STI, O);
1317   O  << ", ";
1318   printRegularOperand(MI, OpNo + 1, STI, O);
1319 }
1320 
1321 void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
1322                                    raw_ostream &O, StringRef Asm,
1323                                    StringRef Default) {
1324   const MCOperand &Op = MI->getOperand(OpNo);
1325   assert(Op.isImm());
1326   if (Op.getImm() == 1) {
1327     O << Asm;
1328   } else {
1329     O << Default;
1330   }
1331 }
1332 
1333 void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
1334                                    raw_ostream &O, char Asm) {
1335   const MCOperand &Op = MI->getOperand(OpNo);
1336   assert(Op.isImm());
1337   if (Op.getImm() == 1)
1338     O << Asm;
1339 }
1340 
1341 void AMDGPUInstPrinter::printHigh(const MCInst *MI, unsigned OpNo,
1342                                   const MCSubtargetInfo &STI,
1343                                   raw_ostream &O) {
1344   printNamedBit(MI, OpNo, O, "high");
1345 }
1346 
1347 void AMDGPUInstPrinter::printClampSI(const MCInst *MI, unsigned OpNo,
1348                                      const MCSubtargetInfo &STI,
1349                                      raw_ostream &O) {
1350   printNamedBit(MI, OpNo, O, "clamp");
1351 }
1352 
1353 void AMDGPUInstPrinter::printOModSI(const MCInst *MI, unsigned OpNo,
1354                                     const MCSubtargetInfo &STI,
1355                                     raw_ostream &O) {
1356   int Imm = MI->getOperand(OpNo).getImm();
1357   if (Imm == SIOutMods::MUL2)
1358     O << " mul:2";
1359   else if (Imm == SIOutMods::MUL4)
1360     O << " mul:4";
1361   else if (Imm == SIOutMods::DIV2)
1362     O << " div:2";
1363 }
1364 
1365 void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo,
1366                                      const MCSubtargetInfo &STI,
1367                                      raw_ostream &O) {
1368   using namespace llvm::AMDGPU::SendMsg;
1369 
1370   const unsigned Imm16 = MI->getOperand(OpNo).getImm();
1371 
1372   uint16_t MsgId;
1373   uint16_t OpId;
1374   uint16_t StreamId;
1375   decodeMsg(Imm16, MsgId, OpId, StreamId, STI);
1376 
1377   StringRef MsgName = getMsgName(MsgId, STI);
1378 
1379   if (!MsgName.empty() && isValidMsgOp(MsgId, OpId, STI) &&
1380       isValidMsgStream(MsgId, OpId, StreamId, STI)) {
1381     O << "sendmsg(" << MsgName;
1382     if (msgRequiresOp(MsgId, STI)) {
1383       O << ", " << getMsgOpName(MsgId, OpId, STI);
1384       if (msgSupportsStream(MsgId, OpId, STI)) {
1385         O << ", " << StreamId;
1386       }
1387     }
1388     O << ')';
1389   } else if (encodeMsg(MsgId, OpId, StreamId) == Imm16) {
1390     O << "sendmsg(" << MsgId << ", " << OpId << ", " << StreamId << ')';
1391   } else {
1392     O << Imm16; // Unknown imm16 code.
1393   }
1394 }
1395 
1396 static void printSwizzleBitmask(const uint16_t AndMask,
1397                                 const uint16_t OrMask,
1398                                 const uint16_t XorMask,
1399                                 raw_ostream &O) {
1400   using namespace llvm::AMDGPU::Swizzle;
1401 
1402   uint16_t Probe0 = ((0            & AndMask) | OrMask) ^ XorMask;
1403   uint16_t Probe1 = ((BITMASK_MASK & AndMask) | OrMask) ^ XorMask;
1404 
1405   O << "\"";
1406 
1407   for (unsigned Mask = 1 << (BITMASK_WIDTH - 1); Mask > 0; Mask >>= 1) {
1408     uint16_t p0 = Probe0 & Mask;
1409     uint16_t p1 = Probe1 & Mask;
1410 
1411     if (p0 == p1) {
1412       if (p0 == 0) {
1413         O << "0";
1414       } else {
1415         O << "1";
1416       }
1417     } else {
1418       if (p0 == 0) {
1419         O << "p";
1420       } else {
1421         O << "i";
1422       }
1423     }
1424   }
1425 
1426   O << "\"";
1427 }
1428 
1429 void AMDGPUInstPrinter::printSwizzle(const MCInst *MI, unsigned OpNo,
1430                                      const MCSubtargetInfo &STI,
1431                                      raw_ostream &O) {
1432   using namespace llvm::AMDGPU::Swizzle;
1433 
1434   uint16_t Imm = MI->getOperand(OpNo).getImm();
1435   if (Imm == 0) {
1436     return;
1437   }
1438 
1439   O << " offset:";
1440 
1441   if ((Imm & QUAD_PERM_ENC_MASK) == QUAD_PERM_ENC) {
1442 
1443     O << "swizzle(" << IdSymbolic[ID_QUAD_PERM];
1444     for (unsigned I = 0; I < LANE_NUM; ++I) {
1445       O << ",";
1446       O << formatDec(Imm & LANE_MASK);
1447       Imm >>= LANE_SHIFT;
1448     }
1449     O << ")";
1450 
1451   } else if ((Imm & BITMASK_PERM_ENC_MASK) == BITMASK_PERM_ENC) {
1452 
1453     uint16_t AndMask = (Imm >> BITMASK_AND_SHIFT) & BITMASK_MASK;
1454     uint16_t OrMask  = (Imm >> BITMASK_OR_SHIFT)  & BITMASK_MASK;
1455     uint16_t XorMask = (Imm >> BITMASK_XOR_SHIFT) & BITMASK_MASK;
1456 
1457     if (AndMask == BITMASK_MAX && OrMask == 0 && llvm::popcount(XorMask) == 1) {
1458 
1459       O << "swizzle(" << IdSymbolic[ID_SWAP];
1460       O << ",";
1461       O << formatDec(XorMask);
1462       O << ")";
1463 
1464     } else if (AndMask == BITMASK_MAX && OrMask == 0 && XorMask > 0 &&
1465                isPowerOf2_64(XorMask + 1)) {
1466 
1467       O << "swizzle(" << IdSymbolic[ID_REVERSE];
1468       O << ",";
1469       O << formatDec(XorMask + 1);
1470       O << ")";
1471 
1472     } else {
1473 
1474       uint16_t GroupSize = BITMASK_MAX - AndMask + 1;
1475       if (GroupSize > 1 &&
1476           isPowerOf2_64(GroupSize) &&
1477           OrMask < GroupSize &&
1478           XorMask == 0) {
1479 
1480         O << "swizzle(" << IdSymbolic[ID_BROADCAST];
1481         O << ",";
1482         O << formatDec(GroupSize);
1483         O << ",";
1484         O << formatDec(OrMask);
1485         O << ")";
1486 
1487       } else {
1488         O << "swizzle(" << IdSymbolic[ID_BITMASK_PERM];
1489         O << ",";
1490         printSwizzleBitmask(AndMask, OrMask, XorMask, O);
1491         O << ")";
1492       }
1493     }
1494   } else {
1495     printU16ImmDecOperand(MI, OpNo, O);
1496   }
1497 }
1498 
1499 void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
1500                                       const MCSubtargetInfo &STI,
1501                                       raw_ostream &O) {
1502   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI.getCPU());
1503 
1504   unsigned SImm16 = MI->getOperand(OpNo).getImm();
1505   unsigned Vmcnt, Expcnt, Lgkmcnt;
1506   decodeWaitcnt(ISA, SImm16, Vmcnt, Expcnt, Lgkmcnt);
1507 
1508   bool IsDefaultVmcnt = Vmcnt == getVmcntBitMask(ISA);
1509   bool IsDefaultExpcnt = Expcnt == getExpcntBitMask(ISA);
1510   bool IsDefaultLgkmcnt = Lgkmcnt == getLgkmcntBitMask(ISA);
1511   bool PrintAll = IsDefaultVmcnt && IsDefaultExpcnt && IsDefaultLgkmcnt;
1512 
1513   bool NeedSpace = false;
1514 
1515   if (!IsDefaultVmcnt || PrintAll) {
1516     O << "vmcnt(" << Vmcnt << ')';
1517     NeedSpace = true;
1518   }
1519 
1520   if (!IsDefaultExpcnt || PrintAll) {
1521     if (NeedSpace)
1522       O << ' ';
1523     O << "expcnt(" << Expcnt << ')';
1524     NeedSpace = true;
1525   }
1526 
1527   if (!IsDefaultLgkmcnt || PrintAll) {
1528     if (NeedSpace)
1529       O << ' ';
1530     O << "lgkmcnt(" << Lgkmcnt << ')';
1531   }
1532 }
1533 
1534 void AMDGPUInstPrinter::printDepCtr(const MCInst *MI, unsigned OpNo,
1535                                     const MCSubtargetInfo &STI,
1536                                     raw_ostream &O) {
1537   using namespace llvm::AMDGPU::DepCtr;
1538 
1539   uint64_t Imm16 = MI->getOperand(OpNo).getImm() & 0xffff;
1540 
1541   bool HasNonDefaultVal = false;
1542   if (isSymbolicDepCtrEncoding(Imm16, HasNonDefaultVal, STI)) {
1543     int Id = 0;
1544     StringRef Name;
1545     unsigned Val;
1546     bool IsDefault;
1547     bool NeedSpace = false;
1548     while (decodeDepCtr(Imm16, Id, Name, Val, IsDefault, STI)) {
1549       if (!IsDefault || !HasNonDefaultVal) {
1550         if (NeedSpace)
1551           O << ' ';
1552         O << Name << '(' << Val << ')';
1553         NeedSpace = true;
1554       }
1555     }
1556   } else {
1557     O << formatHex(Imm16);
1558   }
1559 }
1560 
1561 void AMDGPUInstPrinter::printDelayFlag(const MCInst *MI, unsigned OpNo,
1562                                        const MCSubtargetInfo &STI,
1563                                        raw_ostream &O) {
1564   const char *BadInstId = "/* invalid instid value */";
1565   static const std::array<const char *, 12> InstIds = {
1566       "NO_DEP",        "VALU_DEP_1",    "VALU_DEP_2",
1567       "VALU_DEP_3",    "VALU_DEP_4",    "TRANS32_DEP_1",
1568       "TRANS32_DEP_2", "TRANS32_DEP_3", "FMA_ACCUM_CYCLE_1",
1569       "SALU_CYCLE_1",  "SALU_CYCLE_2",  "SALU_CYCLE_3"};
1570 
1571   const char *BadInstSkip = "/* invalid instskip value */";
1572   static const std::array<const char *, 6> InstSkips = {
1573       "SAME", "NEXT", "SKIP_1", "SKIP_2", "SKIP_3", "SKIP_4"};
1574 
1575   unsigned SImm16 = MI->getOperand(OpNo).getImm();
1576   const char *Prefix = "";
1577 
1578   unsigned Value = SImm16 & 0xF;
1579   if (Value) {
1580     const char *Name = Value < InstIds.size() ? InstIds[Value] : BadInstId;
1581     O << Prefix << "instid0(" << Name << ')';
1582     Prefix = " | ";
1583   }
1584 
1585   Value = (SImm16 >> 4) & 7;
1586   if (Value) {
1587     const char *Name =
1588         Value < InstSkips.size() ? InstSkips[Value] : BadInstSkip;
1589     O << Prefix << "instskip(" << Name << ')';
1590     Prefix = " | ";
1591   }
1592 
1593   Value = (SImm16 >> 7) & 0xF;
1594   if (Value) {
1595     const char *Name = Value < InstIds.size() ? InstIds[Value] : BadInstId;
1596     O << Prefix << "instid1(" << Name << ')';
1597     Prefix = " | ";
1598   }
1599 
1600   if (!*Prefix)
1601     O << "0";
1602 }
1603 
1604 void AMDGPUInstPrinter::printHwreg(const MCInst *MI, unsigned OpNo,
1605                                    const MCSubtargetInfo &STI, raw_ostream &O) {
1606   unsigned Id;
1607   unsigned Offset;
1608   unsigned Width;
1609 
1610   using namespace llvm::AMDGPU::Hwreg;
1611   unsigned Val = MI->getOperand(OpNo).getImm();
1612   decodeHwreg(Val, Id, Offset, Width);
1613   StringRef HwRegName = getHwreg(Id, STI);
1614 
1615   O << "hwreg(";
1616   if (!HwRegName.empty()) {
1617     O << HwRegName;
1618   } else {
1619     O << Id;
1620   }
1621   if (Width != WIDTH_DEFAULT_ || Offset != OFFSET_DEFAULT_) {
1622     O << ", " << Offset << ", " << Width;
1623   }
1624   O << ')';
1625 }
1626 
1627 void AMDGPUInstPrinter::printEndpgm(const MCInst *MI, unsigned OpNo,
1628                                     const MCSubtargetInfo &STI,
1629                                     raw_ostream &O) {
1630   uint16_t Imm = MI->getOperand(OpNo).getImm();
1631   if (Imm == 0) {
1632     return;
1633   }
1634 
1635   O << ' ' << formatDec(Imm);
1636 }
1637 
1638 #include "AMDGPUGenAsmWriter.inc"
1639