1 //===-- AMDGPUInstPrinter.cpp - AMDGPU MC Inst -> ASM ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 // \file
8 //===----------------------------------------------------------------------===//
9 
10 #include "AMDGPUInstPrinter.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "SIDefines.h"
13 #include "SIRegisterInfo.h"
14 #include "Utils/AMDGPUAsmUtils.h"
15 #include "Utils/AMDGPUBaseInfo.h"
16 #include "llvm/MC/MCExpr.h"
17 #include "llvm/MC/MCInst.h"
18 #include "llvm/MC/MCInstrDesc.h"
19 #include "llvm/MC/MCInstrInfo.h"
20 #include "llvm/MC/MCSubtargetInfo.h"
21 #include "llvm/Support/CommandLine.h"
22 #include "llvm/Support/TargetParser.h"
23 
24 using namespace llvm;
25 using namespace llvm::AMDGPU;
26 
27 static cl::opt<bool> Keep16BitSuffixes(
28   "amdgpu-keep-16-bit-reg-suffixes",
29   cl::desc("Keep .l and .h suffixes in asm for debugging purposes"),
30   cl::init(false),
31   cl::ReallyHidden);
32 
33 void AMDGPUInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
34   // FIXME: The current implementation of
35   // AsmParser::parseRegisterOrRegisterNumber in MC implies we either emit this
36   // as an integer or we provide a name which represents a physical register.
37   // For CFI instructions we really want to emit a name for the DWARF register
38   // instead, because there may be multiple DWARF registers corresponding to a
39   // single physical register. One case where this problem manifests is with
40   // wave32/wave64 where using the physical register name is ambiguous: if we
41   // write e.g. `.cfi_undefined v0` we lose information about the wavefront
42   // size which we need to encode the register in the final DWARF. Ideally we
43   // would extend MC to support parsing DWARF register names so we could do
44   // something like `.cfi_undefined dwarf_wave32_v0`. For now we just live with
45   // non-pretty DWARF register names in assembly text.
46   OS << RegNo;
47 }
48 
49 void AMDGPUInstPrinter::printInst(const MCInst *MI, uint64_t Address,
50                                   StringRef Annot, const MCSubtargetInfo &STI,
51                                   raw_ostream &OS) {
52   OS.flush();
53   printInstruction(MI, Address, STI, OS);
54   printAnnotation(OS, Annot);
55 }
56 
57 void AMDGPUInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo,
58                                           const MCSubtargetInfo &STI,
59                                           raw_ostream &O) {
60   O << formatHex(MI->getOperand(OpNo).getImm() & 0xf);
61 }
62 
63 void AMDGPUInstPrinter::printU8ImmOperand(const MCInst *MI, unsigned OpNo,
64                                           raw_ostream &O) {
65   O << formatHex(MI->getOperand(OpNo).getImm() & 0xff);
66 }
67 
68 void AMDGPUInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
69                                            const MCSubtargetInfo &STI,
70                                            raw_ostream &O) {
71   // It's possible to end up with a 32-bit literal used with a 16-bit operand
72   // with ignored high bits. Print as 32-bit anyway in that case.
73   int64_t Imm = MI->getOperand(OpNo).getImm();
74   if (isInt<16>(Imm) || isUInt<16>(Imm))
75     O << formatHex(static_cast<uint64_t>(Imm & 0xffff));
76   else
77     printU32ImmOperand(MI, OpNo, STI, O);
78 }
79 
80 void AMDGPUInstPrinter::printU4ImmDecOperand(const MCInst *MI, unsigned OpNo,
81                                              raw_ostream &O) {
82   O << formatDec(MI->getOperand(OpNo).getImm() & 0xf);
83 }
84 
85 void AMDGPUInstPrinter::printU8ImmDecOperand(const MCInst *MI, unsigned OpNo,
86                                              raw_ostream &O) {
87   O << formatDec(MI->getOperand(OpNo).getImm() & 0xff);
88 }
89 
90 void AMDGPUInstPrinter::printU16ImmDecOperand(const MCInst *MI, unsigned OpNo,
91                                               raw_ostream &O) {
92   O << formatDec(MI->getOperand(OpNo).getImm() & 0xffff);
93 }
94 
95 void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo,
96                                            const MCSubtargetInfo &STI,
97                                            raw_ostream &O) {
98   O << formatHex(MI->getOperand(OpNo).getImm() & 0xffffffff);
99 }
100 
101 void AMDGPUInstPrinter::printNamedBit(const MCInst *MI, unsigned OpNo,
102                                       raw_ostream &O, StringRef BitName) {
103   if (MI->getOperand(OpNo).getImm()) {
104     O << ' ' << BitName;
105   }
106 }
107 
108 void AMDGPUInstPrinter::printOffen(const MCInst *MI, unsigned OpNo,
109                                    raw_ostream &O) {
110   printNamedBit(MI, OpNo, O, "offen");
111 }
112 
113 void AMDGPUInstPrinter::printIdxen(const MCInst *MI, unsigned OpNo,
114                                    raw_ostream &O) {
115   printNamedBit(MI, OpNo, O, "idxen");
116 }
117 
118 void AMDGPUInstPrinter::printAddr64(const MCInst *MI, unsigned OpNo,
119                                     raw_ostream &O) {
120   printNamedBit(MI, OpNo, O, "addr64");
121 }
122 
123 void AMDGPUInstPrinter::printOffset(const MCInst *MI, unsigned OpNo,
124                                     const MCSubtargetInfo &STI,
125                                     raw_ostream &O) {
126   uint16_t Imm = MI->getOperand(OpNo).getImm();
127   if (Imm != 0) {
128     O << " offset:";
129     printU16ImmDecOperand(MI, OpNo, O);
130   }
131 }
132 
133 void AMDGPUInstPrinter::printFlatOffset(const MCInst *MI, unsigned OpNo,
134                                         const MCSubtargetInfo &STI,
135                                         raw_ostream &O) {
136   uint16_t Imm = MI->getOperand(OpNo).getImm();
137   if (Imm != 0) {
138     O << " offset:";
139 
140     const MCInstrDesc &Desc = MII.get(MI->getOpcode());
141     bool IsFlatSeg = !(Desc.TSFlags &
142                        (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch));
143 
144     if (IsFlatSeg) { // Unsigned offset
145       printU16ImmDecOperand(MI, OpNo, O);
146     } else {         // Signed offset
147       if (AMDGPU::isGFX10(STI)) {
148         O << formatDec(SignExtend32<12>(MI->getOperand(OpNo).getImm()));
149       } else {
150         O << formatDec(SignExtend32<13>(MI->getOperand(OpNo).getImm()));
151       }
152     }
153   }
154 }
155 
156 void AMDGPUInstPrinter::printOffset0(const MCInst *MI, unsigned OpNo,
157                                      const MCSubtargetInfo &STI,
158                                      raw_ostream &O) {
159   if (MI->getOperand(OpNo).getImm()) {
160     O << " offset0:";
161     printU8ImmDecOperand(MI, OpNo, O);
162   }
163 }
164 
165 void AMDGPUInstPrinter::printOffset1(const MCInst *MI, unsigned OpNo,
166                                      const MCSubtargetInfo &STI,
167                                      raw_ostream &O) {
168   if (MI->getOperand(OpNo).getImm()) {
169     O << " offset1:";
170     printU8ImmDecOperand(MI, OpNo, O);
171   }
172 }
173 
174 void AMDGPUInstPrinter::printSMRDOffset8(const MCInst *MI, unsigned OpNo,
175                                         const MCSubtargetInfo &STI,
176                                         raw_ostream &O) {
177   printU32ImmOperand(MI, OpNo, STI, O);
178 }
179 
180 void AMDGPUInstPrinter::printSMEMOffset(const MCInst *MI, unsigned OpNo,
181                                         const MCSubtargetInfo &STI,
182                                         raw_ostream &O) {
183   O << formatHex(MI->getOperand(OpNo).getImm());
184 }
185 
186 void AMDGPUInstPrinter::printSMEMOffsetMod(const MCInst *MI, unsigned OpNo,
187                                            const MCSubtargetInfo &STI,
188                                            raw_ostream &O) {
189   O << " offset:";
190   printSMEMOffset(MI, OpNo, STI, O);
191 }
192 
193 void AMDGPUInstPrinter::printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo,
194                                                const MCSubtargetInfo &STI,
195                                                raw_ostream &O) {
196   printU32ImmOperand(MI, OpNo, STI, O);
197 }
198 
199 void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo,
200                                  const MCSubtargetInfo &STI, raw_ostream &O) {
201   printNamedBit(MI, OpNo, O, "gds");
202 }
203 
204 void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
205                                   const MCSubtargetInfo &STI, raw_ostream &O) {
206   auto Imm = MI->getOperand(OpNo).getImm();
207   if (Imm & CPol::GLC)
208     O << ((AMDGPU::isGFX940(STI) &&
209            !(MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::SMRD)) ? " sc0"
210                                                                      : " glc");
211   if (Imm & CPol::SLC)
212     O << (AMDGPU::isGFX940(STI) ? " nt" : " slc");
213   if ((Imm & CPol::DLC) && AMDGPU::isGFX10Plus(STI))
214     O << " dlc";
215   if ((Imm & CPol::SCC) && AMDGPU::isGFX90A(STI))
216     O << (AMDGPU::isGFX940(STI) ? " sc1" : " scc");
217   if (Imm & ~CPol::ALL)
218     O << " /* unexpected cache policy bit */";
219 }
220 
221 void AMDGPUInstPrinter::printSWZ(const MCInst *MI, unsigned OpNo,
222                                  const MCSubtargetInfo &STI, raw_ostream &O) {
223 }
224 
225 void AMDGPUInstPrinter::printTFE(const MCInst *MI, unsigned OpNo,
226                                  const MCSubtargetInfo &STI, raw_ostream &O) {
227   printNamedBit(MI, OpNo, O, "tfe");
228 }
229 
230 void AMDGPUInstPrinter::printDMask(const MCInst *MI, unsigned OpNo,
231                                    const MCSubtargetInfo &STI, raw_ostream &O) {
232   if (MI->getOperand(OpNo).getImm()) {
233     O << " dmask:";
234     printU16ImmOperand(MI, OpNo, STI, O);
235   }
236 }
237 
238 void AMDGPUInstPrinter::printDim(const MCInst *MI, unsigned OpNo,
239                                  const MCSubtargetInfo &STI, raw_ostream &O) {
240   unsigned Dim = MI->getOperand(OpNo).getImm();
241   O << " dim:SQ_RSRC_IMG_";
242 
243   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
244   if (DimInfo)
245     O << DimInfo->AsmSuffix;
246   else
247     O << Dim;
248 }
249 
250 void AMDGPUInstPrinter::printUNorm(const MCInst *MI, unsigned OpNo,
251                                    const MCSubtargetInfo &STI, raw_ostream &O) {
252   printNamedBit(MI, OpNo, O, "unorm");
253 }
254 
255 void AMDGPUInstPrinter::printDA(const MCInst *MI, unsigned OpNo,
256                                 const MCSubtargetInfo &STI, raw_ostream &O) {
257   printNamedBit(MI, OpNo, O, "da");
258 }
259 
260 void AMDGPUInstPrinter::printR128A16(const MCInst *MI, unsigned OpNo,
261                                   const MCSubtargetInfo &STI, raw_ostream &O) {
262   if (STI.hasFeature(AMDGPU::FeatureR128A16))
263     printNamedBit(MI, OpNo, O, "a16");
264   else
265     printNamedBit(MI, OpNo, O, "r128");
266 }
267 
268 void AMDGPUInstPrinter::printGFX10A16(const MCInst *MI, unsigned OpNo,
269                                   const MCSubtargetInfo &STI, raw_ostream &O) {
270   printNamedBit(MI, OpNo, O, "a16");
271 }
272 
273 void AMDGPUInstPrinter::printLWE(const MCInst *MI, unsigned OpNo,
274                                  const MCSubtargetInfo &STI, raw_ostream &O) {
275   printNamedBit(MI, OpNo, O, "lwe");
276 }
277 
278 void AMDGPUInstPrinter::printD16(const MCInst *MI, unsigned OpNo,
279                                  const MCSubtargetInfo &STI, raw_ostream &O) {
280   printNamedBit(MI, OpNo, O, "d16");
281 }
282 
283 void AMDGPUInstPrinter::printExpCompr(const MCInst *MI, unsigned OpNo,
284                                       const MCSubtargetInfo &STI,
285                                       raw_ostream &O) {
286   printNamedBit(MI, OpNo, O, "compr");
287 }
288 
289 void AMDGPUInstPrinter::printExpVM(const MCInst *MI, unsigned OpNo,
290                                    const MCSubtargetInfo &STI,
291                                    raw_ostream &O) {
292   printNamedBit(MI, OpNo, O, "vm");
293 }
294 
295 void AMDGPUInstPrinter::printFORMAT(const MCInst *MI, unsigned OpNo,
296                                     const MCSubtargetInfo &STI,
297                                     raw_ostream &O) {
298 }
299 
300 void AMDGPUInstPrinter::printSymbolicFormat(const MCInst *MI,
301                                             const MCSubtargetInfo &STI,
302                                             raw_ostream &O) {
303   using namespace llvm::AMDGPU::MTBUFFormat;
304 
305   int OpNo =
306     AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::format);
307   assert(OpNo != -1);
308 
309   unsigned Val = MI->getOperand(OpNo).getImm();
310   if (AMDGPU::isGFX10Plus(STI)) {
311     if (Val == UFMT_DEFAULT)
312       return;
313     if (isValidUnifiedFormat(Val, STI)) {
314       O << " format:[" << getUnifiedFormatName(Val, STI) << ']';
315     } else {
316       O << " format:" << Val;
317     }
318   } else {
319     if (Val == DFMT_NFMT_DEFAULT)
320       return;
321     if (isValidDfmtNfmt(Val, STI)) {
322       unsigned Dfmt;
323       unsigned Nfmt;
324       decodeDfmtNfmt(Val, Dfmt, Nfmt);
325       O << " format:[";
326       if (Dfmt != DFMT_DEFAULT) {
327         O << getDfmtName(Dfmt);
328         if (Nfmt != NFMT_DEFAULT) {
329           O << ',';
330         }
331       }
332       if (Nfmt != NFMT_DEFAULT) {
333         O << getNfmtName(Nfmt, STI);
334       }
335       O << ']';
336     } else {
337       O << " format:" << Val;
338     }
339   }
340 }
341 
342 void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
343                                         const MCRegisterInfo &MRI) {
344 #if !defined(NDEBUG)
345   switch (RegNo) {
346   case AMDGPU::FP_REG:
347   case AMDGPU::SP_REG:
348   case AMDGPU::PRIVATE_RSRC_REG:
349     llvm_unreachable("pseudo-register should not ever be emitted");
350   case AMDGPU::SCC:
351     llvm_unreachable("pseudo scc should not ever be emitted");
352   default:
353     break;
354   }
355 #endif
356 
357   StringRef RegName(getRegisterName(RegNo));
358   if (!Keep16BitSuffixes)
359     if (!RegName.consume_back(".l"))
360       RegName.consume_back(".h");
361 
362   O << RegName;
363 }
364 
365 void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
366                                     const MCSubtargetInfo &STI, raw_ostream &O) {
367   auto Opcode = MI->getOpcode();
368   auto Flags = MII.get(Opcode).TSFlags;
369   if (OpNo == 0) {
370     if (Flags & SIInstrFlags::VOP3 && Flags & SIInstrFlags::DPP)
371       O << "_e64_dpp";
372     else if (Flags & SIInstrFlags::VOP3) {
373       if (!getVOP3IsSingle(Opcode))
374         O << "_e64";
375     } else if (Flags & SIInstrFlags::DPP)
376       O << "_dpp";
377     else if (Flags & SIInstrFlags::SDWA)
378       O << "_sdwa";
379     else if (((Flags & SIInstrFlags::VOP1) && !getVOP1IsSingle(Opcode)) ||
380              ((Flags & SIInstrFlags::VOP2) && !getVOP2IsSingle(Opcode)))
381       O << "_e32";
382     O << " ";
383   }
384 
385   printRegularOperand(MI, OpNo, STI, O);
386 
387   // Print default vcc/vcc_lo operand.
388   switch (Opcode) {
389   default: break;
390 
391   case AMDGPU::V_ADD_CO_CI_U32_e32_gfx10:
392   case AMDGPU::V_SUB_CO_CI_U32_e32_gfx10:
393   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
394   case AMDGPU::V_ADD_CO_CI_U32_sdwa_gfx10:
395   case AMDGPU::V_SUB_CO_CI_U32_sdwa_gfx10:
396   case AMDGPU::V_SUBREV_CO_CI_U32_sdwa_gfx10:
397   case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx10:
398   case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx10:
399   case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx10:
400   case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx10:
401   case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx10:
402   case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx10:
403   case AMDGPU::V_ADD_CO_CI_U32_e32_gfx11:
404   case AMDGPU::V_SUB_CO_CI_U32_e32_gfx11:
405   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx11:
406   case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx11:
407   case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx11:
408   case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx11:
409   case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx11:
410   case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx11:
411   case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx11:
412     printDefaultVccOperand(false, STI, O);
413     break;
414   }
415 }
416 
417 void AMDGPUInstPrinter::printVINTRPDst(const MCInst *MI, unsigned OpNo,
418                                        const MCSubtargetInfo &STI, raw_ostream &O) {
419   if (AMDGPU::isSI(STI) || AMDGPU::isCI(STI))
420     O << " ";
421   else
422     O << "_e32 ";
423 
424   printRegularOperand(MI, OpNo, STI, O);
425 }
426 
427 void AMDGPUInstPrinter::printImmediateInt16(uint32_t Imm,
428                                             const MCSubtargetInfo &STI,
429                                             raw_ostream &O) {
430   int16_t SImm = static_cast<int16_t>(Imm);
431   if (isInlinableIntLiteral(SImm)) {
432     O << SImm;
433   } else {
434     uint64_t Imm16 = static_cast<uint16_t>(Imm);
435     O << formatHex(Imm16);
436   }
437 }
438 
439 void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
440                                          const MCSubtargetInfo &STI,
441                                          raw_ostream &O) {
442   int16_t SImm = static_cast<int16_t>(Imm);
443   if (isInlinableIntLiteral(SImm)) {
444     O << SImm;
445     return;
446   }
447 
448   if (Imm == 0x3C00)
449     O<< "1.0";
450   else if (Imm == 0xBC00)
451     O<< "-1.0";
452   else if (Imm == 0x3800)
453     O<< "0.5";
454   else if (Imm == 0xB800)
455     O<< "-0.5";
456   else if (Imm == 0x4000)
457     O<< "2.0";
458   else if (Imm == 0xC000)
459     O<< "-2.0";
460   else if (Imm == 0x4400)
461     O<< "4.0";
462   else if (Imm == 0xC400)
463     O<< "-4.0";
464   else if (Imm == 0x3118 &&
465            STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]) {
466     O << "0.15915494";
467   } else {
468     uint64_t Imm16 = static_cast<uint16_t>(Imm);
469     O << formatHex(Imm16);
470   }
471 }
472 
473 void AMDGPUInstPrinter::printImmediateV216(uint32_t Imm,
474                                            const MCSubtargetInfo &STI,
475                                            raw_ostream &O) {
476   uint16_t Lo16 = static_cast<uint16_t>(Imm);
477   printImmediate16(Lo16, STI, O);
478 }
479 
480 void AMDGPUInstPrinter::printImmediate32(uint32_t Imm,
481                                          const MCSubtargetInfo &STI,
482                                          raw_ostream &O) {
483   int32_t SImm = static_cast<int32_t>(Imm);
484   if (SImm >= -16 && SImm <= 64) {
485     O << SImm;
486     return;
487   }
488 
489   if (Imm == FloatToBits(0.0f))
490     O << "0.0";
491   else if (Imm == FloatToBits(1.0f))
492     O << "1.0";
493   else if (Imm == FloatToBits(-1.0f))
494     O << "-1.0";
495   else if (Imm == FloatToBits(0.5f))
496     O << "0.5";
497   else if (Imm == FloatToBits(-0.5f))
498     O << "-0.5";
499   else if (Imm == FloatToBits(2.0f))
500     O << "2.0";
501   else if (Imm == FloatToBits(-2.0f))
502     O << "-2.0";
503   else if (Imm == FloatToBits(4.0f))
504     O << "4.0";
505   else if (Imm == FloatToBits(-4.0f))
506     O << "-4.0";
507   else if (Imm == 0x3e22f983 &&
508            STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
509     O << "0.15915494";
510   else
511     O << formatHex(static_cast<uint64_t>(Imm));
512 }
513 
514 void AMDGPUInstPrinter::printImmediate64(uint64_t Imm,
515                                          const MCSubtargetInfo &STI,
516                                          raw_ostream &O) {
517   int64_t SImm = static_cast<int64_t>(Imm);
518   if (SImm >= -16 && SImm <= 64) {
519     O << SImm;
520     return;
521   }
522 
523   if (Imm == DoubleToBits(0.0))
524     O << "0.0";
525   else if (Imm == DoubleToBits(1.0))
526     O << "1.0";
527   else if (Imm == DoubleToBits(-1.0))
528     O << "-1.0";
529   else if (Imm == DoubleToBits(0.5))
530     O << "0.5";
531   else if (Imm == DoubleToBits(-0.5))
532     O << "-0.5";
533   else if (Imm == DoubleToBits(2.0))
534     O << "2.0";
535   else if (Imm == DoubleToBits(-2.0))
536     O << "-2.0";
537   else if (Imm == DoubleToBits(4.0))
538     O << "4.0";
539   else if (Imm == DoubleToBits(-4.0))
540     O << "-4.0";
541   else if (Imm == 0x3fc45f306dc9c882 &&
542            STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
543     O << "0.15915494309189532";
544   else {
545     assert(isUInt<32>(Imm) || isInt<32>(Imm));
546 
547     // In rare situations, we will have a 32-bit literal in a 64-bit
548     // operand. This is technically allowed for the encoding of s_mov_b64.
549     O << formatHex(static_cast<uint64_t>(Imm));
550   }
551 }
552 
553 void AMDGPUInstPrinter::printBLGP(const MCInst *MI, unsigned OpNo,
554                                   const MCSubtargetInfo &STI,
555                                   raw_ostream &O) {
556   unsigned Imm = MI->getOperand(OpNo).getImm();
557   if (!Imm)
558     return;
559 
560   if (AMDGPU::isGFX940(STI)) {
561     switch (MI->getOpcode()) {
562     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
563     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
564     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
565     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
566       O << " neg:[" << (Imm & 1) << ',' << ((Imm >> 1) & 1) << ','
567         << ((Imm >> 2) & 1) << ']';
568       return;
569     }
570   }
571 
572   O << " blgp:" << Imm;
573 }
574 
575 void AMDGPUInstPrinter::printCBSZ(const MCInst *MI, unsigned OpNo,
576                                   const MCSubtargetInfo &STI,
577                                   raw_ostream &O) {
578   unsigned Imm = MI->getOperand(OpNo).getImm();
579   if (!Imm)
580     return;
581 
582   O << " cbsz:" << Imm;
583 }
584 
585 void AMDGPUInstPrinter::printABID(const MCInst *MI, unsigned OpNo,
586                                   const MCSubtargetInfo &STI,
587                                   raw_ostream &O) {
588   unsigned Imm = MI->getOperand(OpNo).getImm();
589   if (!Imm)
590     return;
591 
592   O << " abid:" << Imm;
593 }
594 
595 void AMDGPUInstPrinter::printDefaultVccOperand(bool FirstOperand,
596                                                const MCSubtargetInfo &STI,
597                                                raw_ostream &O) {
598   if (!FirstOperand)
599     O << ", ";
600   printRegOperand(STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64]
601                       ? AMDGPU::VCC
602                       : AMDGPU::VCC_LO,
603                   O, MRI);
604   if (FirstOperand)
605     O << ", ";
606 }
607 
608 void AMDGPUInstPrinter::printWaitVDST(const MCInst *MI, unsigned OpNo,
609                                       const MCSubtargetInfo &STI,
610                                       raw_ostream &O) {
611   uint8_t Imm = MI->getOperand(OpNo).getImm();
612   if (Imm != 0) {
613     O << " wait_vdst:";
614     printU4ImmDecOperand(MI, OpNo, O);
615   }
616 }
617 
618 void AMDGPUInstPrinter::printWaitEXP(const MCInst *MI, unsigned OpNo,
619                                     const MCSubtargetInfo &STI,
620                                     raw_ostream &O) {
621   uint8_t Imm = MI->getOperand(OpNo).getImm();
622   if (Imm != 0) {
623     O << " wait_exp:";
624     printU4ImmDecOperand(MI, OpNo, O);
625   }
626 }
627 
628 bool AMDGPUInstPrinter::needsImpliedVcc(const MCInstrDesc &Desc,
629                                         unsigned OpNo) const {
630   return OpNo == 0 && (Desc.TSFlags & SIInstrFlags::DPP) &&
631          (Desc.TSFlags & SIInstrFlags::VOPC) &&
632          (Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC) ||
633           Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC_LO));
634 }
635 
636 // Print default vcc/vcc_lo operand of VOPC.
637 void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
638                                      const MCSubtargetInfo &STI,
639                                      raw_ostream &O) {
640   unsigned Opc = MI->getOpcode();
641   const MCInstrDesc &Desc = MII.get(Opc);
642   int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
643   // 0, 1 and 2 are the first printed operands in different cases
644   // If there are printed modifiers, printOperandAndFPInputMods or
645   // printOperandAndIntInputMods will be called instead
646   if ((OpNo == 0 ||
647        (OpNo == 1 && (Desc.TSFlags & SIInstrFlags::DPP) && ModIdx != -1)) &&
648       (Desc.TSFlags & SIInstrFlags::VOPC) &&
649       (Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC) ||
650        Desc.hasImplicitDefOfPhysReg(AMDGPU::VCC_LO)))
651     printDefaultVccOperand(true, STI, O);
652 
653   printRegularOperand(MI, OpNo, STI, O);
654 }
655 
656 // Print operands after vcc or modifier handling.
657 void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
658                                             const MCSubtargetInfo &STI,
659                                             raw_ostream &O) {
660   const MCInstrDesc &Desc = MII.get(MI->getOpcode());
661 
662   if (OpNo >= MI->getNumOperands()) {
663     O << "/*Missing OP" << OpNo << "*/";
664     return;
665   }
666 
667   const MCOperand &Op = MI->getOperand(OpNo);
668   if (Op.isReg()) {
669     printRegOperand(Op.getReg(), O, MRI);
670   } else if (Op.isImm()) {
671     const uint8_t OpTy = Desc.OpInfo[OpNo].OperandType;
672     switch (OpTy) {
673     case AMDGPU::OPERAND_REG_IMM_INT32:
674     case AMDGPU::OPERAND_REG_IMM_FP32:
675     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
676     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
677     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
678     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
679     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
680     case AMDGPU::OPERAND_REG_IMM_V2INT32:
681     case AMDGPU::OPERAND_REG_IMM_V2FP32:
682     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
683     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
684     case MCOI::OPERAND_IMMEDIATE:
685       printImmediate32(Op.getImm(), STI, O);
686       break;
687     case AMDGPU::OPERAND_REG_IMM_INT64:
688     case AMDGPU::OPERAND_REG_IMM_FP64:
689     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
690     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
691     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
692       printImmediate64(Op.getImm(), STI, O);
693       break;
694     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
695     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
696     case AMDGPU::OPERAND_REG_IMM_INT16:
697       printImmediateInt16(Op.getImm(), STI, O);
698       break;
699     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
700     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
701     case AMDGPU::OPERAND_REG_IMM_FP16:
702     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
703       printImmediate16(Op.getImm(), STI, O);
704       break;
705     case AMDGPU::OPERAND_REG_IMM_V2INT16:
706     case AMDGPU::OPERAND_REG_IMM_V2FP16:
707       if (!isUInt<16>(Op.getImm()) &&
708           STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
709         printImmediate32(Op.getImm(), STI, O);
710         break;
711       }
712 
713       //  Deal with 16-bit FP inline immediates not working.
714       if (OpTy == AMDGPU::OPERAND_REG_IMM_V2FP16) {
715         printImmediate16(static_cast<uint16_t>(Op.getImm()), STI, O);
716         break;
717       }
718       LLVM_FALLTHROUGH;
719     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
720     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
721       printImmediateInt16(static_cast<uint16_t>(Op.getImm()), STI, O);
722       break;
723     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
724     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
725       printImmediateV216(Op.getImm(), STI, O);
726       break;
727     case MCOI::OPERAND_UNKNOWN:
728     case MCOI::OPERAND_PCREL:
729       O << formatDec(Op.getImm());
730       break;
731     case MCOI::OPERAND_REGISTER:
732       // FIXME: This should be removed and handled somewhere else. Seems to come
733       // from a disassembler bug.
734       O << "/*invalid immediate*/";
735       break;
736     default:
737       // We hit this for the immediate instruction bits that don't yet have a
738       // custom printer.
739       llvm_unreachable("unexpected immediate operand type");
740     }
741   } else if (Op.isDFPImm()) {
742     double Value = bit_cast<double>(Op.getDFPImm());
743     // We special case 0.0 because otherwise it will be printed as an integer.
744     if (Value == 0.0)
745       O << "0.0";
746     else {
747       const MCInstrDesc &Desc = MII.get(MI->getOpcode());
748       int RCID = Desc.OpInfo[OpNo].RegClass;
749       unsigned RCBits = AMDGPU::getRegBitWidth(MRI.getRegClass(RCID));
750       if (RCBits == 32)
751         printImmediate32(FloatToBits(Value), STI, O);
752       else if (RCBits == 64)
753         printImmediate64(DoubleToBits(Value), STI, O);
754       else
755         llvm_unreachable("Invalid register class size");
756     }
757   } else if (Op.isExpr()) {
758     const MCExpr *Exp = Op.getExpr();
759     Exp->print(O, &MAI);
760   } else {
761     O << "/*INV_OP*/";
762   }
763 
764   // Print default vcc/vcc_lo operand of v_cndmask_b32_e32.
765   switch (MI->getOpcode()) {
766   default: break;
767 
768   case AMDGPU::V_CNDMASK_B32_e32_gfx10:
769   case AMDGPU::V_ADD_CO_CI_U32_e32_gfx10:
770   case AMDGPU::V_SUB_CO_CI_U32_e32_gfx10:
771   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
772   case AMDGPU::V_CNDMASK_B32_dpp_gfx10:
773   case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx10:
774   case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx10:
775   case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx10:
776   case AMDGPU::V_CNDMASK_B32_dpp8_gfx10:
777   case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx10:
778   case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx10:
779   case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx10:
780   case AMDGPU::V_CNDMASK_B32_e32_gfx11:
781   case AMDGPU::V_ADD_CO_CI_U32_e32_gfx11:
782   case AMDGPU::V_SUB_CO_CI_U32_e32_gfx11:
783   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx11:
784   case AMDGPU::V_CNDMASK_B32_dpp_gfx11:
785   case AMDGPU::V_ADD_CO_CI_U32_dpp_gfx11:
786   case AMDGPU::V_SUB_CO_CI_U32_dpp_gfx11:
787   case AMDGPU::V_SUBREV_CO_CI_U32_dpp_gfx11:
788   case AMDGPU::V_CNDMASK_B32_dpp8_gfx11:
789   case AMDGPU::V_ADD_CO_CI_U32_dpp8_gfx11:
790   case AMDGPU::V_SUB_CO_CI_U32_dpp8_gfx11:
791   case AMDGPU::V_SUBREV_CO_CI_U32_dpp8_gfx11:
792 
793   case AMDGPU::V_CNDMASK_B32_e32_gfx6_gfx7:
794   case AMDGPU::V_CNDMASK_B32_e32_vi:
795     if ((int)OpNo == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
796                                                 AMDGPU::OpName::src1))
797       printDefaultVccOperand(OpNo == 0, STI, O);
798     break;
799   }
800 
801   if (Desc.TSFlags & SIInstrFlags::MTBUF) {
802     int SOffsetIdx =
803       AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::soffset);
804     assert(SOffsetIdx != -1);
805     if ((int)OpNo == SOffsetIdx)
806       printSymbolicFormat(MI, STI, O);
807   }
808 }
809 
810 void AMDGPUInstPrinter::printOperandAndFPInputMods(const MCInst *MI,
811                                                    unsigned OpNo,
812                                                    const MCSubtargetInfo &STI,
813                                                    raw_ostream &O) {
814   const MCInstrDesc &Desc = MII.get(MI->getOpcode());
815   if (needsImpliedVcc(Desc, OpNo))
816     printDefaultVccOperand(true, STI, O);
817 
818   unsigned InputModifiers = MI->getOperand(OpNo).getImm();
819 
820   // Use 'neg(...)' instead of '-' to avoid ambiguity.
821   // This is important for integer literals because
822   // -1 is not the same value as neg(1).
823   bool NegMnemo = false;
824 
825   if (InputModifiers & SISrcMods::NEG) {
826     if (OpNo + 1 < MI->getNumOperands() &&
827         (InputModifiers & SISrcMods::ABS) == 0) {
828       const MCOperand &Op = MI->getOperand(OpNo + 1);
829       NegMnemo = Op.isImm() || Op.isDFPImm();
830     }
831     if (NegMnemo) {
832       O << "neg(";
833     } else {
834       O << '-';
835     }
836   }
837 
838   if (InputModifiers & SISrcMods::ABS)
839     O << '|';
840   printRegularOperand(MI, OpNo + 1, STI, O);
841   if (InputModifiers & SISrcMods::ABS)
842     O << '|';
843 
844   if (NegMnemo) {
845     O << ')';
846   }
847 }
848 
849 void AMDGPUInstPrinter::printOperandAndIntInputMods(const MCInst *MI,
850                                                     unsigned OpNo,
851                                                     const MCSubtargetInfo &STI,
852                                                     raw_ostream &O) {
853   const MCInstrDesc &Desc = MII.get(MI->getOpcode());
854   if (needsImpliedVcc(Desc, OpNo))
855     printDefaultVccOperand(true, STI, O);
856 
857   unsigned InputModifiers = MI->getOperand(OpNo).getImm();
858   if (InputModifiers & SISrcMods::SEXT)
859     O << "sext(";
860   printRegularOperand(MI, OpNo + 1, STI, O);
861   if (InputModifiers & SISrcMods::SEXT)
862     O << ')';
863 
864   // Print default vcc/vcc_lo operand of VOP2b.
865   switch (MI->getOpcode()) {
866   default: break;
867 
868   case AMDGPU::V_CNDMASK_B32_sdwa_gfx10:
869   case AMDGPU::V_ADD_CO_CI_U32_sdwa_gfx10:
870   case AMDGPU::V_SUB_CO_CI_U32_sdwa_gfx10:
871   case AMDGPU::V_SUBREV_CO_CI_U32_sdwa_gfx10:
872     if ((int)OpNo + 1 == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
873                                                     AMDGPU::OpName::src1))
874       printDefaultVccOperand(OpNo == 0, STI, O);
875     break;
876   }
877 }
878 
879 void AMDGPUInstPrinter::printDPP8(const MCInst *MI, unsigned OpNo,
880                                   const MCSubtargetInfo &STI,
881                                   raw_ostream &O) {
882   if (!AMDGPU::isGFX10Plus(STI))
883     llvm_unreachable("dpp8 is not supported on ASICs earlier than GFX10");
884 
885   unsigned Imm = MI->getOperand(OpNo).getImm();
886   O << "dpp8:[" << formatDec(Imm & 0x7);
887   for (size_t i = 1; i < 8; ++i) {
888     O << ',' << formatDec((Imm >> (3 * i)) & 0x7);
889   }
890   O << ']';
891 }
892 
893 void AMDGPUInstPrinter::printDPPCtrl(const MCInst *MI, unsigned OpNo,
894                                      const MCSubtargetInfo &STI,
895                                      raw_ostream &O) {
896   using namespace AMDGPU::DPP;
897 
898   unsigned Imm = MI->getOperand(OpNo).getImm();
899   const MCInstrDesc &Desc = MII.get(MI->getOpcode());
900   int Src0Idx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
901                                            AMDGPU::OpName::src0);
902 
903   if (Src0Idx >= 0 &&
904       Desc.OpInfo[Src0Idx].RegClass == AMDGPU::VReg_64RegClassID &&
905       !AMDGPU::isLegal64BitDPPControl(Imm)) {
906     O << " /* 64 bit dpp only supports row_newbcast */";
907     return;
908   } else if (Imm <= DppCtrl::QUAD_PERM_LAST) {
909     O << "quad_perm:[";
910     O << formatDec(Imm & 0x3)         << ',';
911     O << formatDec((Imm & 0xc)  >> 2) << ',';
912     O << formatDec((Imm & 0x30) >> 4) << ',';
913     O << formatDec((Imm & 0xc0) >> 6) << ']';
914   } else if ((Imm >= DppCtrl::ROW_SHL_FIRST) &&
915              (Imm <= DppCtrl::ROW_SHL_LAST)) {
916     O << "row_shl:";
917     printU4ImmDecOperand(MI, OpNo, O);
918   } else if ((Imm >= DppCtrl::ROW_SHR_FIRST) &&
919              (Imm <= DppCtrl::ROW_SHR_LAST)) {
920     O << "row_shr:";
921     printU4ImmDecOperand(MI, OpNo, O);
922   } else if ((Imm >= DppCtrl::ROW_ROR_FIRST) &&
923              (Imm <= DppCtrl::ROW_ROR_LAST)) {
924     O << "row_ror:";
925     printU4ImmDecOperand(MI, OpNo, O);
926   } else if (Imm == DppCtrl::WAVE_SHL1) {
927     if (AMDGPU::isGFX10Plus(STI)) {
928       O << "/* wave_shl is not supported starting from GFX10 */";
929       return;
930     }
931     O << "wave_shl:1";
932   } else if (Imm == DppCtrl::WAVE_ROL1) {
933     if (AMDGPU::isGFX10Plus(STI)) {
934       O << "/* wave_rol is not supported starting from GFX10 */";
935       return;
936     }
937     O << "wave_rol:1";
938   } else if (Imm == DppCtrl::WAVE_SHR1) {
939     if (AMDGPU::isGFX10Plus(STI)) {
940       O << "/* wave_shr is not supported starting from GFX10 */";
941       return;
942     }
943     O << "wave_shr:1";
944   } else if (Imm == DppCtrl::WAVE_ROR1) {
945     if (AMDGPU::isGFX10Plus(STI)) {
946       O << "/* wave_ror is not supported starting from GFX10 */";
947       return;
948     }
949     O << "wave_ror:1";
950   } else if (Imm == DppCtrl::ROW_MIRROR) {
951     O << "row_mirror";
952   } else if (Imm == DppCtrl::ROW_HALF_MIRROR) {
953     O << "row_half_mirror";
954   } else if (Imm == DppCtrl::BCAST15) {
955     if (AMDGPU::isGFX10Plus(STI)) {
956       O << "/* row_bcast is not supported starting from GFX10 */";
957       return;
958     }
959     O << "row_bcast:15";
960   } else if (Imm == DppCtrl::BCAST31) {
961     if (AMDGPU::isGFX10Plus(STI)) {
962       O << "/* row_bcast is not supported starting from GFX10 */";
963       return;
964     }
965     O << "row_bcast:31";
966   } else if ((Imm >= DppCtrl::ROW_SHARE_FIRST) &&
967              (Imm <= DppCtrl::ROW_SHARE_LAST)) {
968     if (AMDGPU::isGFX90A(STI)) {
969       O << "row_newbcast:";
970     } else if (AMDGPU::isGFX10Plus(STI)) {
971       O << "row_share:";
972     } else {
973       O << " /* row_newbcast/row_share is not supported on ASICs earlier "
974            "than GFX90A/GFX10 */";
975       return;
976     }
977     printU4ImmDecOperand(MI, OpNo, O);
978   } else if ((Imm >= DppCtrl::ROW_XMASK_FIRST) &&
979              (Imm <= DppCtrl::ROW_XMASK_LAST)) {
980     if (!AMDGPU::isGFX10Plus(STI)) {
981       O << "/* row_xmask is not supported on ASICs earlier than GFX10 */";
982       return;
983     }
984     O << "row_xmask:";
985     printU4ImmDecOperand(MI, OpNo, O);
986   } else {
987     O << "/* Invalid dpp_ctrl value */";
988   }
989 }
990 
991 void AMDGPUInstPrinter::printRowMask(const MCInst *MI, unsigned OpNo,
992                                      const MCSubtargetInfo &STI,
993                                      raw_ostream &O) {
994   O << " row_mask:";
995   printU4ImmOperand(MI, OpNo, STI, O);
996 }
997 
998 void AMDGPUInstPrinter::printBankMask(const MCInst *MI, unsigned OpNo,
999                                       const MCSubtargetInfo &STI,
1000                                       raw_ostream &O) {
1001   O << " bank_mask:";
1002   printU4ImmOperand(MI, OpNo, STI, O);
1003 }
1004 
1005 void AMDGPUInstPrinter::printBoundCtrl(const MCInst *MI, unsigned OpNo,
1006                                        const MCSubtargetInfo &STI,
1007                                        raw_ostream &O) {
1008   unsigned Imm = MI->getOperand(OpNo).getImm();
1009   if (Imm) {
1010     O << " bound_ctrl:1";
1011   }
1012 }
1013 
1014 void AMDGPUInstPrinter::printFI(const MCInst *MI, unsigned OpNo,
1015                                 const MCSubtargetInfo &STI,
1016                                 raw_ostream &O) {
1017   using namespace llvm::AMDGPU::DPP;
1018   unsigned Imm = MI->getOperand(OpNo).getImm();
1019   if (Imm == DPP_FI_1 || Imm == DPP8_FI_1) {
1020     O << " fi:1";
1021   }
1022 }
1023 
1024 void AMDGPUInstPrinter::printSDWASel(const MCInst *MI, unsigned OpNo,
1025                                      raw_ostream &O) {
1026   using namespace llvm::AMDGPU::SDWA;
1027 
1028   unsigned Imm = MI->getOperand(OpNo).getImm();
1029   switch (Imm) {
1030   case SdwaSel::BYTE_0: O << "BYTE_0"; break;
1031   case SdwaSel::BYTE_1: O << "BYTE_1"; break;
1032   case SdwaSel::BYTE_2: O << "BYTE_2"; break;
1033   case SdwaSel::BYTE_3: O << "BYTE_3"; break;
1034   case SdwaSel::WORD_0: O << "WORD_0"; break;
1035   case SdwaSel::WORD_1: O << "WORD_1"; break;
1036   case SdwaSel::DWORD: O << "DWORD"; break;
1037   default: llvm_unreachable("Invalid SDWA data select operand");
1038   }
1039 }
1040 
1041 void AMDGPUInstPrinter::printSDWADstSel(const MCInst *MI, unsigned OpNo,
1042                                         const MCSubtargetInfo &STI,
1043                                         raw_ostream &O) {
1044   O << "dst_sel:";
1045   printSDWASel(MI, OpNo, O);
1046 }
1047 
1048 void AMDGPUInstPrinter::printSDWASrc0Sel(const MCInst *MI, unsigned OpNo,
1049                                          const MCSubtargetInfo &STI,
1050                                          raw_ostream &O) {
1051   O << "src0_sel:";
1052   printSDWASel(MI, OpNo, O);
1053 }
1054 
1055 void AMDGPUInstPrinter::printSDWASrc1Sel(const MCInst *MI, unsigned OpNo,
1056                                          const MCSubtargetInfo &STI,
1057                                          raw_ostream &O) {
1058   O << "src1_sel:";
1059   printSDWASel(MI, OpNo, O);
1060 }
1061 
1062 void AMDGPUInstPrinter::printSDWADstUnused(const MCInst *MI, unsigned OpNo,
1063                                            const MCSubtargetInfo &STI,
1064                                            raw_ostream &O) {
1065   using namespace llvm::AMDGPU::SDWA;
1066 
1067   O << "dst_unused:";
1068   unsigned Imm = MI->getOperand(OpNo).getImm();
1069   switch (Imm) {
1070   case DstUnused::UNUSED_PAD: O << "UNUSED_PAD"; break;
1071   case DstUnused::UNUSED_SEXT: O << "UNUSED_SEXT"; break;
1072   case DstUnused::UNUSED_PRESERVE: O << "UNUSED_PRESERVE"; break;
1073   default: llvm_unreachable("Invalid SDWA dest_unused operand");
1074   }
1075 }
1076 
1077 void AMDGPUInstPrinter::printExpSrcN(const MCInst *MI, unsigned OpNo,
1078                                      const MCSubtargetInfo &STI, raw_ostream &O,
1079                                      unsigned N) {
1080   unsigned Opc = MI->getOpcode();
1081   int EnIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::en);
1082   unsigned En = MI->getOperand(EnIdx).getImm();
1083 
1084   int ComprIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::compr);
1085 
1086   // If compr is set, print as src0, src0, src1, src1
1087   if (MI->getOperand(ComprIdx).getImm())
1088     OpNo = OpNo - N + N / 2;
1089 
1090   if (En & (1 << N))
1091     printRegOperand(MI->getOperand(OpNo).getReg(), O, MRI);
1092   else
1093     O << "off";
1094 }
1095 
1096 void AMDGPUInstPrinter::printExpSrc0(const MCInst *MI, unsigned OpNo,
1097                                      const MCSubtargetInfo &STI,
1098                                      raw_ostream &O) {
1099   printExpSrcN(MI, OpNo, STI, O, 0);
1100 }
1101 
1102 void AMDGPUInstPrinter::printExpSrc1(const MCInst *MI, unsigned OpNo,
1103                                      const MCSubtargetInfo &STI,
1104                                      raw_ostream &O) {
1105   printExpSrcN(MI, OpNo, STI, O, 1);
1106 }
1107 
1108 void AMDGPUInstPrinter::printExpSrc2(const MCInst *MI, unsigned OpNo,
1109                                      const MCSubtargetInfo &STI,
1110                                      raw_ostream &O) {
1111   printExpSrcN(MI, OpNo, STI, O, 2);
1112 }
1113 
1114 void AMDGPUInstPrinter::printExpSrc3(const MCInst *MI, unsigned OpNo,
1115                                      const MCSubtargetInfo &STI,
1116                                      raw_ostream &O) {
1117   printExpSrcN(MI, OpNo, STI, O, 3);
1118 }
1119 
1120 void AMDGPUInstPrinter::printExpTgt(const MCInst *MI, unsigned OpNo,
1121                                     const MCSubtargetInfo &STI,
1122                                     raw_ostream &O) {
1123   using namespace llvm::AMDGPU::Exp;
1124 
1125   // This is really a 6 bit field.
1126   unsigned Id = MI->getOperand(OpNo).getImm() & ((1 << 6) - 1);
1127 
1128   int Index;
1129   StringRef TgtName;
1130   if (getTgtName(Id, TgtName, Index) && isSupportedTgtId(Id, STI)) {
1131     O << ' ' << TgtName;
1132     if (Index >= 0)
1133       O << Index;
1134   } else {
1135     O << " invalid_target_" << Id;
1136   }
1137 }
1138 
1139 static bool allOpsDefaultValue(const int* Ops, int NumOps, int Mod,
1140                                bool IsPacked, bool HasDstSel) {
1141   int DefaultValue = IsPacked && (Mod == SISrcMods::OP_SEL_1);
1142 
1143   for (int I = 0; I < NumOps; ++I) {
1144     if (!!(Ops[I] & Mod) != DefaultValue)
1145       return false;
1146   }
1147 
1148   if (HasDstSel && (Ops[0] & SISrcMods::DST_OP_SEL) != 0)
1149     return false;
1150 
1151   return true;
1152 }
1153 
1154 void AMDGPUInstPrinter::printPackedModifier(const MCInst *MI,
1155                                             StringRef Name,
1156                                             unsigned Mod,
1157                                             raw_ostream &O) {
1158   unsigned Opc = MI->getOpcode();
1159   int NumOps = 0;
1160   int Ops[3];
1161 
1162   for (int OpName : { AMDGPU::OpName::src0_modifiers,
1163                       AMDGPU::OpName::src1_modifiers,
1164                       AMDGPU::OpName::src2_modifiers }) {
1165     int Idx = AMDGPU::getNamedOperandIdx(Opc, OpName);
1166     if (Idx == -1)
1167       break;
1168 
1169     Ops[NumOps++] = MI->getOperand(Idx).getImm();
1170   }
1171 
1172   const bool HasDstSel =
1173     NumOps > 0 &&
1174     Mod == SISrcMods::OP_SEL_0 &&
1175     MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::VOP3_OPSEL;
1176 
1177   const bool IsPacked =
1178     MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::IsPacked;
1179 
1180   if (allOpsDefaultValue(Ops, NumOps, Mod, IsPacked, HasDstSel))
1181     return;
1182 
1183   O << Name;
1184   for (int I = 0; I < NumOps; ++I) {
1185     if (I != 0)
1186       O << ',';
1187 
1188     O << !!(Ops[I] & Mod);
1189   }
1190 
1191   if (HasDstSel) {
1192     O << ',' << !!(Ops[0] & SISrcMods::DST_OP_SEL);
1193   }
1194 
1195   O << ']';
1196 }
1197 
1198 void AMDGPUInstPrinter::printOpSel(const MCInst *MI, unsigned,
1199                                    const MCSubtargetInfo &STI,
1200                                    raw_ostream &O) {
1201   unsigned Opc = MI->getOpcode();
1202   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
1203       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
1204     auto FIN = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
1205     auto BCN = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers);
1206     unsigned FI = !!(MI->getOperand(FIN).getImm() & SISrcMods::OP_SEL_0);
1207     unsigned BC = !!(MI->getOperand(BCN).getImm() & SISrcMods::OP_SEL_0);
1208     if (FI || BC)
1209       O << " op_sel:[" << FI << ',' << BC << ']';
1210     return;
1211   }
1212 
1213   printPackedModifier(MI, " op_sel:[", SISrcMods::OP_SEL_0, O);
1214 }
1215 
1216 void AMDGPUInstPrinter::printOpSelHi(const MCInst *MI, unsigned OpNo,
1217                                      const MCSubtargetInfo &STI,
1218                                      raw_ostream &O) {
1219   printPackedModifier(MI, " op_sel_hi:[", SISrcMods::OP_SEL_1, O);
1220 }
1221 
1222 void AMDGPUInstPrinter::printNegLo(const MCInst *MI, unsigned OpNo,
1223                                    const MCSubtargetInfo &STI,
1224                                    raw_ostream &O) {
1225   printPackedModifier(MI, " neg_lo:[", SISrcMods::NEG, O);
1226 }
1227 
1228 void AMDGPUInstPrinter::printNegHi(const MCInst *MI, unsigned OpNo,
1229                                    const MCSubtargetInfo &STI,
1230                                    raw_ostream &O) {
1231   printPackedModifier(MI, " neg_hi:[", SISrcMods::NEG_HI, O);
1232 }
1233 
1234 void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
1235                                         const MCSubtargetInfo &STI,
1236                                         raw_ostream &O) {
1237   unsigned Imm = MI->getOperand(OpNum).getImm();
1238   switch (Imm) {
1239   case 0:
1240     O << "p10";
1241     break;
1242   case 1:
1243     O << "p20";
1244     break;
1245   case 2:
1246     O << "p0";
1247     break;
1248   default:
1249     O << "invalid_param_" << Imm;
1250   }
1251 }
1252 
1253 void AMDGPUInstPrinter::printInterpAttr(const MCInst *MI, unsigned OpNum,
1254                                         const MCSubtargetInfo &STI,
1255                                         raw_ostream &O) {
1256   unsigned Attr = MI->getOperand(OpNum).getImm();
1257   O << "attr" << Attr;
1258 }
1259 
1260 void AMDGPUInstPrinter::printInterpAttrChan(const MCInst *MI, unsigned OpNum,
1261                                         const MCSubtargetInfo &STI,
1262                                         raw_ostream &O) {
1263   unsigned Chan = MI->getOperand(OpNum).getImm();
1264   O << '.' << "xyzw"[Chan & 0x3];
1265 }
1266 
1267 void AMDGPUInstPrinter::printVGPRIndexMode(const MCInst *MI, unsigned OpNo,
1268                                            const MCSubtargetInfo &STI,
1269                                            raw_ostream &O) {
1270   using namespace llvm::AMDGPU::VGPRIndexMode;
1271   unsigned Val = MI->getOperand(OpNo).getImm();
1272 
1273   if ((Val & ~ENABLE_MASK) != 0) {
1274     O << formatHex(static_cast<uint64_t>(Val));
1275   } else {
1276     O << "gpr_idx(";
1277     bool NeedComma = false;
1278     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
1279       if (Val & (1 << ModeId)) {
1280         if (NeedComma)
1281           O << ',';
1282         O << IdSymbolic[ModeId];
1283         NeedComma = true;
1284       }
1285     }
1286     O << ')';
1287   }
1288 }
1289 
1290 void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
1291                                         const MCSubtargetInfo &STI,
1292                                         raw_ostream &O) {
1293   printRegularOperand(MI, OpNo, STI, O);
1294   O  << ", ";
1295   printRegularOperand(MI, OpNo + 1, STI, O);
1296 }
1297 
1298 void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
1299                                    raw_ostream &O, StringRef Asm,
1300                                    StringRef Default) {
1301   const MCOperand &Op = MI->getOperand(OpNo);
1302   assert(Op.isImm());
1303   if (Op.getImm() == 1) {
1304     O << Asm;
1305   } else {
1306     O << Default;
1307   }
1308 }
1309 
1310 void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
1311                                    raw_ostream &O, char Asm) {
1312   const MCOperand &Op = MI->getOperand(OpNo);
1313   assert(Op.isImm());
1314   if (Op.getImm() == 1)
1315     O << Asm;
1316 }
1317 
1318 void AMDGPUInstPrinter::printHigh(const MCInst *MI, unsigned OpNo,
1319                                   const MCSubtargetInfo &STI,
1320                                   raw_ostream &O) {
1321   printNamedBit(MI, OpNo, O, "high");
1322 }
1323 
1324 void AMDGPUInstPrinter::printClampSI(const MCInst *MI, unsigned OpNo,
1325                                      const MCSubtargetInfo &STI,
1326                                      raw_ostream &O) {
1327   printNamedBit(MI, OpNo, O, "clamp");
1328 }
1329 
1330 void AMDGPUInstPrinter::printOModSI(const MCInst *MI, unsigned OpNo,
1331                                     const MCSubtargetInfo &STI,
1332                                     raw_ostream &O) {
1333   int Imm = MI->getOperand(OpNo).getImm();
1334   if (Imm == SIOutMods::MUL2)
1335     O << " mul:2";
1336   else if (Imm == SIOutMods::MUL4)
1337     O << " mul:4";
1338   else if (Imm == SIOutMods::DIV2)
1339     O << " div:2";
1340 }
1341 
1342 void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo,
1343                                      const MCSubtargetInfo &STI,
1344                                      raw_ostream &O) {
1345   using namespace llvm::AMDGPU::SendMsg;
1346 
1347   const unsigned Imm16 = MI->getOperand(OpNo).getImm();
1348 
1349   uint16_t MsgId;
1350   uint16_t OpId;
1351   uint16_t StreamId;
1352   decodeMsg(Imm16, MsgId, OpId, StreamId, STI);
1353 
1354   StringRef MsgName = getMsgName(MsgId, STI);
1355 
1356   if (!MsgName.empty() && isValidMsgOp(MsgId, OpId, STI) &&
1357       isValidMsgStream(MsgId, OpId, StreamId, STI)) {
1358     O << "sendmsg(" << MsgName;
1359     if (msgRequiresOp(MsgId, STI)) {
1360       O << ", " << getMsgOpName(MsgId, OpId, STI);
1361       if (msgSupportsStream(MsgId, OpId, STI)) {
1362         O << ", " << StreamId;
1363       }
1364     }
1365     O << ')';
1366   } else if (encodeMsg(MsgId, OpId, StreamId) == Imm16) {
1367     O << "sendmsg(" << MsgId << ", " << OpId << ", " << StreamId << ')';
1368   } else {
1369     O << Imm16; // Unknown imm16 code.
1370   }
1371 }
1372 
1373 static void printSwizzleBitmask(const uint16_t AndMask,
1374                                 const uint16_t OrMask,
1375                                 const uint16_t XorMask,
1376                                 raw_ostream &O) {
1377   using namespace llvm::AMDGPU::Swizzle;
1378 
1379   uint16_t Probe0 = ((0            & AndMask) | OrMask) ^ XorMask;
1380   uint16_t Probe1 = ((BITMASK_MASK & AndMask) | OrMask) ^ XorMask;
1381 
1382   O << "\"";
1383 
1384   for (unsigned Mask = 1 << (BITMASK_WIDTH - 1); Mask > 0; Mask >>= 1) {
1385     uint16_t p0 = Probe0 & Mask;
1386     uint16_t p1 = Probe1 & Mask;
1387 
1388     if (p0 == p1) {
1389       if (p0 == 0) {
1390         O << "0";
1391       } else {
1392         O << "1";
1393       }
1394     } else {
1395       if (p0 == 0) {
1396         O << "p";
1397       } else {
1398         O << "i";
1399       }
1400     }
1401   }
1402 
1403   O << "\"";
1404 }
1405 
1406 void AMDGPUInstPrinter::printSwizzle(const MCInst *MI, unsigned OpNo,
1407                                      const MCSubtargetInfo &STI,
1408                                      raw_ostream &O) {
1409   using namespace llvm::AMDGPU::Swizzle;
1410 
1411   uint16_t Imm = MI->getOperand(OpNo).getImm();
1412   if (Imm == 0) {
1413     return;
1414   }
1415 
1416   O << " offset:";
1417 
1418   if ((Imm & QUAD_PERM_ENC_MASK) == QUAD_PERM_ENC) {
1419 
1420     O << "swizzle(" << IdSymbolic[ID_QUAD_PERM];
1421     for (unsigned I = 0; I < LANE_NUM; ++I) {
1422       O << ",";
1423       O << formatDec(Imm & LANE_MASK);
1424       Imm >>= LANE_SHIFT;
1425     }
1426     O << ")";
1427 
1428   } else if ((Imm & BITMASK_PERM_ENC_MASK) == BITMASK_PERM_ENC) {
1429 
1430     uint16_t AndMask = (Imm >> BITMASK_AND_SHIFT) & BITMASK_MASK;
1431     uint16_t OrMask  = (Imm >> BITMASK_OR_SHIFT)  & BITMASK_MASK;
1432     uint16_t XorMask = (Imm >> BITMASK_XOR_SHIFT) & BITMASK_MASK;
1433 
1434     if (AndMask == BITMASK_MAX &&
1435         OrMask == 0 &&
1436         countPopulation(XorMask) == 1) {
1437 
1438       O << "swizzle(" << IdSymbolic[ID_SWAP];
1439       O << ",";
1440       O << formatDec(XorMask);
1441       O << ")";
1442 
1443     } else if (AndMask == BITMASK_MAX &&
1444                OrMask == 0 && XorMask > 0 &&
1445                isPowerOf2_64(XorMask + 1)) {
1446 
1447       O << "swizzle(" << IdSymbolic[ID_REVERSE];
1448       O << ",";
1449       O << formatDec(XorMask + 1);
1450       O << ")";
1451 
1452     } else {
1453 
1454       uint16_t GroupSize = BITMASK_MAX - AndMask + 1;
1455       if (GroupSize > 1 &&
1456           isPowerOf2_64(GroupSize) &&
1457           OrMask < GroupSize &&
1458           XorMask == 0) {
1459 
1460         O << "swizzle(" << IdSymbolic[ID_BROADCAST];
1461         O << ",";
1462         O << formatDec(GroupSize);
1463         O << ",";
1464         O << formatDec(OrMask);
1465         O << ")";
1466 
1467       } else {
1468         O << "swizzle(" << IdSymbolic[ID_BITMASK_PERM];
1469         O << ",";
1470         printSwizzleBitmask(AndMask, OrMask, XorMask, O);
1471         O << ")";
1472       }
1473     }
1474   } else {
1475     printU16ImmDecOperand(MI, OpNo, O);
1476   }
1477 }
1478 
1479 void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
1480                                       const MCSubtargetInfo &STI,
1481                                       raw_ostream &O) {
1482   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI.getCPU());
1483 
1484   unsigned SImm16 = MI->getOperand(OpNo).getImm();
1485   unsigned Vmcnt, Expcnt, Lgkmcnt;
1486   decodeWaitcnt(ISA, SImm16, Vmcnt, Expcnt, Lgkmcnt);
1487 
1488   bool IsDefaultVmcnt = Vmcnt == getVmcntBitMask(ISA);
1489   bool IsDefaultExpcnt = Expcnt == getExpcntBitMask(ISA);
1490   bool IsDefaultLgkmcnt = Lgkmcnt == getLgkmcntBitMask(ISA);
1491   bool PrintAll = IsDefaultVmcnt && IsDefaultExpcnt && IsDefaultLgkmcnt;
1492 
1493   bool NeedSpace = false;
1494 
1495   if (!IsDefaultVmcnt || PrintAll) {
1496     O << "vmcnt(" << Vmcnt << ')';
1497     NeedSpace = true;
1498   }
1499 
1500   if (!IsDefaultExpcnt || PrintAll) {
1501     if (NeedSpace)
1502       O << ' ';
1503     O << "expcnt(" << Expcnt << ')';
1504     NeedSpace = true;
1505   }
1506 
1507   if (!IsDefaultLgkmcnt || PrintAll) {
1508     if (NeedSpace)
1509       O << ' ';
1510     O << "lgkmcnt(" << Lgkmcnt << ')';
1511   }
1512 }
1513 
1514 void AMDGPUInstPrinter::printDepCtr(const MCInst *MI, unsigned OpNo,
1515                                     const MCSubtargetInfo &STI,
1516                                     raw_ostream &O) {
1517   using namespace llvm::AMDGPU::DepCtr;
1518 
1519   uint64_t Imm16 = MI->getOperand(OpNo).getImm() & 0xffff;
1520 
1521   bool HasNonDefaultVal = false;
1522   if (isSymbolicDepCtrEncoding(Imm16, HasNonDefaultVal, STI)) {
1523     int Id = 0;
1524     StringRef Name;
1525     unsigned Val;
1526     bool IsDefault;
1527     bool NeedSpace = false;
1528     while (decodeDepCtr(Imm16, Id, Name, Val, IsDefault, STI)) {
1529       if (!IsDefault || !HasNonDefaultVal) {
1530         if (NeedSpace)
1531           O << ' ';
1532         O << Name << '(' << Val << ')';
1533         NeedSpace = true;
1534       }
1535     }
1536   } else {
1537     O << formatHex(Imm16);
1538   }
1539 }
1540 
1541 void AMDGPUInstPrinter::printDelayFlag(const MCInst *MI, unsigned OpNo,
1542                                        const MCSubtargetInfo &STI,
1543                                        raw_ostream &O) {
1544   const char *BadInstId = "/* invalid instid value */";
1545   static const std::array<const char *, 12> InstIds = {
1546       "NO_DEP",        "VALU_DEP_1",    "VALU_DEP_2",
1547       "VALU_DEP_3",    "VALU_DEP_4",    "TRANS32_DEP_1",
1548       "TRANS32_DEP_2", "TRANS32_DEP_3", "FMA_ACCUM_CYCLE_1",
1549       "SALU_CYCLE_1",  "SALU_CYCLE_2",  "SALU_CYCLE_3"};
1550 
1551   const char *BadInstSkip = "/* invalid instskip value */";
1552   static const std::array<const char *, 6> InstSkips = {
1553       "SAME", "NEXT", "SKIP_1", "SKIP_2", "SKIP_3", "SKIP_4"};
1554 
1555   unsigned SImm16 = MI->getOperand(OpNo).getImm();
1556   const char *Prefix = "";
1557 
1558   unsigned Value = SImm16 & 0xF;
1559   if (Value) {
1560     const char *Name = Value < InstIds.size() ? InstIds[Value] : BadInstId;
1561     O << Prefix << "instid0(" << Name << ')';
1562     Prefix = " | ";
1563   }
1564 
1565   Value = (SImm16 >> 4) & 7;
1566   if (Value) {
1567     const char *Name =
1568         Value < InstSkips.size() ? InstSkips[Value] : BadInstSkip;
1569     O << Prefix << "instskip(" << Name << ')';
1570     Prefix = " | ";
1571   }
1572 
1573   Value = (SImm16 >> 7) & 0xF;
1574   if (Value) {
1575     const char *Name = Value < InstIds.size() ? InstIds[Value] : BadInstId;
1576     O << Prefix << "instid1(" << Name << ')';
1577     Prefix = " | ";
1578   }
1579 
1580   if (!*Prefix)
1581     O << "0";
1582 }
1583 
1584 void AMDGPUInstPrinter::printHwreg(const MCInst *MI, unsigned OpNo,
1585                                    const MCSubtargetInfo &STI, raw_ostream &O) {
1586   unsigned Id;
1587   unsigned Offset;
1588   unsigned Width;
1589 
1590   using namespace llvm::AMDGPU::Hwreg;
1591   unsigned Val = MI->getOperand(OpNo).getImm();
1592   decodeHwreg(Val, Id, Offset, Width);
1593   StringRef HwRegName = getHwreg(Id, STI);
1594 
1595   O << "hwreg(";
1596   if (!HwRegName.empty()) {
1597     O << HwRegName;
1598   } else {
1599     O << Id;
1600   }
1601   if (Width != WIDTH_DEFAULT_ || Offset != OFFSET_DEFAULT_) {
1602     O << ", " << Offset << ", " << Width;
1603   }
1604   O << ')';
1605 }
1606 
1607 void AMDGPUInstPrinter::printEndpgm(const MCInst *MI, unsigned OpNo,
1608                                     const MCSubtargetInfo &STI,
1609                                     raw_ostream &O) {
1610   uint16_t Imm = MI->getOperand(OpNo).getImm();
1611   if (Imm == 0) {
1612     return;
1613   }
1614 
1615   O << ' ' << formatDec(Imm);
1616 }
1617 
1618 #include "AMDGPUGenAsmWriter.inc"
1619