1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/CodeGen/MachineValueType.h"
25 #include "llvm/MC/MCAsmInfo.h"
26 #include "llvm/MC/MCContext.h"
27 #include "llvm/MC/MCExpr.h"
28 #include "llvm/MC/MCInst.h"
29 #include "llvm/MC/MCInstrDesc.h"
30 #include "llvm/MC/MCParser/MCAsmLexer.h"
31 #include "llvm/MC/MCParser/MCAsmParser.h"
32 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
33 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
34 #include "llvm/MC/MCSymbol.h"
35 #include "llvm/MC/TargetRegistry.h"
36 #include "llvm/Support/AMDGPUMetadata.h"
37 #include "llvm/Support/AMDHSAKernelDescriptor.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/MathExtras.h"
40 #include "llvm/TargetParser/TargetParser.h"
41 #include <optional>
42 
43 using namespace llvm;
44 using namespace llvm::AMDGPU;
45 using namespace llvm::amdhsa;
46 
47 namespace {
48 
49 class AMDGPUAsmParser;
50 
51 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
52 
53 //===----------------------------------------------------------------------===//
54 // Operand
55 //===----------------------------------------------------------------------===//
56 
57 class AMDGPUOperand : public MCParsedAsmOperand {
58   enum KindTy {
59     Token,
60     Immediate,
61     Register,
62     Expression
63   } Kind;
64 
65   SMLoc StartLoc, EndLoc;
66   const AMDGPUAsmParser *AsmParser;
67 
68 public:
69   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
70       : Kind(Kind_), AsmParser(AsmParser_) {}
71 
72   using Ptr = std::unique_ptr<AMDGPUOperand>;
73 
74   struct Modifiers {
75     bool Abs = false;
76     bool Neg = false;
77     bool Sext = false;
78 
79     bool hasFPModifiers() const { return Abs || Neg; }
80     bool hasIntModifiers() const { return Sext; }
81     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
82 
83     int64_t getFPModifiersOperand() const {
84       int64_t Operand = 0;
85       Operand |= Abs ? SISrcMods::ABS : 0u;
86       Operand |= Neg ? SISrcMods::NEG : 0u;
87       return Operand;
88     }
89 
90     int64_t getIntModifiersOperand() const {
91       int64_t Operand = 0;
92       Operand |= Sext ? SISrcMods::SEXT : 0u;
93       return Operand;
94     }
95 
96     int64_t getModifiersOperand() const {
97       assert(!(hasFPModifiers() && hasIntModifiers())
98            && "fp and int modifiers should not be used simultaneously");
99       if (hasFPModifiers()) {
100         return getFPModifiersOperand();
101       } else if (hasIntModifiers()) {
102         return getIntModifiersOperand();
103       } else {
104         return 0;
105       }
106     }
107 
108     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
109   };
110 
111   enum ImmTy {
112     ImmTyNone,
113     ImmTyGDS,
114     ImmTyLDS,
115     ImmTyOffen,
116     ImmTyIdxen,
117     ImmTyAddr64,
118     ImmTyOffset,
119     ImmTyInstOffset,
120     ImmTyOffset0,
121     ImmTyOffset1,
122     ImmTySMEMOffsetMod,
123     ImmTyCPol,
124     ImmTyTFE,
125     ImmTyD16,
126     ImmTyClampSI,
127     ImmTyOModSI,
128     ImmTySDWADstSel,
129     ImmTySDWASrc0Sel,
130     ImmTySDWASrc1Sel,
131     ImmTySDWADstUnused,
132     ImmTyDMask,
133     ImmTyDim,
134     ImmTyUNorm,
135     ImmTyDA,
136     ImmTyR128A16,
137     ImmTyA16,
138     ImmTyLWE,
139     ImmTyExpTgt,
140     ImmTyExpCompr,
141     ImmTyExpVM,
142     ImmTyFORMAT,
143     ImmTyHwreg,
144     ImmTyOff,
145     ImmTySendMsg,
146     ImmTyInterpSlot,
147     ImmTyInterpAttr,
148     ImmTyInterpAttrChan,
149     ImmTyOpSel,
150     ImmTyOpSelHi,
151     ImmTyNegLo,
152     ImmTyNegHi,
153     ImmTyDPP8,
154     ImmTyDppCtrl,
155     ImmTyDppRowMask,
156     ImmTyDppBankMask,
157     ImmTyDppBoundCtrl,
158     ImmTyDppFI,
159     ImmTySwizzle,
160     ImmTyGprIdxMode,
161     ImmTyHigh,
162     ImmTyBLGP,
163     ImmTyCBSZ,
164     ImmTyABID,
165     ImmTyEndpgm,
166     ImmTyWaitVDST,
167     ImmTyWaitEXP,
168   };
169 
170   // Immediate operand kind.
171   // It helps to identify the location of an offending operand after an error.
172   // Note that regular literals and mandatory literals (KImm) must be handled
173   // differently. When looking for an offending operand, we should usually
174   // ignore mandatory literals because they are part of the instruction and
175   // cannot be changed. Report location of mandatory operands only for VOPD,
176   // when both OpX and OpY have a KImm and there are no other literals.
177   enum ImmKindTy {
178     ImmKindTyNone,
179     ImmKindTyLiteral,
180     ImmKindTyMandatoryLiteral,
181     ImmKindTyConst,
182   };
183 
184 private:
185   struct TokOp {
186     const char *Data;
187     unsigned Length;
188   };
189 
190   struct ImmOp {
191     int64_t Val;
192     ImmTy Type;
193     bool IsFPImm;
194     mutable ImmKindTy Kind;
195     Modifiers Mods;
196   };
197 
198   struct RegOp {
199     unsigned RegNo;
200     Modifiers Mods;
201   };
202 
203   union {
204     TokOp Tok;
205     ImmOp Imm;
206     RegOp Reg;
207     const MCExpr *Expr;
208   };
209 
210 public:
211   bool isToken() const override { return Kind == Token; }
212 
213   bool isSymbolRefExpr() const {
214     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
215   }
216 
217   bool isImm() const override {
218     return Kind == Immediate;
219   }
220 
221   void setImmKindNone() const {
222     assert(isImm());
223     Imm.Kind = ImmKindTyNone;
224   }
225 
226   void setImmKindLiteral() const {
227     assert(isImm());
228     Imm.Kind = ImmKindTyLiteral;
229   }
230 
231   void setImmKindMandatoryLiteral() const {
232     assert(isImm());
233     Imm.Kind = ImmKindTyMandatoryLiteral;
234   }
235 
236   void setImmKindConst() const {
237     assert(isImm());
238     Imm.Kind = ImmKindTyConst;
239   }
240 
241   bool IsImmKindLiteral() const {
242     return isImm() && Imm.Kind == ImmKindTyLiteral;
243   }
244 
245   bool IsImmKindMandatoryLiteral() const {
246     return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
247   }
248 
249   bool isImmKindConst() const {
250     return isImm() && Imm.Kind == ImmKindTyConst;
251   }
252 
253   bool isInlinableImm(MVT type) const;
254   bool isLiteralImm(MVT type) const;
255 
256   bool isRegKind() const {
257     return Kind == Register;
258   }
259 
260   bool isReg() const override {
261     return isRegKind() && !hasModifiers();
262   }
263 
264   bool isRegOrInline(unsigned RCID, MVT type) const {
265     return isRegClass(RCID) || isInlinableImm(type);
266   }
267 
268   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
269     return isRegOrInline(RCID, type) || isLiteralImm(type);
270   }
271 
272   bool isRegOrImmWithInt16InputMods() const {
273     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
274   }
275 
276   bool isRegOrImmWithInt32InputMods() const {
277     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
278   }
279 
280   bool isRegOrInlineImmWithInt16InputMods() const {
281     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
282   }
283 
284   bool isRegOrInlineImmWithInt32InputMods() const {
285     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
286   }
287 
288   bool isRegOrImmWithInt64InputMods() const {
289     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
290   }
291 
292   bool isRegOrImmWithFP16InputMods() const {
293     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
294   }
295 
296   bool isRegOrImmWithFP32InputMods() const {
297     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
298   }
299 
300   bool isRegOrImmWithFP64InputMods() const {
301     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
302   }
303 
304   bool isRegOrInlineImmWithFP16InputMods() const {
305     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
306   }
307 
308   bool isRegOrInlineImmWithFP32InputMods() const {
309     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
310   }
311 
312 
313   bool isVReg() const {
314     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
315            isRegClass(AMDGPU::VReg_64RegClassID) ||
316            isRegClass(AMDGPU::VReg_96RegClassID) ||
317            isRegClass(AMDGPU::VReg_128RegClassID) ||
318            isRegClass(AMDGPU::VReg_160RegClassID) ||
319            isRegClass(AMDGPU::VReg_192RegClassID) ||
320            isRegClass(AMDGPU::VReg_256RegClassID) ||
321            isRegClass(AMDGPU::VReg_512RegClassID) ||
322            isRegClass(AMDGPU::VReg_1024RegClassID);
323   }
324 
325   bool isVReg32() const {
326     return isRegClass(AMDGPU::VGPR_32RegClassID);
327   }
328 
329   bool isVReg32OrOff() const {
330     return isOff() || isVReg32();
331   }
332 
333   bool isNull() const {
334     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
335   }
336 
337   bool isVRegWithInputMods() const;
338   bool isT16VRegWithInputMods() const;
339 
340   bool isSDWAOperand(MVT type) const;
341   bool isSDWAFP16Operand() const;
342   bool isSDWAFP32Operand() const;
343   bool isSDWAInt16Operand() const;
344   bool isSDWAInt32Operand() const;
345 
346   bool isImmTy(ImmTy ImmT) const {
347     return isImm() && Imm.Type == ImmT;
348   }
349 
350   bool isImmLiteral() const { return isImmTy(ImmTyNone); }
351 
352   bool isImmModifier() const {
353     return isImm() && Imm.Type != ImmTyNone;
354   }
355 
356   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
357   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
358   bool isDMask() const { return isImmTy(ImmTyDMask); }
359   bool isDim() const { return isImmTy(ImmTyDim); }
360   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
361   bool isDA() const { return isImmTy(ImmTyDA); }
362   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
363   bool isA16() const { return isImmTy(ImmTyA16); }
364   bool isLWE() const { return isImmTy(ImmTyLWE); }
365   bool isOff() const { return isImmTy(ImmTyOff); }
366   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
367   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
368   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
369   bool isOffen() const { return isImmTy(ImmTyOffen); }
370   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
371   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
372   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
373   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
374   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
375   bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
376   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
377   bool isGDS() const { return isImmTy(ImmTyGDS); }
378   bool isLDS() const { return isImmTy(ImmTyLDS); }
379   bool isCPol() const { return isImmTy(ImmTyCPol); }
380   bool isTFE() const { return isImmTy(ImmTyTFE); }
381   bool isD16() const { return isImmTy(ImmTyD16); }
382   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
383   bool isDppBankMask() const { return isImmTy(ImmTyDppBankMask); }
384   bool isDppRowMask() const { return isImmTy(ImmTyDppRowMask); }
385   bool isDppBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
386   bool isDppFI() const { return isImmTy(ImmTyDppFI); }
387   bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
388   bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
389   bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
390   bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
391   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
392   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
393   bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
394   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
395   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
396   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
397   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
398   bool isHigh() const { return isImmTy(ImmTyHigh); }
399 
400   bool isRegOrImm() const {
401     return isReg() || isImm();
402   }
403 
404   bool isRegClass(unsigned RCID) const;
405 
406   bool isInlineValue() const;
407 
408   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
409     return isRegOrInline(RCID, type) && !hasModifiers();
410   }
411 
412   bool isSCSrcB16() const {
413     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
414   }
415 
416   bool isSCSrcV2B16() const {
417     return isSCSrcB16();
418   }
419 
420   bool isSCSrcB32() const {
421     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
422   }
423 
424   bool isSCSrcB64() const {
425     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
426   }
427 
428   bool isBoolReg() const;
429 
430   bool isSCSrcF16() const {
431     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
432   }
433 
434   bool isSCSrcV2F16() const {
435     return isSCSrcF16();
436   }
437 
438   bool isSCSrcF32() const {
439     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
440   }
441 
442   bool isSCSrcF64() const {
443     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
444   }
445 
446   bool isSSrcB32() const {
447     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
448   }
449 
450   bool isSSrcB16() const {
451     return isSCSrcB16() || isLiteralImm(MVT::i16);
452   }
453 
454   bool isSSrcV2B16() const {
455     llvm_unreachable("cannot happen");
456     return isSSrcB16();
457   }
458 
459   bool isSSrcB64() const {
460     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
461     // See isVSrc64().
462     return isSCSrcB64() || isLiteralImm(MVT::i64);
463   }
464 
465   bool isSSrcF32() const {
466     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
467   }
468 
469   bool isSSrcF64() const {
470     return isSCSrcB64() || isLiteralImm(MVT::f64);
471   }
472 
473   bool isSSrcF16() const {
474     return isSCSrcB16() || isLiteralImm(MVT::f16);
475   }
476 
477   bool isSSrcV2F16() const {
478     llvm_unreachable("cannot happen");
479     return isSSrcF16();
480   }
481 
482   bool isSSrcV2FP32() const {
483     llvm_unreachable("cannot happen");
484     return isSSrcF32();
485   }
486 
487   bool isSCSrcV2FP32() const {
488     llvm_unreachable("cannot happen");
489     return isSCSrcF32();
490   }
491 
492   bool isSSrcV2INT32() const {
493     llvm_unreachable("cannot happen");
494     return isSSrcB32();
495   }
496 
497   bool isSCSrcV2INT32() const {
498     llvm_unreachable("cannot happen");
499     return isSCSrcB32();
500   }
501 
502   bool isSSrcOrLdsB32() const {
503     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
504            isLiteralImm(MVT::i32) || isExpr();
505   }
506 
507   bool isVCSrcB32() const {
508     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
509   }
510 
511   bool isVCSrcB64() const {
512     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
513   }
514 
515   bool isVCSrcTB16_Lo128() const {
516     return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
517   }
518 
519   bool isVCSrcB16() const {
520     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
521   }
522 
523   bool isVCSrcV2B16() const {
524     return isVCSrcB16();
525   }
526 
527   bool isVCSrcF32() const {
528     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
529   }
530 
531   bool isVCSrcF64() const {
532     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
533   }
534 
535   bool isVCSrcTF16_Lo128() const {
536     return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
537   }
538 
539   bool isVCSrcF16() const {
540     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
541   }
542 
543   bool isVCSrcV2F16() const {
544     return isVCSrcF16();
545   }
546 
547   bool isVSrcB32() const {
548     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
549   }
550 
551   bool isVSrcB64() const {
552     return isVCSrcF64() || isLiteralImm(MVT::i64);
553   }
554 
555   bool isVSrcTB16_Lo128() const {
556     return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
557   }
558 
559   bool isVSrcB16() const {
560     return isVCSrcB16() || isLiteralImm(MVT::i16);
561   }
562 
563   bool isVSrcV2B16() const {
564     return isVSrcB16() || isLiteralImm(MVT::v2i16);
565   }
566 
567   bool isVCSrcV2FP32() const {
568     return isVCSrcF64();
569   }
570 
571   bool isVSrcV2FP32() const {
572     return isVSrcF64() || isLiteralImm(MVT::v2f32);
573   }
574 
575   bool isVCSrcV2INT32() const {
576     return isVCSrcB64();
577   }
578 
579   bool isVSrcV2INT32() const {
580     return isVSrcB64() || isLiteralImm(MVT::v2i32);
581   }
582 
583   bool isVSrcF32() const {
584     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
585   }
586 
587   bool isVSrcF64() const {
588     return isVCSrcF64() || isLiteralImm(MVT::f64);
589   }
590 
591   bool isVSrcTF16_Lo128() const {
592     return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
593   }
594 
595   bool isVSrcF16() const {
596     return isVCSrcF16() || isLiteralImm(MVT::f16);
597   }
598 
599   bool isVSrcV2F16() const {
600     return isVSrcF16() || isLiteralImm(MVT::v2f16);
601   }
602 
603   bool isVISrcB32() const {
604     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
605   }
606 
607   bool isVISrcB16() const {
608     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
609   }
610 
611   bool isVISrcV2B16() const {
612     return isVISrcB16();
613   }
614 
615   bool isVISrcF32() const {
616     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
617   }
618 
619   bool isVISrcF16() const {
620     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
621   }
622 
623   bool isVISrcV2F16() const {
624     return isVISrcF16() || isVISrcB32();
625   }
626 
627   bool isVISrc_64B64() const {
628     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
629   }
630 
631   bool isVISrc_64F64() const {
632     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
633   }
634 
635   bool isVISrc_64V2FP32() const {
636     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
637   }
638 
639   bool isVISrc_64V2INT32() const {
640     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
641   }
642 
643   bool isVISrc_256B64() const {
644     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
645   }
646 
647   bool isVISrc_256F64() const {
648     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
649   }
650 
651   bool isVISrc_128B16() const {
652     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
653   }
654 
655   bool isVISrc_128V2B16() const {
656     return isVISrc_128B16();
657   }
658 
659   bool isVISrc_128B32() const {
660     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
661   }
662 
663   bool isVISrc_128F32() const {
664     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
665   }
666 
667   bool isVISrc_256V2FP32() const {
668     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
669   }
670 
671   bool isVISrc_256V2INT32() const {
672     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
673   }
674 
675   bool isVISrc_512B32() const {
676     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
677   }
678 
679   bool isVISrc_512B16() const {
680     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
681   }
682 
683   bool isVISrc_512V2B16() const {
684     return isVISrc_512B16();
685   }
686 
687   bool isVISrc_512F32() const {
688     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
689   }
690 
691   bool isVISrc_512F16() const {
692     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
693   }
694 
695   bool isVISrc_512V2F16() const {
696     return isVISrc_512F16() || isVISrc_512B32();
697   }
698 
699   bool isVISrc_1024B32() const {
700     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
701   }
702 
703   bool isVISrc_1024B16() const {
704     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
705   }
706 
707   bool isVISrc_1024V2B16() const {
708     return isVISrc_1024B16();
709   }
710 
711   bool isVISrc_1024F32() const {
712     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
713   }
714 
715   bool isVISrc_1024F16() const {
716     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
717   }
718 
719   bool isVISrc_1024V2F16() const {
720     return isVISrc_1024F16() || isVISrc_1024B32();
721   }
722 
723   bool isAISrcB32() const {
724     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
725   }
726 
727   bool isAISrcB16() const {
728     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
729   }
730 
731   bool isAISrcV2B16() const {
732     return isAISrcB16();
733   }
734 
735   bool isAISrcF32() const {
736     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
737   }
738 
739   bool isAISrcF16() const {
740     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
741   }
742 
743   bool isAISrcV2F16() const {
744     return isAISrcF16() || isAISrcB32();
745   }
746 
747   bool isAISrc_64B64() const {
748     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
749   }
750 
751   bool isAISrc_64F64() const {
752     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
753   }
754 
755   bool isAISrc_128B32() const {
756     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
757   }
758 
759   bool isAISrc_128B16() const {
760     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
761   }
762 
763   bool isAISrc_128V2B16() const {
764     return isAISrc_128B16();
765   }
766 
767   bool isAISrc_128F32() const {
768     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
769   }
770 
771   bool isAISrc_128F16() const {
772     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
773   }
774 
775   bool isAISrc_128V2F16() const {
776     return isAISrc_128F16() || isAISrc_128B32();
777   }
778 
779   bool isVISrc_128F16() const {
780     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
781   }
782 
783   bool isVISrc_128V2F16() const {
784     return isVISrc_128F16() || isVISrc_128B32();
785   }
786 
787   bool isAISrc_256B64() const {
788     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
789   }
790 
791   bool isAISrc_256F64() const {
792     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
793   }
794 
795   bool isAISrc_512B32() const {
796     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
797   }
798 
799   bool isAISrc_512B16() const {
800     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
801   }
802 
803   bool isAISrc_512V2B16() const {
804     return isAISrc_512B16();
805   }
806 
807   bool isAISrc_512F32() const {
808     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
809   }
810 
811   bool isAISrc_512F16() const {
812     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
813   }
814 
815   bool isAISrc_512V2F16() const {
816     return isAISrc_512F16() || isAISrc_512B32();
817   }
818 
819   bool isAISrc_1024B32() const {
820     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
821   }
822 
823   bool isAISrc_1024B16() const {
824     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
825   }
826 
827   bool isAISrc_1024V2B16() const {
828     return isAISrc_1024B16();
829   }
830 
831   bool isAISrc_1024F32() const {
832     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
833   }
834 
835   bool isAISrc_1024F16() const {
836     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
837   }
838 
839   bool isAISrc_1024V2F16() const {
840     return isAISrc_1024F16() || isAISrc_1024B32();
841   }
842 
843   bool isKImmFP32() const {
844     return isLiteralImm(MVT::f32);
845   }
846 
847   bool isKImmFP16() const {
848     return isLiteralImm(MVT::f16);
849   }
850 
851   bool isMem() const override {
852     return false;
853   }
854 
855   bool isExpr() const {
856     return Kind == Expression;
857   }
858 
859   bool isSOPPBrTarget() const { return isExpr() || isImm(); }
860 
861   bool isSWaitCnt() const;
862   bool isDepCtr() const;
863   bool isSDelayALU() const;
864   bool isHwreg() const;
865   bool isSendMsg() const;
866   bool isSwizzle() const;
867   bool isSMRDOffset8() const;
868   bool isSMEMOffset() const;
869   bool isSMRDLiteralOffset() const;
870   bool isDPP8() const;
871   bool isDPPCtrl() const;
872   bool isBLGP() const;
873   bool isCBSZ() const;
874   bool isABID() const;
875   bool isGPRIdxMode() const;
876   bool isS16Imm() const;
877   bool isU16Imm() const;
878   bool isEndpgm() const;
879   bool isWaitVDST() const;
880   bool isWaitEXP() const;
881 
882   StringRef getToken() const {
883     assert(isToken());
884     return StringRef(Tok.Data, Tok.Length);
885   }
886 
887   int64_t getImm() const {
888     assert(isImm());
889     return Imm.Val;
890   }
891 
892   void setImm(int64_t Val) {
893     assert(isImm());
894     Imm.Val = Val;
895   }
896 
897   ImmTy getImmTy() const {
898     assert(isImm());
899     return Imm.Type;
900   }
901 
902   unsigned getReg() const override {
903     assert(isRegKind());
904     return Reg.RegNo;
905   }
906 
907   SMLoc getStartLoc() const override {
908     return StartLoc;
909   }
910 
911   SMLoc getEndLoc() const override {
912     return EndLoc;
913   }
914 
915   SMRange getLocRange() const {
916     return SMRange(StartLoc, EndLoc);
917   }
918 
919   Modifiers getModifiers() const {
920     assert(isRegKind() || isImmTy(ImmTyNone));
921     return isRegKind() ? Reg.Mods : Imm.Mods;
922   }
923 
924   void setModifiers(Modifiers Mods) {
925     assert(isRegKind() || isImmTy(ImmTyNone));
926     if (isRegKind())
927       Reg.Mods = Mods;
928     else
929       Imm.Mods = Mods;
930   }
931 
932   bool hasModifiers() const {
933     return getModifiers().hasModifiers();
934   }
935 
936   bool hasFPModifiers() const {
937     return getModifiers().hasFPModifiers();
938   }
939 
940   bool hasIntModifiers() const {
941     return getModifiers().hasIntModifiers();
942   }
943 
944   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
945 
946   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
947 
948   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
949 
950   void addRegOperands(MCInst &Inst, unsigned N) const;
951 
952   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
953     if (isRegKind())
954       addRegOperands(Inst, N);
955     else
956       addImmOperands(Inst, N);
957   }
958 
959   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
960     Modifiers Mods = getModifiers();
961     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
962     if (isRegKind()) {
963       addRegOperands(Inst, N);
964     } else {
965       addImmOperands(Inst, N, false);
966     }
967   }
968 
969   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
970     assert(!hasIntModifiers());
971     addRegOrImmWithInputModsOperands(Inst, N);
972   }
973 
974   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
975     assert(!hasFPModifiers());
976     addRegOrImmWithInputModsOperands(Inst, N);
977   }
978 
979   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
980     Modifiers Mods = getModifiers();
981     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
982     assert(isRegKind());
983     addRegOperands(Inst, N);
984   }
985 
986   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
987     assert(!hasIntModifiers());
988     addRegWithInputModsOperands(Inst, N);
989   }
990 
991   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
992     assert(!hasFPModifiers());
993     addRegWithInputModsOperands(Inst, N);
994   }
995 
996   static void printImmTy(raw_ostream& OS, ImmTy Type) {
997     switch (Type) {
998     case ImmTyNone: OS << "None"; break;
999     case ImmTyGDS: OS << "GDS"; break;
1000     case ImmTyLDS: OS << "LDS"; break;
1001     case ImmTyOffen: OS << "Offen"; break;
1002     case ImmTyIdxen: OS << "Idxen"; break;
1003     case ImmTyAddr64: OS << "Addr64"; break;
1004     case ImmTyOffset: OS << "Offset"; break;
1005     case ImmTyInstOffset: OS << "InstOffset"; break;
1006     case ImmTyOffset0: OS << "Offset0"; break;
1007     case ImmTyOffset1: OS << "Offset1"; break;
1008     case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1009     case ImmTyCPol: OS << "CPol"; break;
1010     case ImmTyTFE: OS << "TFE"; break;
1011     case ImmTyD16: OS << "D16"; break;
1012     case ImmTyFORMAT: OS << "FORMAT"; break;
1013     case ImmTyClampSI: OS << "ClampSI"; break;
1014     case ImmTyOModSI: OS << "OModSI"; break;
1015     case ImmTyDPP8: OS << "DPP8"; break;
1016     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1017     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1018     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1019     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1020     case ImmTyDppFI: OS << "DppFI"; break;
1021     case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1022     case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1023     case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1024     case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1025     case ImmTyDMask: OS << "DMask"; break;
1026     case ImmTyDim: OS << "Dim"; break;
1027     case ImmTyUNorm: OS << "UNorm"; break;
1028     case ImmTyDA: OS << "DA"; break;
1029     case ImmTyR128A16: OS << "R128A16"; break;
1030     case ImmTyA16: OS << "A16"; break;
1031     case ImmTyLWE: OS << "LWE"; break;
1032     case ImmTyOff: OS << "Off"; break;
1033     case ImmTyExpTgt: OS << "ExpTgt"; break;
1034     case ImmTyExpCompr: OS << "ExpCompr"; break;
1035     case ImmTyExpVM: OS << "ExpVM"; break;
1036     case ImmTyHwreg: OS << "Hwreg"; break;
1037     case ImmTySendMsg: OS << "SendMsg"; break;
1038     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1039     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1040     case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1041     case ImmTyOpSel: OS << "OpSel"; break;
1042     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1043     case ImmTyNegLo: OS << "NegLo"; break;
1044     case ImmTyNegHi: OS << "NegHi"; break;
1045     case ImmTySwizzle: OS << "Swizzle"; break;
1046     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1047     case ImmTyHigh: OS << "High"; break;
1048     case ImmTyBLGP: OS << "BLGP"; break;
1049     case ImmTyCBSZ: OS << "CBSZ"; break;
1050     case ImmTyABID: OS << "ABID"; break;
1051     case ImmTyEndpgm: OS << "Endpgm"; break;
1052     case ImmTyWaitVDST: OS << "WaitVDST"; break;
1053     case ImmTyWaitEXP: OS << "WaitEXP"; break;
1054     }
1055   }
1056 
1057   void print(raw_ostream &OS) const override {
1058     switch (Kind) {
1059     case Register:
1060       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1061       break;
1062     case Immediate:
1063       OS << '<' << getImm();
1064       if (getImmTy() != ImmTyNone) {
1065         OS << " type: "; printImmTy(OS, getImmTy());
1066       }
1067       OS << " mods: " << Imm.Mods << '>';
1068       break;
1069     case Token:
1070       OS << '\'' << getToken() << '\'';
1071       break;
1072     case Expression:
1073       OS << "<expr " << *Expr << '>';
1074       break;
1075     }
1076   }
1077 
1078   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1079                                       int64_t Val, SMLoc Loc,
1080                                       ImmTy Type = ImmTyNone,
1081                                       bool IsFPImm = false) {
1082     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1083     Op->Imm.Val = Val;
1084     Op->Imm.IsFPImm = IsFPImm;
1085     Op->Imm.Kind = ImmKindTyNone;
1086     Op->Imm.Type = Type;
1087     Op->Imm.Mods = Modifiers();
1088     Op->StartLoc = Loc;
1089     Op->EndLoc = Loc;
1090     return Op;
1091   }
1092 
1093   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1094                                         StringRef Str, SMLoc Loc,
1095                                         bool HasExplicitEncodingSize = true) {
1096     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1097     Res->Tok.Data = Str.data();
1098     Res->Tok.Length = Str.size();
1099     Res->StartLoc = Loc;
1100     Res->EndLoc = Loc;
1101     return Res;
1102   }
1103 
1104   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1105                                       unsigned RegNo, SMLoc S,
1106                                       SMLoc E) {
1107     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1108     Op->Reg.RegNo = RegNo;
1109     Op->Reg.Mods = Modifiers();
1110     Op->StartLoc = S;
1111     Op->EndLoc = E;
1112     return Op;
1113   }
1114 
1115   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1116                                        const class MCExpr *Expr, SMLoc S) {
1117     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1118     Op->Expr = Expr;
1119     Op->StartLoc = S;
1120     Op->EndLoc = S;
1121     return Op;
1122   }
1123 };
1124 
1125 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1126   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1127   return OS;
1128 }
1129 
1130 //===----------------------------------------------------------------------===//
1131 // AsmParser
1132 //===----------------------------------------------------------------------===//
1133 
1134 // Holds info related to the current kernel, e.g. count of SGPRs used.
1135 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1136 // .amdgpu_hsa_kernel or at EOF.
1137 class KernelScopeInfo {
1138   int SgprIndexUnusedMin = -1;
1139   int VgprIndexUnusedMin = -1;
1140   int AgprIndexUnusedMin = -1;
1141   MCContext *Ctx = nullptr;
1142   MCSubtargetInfo const *MSTI = nullptr;
1143 
1144   void usesSgprAt(int i) {
1145     if (i >= SgprIndexUnusedMin) {
1146       SgprIndexUnusedMin = ++i;
1147       if (Ctx) {
1148         MCSymbol* const Sym =
1149           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1150         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1151       }
1152     }
1153   }
1154 
1155   void usesVgprAt(int i) {
1156     if (i >= VgprIndexUnusedMin) {
1157       VgprIndexUnusedMin = ++i;
1158       if (Ctx) {
1159         MCSymbol* const Sym =
1160           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1161         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1162                                          VgprIndexUnusedMin);
1163         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1164       }
1165     }
1166   }
1167 
1168   void usesAgprAt(int i) {
1169     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1170     if (!hasMAIInsts(*MSTI))
1171       return;
1172 
1173     if (i >= AgprIndexUnusedMin) {
1174       AgprIndexUnusedMin = ++i;
1175       if (Ctx) {
1176         MCSymbol* const Sym =
1177           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1178         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1179 
1180         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1181         MCSymbol* const vSym =
1182           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1183         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1184                                          VgprIndexUnusedMin);
1185         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1186       }
1187     }
1188   }
1189 
1190 public:
1191   KernelScopeInfo() = default;
1192 
1193   void initialize(MCContext &Context) {
1194     Ctx = &Context;
1195     MSTI = Ctx->getSubtargetInfo();
1196 
1197     usesSgprAt(SgprIndexUnusedMin = -1);
1198     usesVgprAt(VgprIndexUnusedMin = -1);
1199     if (hasMAIInsts(*MSTI)) {
1200       usesAgprAt(AgprIndexUnusedMin = -1);
1201     }
1202   }
1203 
1204   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1205                     unsigned RegWidth) {
1206     switch (RegKind) {
1207     case IS_SGPR:
1208       usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1209       break;
1210     case IS_AGPR:
1211       usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1212       break;
1213     case IS_VGPR:
1214       usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1215       break;
1216     default:
1217       break;
1218     }
1219   }
1220 };
1221 
1222 class AMDGPUAsmParser : public MCTargetAsmParser {
1223   MCAsmParser &Parser;
1224 
1225   unsigned ForcedEncodingSize = 0;
1226   bool ForcedDPP = false;
1227   bool ForcedSDWA = false;
1228   KernelScopeInfo KernelScope;
1229 
1230   /// @name Auto-generated Match Functions
1231   /// {
1232 
1233 #define GET_ASSEMBLER_HEADER
1234 #include "AMDGPUGenAsmMatcher.inc"
1235 
1236   /// }
1237 
1238 private:
1239   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1240   bool OutOfRangeError(SMRange Range);
1241   /// Calculate VGPR/SGPR blocks required for given target, reserved
1242   /// registers, and user-specified NextFreeXGPR values.
1243   ///
1244   /// \param Features [in] Target features, used for bug corrections.
1245   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1246   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1247   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1248   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1249   /// descriptor field, if valid.
1250   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1251   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1252   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1253   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1254   /// \param VGPRBlocks [out] Result VGPR block count.
1255   /// \param SGPRBlocks [out] Result SGPR block count.
1256   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1257                           bool FlatScrUsed, bool XNACKUsed,
1258                           std::optional<bool> EnableWavefrontSize32,
1259                           unsigned NextFreeVGPR, SMRange VGPRRange,
1260                           unsigned NextFreeSGPR, SMRange SGPRRange,
1261                           unsigned &VGPRBlocks, unsigned &SGPRBlocks);
1262   bool ParseDirectiveAMDGCNTarget();
1263   bool ParseDirectiveAMDHSAKernel();
1264   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1265   bool ParseDirectiveHSACodeObjectVersion();
1266   bool ParseDirectiveHSACodeObjectISA();
1267   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1268   bool ParseDirectiveAMDKernelCodeT();
1269   // TODO: Possibly make subtargetHasRegister const.
1270   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1271   bool ParseDirectiveAMDGPUHsaKernel();
1272 
1273   bool ParseDirectiveISAVersion();
1274   bool ParseDirectiveHSAMetadata();
1275   bool ParseDirectivePALMetadataBegin();
1276   bool ParseDirectivePALMetadata();
1277   bool ParseDirectiveAMDGPULDS();
1278 
1279   /// Common code to parse out a block of text (typically YAML) between start and
1280   /// end directives.
1281   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1282                            const char *AssemblerDirectiveEnd,
1283                            std::string &CollectString);
1284 
1285   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1286                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1287   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1288                            unsigned &RegNum, unsigned &RegWidth,
1289                            bool RestoreOnFailure = false);
1290   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1291                            unsigned &RegNum, unsigned &RegWidth,
1292                            SmallVectorImpl<AsmToken> &Tokens);
1293   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1294                            unsigned &RegWidth,
1295                            SmallVectorImpl<AsmToken> &Tokens);
1296   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1297                            unsigned &RegWidth,
1298                            SmallVectorImpl<AsmToken> &Tokens);
1299   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1300                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1301   bool ParseRegRange(unsigned& Num, unsigned& Width);
1302   unsigned getRegularReg(RegisterKind RegKind,
1303                          unsigned RegNum,
1304                          unsigned RegWidth,
1305                          SMLoc Loc);
1306 
1307   bool isRegister();
1308   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1309   std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1310   void initializeGprCountSymbol(RegisterKind RegKind);
1311   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1312                              unsigned RegWidth);
1313   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1314                     bool IsAtomic);
1315 
1316 public:
1317   enum AMDGPUMatchResultTy {
1318     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1319   };
1320   enum OperandMode {
1321     OperandMode_Default,
1322     OperandMode_NSA,
1323   };
1324 
1325   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1326 
1327   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1328                const MCInstrInfo &MII,
1329                const MCTargetOptions &Options)
1330       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1331     MCAsmParserExtension::Initialize(Parser);
1332 
1333     if (getFeatureBits().none()) {
1334       // Set default features.
1335       copySTI().ToggleFeature("southern-islands");
1336     }
1337 
1338     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1339 
1340     {
1341       // TODO: make those pre-defined variables read-only.
1342       // Currently there is none suitable machinery in the core llvm-mc for this.
1343       // MCSymbol::isRedefinable is intended for another purpose, and
1344       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1345       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1346       MCContext &Ctx = getContext();
1347       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1348         MCSymbol *Sym =
1349             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1350         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1351         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1352         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1353         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1354         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1355       } else {
1356         MCSymbol *Sym =
1357             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1358         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1359         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1360         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1361         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1362         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1363       }
1364       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1365         initializeGprCountSymbol(IS_VGPR);
1366         initializeGprCountSymbol(IS_SGPR);
1367       } else
1368         KernelScope.initialize(getContext());
1369     }
1370   }
1371 
1372   bool hasMIMG_R128() const {
1373     return AMDGPU::hasMIMG_R128(getSTI());
1374   }
1375 
1376   bool hasPackedD16() const {
1377     return AMDGPU::hasPackedD16(getSTI());
1378   }
1379 
1380   bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1381 
1382   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1383 
1384   bool isSI() const {
1385     return AMDGPU::isSI(getSTI());
1386   }
1387 
1388   bool isCI() const {
1389     return AMDGPU::isCI(getSTI());
1390   }
1391 
1392   bool isVI() const {
1393     return AMDGPU::isVI(getSTI());
1394   }
1395 
1396   bool isGFX9() const {
1397     return AMDGPU::isGFX9(getSTI());
1398   }
1399 
1400   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1401   bool isGFX90A() const {
1402     return AMDGPU::isGFX90A(getSTI());
1403   }
1404 
1405   bool isGFX940() const {
1406     return AMDGPU::isGFX940(getSTI());
1407   }
1408 
1409   bool isGFX9Plus() const {
1410     return AMDGPU::isGFX9Plus(getSTI());
1411   }
1412 
1413   bool isGFX10() const {
1414     return AMDGPU::isGFX10(getSTI());
1415   }
1416 
1417   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1418 
1419   bool isGFX11() const {
1420     return AMDGPU::isGFX11(getSTI());
1421   }
1422 
1423   bool isGFX11Plus() const {
1424     return AMDGPU::isGFX11Plus(getSTI());
1425   }
1426 
1427   bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1428 
1429   bool isGFX10_BEncoding() const {
1430     return AMDGPU::isGFX10_BEncoding(getSTI());
1431   }
1432 
1433   bool hasInv2PiInlineImm() const {
1434     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1435   }
1436 
1437   bool hasFlatOffsets() const {
1438     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1439   }
1440 
1441   bool hasArchitectedFlatScratch() const {
1442     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1443   }
1444 
1445   bool hasSGPR102_SGPR103() const {
1446     return !isVI() && !isGFX9();
1447   }
1448 
1449   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1450 
1451   bool hasIntClamp() const {
1452     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1453   }
1454 
1455   bool hasPartialNSAEncoding() const {
1456     return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1457   }
1458 
1459   unsigned getNSAMaxSize() const {
1460     return AMDGPU::getNSAMaxSize(getSTI());
1461   }
1462 
1463   AMDGPUTargetStreamer &getTargetStreamer() {
1464     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1465     return static_cast<AMDGPUTargetStreamer &>(TS);
1466   }
1467 
1468   const MCRegisterInfo *getMRI() const {
1469     // We need this const_cast because for some reason getContext() is not const
1470     // in MCAsmParser.
1471     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1472   }
1473 
1474   const MCInstrInfo *getMII() const {
1475     return &MII;
1476   }
1477 
1478   const FeatureBitset &getFeatureBits() const {
1479     return getSTI().getFeatureBits();
1480   }
1481 
1482   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1483   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1484   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1485 
1486   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1487   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1488   bool isForcedDPP() const { return ForcedDPP; }
1489   bool isForcedSDWA() const { return ForcedSDWA; }
1490   ArrayRef<unsigned> getMatchedVariants() const;
1491   StringRef getMatchedVariantName() const;
1492 
1493   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1494   bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1495                      bool RestoreOnFailure);
1496   bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
1497                      SMLoc &EndLoc) override;
1498   OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
1499                                         SMLoc &EndLoc) override;
1500   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1501   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1502                                       unsigned Kind) override;
1503   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1504                                OperandVector &Operands, MCStreamer &Out,
1505                                uint64_t &ErrorInfo,
1506                                bool MatchingInlineAsm) override;
1507   bool ParseDirective(AsmToken DirectiveID) override;
1508   ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1509                            OperandMode Mode = OperandMode_Default);
1510   StringRef parseMnemonicSuffix(StringRef Name);
1511   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1512                         SMLoc NameLoc, OperandVector &Operands) override;
1513   //bool ProcessInstruction(MCInst &Inst);
1514 
1515   ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1516 
1517   ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1518 
1519   ParseStatus
1520   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1521                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1522                      std::function<bool(int64_t &)> ConvertResult = nullptr);
1523 
1524   ParseStatus parseOperandArrayWithPrefix(
1525       const char *Prefix, OperandVector &Operands,
1526       AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1527       bool (*ConvertResult)(int64_t &) = nullptr);
1528 
1529   ParseStatus
1530   parseNamedBit(StringRef Name, OperandVector &Operands,
1531                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1532   unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1533   ParseStatus parseCPol(OperandVector &Operands);
1534   ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1535                                     SMLoc &StringLoc);
1536 
1537   bool isModifier();
1538   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1539   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1540   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1541   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1542   bool parseSP3NegModifier();
1543   ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1544   ParseStatus parseReg(OperandVector &Operands);
1545   ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1546   ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1547                                            bool AllowImm = true);
1548   ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1549                                             bool AllowImm = true);
1550   ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1551   ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1552   ParseStatus parseVReg32OrOff(OperandVector &Operands);
1553   ParseStatus parseDfmtNfmt(int64_t &Format);
1554   ParseStatus parseUfmt(int64_t &Format);
1555   ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1556                                        int64_t &Format);
1557   ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1558                                          int64_t &Format);
1559   ParseStatus parseFORMAT(OperandVector &Operands);
1560   ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1561   ParseStatus parseNumericFormat(int64_t &Format);
1562   ParseStatus parseFlatOffset(OperandVector &Operands);
1563   ParseStatus parseR128A16(OperandVector &Operands);
1564   ParseStatus parseBLGP(OperandVector &Operands);
1565   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1566   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1567 
1568   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1569 
1570   bool parseCnt(int64_t &IntVal);
1571   ParseStatus parseSWaitCnt(OperandVector &Operands);
1572 
1573   bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1574   void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1575   ParseStatus parseDepCtr(OperandVector &Operands);
1576 
1577   bool parseDelay(int64_t &Delay);
1578   ParseStatus parseSDelayALU(OperandVector &Operands);
1579 
1580   ParseStatus parseHwreg(OperandVector &Operands);
1581 
1582 private:
1583   struct OperandInfoTy {
1584     SMLoc Loc;
1585     int64_t Id;
1586     bool IsSymbolic = false;
1587     bool IsDefined = false;
1588 
1589     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1590   };
1591 
1592   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1593   bool validateSendMsg(const OperandInfoTy &Msg,
1594                        const OperandInfoTy &Op,
1595                        const OperandInfoTy &Stream);
1596 
1597   bool parseHwregBody(OperandInfoTy &HwReg,
1598                       OperandInfoTy &Offset,
1599                       OperandInfoTy &Width);
1600   bool validateHwreg(const OperandInfoTy &HwReg,
1601                      const OperandInfoTy &Offset,
1602                      const OperandInfoTy &Width);
1603 
1604   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1605   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1606   SMLoc getBLGPLoc(const OperandVector &Operands) const;
1607 
1608   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1609                       const OperandVector &Operands) const;
1610   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1611   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1612   SMLoc getLitLoc(const OperandVector &Operands,
1613                   bool SearchMandatoryLiterals = false) const;
1614   SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1615   SMLoc getConstLoc(const OperandVector &Operands) const;
1616   SMLoc getInstLoc(const OperandVector &Operands) const;
1617 
1618   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1619   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1620   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1621   bool validateSOPLiteral(const MCInst &Inst) const;
1622   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1623   bool validateVOPDRegBankConstraints(const MCInst &Inst,
1624                                       const OperandVector &Operands);
1625   bool validateIntClampSupported(const MCInst &Inst);
1626   bool validateMIMGAtomicDMask(const MCInst &Inst);
1627   bool validateMIMGGatherDMask(const MCInst &Inst);
1628   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1629   bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1630   bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1631   bool validateMIMGD16(const MCInst &Inst);
1632   bool validateMIMGMSAA(const MCInst &Inst);
1633   bool validateOpSel(const MCInst &Inst);
1634   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1635   bool validateVccOperand(unsigned Reg) const;
1636   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1637   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1638   bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1639   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1640   bool validateAGPRLdSt(const MCInst &Inst) const;
1641   bool validateVGPRAlign(const MCInst &Inst) const;
1642   bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1643   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1644   bool validateDivScale(const MCInst &Inst);
1645   bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1646   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1647                              const SMLoc &IDLoc);
1648   bool validateExeczVcczOperands(const OperandVector &Operands);
1649   bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1650   std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1651   unsigned getConstantBusLimit(unsigned Opcode) const;
1652   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1653   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1654   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1655 
1656   bool isSupportedMnemo(StringRef Mnemo,
1657                         const FeatureBitset &FBS);
1658   bool isSupportedMnemo(StringRef Mnemo,
1659                         const FeatureBitset &FBS,
1660                         ArrayRef<unsigned> Variants);
1661   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1662 
1663   bool isId(const StringRef Id) const;
1664   bool isId(const AsmToken &Token, const StringRef Id) const;
1665   bool isToken(const AsmToken::TokenKind Kind) const;
1666   StringRef getId() const;
1667   bool trySkipId(const StringRef Id);
1668   bool trySkipId(const StringRef Pref, const StringRef Id);
1669   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1670   bool trySkipToken(const AsmToken::TokenKind Kind);
1671   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1672   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1673   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1674 
1675   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1676   AsmToken::TokenKind getTokenKind() const;
1677   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1678   bool parseExpr(OperandVector &Operands);
1679   StringRef getTokenStr() const;
1680   AsmToken peekToken(bool ShouldSkipSpace = true);
1681   AsmToken getToken() const;
1682   SMLoc getLoc() const;
1683   void lex();
1684 
1685 public:
1686   void onBeginOfFile() override;
1687 
1688   ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1689 
1690   ParseStatus parseExpTgt(OperandVector &Operands);
1691   ParseStatus parseSendMsg(OperandVector &Operands);
1692   ParseStatus parseInterpSlot(OperandVector &Operands);
1693   ParseStatus parseInterpAttr(OperandVector &Operands);
1694   ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1695   ParseStatus parseBoolReg(OperandVector &Operands);
1696 
1697   bool parseSwizzleOperand(int64_t &Op,
1698                            const unsigned MinVal,
1699                            const unsigned MaxVal,
1700                            const StringRef ErrMsg,
1701                            SMLoc &Loc);
1702   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1703                             const unsigned MinVal,
1704                             const unsigned MaxVal,
1705                             const StringRef ErrMsg);
1706   ParseStatus parseSwizzle(OperandVector &Operands);
1707   bool parseSwizzleOffset(int64_t &Imm);
1708   bool parseSwizzleMacro(int64_t &Imm);
1709   bool parseSwizzleQuadPerm(int64_t &Imm);
1710   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1711   bool parseSwizzleBroadcast(int64_t &Imm);
1712   bool parseSwizzleSwap(int64_t &Imm);
1713   bool parseSwizzleReverse(int64_t &Imm);
1714 
1715   ParseStatus parseGPRIdxMode(OperandVector &Operands);
1716   int64_t parseGPRIdxMacro();
1717 
1718   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1719   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1720 
1721   ParseStatus parseOModSI(OperandVector &Operands);
1722 
1723   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1724                OptionalImmIndexMap &OptionalIdx);
1725   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1726   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1727   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1728   void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1729   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1730                     OptionalImmIndexMap &OptionalIdx);
1731   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1732                 OptionalImmIndexMap &OptionalIdx);
1733 
1734   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1735   void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1736   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1737 
1738   bool parseDimId(unsigned &Encoding);
1739   ParseStatus parseDim(OperandVector &Operands);
1740   bool convertDppBoundCtrl(int64_t &BoundCtrl);
1741   ParseStatus parseDPP8(OperandVector &Operands);
1742   ParseStatus parseDPPCtrl(OperandVector &Operands);
1743   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1744   int64_t parseDPPCtrlSel(StringRef Ctrl);
1745   int64_t parseDPPCtrlPerm();
1746   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1747   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1748     cvtDPP(Inst, Operands, true);
1749   }
1750   void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1751                   bool IsDPP8 = false);
1752   void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1753     cvtVOP3DPP(Inst, Operands, true);
1754   }
1755 
1756   ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1757                            AMDGPUOperand::ImmTy Type);
1758   ParseStatus parseSDWADstUnused(OperandVector &Operands);
1759   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1760   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1761   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1762   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1763   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1764   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1765                uint64_t BasicInstType,
1766                bool SkipDstVcc = false,
1767                bool SkipSrcVcc = false);
1768 
1769   ParseStatus parseEndpgm(OperandVector &Operands);
1770 
1771   ParseStatus parseVOPD(OperandVector &Operands);
1772 };
1773 
1774 } // end anonymous namespace
1775 
1776 // May be called with integer type with equivalent bitwidth.
1777 static const fltSemantics *getFltSemantics(unsigned Size) {
1778   switch (Size) {
1779   case 4:
1780     return &APFloat::IEEEsingle();
1781   case 8:
1782     return &APFloat::IEEEdouble();
1783   case 2:
1784     return &APFloat::IEEEhalf();
1785   default:
1786     llvm_unreachable("unsupported fp type");
1787   }
1788 }
1789 
1790 static const fltSemantics *getFltSemantics(MVT VT) {
1791   return getFltSemantics(VT.getSizeInBits() / 8);
1792 }
1793 
1794 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1795   switch (OperandType) {
1796   case AMDGPU::OPERAND_REG_IMM_INT32:
1797   case AMDGPU::OPERAND_REG_IMM_FP32:
1798   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1799   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1800   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1801   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1802   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1803   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1804   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1805   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1806   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1807   case AMDGPU::OPERAND_KIMM32:
1808     return &APFloat::IEEEsingle();
1809   case AMDGPU::OPERAND_REG_IMM_INT64:
1810   case AMDGPU::OPERAND_REG_IMM_FP64:
1811   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1812   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1813   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1814     return &APFloat::IEEEdouble();
1815   case AMDGPU::OPERAND_REG_IMM_INT16:
1816   case AMDGPU::OPERAND_REG_IMM_FP16:
1817   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1818   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1819   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1820   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1821   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1822   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1823   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1824   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1825   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1826   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1827   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1828   case AMDGPU::OPERAND_KIMM16:
1829     return &APFloat::IEEEhalf();
1830   default:
1831     llvm_unreachable("unsupported fp type");
1832   }
1833 }
1834 
1835 //===----------------------------------------------------------------------===//
1836 // Operand
1837 //===----------------------------------------------------------------------===//
1838 
1839 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1840   bool Lost;
1841 
1842   // Convert literal to single precision
1843   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1844                                                APFloat::rmNearestTiesToEven,
1845                                                &Lost);
1846   // We allow precision lost but not overflow or underflow
1847   if (Status != APFloat::opOK &&
1848       Lost &&
1849       ((Status & APFloat::opOverflow)  != 0 ||
1850        (Status & APFloat::opUnderflow) != 0)) {
1851     return false;
1852   }
1853 
1854   return true;
1855 }
1856 
1857 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1858   return isUIntN(Size, Val) || isIntN(Size, Val);
1859 }
1860 
1861 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1862   if (VT.getScalarType() == MVT::i16) {
1863     // FP immediate values are broken.
1864     return isInlinableIntLiteral(Val);
1865   }
1866 
1867   // f16/v2f16 operands work correctly for all values.
1868   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1869 }
1870 
1871 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1872 
1873   // This is a hack to enable named inline values like
1874   // shared_base with both 32-bit and 64-bit operands.
1875   // Note that these values are defined as
1876   // 32-bit operands only.
1877   if (isInlineValue()) {
1878     return true;
1879   }
1880 
1881   if (!isImmTy(ImmTyNone)) {
1882     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1883     return false;
1884   }
1885   // TODO: We should avoid using host float here. It would be better to
1886   // check the float bit values which is what a few other places do.
1887   // We've had bot failures before due to weird NaN support on mips hosts.
1888 
1889   APInt Literal(64, Imm.Val);
1890 
1891   if (Imm.IsFPImm) { // We got fp literal token
1892     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1893       return AMDGPU::isInlinableLiteral64(Imm.Val,
1894                                           AsmParser->hasInv2PiInlineImm());
1895     }
1896 
1897     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1898     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1899       return false;
1900 
1901     if (type.getScalarSizeInBits() == 16) {
1902       return isInlineableLiteralOp16(
1903         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1904         type, AsmParser->hasInv2PiInlineImm());
1905     }
1906 
1907     // Check if single precision literal is inlinable
1908     return AMDGPU::isInlinableLiteral32(
1909       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1910       AsmParser->hasInv2PiInlineImm());
1911   }
1912 
1913   // We got int literal token.
1914   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1915     return AMDGPU::isInlinableLiteral64(Imm.Val,
1916                                         AsmParser->hasInv2PiInlineImm());
1917   }
1918 
1919   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1920     return false;
1921   }
1922 
1923   if (type.getScalarSizeInBits() == 16) {
1924     return isInlineableLiteralOp16(
1925       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1926       type, AsmParser->hasInv2PiInlineImm());
1927   }
1928 
1929   return AMDGPU::isInlinableLiteral32(
1930     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1931     AsmParser->hasInv2PiInlineImm());
1932 }
1933 
1934 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1935   // Check that this immediate can be added as literal
1936   if (!isImmTy(ImmTyNone)) {
1937     return false;
1938   }
1939 
1940   if (!Imm.IsFPImm) {
1941     // We got int literal token.
1942 
1943     if (type == MVT::f64 && hasFPModifiers()) {
1944       // Cannot apply fp modifiers to int literals preserving the same semantics
1945       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1946       // disable these cases.
1947       return false;
1948     }
1949 
1950     unsigned Size = type.getSizeInBits();
1951     if (Size == 64)
1952       Size = 32;
1953 
1954     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1955     // types.
1956     return isSafeTruncation(Imm.Val, Size);
1957   }
1958 
1959   // We got fp literal token
1960   if (type == MVT::f64) { // Expected 64-bit fp operand
1961     // We would set low 64-bits of literal to zeroes but we accept this literals
1962     return true;
1963   }
1964 
1965   if (type == MVT::i64) { // Expected 64-bit int operand
1966     // We don't allow fp literals in 64-bit integer instructions. It is
1967     // unclear how we should encode them.
1968     return false;
1969   }
1970 
1971   // We allow fp literals with f16x2 operands assuming that the specified
1972   // literal goes into the lower half and the upper half is zero. We also
1973   // require that the literal may be losslessly converted to f16.
1974   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1975                      (type == MVT::v2i16)? MVT::i16 :
1976                      (type == MVT::v2f32)? MVT::f32 : type;
1977 
1978   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1979   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1980 }
1981 
1982 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1983   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1984 }
1985 
1986 bool AMDGPUOperand::isVRegWithInputMods() const {
1987   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1988          // GFX90A allows DPP on 64-bit operands.
1989          (isRegClass(AMDGPU::VReg_64RegClassID) &&
1990           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1991 }
1992 
1993 bool AMDGPUOperand::isT16VRegWithInputMods() const {
1994   return isRegClass(AMDGPU::VGPR_32_Lo128RegClassID);
1995 }
1996 
1997 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1998   if (AsmParser->isVI())
1999     return isVReg32();
2000   else if (AsmParser->isGFX9Plus())
2001     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2002   else
2003     return false;
2004 }
2005 
2006 bool AMDGPUOperand::isSDWAFP16Operand() const {
2007   return isSDWAOperand(MVT::f16);
2008 }
2009 
2010 bool AMDGPUOperand::isSDWAFP32Operand() const {
2011   return isSDWAOperand(MVT::f32);
2012 }
2013 
2014 bool AMDGPUOperand::isSDWAInt16Operand() const {
2015   return isSDWAOperand(MVT::i16);
2016 }
2017 
2018 bool AMDGPUOperand::isSDWAInt32Operand() const {
2019   return isSDWAOperand(MVT::i32);
2020 }
2021 
2022 bool AMDGPUOperand::isBoolReg() const {
2023   auto FB = AsmParser->getFeatureBits();
2024   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2025                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2026 }
2027 
2028 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2029 {
2030   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2031   assert(Size == 2 || Size == 4 || Size == 8);
2032 
2033   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2034 
2035   if (Imm.Mods.Abs) {
2036     Val &= ~FpSignMask;
2037   }
2038   if (Imm.Mods.Neg) {
2039     Val ^= FpSignMask;
2040   }
2041 
2042   return Val;
2043 }
2044 
2045 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2046   if (isExpr()) {
2047     Inst.addOperand(MCOperand::createExpr(Expr));
2048     return;
2049   }
2050 
2051   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2052                              Inst.getNumOperands())) {
2053     addLiteralImmOperand(Inst, Imm.Val,
2054                          ApplyModifiers &
2055                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2056   } else {
2057     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2058     Inst.addOperand(MCOperand::createImm(Imm.Val));
2059     setImmKindNone();
2060   }
2061 }
2062 
2063 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2064   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2065   auto OpNum = Inst.getNumOperands();
2066   // Check that this operand accepts literals
2067   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2068 
2069   if (ApplyModifiers) {
2070     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2071     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2072     Val = applyInputFPModifiers(Val, Size);
2073   }
2074 
2075   APInt Literal(64, Val);
2076   uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2077 
2078   if (Imm.IsFPImm) { // We got fp literal token
2079     switch (OpTy) {
2080     case AMDGPU::OPERAND_REG_IMM_INT64:
2081     case AMDGPU::OPERAND_REG_IMM_FP64:
2082     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2083     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2084     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2085       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2086                                        AsmParser->hasInv2PiInlineImm())) {
2087         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2088         setImmKindConst();
2089         return;
2090       }
2091 
2092       // Non-inlineable
2093       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2094         // For fp operands we check if low 32 bits are zeros
2095         if (Literal.getLoBits(32) != 0) {
2096           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2097           "Can't encode literal as exact 64-bit floating-point operand. "
2098           "Low 32-bits will be set to zero");
2099         }
2100 
2101         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2102         setImmKindLiteral();
2103         return;
2104       }
2105 
2106       // We don't allow fp literals in 64-bit integer instructions. It is
2107       // unclear how we should encode them. This case should be checked earlier
2108       // in predicate methods (isLiteralImm())
2109       llvm_unreachable("fp literal in 64-bit integer instruction.");
2110 
2111     case AMDGPU::OPERAND_REG_IMM_INT32:
2112     case AMDGPU::OPERAND_REG_IMM_FP32:
2113     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2114     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2115     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2116     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2117     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2118     case AMDGPU::OPERAND_REG_IMM_INT16:
2119     case AMDGPU::OPERAND_REG_IMM_FP16:
2120     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2121     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2122     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2123     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2124     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2125     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2126     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2127     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2128     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2129     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2130     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2131     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2132     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2133     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2134     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2135     case AMDGPU::OPERAND_KIMM32:
2136     case AMDGPU::OPERAND_KIMM16: {
2137       bool lost;
2138       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2139       // Convert literal to single precision
2140       FPLiteral.convert(*getOpFltSemantics(OpTy),
2141                         APFloat::rmNearestTiesToEven, &lost);
2142       // We allow precision lost but not overflow or underflow. This should be
2143       // checked earlier in isLiteralImm()
2144 
2145       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2146       Inst.addOperand(MCOperand::createImm(ImmVal));
2147       if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2148         setImmKindMandatoryLiteral();
2149       } else {
2150         setImmKindLiteral();
2151       }
2152       return;
2153     }
2154     default:
2155       llvm_unreachable("invalid operand size");
2156     }
2157 
2158     return;
2159   }
2160 
2161   // We got int literal token.
2162   // Only sign extend inline immediates.
2163   switch (OpTy) {
2164   case AMDGPU::OPERAND_REG_IMM_INT32:
2165   case AMDGPU::OPERAND_REG_IMM_FP32:
2166   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2167   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2168   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2169   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2170   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2171   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2172   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2173   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2174   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2175   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2176   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2177     if (isSafeTruncation(Val, 32) &&
2178         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2179                                      AsmParser->hasInv2PiInlineImm())) {
2180       Inst.addOperand(MCOperand::createImm(Val));
2181       setImmKindConst();
2182       return;
2183     }
2184 
2185     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2186     setImmKindLiteral();
2187     return;
2188 
2189   case AMDGPU::OPERAND_REG_IMM_INT64:
2190   case AMDGPU::OPERAND_REG_IMM_FP64:
2191   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2192   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2193   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2194     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2195       Inst.addOperand(MCOperand::createImm(Val));
2196       setImmKindConst();
2197       return;
2198     }
2199 
2200     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2201     setImmKindLiteral();
2202     return;
2203 
2204   case AMDGPU::OPERAND_REG_IMM_INT16:
2205   case AMDGPU::OPERAND_REG_IMM_FP16:
2206   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2207   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2208   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2209   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2210   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2211     if (isSafeTruncation(Val, 16) &&
2212         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2213                                      AsmParser->hasInv2PiInlineImm())) {
2214       Inst.addOperand(MCOperand::createImm(Val));
2215       setImmKindConst();
2216       return;
2217     }
2218 
2219     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2220     setImmKindLiteral();
2221     return;
2222 
2223   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2224   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2225   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2226   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2227     assert(isSafeTruncation(Val, 16));
2228     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2229                                         AsmParser->hasInv2PiInlineImm()));
2230 
2231     Inst.addOperand(MCOperand::createImm(Val));
2232     return;
2233   }
2234   case AMDGPU::OPERAND_KIMM32:
2235     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2236     setImmKindMandatoryLiteral();
2237     return;
2238   case AMDGPU::OPERAND_KIMM16:
2239     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2240     setImmKindMandatoryLiteral();
2241     return;
2242   default:
2243     llvm_unreachable("invalid operand size");
2244   }
2245 }
2246 
2247 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2248   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2249 }
2250 
2251 bool AMDGPUOperand::isInlineValue() const {
2252   return isRegKind() && ::isInlineValue(getReg());
2253 }
2254 
2255 //===----------------------------------------------------------------------===//
2256 // AsmParser
2257 //===----------------------------------------------------------------------===//
2258 
2259 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2260   if (Is == IS_VGPR) {
2261     switch (RegWidth) {
2262       default: return -1;
2263       case 32:
2264         return AMDGPU::VGPR_32RegClassID;
2265       case 64:
2266         return AMDGPU::VReg_64RegClassID;
2267       case 96:
2268         return AMDGPU::VReg_96RegClassID;
2269       case 128:
2270         return AMDGPU::VReg_128RegClassID;
2271       case 160:
2272         return AMDGPU::VReg_160RegClassID;
2273       case 192:
2274         return AMDGPU::VReg_192RegClassID;
2275       case 224:
2276         return AMDGPU::VReg_224RegClassID;
2277       case 256:
2278         return AMDGPU::VReg_256RegClassID;
2279       case 288:
2280         return AMDGPU::VReg_288RegClassID;
2281       case 320:
2282         return AMDGPU::VReg_320RegClassID;
2283       case 352:
2284         return AMDGPU::VReg_352RegClassID;
2285       case 384:
2286         return AMDGPU::VReg_384RegClassID;
2287       case 512:
2288         return AMDGPU::VReg_512RegClassID;
2289       case 1024:
2290         return AMDGPU::VReg_1024RegClassID;
2291     }
2292   } else if (Is == IS_TTMP) {
2293     switch (RegWidth) {
2294       default: return -1;
2295       case 32:
2296         return AMDGPU::TTMP_32RegClassID;
2297       case 64:
2298         return AMDGPU::TTMP_64RegClassID;
2299       case 128:
2300         return AMDGPU::TTMP_128RegClassID;
2301       case 256:
2302         return AMDGPU::TTMP_256RegClassID;
2303       case 512:
2304         return AMDGPU::TTMP_512RegClassID;
2305     }
2306   } else if (Is == IS_SGPR) {
2307     switch (RegWidth) {
2308       default: return -1;
2309       case 32:
2310         return AMDGPU::SGPR_32RegClassID;
2311       case 64:
2312         return AMDGPU::SGPR_64RegClassID;
2313       case 96:
2314         return AMDGPU::SGPR_96RegClassID;
2315       case 128:
2316         return AMDGPU::SGPR_128RegClassID;
2317       case 160:
2318         return AMDGPU::SGPR_160RegClassID;
2319       case 192:
2320         return AMDGPU::SGPR_192RegClassID;
2321       case 224:
2322         return AMDGPU::SGPR_224RegClassID;
2323       case 256:
2324         return AMDGPU::SGPR_256RegClassID;
2325       case 288:
2326         return AMDGPU::SGPR_288RegClassID;
2327       case 320:
2328         return AMDGPU::SGPR_320RegClassID;
2329       case 352:
2330         return AMDGPU::SGPR_352RegClassID;
2331       case 384:
2332         return AMDGPU::SGPR_384RegClassID;
2333       case 512:
2334         return AMDGPU::SGPR_512RegClassID;
2335     }
2336   } else if (Is == IS_AGPR) {
2337     switch (RegWidth) {
2338       default: return -1;
2339       case 32:
2340         return AMDGPU::AGPR_32RegClassID;
2341       case 64:
2342         return AMDGPU::AReg_64RegClassID;
2343       case 96:
2344         return AMDGPU::AReg_96RegClassID;
2345       case 128:
2346         return AMDGPU::AReg_128RegClassID;
2347       case 160:
2348         return AMDGPU::AReg_160RegClassID;
2349       case 192:
2350         return AMDGPU::AReg_192RegClassID;
2351       case 224:
2352         return AMDGPU::AReg_224RegClassID;
2353       case 256:
2354         return AMDGPU::AReg_256RegClassID;
2355       case 288:
2356         return AMDGPU::AReg_288RegClassID;
2357       case 320:
2358         return AMDGPU::AReg_320RegClassID;
2359       case 352:
2360         return AMDGPU::AReg_352RegClassID;
2361       case 384:
2362         return AMDGPU::AReg_384RegClassID;
2363       case 512:
2364         return AMDGPU::AReg_512RegClassID;
2365       case 1024:
2366         return AMDGPU::AReg_1024RegClassID;
2367     }
2368   }
2369   return -1;
2370 }
2371 
2372 static unsigned getSpecialRegForName(StringRef RegName) {
2373   return StringSwitch<unsigned>(RegName)
2374     .Case("exec", AMDGPU::EXEC)
2375     .Case("vcc", AMDGPU::VCC)
2376     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2377     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2378     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2379     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2380     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2381     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2382     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2383     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2384     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2385     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2386     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2387     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2388     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2389     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2390     .Case("m0", AMDGPU::M0)
2391     .Case("vccz", AMDGPU::SRC_VCCZ)
2392     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2393     .Case("execz", AMDGPU::SRC_EXECZ)
2394     .Case("src_execz", AMDGPU::SRC_EXECZ)
2395     .Case("scc", AMDGPU::SRC_SCC)
2396     .Case("src_scc", AMDGPU::SRC_SCC)
2397     .Case("tba", AMDGPU::TBA)
2398     .Case("tma", AMDGPU::TMA)
2399     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2400     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2401     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2402     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2403     .Case("vcc_lo", AMDGPU::VCC_LO)
2404     .Case("vcc_hi", AMDGPU::VCC_HI)
2405     .Case("exec_lo", AMDGPU::EXEC_LO)
2406     .Case("exec_hi", AMDGPU::EXEC_HI)
2407     .Case("tma_lo", AMDGPU::TMA_LO)
2408     .Case("tma_hi", AMDGPU::TMA_HI)
2409     .Case("tba_lo", AMDGPU::TBA_LO)
2410     .Case("tba_hi", AMDGPU::TBA_HI)
2411     .Case("pc", AMDGPU::PC_REG)
2412     .Case("null", AMDGPU::SGPR_NULL)
2413     .Default(AMDGPU::NoRegister);
2414 }
2415 
2416 bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2417                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2418   auto R = parseRegister();
2419   if (!R) return true;
2420   assert(R->isReg());
2421   RegNo = R->getReg();
2422   StartLoc = R->getStartLoc();
2423   EndLoc = R->getEndLoc();
2424   return false;
2425 }
2426 
2427 bool AMDGPUAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2428                                     SMLoc &EndLoc) {
2429   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2430 }
2431 
2432 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(MCRegister &RegNo,
2433                                                        SMLoc &StartLoc,
2434                                                        SMLoc &EndLoc) {
2435   bool Result =
2436       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2437   bool PendingErrors = getParser().hasPendingError();
2438   getParser().clearPendingErrors();
2439   if (PendingErrors)
2440     return MatchOperand_ParseFail;
2441   if (Result)
2442     return MatchOperand_NoMatch;
2443   return MatchOperand_Success;
2444 }
2445 
2446 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2447                                             RegisterKind RegKind, unsigned Reg1,
2448                                             SMLoc Loc) {
2449   switch (RegKind) {
2450   case IS_SPECIAL:
2451     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2452       Reg = AMDGPU::EXEC;
2453       RegWidth = 64;
2454       return true;
2455     }
2456     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2457       Reg = AMDGPU::FLAT_SCR;
2458       RegWidth = 64;
2459       return true;
2460     }
2461     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2462       Reg = AMDGPU::XNACK_MASK;
2463       RegWidth = 64;
2464       return true;
2465     }
2466     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2467       Reg = AMDGPU::VCC;
2468       RegWidth = 64;
2469       return true;
2470     }
2471     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2472       Reg = AMDGPU::TBA;
2473       RegWidth = 64;
2474       return true;
2475     }
2476     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2477       Reg = AMDGPU::TMA;
2478       RegWidth = 64;
2479       return true;
2480     }
2481     Error(Loc, "register does not fit in the list");
2482     return false;
2483   case IS_VGPR:
2484   case IS_SGPR:
2485   case IS_AGPR:
2486   case IS_TTMP:
2487     if (Reg1 != Reg + RegWidth / 32) {
2488       Error(Loc, "registers in a list must have consecutive indices");
2489       return false;
2490     }
2491     RegWidth += 32;
2492     return true;
2493   default:
2494     llvm_unreachable("unexpected register kind");
2495   }
2496 }
2497 
2498 struct RegInfo {
2499   StringLiteral Name;
2500   RegisterKind Kind;
2501 };
2502 
2503 static constexpr RegInfo RegularRegisters[] = {
2504   {{"v"},    IS_VGPR},
2505   {{"s"},    IS_SGPR},
2506   {{"ttmp"}, IS_TTMP},
2507   {{"acc"},  IS_AGPR},
2508   {{"a"},    IS_AGPR},
2509 };
2510 
2511 static bool isRegularReg(RegisterKind Kind) {
2512   return Kind == IS_VGPR ||
2513          Kind == IS_SGPR ||
2514          Kind == IS_TTMP ||
2515          Kind == IS_AGPR;
2516 }
2517 
2518 static const RegInfo* getRegularRegInfo(StringRef Str) {
2519   for (const RegInfo &Reg : RegularRegisters)
2520     if (Str.startswith(Reg.Name))
2521       return &Reg;
2522   return nullptr;
2523 }
2524 
2525 static bool getRegNum(StringRef Str, unsigned& Num) {
2526   return !Str.getAsInteger(10, Num);
2527 }
2528 
2529 bool
2530 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2531                             const AsmToken &NextToken) const {
2532 
2533   // A list of consecutive registers: [s0,s1,s2,s3]
2534   if (Token.is(AsmToken::LBrac))
2535     return true;
2536 
2537   if (!Token.is(AsmToken::Identifier))
2538     return false;
2539 
2540   // A single register like s0 or a range of registers like s[0:1]
2541 
2542   StringRef Str = Token.getString();
2543   const RegInfo *Reg = getRegularRegInfo(Str);
2544   if (Reg) {
2545     StringRef RegName = Reg->Name;
2546     StringRef RegSuffix = Str.substr(RegName.size());
2547     if (!RegSuffix.empty()) {
2548       unsigned Num;
2549       // A single register with an index: rXX
2550       if (getRegNum(RegSuffix, Num))
2551         return true;
2552     } else {
2553       // A range of registers: r[XX:YY].
2554       if (NextToken.is(AsmToken::LBrac))
2555         return true;
2556     }
2557   }
2558 
2559   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2560 }
2561 
2562 bool
2563 AMDGPUAsmParser::isRegister()
2564 {
2565   return isRegister(getToken(), peekToken());
2566 }
2567 
2568 unsigned
2569 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2570                                unsigned RegNum,
2571                                unsigned RegWidth,
2572                                SMLoc Loc) {
2573 
2574   assert(isRegularReg(RegKind));
2575 
2576   unsigned AlignSize = 1;
2577   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2578     // SGPR and TTMP registers must be aligned.
2579     // Max required alignment is 4 dwords.
2580     AlignSize = std::min(RegWidth / 32, 4u);
2581   }
2582 
2583   if (RegNum % AlignSize != 0) {
2584     Error(Loc, "invalid register alignment");
2585     return AMDGPU::NoRegister;
2586   }
2587 
2588   unsigned RegIdx = RegNum / AlignSize;
2589   int RCID = getRegClass(RegKind, RegWidth);
2590   if (RCID == -1) {
2591     Error(Loc, "invalid or unsupported register size");
2592     return AMDGPU::NoRegister;
2593   }
2594 
2595   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2596   const MCRegisterClass RC = TRI->getRegClass(RCID);
2597   if (RegIdx >= RC.getNumRegs()) {
2598     Error(Loc, "register index is out of range");
2599     return AMDGPU::NoRegister;
2600   }
2601 
2602   return RC.getRegister(RegIdx);
2603 }
2604 
2605 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2606   int64_t RegLo, RegHi;
2607   if (!skipToken(AsmToken::LBrac, "missing register index"))
2608     return false;
2609 
2610   SMLoc FirstIdxLoc = getLoc();
2611   SMLoc SecondIdxLoc;
2612 
2613   if (!parseExpr(RegLo))
2614     return false;
2615 
2616   if (trySkipToken(AsmToken::Colon)) {
2617     SecondIdxLoc = getLoc();
2618     if (!parseExpr(RegHi))
2619       return false;
2620   } else {
2621     RegHi = RegLo;
2622   }
2623 
2624   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2625     return false;
2626 
2627   if (!isUInt<32>(RegLo)) {
2628     Error(FirstIdxLoc, "invalid register index");
2629     return false;
2630   }
2631 
2632   if (!isUInt<32>(RegHi)) {
2633     Error(SecondIdxLoc, "invalid register index");
2634     return false;
2635   }
2636 
2637   if (RegLo > RegHi) {
2638     Error(FirstIdxLoc, "first register index should not exceed second index");
2639     return false;
2640   }
2641 
2642   Num = static_cast<unsigned>(RegLo);
2643   RegWidth = 32 * ((RegHi - RegLo) + 1);
2644   return true;
2645 }
2646 
2647 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2648                                           unsigned &RegNum, unsigned &RegWidth,
2649                                           SmallVectorImpl<AsmToken> &Tokens) {
2650   assert(isToken(AsmToken::Identifier));
2651   unsigned Reg = getSpecialRegForName(getTokenStr());
2652   if (Reg) {
2653     RegNum = 0;
2654     RegWidth = 32;
2655     RegKind = IS_SPECIAL;
2656     Tokens.push_back(getToken());
2657     lex(); // skip register name
2658   }
2659   return Reg;
2660 }
2661 
2662 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2663                                           unsigned &RegNum, unsigned &RegWidth,
2664                                           SmallVectorImpl<AsmToken> &Tokens) {
2665   assert(isToken(AsmToken::Identifier));
2666   StringRef RegName = getTokenStr();
2667   auto Loc = getLoc();
2668 
2669   const RegInfo *RI = getRegularRegInfo(RegName);
2670   if (!RI) {
2671     Error(Loc, "invalid register name");
2672     return AMDGPU::NoRegister;
2673   }
2674 
2675   Tokens.push_back(getToken());
2676   lex(); // skip register name
2677 
2678   RegKind = RI->Kind;
2679   StringRef RegSuffix = RegName.substr(RI->Name.size());
2680   if (!RegSuffix.empty()) {
2681     // Single 32-bit register: vXX.
2682     if (!getRegNum(RegSuffix, RegNum)) {
2683       Error(Loc, "invalid register index");
2684       return AMDGPU::NoRegister;
2685     }
2686     RegWidth = 32;
2687   } else {
2688     // Range of registers: v[XX:YY]. ":YY" is optional.
2689     if (!ParseRegRange(RegNum, RegWidth))
2690       return AMDGPU::NoRegister;
2691   }
2692 
2693   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2694 }
2695 
2696 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2697                                        unsigned &RegWidth,
2698                                        SmallVectorImpl<AsmToken> &Tokens) {
2699   unsigned Reg = AMDGPU::NoRegister;
2700   auto ListLoc = getLoc();
2701 
2702   if (!skipToken(AsmToken::LBrac,
2703                  "expected a register or a list of registers")) {
2704     return AMDGPU::NoRegister;
2705   }
2706 
2707   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2708 
2709   auto Loc = getLoc();
2710   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2711     return AMDGPU::NoRegister;
2712   if (RegWidth != 32) {
2713     Error(Loc, "expected a single 32-bit register");
2714     return AMDGPU::NoRegister;
2715   }
2716 
2717   for (; trySkipToken(AsmToken::Comma); ) {
2718     RegisterKind NextRegKind;
2719     unsigned NextReg, NextRegNum, NextRegWidth;
2720     Loc = getLoc();
2721 
2722     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2723                              NextRegNum, NextRegWidth,
2724                              Tokens)) {
2725       return AMDGPU::NoRegister;
2726     }
2727     if (NextRegWidth != 32) {
2728       Error(Loc, "expected a single 32-bit register");
2729       return AMDGPU::NoRegister;
2730     }
2731     if (NextRegKind != RegKind) {
2732       Error(Loc, "registers in a list must be of the same kind");
2733       return AMDGPU::NoRegister;
2734     }
2735     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2736       return AMDGPU::NoRegister;
2737   }
2738 
2739   if (!skipToken(AsmToken::RBrac,
2740                  "expected a comma or a closing square bracket")) {
2741     return AMDGPU::NoRegister;
2742   }
2743 
2744   if (isRegularReg(RegKind))
2745     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2746 
2747   return Reg;
2748 }
2749 
2750 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2751                                           unsigned &RegNum, unsigned &RegWidth,
2752                                           SmallVectorImpl<AsmToken> &Tokens) {
2753   auto Loc = getLoc();
2754   Reg = AMDGPU::NoRegister;
2755 
2756   if (isToken(AsmToken::Identifier)) {
2757     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2758     if (Reg == AMDGPU::NoRegister)
2759       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2760   } else {
2761     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2762   }
2763 
2764   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2765   if (Reg == AMDGPU::NoRegister) {
2766     assert(Parser.hasPendingError());
2767     return false;
2768   }
2769 
2770   if (!subtargetHasRegister(*TRI, Reg)) {
2771     if (Reg == AMDGPU::SGPR_NULL) {
2772       Error(Loc, "'null' operand is not supported on this GPU");
2773     } else {
2774       Error(Loc, "register not available on this GPU");
2775     }
2776     return false;
2777   }
2778 
2779   return true;
2780 }
2781 
2782 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2783                                           unsigned &RegNum, unsigned &RegWidth,
2784                                           bool RestoreOnFailure /*=false*/) {
2785   Reg = AMDGPU::NoRegister;
2786 
2787   SmallVector<AsmToken, 1> Tokens;
2788   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2789     if (RestoreOnFailure) {
2790       while (!Tokens.empty()) {
2791         getLexer().UnLex(Tokens.pop_back_val());
2792       }
2793     }
2794     return true;
2795   }
2796   return false;
2797 }
2798 
2799 std::optional<StringRef>
2800 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2801   switch (RegKind) {
2802   case IS_VGPR:
2803     return StringRef(".amdgcn.next_free_vgpr");
2804   case IS_SGPR:
2805     return StringRef(".amdgcn.next_free_sgpr");
2806   default:
2807     return std::nullopt;
2808   }
2809 }
2810 
2811 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2812   auto SymbolName = getGprCountSymbolName(RegKind);
2813   assert(SymbolName && "initializing invalid register kind");
2814   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2815   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2816 }
2817 
2818 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2819                                             unsigned DwordRegIndex,
2820                                             unsigned RegWidth) {
2821   // Symbols are only defined for GCN targets
2822   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2823     return true;
2824 
2825   auto SymbolName = getGprCountSymbolName(RegKind);
2826   if (!SymbolName)
2827     return true;
2828   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2829 
2830   int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2831   int64_t OldCount;
2832 
2833   if (!Sym->isVariable())
2834     return !Error(getLoc(),
2835                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2836   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2837     return !Error(
2838         getLoc(),
2839         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2840 
2841   if (OldCount <= NewMax)
2842     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2843 
2844   return true;
2845 }
2846 
2847 std::unique_ptr<AMDGPUOperand>
2848 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2849   const auto &Tok = getToken();
2850   SMLoc StartLoc = Tok.getLoc();
2851   SMLoc EndLoc = Tok.getEndLoc();
2852   RegisterKind RegKind;
2853   unsigned Reg, RegNum, RegWidth;
2854 
2855   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2856     return nullptr;
2857   }
2858   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2859     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2860       return nullptr;
2861   } else
2862     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2863   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2864 }
2865 
2866 ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
2867                                       bool HasSP3AbsModifier) {
2868   // TODO: add syntactic sugar for 1/(2*PI)
2869 
2870   if (isRegister())
2871     return ParseStatus::NoMatch;
2872   assert(!isModifier());
2873 
2874   const auto& Tok = getToken();
2875   const auto& NextTok = peekToken();
2876   bool IsReal = Tok.is(AsmToken::Real);
2877   SMLoc S = getLoc();
2878   bool Negate = false;
2879 
2880   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2881     lex();
2882     IsReal = true;
2883     Negate = true;
2884   }
2885 
2886   if (IsReal) {
2887     // Floating-point expressions are not supported.
2888     // Can only allow floating-point literals with an
2889     // optional sign.
2890 
2891     StringRef Num = getTokenStr();
2892     lex();
2893 
2894     APFloat RealVal(APFloat::IEEEdouble());
2895     auto roundMode = APFloat::rmNearestTiesToEven;
2896     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
2897       return ParseStatus::Failure;
2898     if (Negate)
2899       RealVal.changeSign();
2900 
2901     Operands.push_back(
2902       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2903                                AMDGPUOperand::ImmTyNone, true));
2904 
2905     return ParseStatus::Success;
2906 
2907   } else {
2908     int64_t IntVal;
2909     const MCExpr *Expr;
2910     SMLoc S = getLoc();
2911 
2912     if (HasSP3AbsModifier) {
2913       // This is a workaround for handling expressions
2914       // as arguments of SP3 'abs' modifier, for example:
2915       //     |1.0|
2916       //     |-1|
2917       //     |1+x|
2918       // This syntax is not compatible with syntax of standard
2919       // MC expressions (due to the trailing '|').
2920       SMLoc EndLoc;
2921       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2922         return ParseStatus::Failure;
2923     } else {
2924       if (Parser.parseExpression(Expr))
2925         return ParseStatus::Failure;
2926     }
2927 
2928     if (Expr->evaluateAsAbsolute(IntVal)) {
2929       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2930     } else {
2931       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2932     }
2933 
2934     return ParseStatus::Success;
2935   }
2936 
2937   return ParseStatus::NoMatch;
2938 }
2939 
2940 ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2941   if (!isRegister())
2942     return ParseStatus::NoMatch;
2943 
2944   if (auto R = parseRegister()) {
2945     assert(R->isReg());
2946     Operands.push_back(std::move(R));
2947     return ParseStatus::Success;
2948   }
2949   return ParseStatus::Failure;
2950 }
2951 
2952 ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
2953                                            bool HasSP3AbsMod) {
2954   ParseStatus Res = parseReg(Operands);
2955   if (!Res.isNoMatch())
2956     return Res;
2957   if (isModifier())
2958     return ParseStatus::NoMatch;
2959   return parseImm(Operands, HasSP3AbsMod);
2960 }
2961 
2962 bool
2963 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2964   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2965     const auto &str = Token.getString();
2966     return str == "abs" || str == "neg" || str == "sext";
2967   }
2968   return false;
2969 }
2970 
2971 bool
2972 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2973   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2974 }
2975 
2976 bool
2977 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2978   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2979 }
2980 
2981 bool
2982 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2983   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2984 }
2985 
2986 // Check if this is an operand modifier or an opcode modifier
2987 // which may look like an expression but it is not. We should
2988 // avoid parsing these modifiers as expressions. Currently
2989 // recognized sequences are:
2990 //   |...|
2991 //   abs(...)
2992 //   neg(...)
2993 //   sext(...)
2994 //   -reg
2995 //   -|...|
2996 //   -abs(...)
2997 //   name:...
2998 //
2999 bool
3000 AMDGPUAsmParser::isModifier() {
3001 
3002   AsmToken Tok = getToken();
3003   AsmToken NextToken[2];
3004   peekTokens(NextToken);
3005 
3006   return isOperandModifier(Tok, NextToken[0]) ||
3007          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3008          isOpcodeModifierWithVal(Tok, NextToken[0]);
3009 }
3010 
3011 // Check if the current token is an SP3 'neg' modifier.
3012 // Currently this modifier is allowed in the following context:
3013 //
3014 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3015 // 2. Before an 'abs' modifier: -abs(...)
3016 // 3. Before an SP3 'abs' modifier: -|...|
3017 //
3018 // In all other cases "-" is handled as a part
3019 // of an expression that follows the sign.
3020 //
3021 // Note: When "-" is followed by an integer literal,
3022 // this is interpreted as integer negation rather
3023 // than a floating-point NEG modifier applied to N.
3024 // Beside being contr-intuitive, such use of floating-point
3025 // NEG modifier would have resulted in different meaning
3026 // of integer literals used with VOP1/2/C and VOP3,
3027 // for example:
3028 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3029 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3030 // Negative fp literals with preceding "-" are
3031 // handled likewise for uniformity
3032 //
3033 bool
3034 AMDGPUAsmParser::parseSP3NegModifier() {
3035 
3036   AsmToken NextToken[2];
3037   peekTokens(NextToken);
3038 
3039   if (isToken(AsmToken::Minus) &&
3040       (isRegister(NextToken[0], NextToken[1]) ||
3041        NextToken[0].is(AsmToken::Pipe) ||
3042        isId(NextToken[0], "abs"))) {
3043     lex();
3044     return true;
3045   }
3046 
3047   return false;
3048 }
3049 
3050 ParseStatus
3051 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3052                                               bool AllowImm) {
3053   bool Neg, SP3Neg;
3054   bool Abs, SP3Abs;
3055   SMLoc Loc;
3056 
3057   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3058   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3059     return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3060 
3061   SP3Neg = parseSP3NegModifier();
3062 
3063   Loc = getLoc();
3064   Neg = trySkipId("neg");
3065   if (Neg && SP3Neg)
3066     return Error(Loc, "expected register or immediate");
3067   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3068     return ParseStatus::Failure;
3069 
3070   Abs = trySkipId("abs");
3071   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3072     return ParseStatus::Failure;
3073 
3074   Loc = getLoc();
3075   SP3Abs = trySkipToken(AsmToken::Pipe);
3076   if (Abs && SP3Abs)
3077     return Error(Loc, "expected register or immediate");
3078 
3079   ParseStatus Res;
3080   if (AllowImm) {
3081     Res = parseRegOrImm(Operands, SP3Abs);
3082   } else {
3083     Res = parseReg(Operands);
3084   }
3085   if (!Res.isSuccess())
3086     return (SP3Neg || Neg || SP3Abs || Abs) ? ParseStatus::Failure : Res;
3087 
3088   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3089     return ParseStatus::Failure;
3090   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3091     return ParseStatus::Failure;
3092   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3093     return ParseStatus::Failure;
3094 
3095   AMDGPUOperand::Modifiers Mods;
3096   Mods.Abs = Abs || SP3Abs;
3097   Mods.Neg = Neg || SP3Neg;
3098 
3099   if (Mods.hasFPModifiers()) {
3100     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3101     if (Op.isExpr())
3102       return Error(Op.getStartLoc(), "expected an absolute expression");
3103     Op.setModifiers(Mods);
3104   }
3105   return ParseStatus::Success;
3106 }
3107 
3108 ParseStatus
3109 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3110                                                bool AllowImm) {
3111   bool Sext = trySkipId("sext");
3112   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3113     return ParseStatus::Failure;
3114 
3115   ParseStatus Res;
3116   if (AllowImm) {
3117     Res = parseRegOrImm(Operands);
3118   } else {
3119     Res = parseReg(Operands);
3120   }
3121   if (!Res.isSuccess())
3122     return Sext ? ParseStatus::Failure : Res;
3123 
3124   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3125     return ParseStatus::Failure;
3126 
3127   AMDGPUOperand::Modifiers Mods;
3128   Mods.Sext = Sext;
3129 
3130   if (Mods.hasIntModifiers()) {
3131     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3132     if (Op.isExpr())
3133       return Error(Op.getStartLoc(), "expected an absolute expression");
3134     Op.setModifiers(Mods);
3135   }
3136 
3137   return ParseStatus::Success;
3138 }
3139 
3140 ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3141   return parseRegOrImmWithFPInputMods(Operands, false);
3142 }
3143 
3144 ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3145   return parseRegOrImmWithIntInputMods(Operands, false);
3146 }
3147 
3148 ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3149   auto Loc = getLoc();
3150   if (trySkipId("off")) {
3151     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3152                                                 AMDGPUOperand::ImmTyOff, false));
3153     return ParseStatus::Success;
3154   }
3155 
3156   if (!isRegister())
3157     return ParseStatus::NoMatch;
3158 
3159   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3160   if (Reg) {
3161     Operands.push_back(std::move(Reg));
3162     return ParseStatus::Success;
3163   }
3164 
3165   return ParseStatus::Failure;
3166 }
3167 
3168 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3169   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3170 
3171   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3172       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3173       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3174       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3175     return Match_InvalidOperand;
3176 
3177   if ((TSFlags & SIInstrFlags::VOP3) &&
3178       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3179       getForcedEncodingSize() != 64)
3180     return Match_PreferE32;
3181 
3182   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3183       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3184     // v_mac_f32/16 allow only dst_sel == DWORD;
3185     auto OpNum =
3186         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3187     const auto &Op = Inst.getOperand(OpNum);
3188     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3189       return Match_InvalidOperand;
3190     }
3191   }
3192 
3193   return Match_Success;
3194 }
3195 
3196 static ArrayRef<unsigned> getAllVariants() {
3197   static const unsigned Variants[] = {
3198     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3199     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9,
3200     AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP
3201   };
3202 
3203   return ArrayRef(Variants);
3204 }
3205 
3206 // What asm variants we should check
3207 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3208   if (isForcedDPP() && isForcedVOP3()) {
3209     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3210     return ArrayRef(Variants);
3211   }
3212   if (getForcedEncodingSize() == 32) {
3213     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3214     return ArrayRef(Variants);
3215   }
3216 
3217   if (isForcedVOP3()) {
3218     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3219     return ArrayRef(Variants);
3220   }
3221 
3222   if (isForcedSDWA()) {
3223     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3224                                         AMDGPUAsmVariants::SDWA9};
3225     return ArrayRef(Variants);
3226   }
3227 
3228   if (isForcedDPP()) {
3229     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3230     return ArrayRef(Variants);
3231   }
3232 
3233   return getAllVariants();
3234 }
3235 
3236 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3237   if (isForcedDPP() && isForcedVOP3())
3238     return "e64_dpp";
3239 
3240   if (getForcedEncodingSize() == 32)
3241     return "e32";
3242 
3243   if (isForcedVOP3())
3244     return "e64";
3245 
3246   if (isForcedSDWA())
3247     return "sdwa";
3248 
3249   if (isForcedDPP())
3250     return "dpp";
3251 
3252   return "";
3253 }
3254 
3255 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3256   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3257   for (MCPhysReg Reg : Desc.implicit_uses()) {
3258     switch (Reg) {
3259     case AMDGPU::FLAT_SCR:
3260     case AMDGPU::VCC:
3261     case AMDGPU::VCC_LO:
3262     case AMDGPU::VCC_HI:
3263     case AMDGPU::M0:
3264       return Reg;
3265     default:
3266       break;
3267     }
3268   }
3269   return AMDGPU::NoRegister;
3270 }
3271 
3272 // NB: This code is correct only when used to check constant
3273 // bus limitations because GFX7 support no f16 inline constants.
3274 // Note that there are no cases when a GFX7 opcode violates
3275 // constant bus limitations due to the use of an f16 constant.
3276 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3277                                        unsigned OpIdx) const {
3278   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3279 
3280   if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3281       AMDGPU::isKImmOperand(Desc, OpIdx)) {
3282     return false;
3283   }
3284 
3285   const MCOperand &MO = Inst.getOperand(OpIdx);
3286 
3287   int64_t Val = MO.getImm();
3288   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3289 
3290   switch (OpSize) { // expected operand size
3291   case 8:
3292     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3293   case 4:
3294     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3295   case 2: {
3296     const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3297     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3298         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3299         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3300       return AMDGPU::isInlinableIntLiteral(Val);
3301 
3302     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3303         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3304         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3305       return AMDGPU::isInlinableIntLiteralV216(Val);
3306 
3307     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3308         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3309         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3310       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3311 
3312     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3313   }
3314   default:
3315     llvm_unreachable("invalid operand size");
3316   }
3317 }
3318 
3319 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3320   if (!isGFX10Plus())
3321     return 1;
3322 
3323   switch (Opcode) {
3324   // 64-bit shift instructions can use only one scalar value input
3325   case AMDGPU::V_LSHLREV_B64_e64:
3326   case AMDGPU::V_LSHLREV_B64_gfx10:
3327   case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3328   case AMDGPU::V_LSHRREV_B64_e64:
3329   case AMDGPU::V_LSHRREV_B64_gfx10:
3330   case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3331   case AMDGPU::V_ASHRREV_I64_e64:
3332   case AMDGPU::V_ASHRREV_I64_gfx10:
3333   case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3334   case AMDGPU::V_LSHL_B64_e64:
3335   case AMDGPU::V_LSHR_B64_e64:
3336   case AMDGPU::V_ASHR_I64_e64:
3337     return 1;
3338   default:
3339     return 2;
3340   }
3341 }
3342 
3343 constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3344 using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>;
3345 
3346 // Get regular operand indices in the same order as specified
3347 // in the instruction (but append mandatory literals to the end).
3348 static OperandIndices getSrcOperandIndices(unsigned Opcode,
3349                                            bool AddMandatoryLiterals = false) {
3350 
3351   int16_t ImmIdx =
3352       AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3353 
3354   if (isVOPD(Opcode)) {
3355     int16_t ImmDeferredIdx =
3356         AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3357                              : -1;
3358 
3359     return {getNamedOperandIdx(Opcode, OpName::src0X),
3360             getNamedOperandIdx(Opcode, OpName::vsrc1X),
3361             getNamedOperandIdx(Opcode, OpName::src0Y),
3362             getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3363             ImmDeferredIdx,
3364             ImmIdx};
3365   }
3366 
3367   return {getNamedOperandIdx(Opcode, OpName::src0),
3368           getNamedOperandIdx(Opcode, OpName::src1),
3369           getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3370 }
3371 
3372 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3373   const MCOperand &MO = Inst.getOperand(OpIdx);
3374   if (MO.isImm()) {
3375     return !isInlineConstant(Inst, OpIdx);
3376   } else if (MO.isReg()) {
3377     auto Reg = MO.getReg();
3378     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3379     auto PReg = mc2PseudoReg(Reg);
3380     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3381   } else {
3382     return true;
3383   }
3384 }
3385 
3386 bool AMDGPUAsmParser::validateConstantBusLimitations(
3387     const MCInst &Inst, const OperandVector &Operands) {
3388   const unsigned Opcode = Inst.getOpcode();
3389   const MCInstrDesc &Desc = MII.get(Opcode);
3390   unsigned LastSGPR = AMDGPU::NoRegister;
3391   unsigned ConstantBusUseCount = 0;
3392   unsigned NumLiterals = 0;
3393   unsigned LiteralSize;
3394 
3395   if (!(Desc.TSFlags &
3396         (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3397          SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) &&
3398       !isVOPD(Opcode))
3399     return true;
3400 
3401   // Check special imm operands (used by madmk, etc)
3402   if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3403     ++NumLiterals;
3404     LiteralSize = 4;
3405   }
3406 
3407   SmallDenseSet<unsigned> SGPRsUsed;
3408   unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3409   if (SGPRUsed != AMDGPU::NoRegister) {
3410     SGPRsUsed.insert(SGPRUsed);
3411     ++ConstantBusUseCount;
3412   }
3413 
3414   OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3415 
3416   for (int OpIdx : OpIndices) {
3417     if (OpIdx == -1)
3418       continue;
3419 
3420     const MCOperand &MO = Inst.getOperand(OpIdx);
3421     if (usesConstantBus(Inst, OpIdx)) {
3422       if (MO.isReg()) {
3423         LastSGPR = mc2PseudoReg(MO.getReg());
3424         // Pairs of registers with a partial intersections like these
3425         //   s0, s[0:1]
3426         //   flat_scratch_lo, flat_scratch
3427         //   flat_scratch_lo, flat_scratch_hi
3428         // are theoretically valid but they are disabled anyway.
3429         // Note that this code mimics SIInstrInfo::verifyInstruction
3430         if (SGPRsUsed.insert(LastSGPR).second) {
3431           ++ConstantBusUseCount;
3432         }
3433       } else { // Expression or a literal
3434 
3435         if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3436           continue; // special operand like VINTERP attr_chan
3437 
3438         // An instruction may use only one literal.
3439         // This has been validated on the previous step.
3440         // See validateVOPLiteral.
3441         // This literal may be used as more than one operand.
3442         // If all these operands are of the same size,
3443         // this literal counts as one scalar value.
3444         // Otherwise it counts as 2 scalar values.
3445         // See "GFX10 Shader Programming", section 3.6.2.3.
3446 
3447         unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3448         if (Size < 4)
3449           Size = 4;
3450 
3451         if (NumLiterals == 0) {
3452           NumLiterals = 1;
3453           LiteralSize = Size;
3454         } else if (LiteralSize != Size) {
3455           NumLiterals = 2;
3456         }
3457       }
3458     }
3459   }
3460   ConstantBusUseCount += NumLiterals;
3461 
3462   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3463     return true;
3464 
3465   SMLoc LitLoc = getLitLoc(Operands);
3466   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3467   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3468   Error(Loc, "invalid operand (violates constant bus restrictions)");
3469   return false;
3470 }
3471 
3472 bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3473     const MCInst &Inst, const OperandVector &Operands) {
3474 
3475   const unsigned Opcode = Inst.getOpcode();
3476   if (!isVOPD(Opcode))
3477     return true;
3478 
3479   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3480 
3481   auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3482     const MCOperand &Opr = Inst.getOperand(OperandIdx);
3483     return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3484                ? Opr.getReg()
3485                : MCRegister::NoRegister;
3486   };
3487 
3488   const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3489   auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(getVRegIdx);
3490   if (!InvalidCompOprIdx)
3491     return true;
3492 
3493   auto CompOprIdx = *InvalidCompOprIdx;
3494   auto ParsedIdx =
3495       std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3496                InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3497   assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3498 
3499   auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3500   if (CompOprIdx == VOPD::Component::DST) {
3501     Error(Loc, "one dst register must be even and the other odd");
3502   } else {
3503     auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3504     Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3505                    " operands must use different VGPR banks");
3506   }
3507 
3508   return false;
3509 }
3510 
3511 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3512 
3513   const unsigned Opc = Inst.getOpcode();
3514   const MCInstrDesc &Desc = MII.get(Opc);
3515 
3516   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3517     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3518     assert(ClampIdx != -1);
3519     return Inst.getOperand(ClampIdx).getImm() == 0;
3520   }
3521 
3522   return true;
3523 }
3524 
3525 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3526                                            const SMLoc &IDLoc) {
3527 
3528   const unsigned Opc = Inst.getOpcode();
3529   const MCInstrDesc &Desc = MII.get(Opc);
3530 
3531   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3532     return true;
3533 
3534   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3535   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3536   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3537 
3538   assert(VDataIdx != -1);
3539 
3540   if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3541     return true;
3542 
3543   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3544   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3545   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3546   if (DMask == 0)
3547     DMask = 1;
3548 
3549   bool IsPackedD16 = false;
3550   unsigned DataSize =
3551       (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3552   if (hasPackedD16()) {
3553     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3554     IsPackedD16 = D16Idx >= 0;
3555     if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3556       DataSize = (DataSize + 1) / 2;
3557   }
3558 
3559   if ((VDataSize / 4) == DataSize + TFESize)
3560     return true;
3561 
3562   StringRef Modifiers;
3563   if (isGFX90A())
3564     Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3565   else
3566     Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3567 
3568   Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3569   return false;
3570 }
3571 
3572 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
3573                                            const SMLoc &IDLoc) {
3574   const unsigned Opc = Inst.getOpcode();
3575   const MCInstrDesc &Desc = MII.get(Opc);
3576 
3577   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3578     return true;
3579 
3580   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3581 
3582   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3583       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3584   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3585   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3586   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3587   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3588 
3589   assert(VAddr0Idx != -1);
3590   assert(SrsrcIdx != -1);
3591   assert(SrsrcIdx > VAddr0Idx);
3592 
3593   bool IsA16 = Inst.getOperand(A16Idx).getImm();
3594   if (BaseOpcode->BVH) {
3595     if (IsA16 == BaseOpcode->A16)
3596       return true;
3597     Error(IDLoc, "image address size does not match a16");
3598     return false;
3599   }
3600 
3601   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3602   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3603   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3604   unsigned ActualAddrSize =
3605       IsNSA ? SrsrcIdx - VAddr0Idx
3606             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3607 
3608   unsigned ExpectedAddrSize =
3609       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3610 
3611   if (IsNSA) {
3612     if (hasPartialNSAEncoding() && ExpectedAddrSize > getNSAMaxSize()) {
3613       int VAddrLastIdx = SrsrcIdx - 1;
3614       unsigned VAddrLastSize =
3615           AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
3616 
3617       ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3618     }
3619   } else {
3620     if (ExpectedAddrSize > 12)
3621       ExpectedAddrSize = 16;
3622 
3623     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3624     // This provides backward compatibility for assembly created
3625     // before 160b/192b/224b types were directly supported.
3626     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3627       return true;
3628   }
3629 
3630   if (ActualAddrSize == ExpectedAddrSize)
3631     return true;
3632 
3633   Error(IDLoc, "image address size does not match dim and a16");
3634   return false;
3635 }
3636 
3637 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3638 
3639   const unsigned Opc = Inst.getOpcode();
3640   const MCInstrDesc &Desc = MII.get(Opc);
3641 
3642   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3643     return true;
3644   if (!Desc.mayLoad() || !Desc.mayStore())
3645     return true; // Not atomic
3646 
3647   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3648   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3649 
3650   // This is an incomplete check because image_atomic_cmpswap
3651   // may only use 0x3 and 0xf while other atomic operations
3652   // may use 0x1 and 0x3. However these limitations are
3653   // verified when we check that dmask matches dst size.
3654   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3655 }
3656 
3657 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3658 
3659   const unsigned Opc = Inst.getOpcode();
3660   const MCInstrDesc &Desc = MII.get(Opc);
3661 
3662   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3663     return true;
3664 
3665   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3666   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3667 
3668   // GATHER4 instructions use dmask in a different fashion compared to
3669   // other MIMG instructions. The only useful DMASK values are
3670   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3671   // (red,red,red,red) etc.) The ISA document doesn't mention
3672   // this.
3673   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3674 }
3675 
3676 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3677   const unsigned Opc = Inst.getOpcode();
3678   const MCInstrDesc &Desc = MII.get(Opc);
3679 
3680   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3681     return true;
3682 
3683   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3684   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3685       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3686 
3687   if (!BaseOpcode->MSAA)
3688     return true;
3689 
3690   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3691   assert(DimIdx != -1);
3692 
3693   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3694   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3695 
3696   return DimInfo->MSAA;
3697 }
3698 
3699 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3700 {
3701   switch (Opcode) {
3702   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3703   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3704   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3705     return true;
3706   default:
3707     return false;
3708   }
3709 }
3710 
3711 // movrels* opcodes should only allow VGPRS as src0.
3712 // This is specified in .td description for vop1/vop3,
3713 // but sdwa is handled differently. See isSDWAOperand.
3714 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3715                                       const OperandVector &Operands) {
3716 
3717   const unsigned Opc = Inst.getOpcode();
3718   const MCInstrDesc &Desc = MII.get(Opc);
3719 
3720   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3721     return true;
3722 
3723   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3724   assert(Src0Idx != -1);
3725 
3726   SMLoc ErrLoc;
3727   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3728   if (Src0.isReg()) {
3729     auto Reg = mc2PseudoReg(Src0.getReg());
3730     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3731     if (!isSGPR(Reg, TRI))
3732       return true;
3733     ErrLoc = getRegLoc(Reg, Operands);
3734   } else {
3735     ErrLoc = getConstLoc(Operands);
3736   }
3737 
3738   Error(ErrLoc, "source operand must be a VGPR");
3739   return false;
3740 }
3741 
3742 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3743                                           const OperandVector &Operands) {
3744 
3745   const unsigned Opc = Inst.getOpcode();
3746 
3747   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3748     return true;
3749 
3750   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3751   assert(Src0Idx != -1);
3752 
3753   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3754   if (!Src0.isReg())
3755     return true;
3756 
3757   auto Reg = mc2PseudoReg(Src0.getReg());
3758   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3759   if (!isGFX90A() && isSGPR(Reg, TRI)) {
3760     Error(getRegLoc(Reg, Operands),
3761           "source operand must be either a VGPR or an inline constant");
3762     return false;
3763   }
3764 
3765   return true;
3766 }
3767 
3768 bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
3769                                       const OperandVector &Operands) {
3770   unsigned Opcode = Inst.getOpcode();
3771   const MCInstrDesc &Desc = MII.get(Opcode);
3772 
3773   if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
3774       !getFeatureBits()[FeatureMFMAInlineLiteralBug])
3775     return true;
3776 
3777   const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
3778   if (Src2Idx == -1)
3779     return true;
3780 
3781   if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
3782     Error(getConstLoc(Operands),
3783           "inline constants are not allowed for this operand");
3784     return false;
3785   }
3786 
3787   return true;
3788 }
3789 
3790 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3791                                    const OperandVector &Operands) {
3792   const unsigned Opc = Inst.getOpcode();
3793   const MCInstrDesc &Desc = MII.get(Opc);
3794 
3795   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3796     return true;
3797 
3798   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3799   if (Src2Idx == -1)
3800     return true;
3801 
3802   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3803   if (!Src2.isReg())
3804     return true;
3805 
3806   MCRegister Src2Reg = Src2.getReg();
3807   MCRegister DstReg = Inst.getOperand(0).getReg();
3808   if (Src2Reg == DstReg)
3809     return true;
3810 
3811   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3812   if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
3813     return true;
3814 
3815   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3816     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3817           "source 2 operand must not partially overlap with dst");
3818     return false;
3819   }
3820 
3821   return true;
3822 }
3823 
3824 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3825   switch (Inst.getOpcode()) {
3826   default:
3827     return true;
3828   case V_DIV_SCALE_F32_gfx6_gfx7:
3829   case V_DIV_SCALE_F32_vi:
3830   case V_DIV_SCALE_F32_gfx10:
3831   case V_DIV_SCALE_F64_gfx6_gfx7:
3832   case V_DIV_SCALE_F64_vi:
3833   case V_DIV_SCALE_F64_gfx10:
3834     break;
3835   }
3836 
3837   // TODO: Check that src0 = src1 or src2.
3838 
3839   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3840                     AMDGPU::OpName::src2_modifiers,
3841                     AMDGPU::OpName::src2_modifiers}) {
3842     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3843             .getImm() &
3844         SISrcMods::ABS) {
3845       return false;
3846     }
3847   }
3848 
3849   return true;
3850 }
3851 
3852 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3853 
3854   const unsigned Opc = Inst.getOpcode();
3855   const MCInstrDesc &Desc = MII.get(Opc);
3856 
3857   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3858     return true;
3859 
3860   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3861   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3862     if (isCI() || isSI())
3863       return false;
3864   }
3865 
3866   return true;
3867 }
3868 
3869 static bool IsRevOpcode(const unsigned Opcode)
3870 {
3871   switch (Opcode) {
3872   case AMDGPU::V_SUBREV_F32_e32:
3873   case AMDGPU::V_SUBREV_F32_e64:
3874   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3875   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3876   case AMDGPU::V_SUBREV_F32_e32_vi:
3877   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3878   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3879   case AMDGPU::V_SUBREV_F32_e64_vi:
3880 
3881   case AMDGPU::V_SUBREV_CO_U32_e32:
3882   case AMDGPU::V_SUBREV_CO_U32_e64:
3883   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3884   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3885 
3886   case AMDGPU::V_SUBBREV_U32_e32:
3887   case AMDGPU::V_SUBBREV_U32_e64:
3888   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3889   case AMDGPU::V_SUBBREV_U32_e32_vi:
3890   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3891   case AMDGPU::V_SUBBREV_U32_e64_vi:
3892 
3893   case AMDGPU::V_SUBREV_U32_e32:
3894   case AMDGPU::V_SUBREV_U32_e64:
3895   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3896   case AMDGPU::V_SUBREV_U32_e32_vi:
3897   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3898   case AMDGPU::V_SUBREV_U32_e64_vi:
3899 
3900   case AMDGPU::V_SUBREV_F16_e32:
3901   case AMDGPU::V_SUBREV_F16_e64:
3902   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3903   case AMDGPU::V_SUBREV_F16_e32_vi:
3904   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3905   case AMDGPU::V_SUBREV_F16_e64_vi:
3906 
3907   case AMDGPU::V_SUBREV_U16_e32:
3908   case AMDGPU::V_SUBREV_U16_e64:
3909   case AMDGPU::V_SUBREV_U16_e32_vi:
3910   case AMDGPU::V_SUBREV_U16_e64_vi:
3911 
3912   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3913   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3914   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3915 
3916   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3917   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3918 
3919   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3920   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3921 
3922   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3923   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3924 
3925   case AMDGPU::V_LSHRREV_B32_e32:
3926   case AMDGPU::V_LSHRREV_B32_e64:
3927   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3928   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3929   case AMDGPU::V_LSHRREV_B32_e32_vi:
3930   case AMDGPU::V_LSHRREV_B32_e64_vi:
3931   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3932   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3933 
3934   case AMDGPU::V_ASHRREV_I32_e32:
3935   case AMDGPU::V_ASHRREV_I32_e64:
3936   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3937   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3938   case AMDGPU::V_ASHRREV_I32_e32_vi:
3939   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3940   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3941   case AMDGPU::V_ASHRREV_I32_e64_vi:
3942 
3943   case AMDGPU::V_LSHLREV_B32_e32:
3944   case AMDGPU::V_LSHLREV_B32_e64:
3945   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3946   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3947   case AMDGPU::V_LSHLREV_B32_e32_vi:
3948   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3949   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3950   case AMDGPU::V_LSHLREV_B32_e64_vi:
3951 
3952   case AMDGPU::V_LSHLREV_B16_e32:
3953   case AMDGPU::V_LSHLREV_B16_e64:
3954   case AMDGPU::V_LSHLREV_B16_e32_vi:
3955   case AMDGPU::V_LSHLREV_B16_e64_vi:
3956   case AMDGPU::V_LSHLREV_B16_gfx10:
3957 
3958   case AMDGPU::V_LSHRREV_B16_e32:
3959   case AMDGPU::V_LSHRREV_B16_e64:
3960   case AMDGPU::V_LSHRREV_B16_e32_vi:
3961   case AMDGPU::V_LSHRREV_B16_e64_vi:
3962   case AMDGPU::V_LSHRREV_B16_gfx10:
3963 
3964   case AMDGPU::V_ASHRREV_I16_e32:
3965   case AMDGPU::V_ASHRREV_I16_e64:
3966   case AMDGPU::V_ASHRREV_I16_e32_vi:
3967   case AMDGPU::V_ASHRREV_I16_e64_vi:
3968   case AMDGPU::V_ASHRREV_I16_gfx10:
3969 
3970   case AMDGPU::V_LSHLREV_B64_e64:
3971   case AMDGPU::V_LSHLREV_B64_gfx10:
3972   case AMDGPU::V_LSHLREV_B64_vi:
3973 
3974   case AMDGPU::V_LSHRREV_B64_e64:
3975   case AMDGPU::V_LSHRREV_B64_gfx10:
3976   case AMDGPU::V_LSHRREV_B64_vi:
3977 
3978   case AMDGPU::V_ASHRREV_I64_e64:
3979   case AMDGPU::V_ASHRREV_I64_gfx10:
3980   case AMDGPU::V_ASHRREV_I64_vi:
3981 
3982   case AMDGPU::V_PK_LSHLREV_B16:
3983   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3984   case AMDGPU::V_PK_LSHLREV_B16_vi:
3985 
3986   case AMDGPU::V_PK_LSHRREV_B16:
3987   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3988   case AMDGPU::V_PK_LSHRREV_B16_vi:
3989   case AMDGPU::V_PK_ASHRREV_I16:
3990   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3991   case AMDGPU::V_PK_ASHRREV_I16_vi:
3992     return true;
3993   default:
3994     return false;
3995   }
3996 }
3997 
3998 std::optional<StringRef>
3999 AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4000 
4001   using namespace SIInstrFlags;
4002   const unsigned Opcode = Inst.getOpcode();
4003   const MCInstrDesc &Desc = MII.get(Opcode);
4004 
4005   // lds_direct register is defined so that it can be used
4006   // with 9-bit operands only. Ignore encodings which do not accept these.
4007   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4008   if ((Desc.TSFlags & Enc) == 0)
4009     return std::nullopt;
4010 
4011   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4012     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4013     if (SrcIdx == -1)
4014       break;
4015     const auto &Src = Inst.getOperand(SrcIdx);
4016     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4017 
4018       if (isGFX90A() || isGFX11Plus())
4019         return StringRef("lds_direct is not supported on this GPU");
4020 
4021       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4022         return StringRef("lds_direct cannot be used with this instruction");
4023 
4024       if (SrcName != OpName::src0)
4025         return StringRef("lds_direct may be used as src0 only");
4026     }
4027   }
4028 
4029   return std::nullopt;
4030 }
4031 
4032 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4033   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4034     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4035     if (Op.isFlatOffset())
4036       return Op.getStartLoc();
4037   }
4038   return getLoc();
4039 }
4040 
4041 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4042                                          const OperandVector &Operands) {
4043   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4044   if ((TSFlags & SIInstrFlags::FLAT) == 0)
4045     return true;
4046 
4047   auto Opcode = Inst.getOpcode();
4048   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4049   assert(OpNum != -1);
4050 
4051   const auto &Op = Inst.getOperand(OpNum);
4052   if (!hasFlatOffsets() && Op.getImm() != 0) {
4053     Error(getFlatOffsetLoc(Operands),
4054           "flat offset modifier is not supported on this GPU");
4055     return false;
4056   }
4057 
4058   // For FLAT segment the offset must be positive;
4059   // MSB is ignored and forced to zero.
4060   unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4061   bool AllowNegative =
4062       TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch);
4063   if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4064     Error(getFlatOffsetLoc(Operands),
4065           Twine("expected a ") +
4066               (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4067                              : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4068     return false;
4069   }
4070 
4071   return true;
4072 }
4073 
4074 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4075   // Start with second operand because SMEM Offset cannot be dst or src0.
4076   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4077     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4078     if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4079       return Op.getStartLoc();
4080   }
4081   return getLoc();
4082 }
4083 
4084 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4085                                          const OperandVector &Operands) {
4086   if (isCI() || isSI())
4087     return true;
4088 
4089   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4090   if ((TSFlags & SIInstrFlags::SMRD) == 0)
4091     return true;
4092 
4093   auto Opcode = Inst.getOpcode();
4094   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4095   if (OpNum == -1)
4096     return true;
4097 
4098   const auto &Op = Inst.getOperand(OpNum);
4099   if (!Op.isImm())
4100     return true;
4101 
4102   uint64_t Offset = Op.getImm();
4103   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4104   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4105       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4106     return true;
4107 
4108   Error(getSMEMOffsetLoc(Operands),
4109         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4110                                "expected a 21-bit signed offset");
4111 
4112   return false;
4113 }
4114 
4115 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4116   unsigned Opcode = Inst.getOpcode();
4117   const MCInstrDesc &Desc = MII.get(Opcode);
4118   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4119     return true;
4120 
4121   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4122   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4123 
4124   const int OpIndices[] = { Src0Idx, Src1Idx };
4125 
4126   unsigned NumExprs = 0;
4127   unsigned NumLiterals = 0;
4128   uint32_t LiteralValue;
4129 
4130   for (int OpIdx : OpIndices) {
4131     if (OpIdx == -1) break;
4132 
4133     const MCOperand &MO = Inst.getOperand(OpIdx);
4134     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4135     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4136       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4137         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4138         if (NumLiterals == 0 || LiteralValue != Value) {
4139           LiteralValue = Value;
4140           ++NumLiterals;
4141         }
4142       } else if (MO.isExpr()) {
4143         ++NumExprs;
4144       }
4145     }
4146   }
4147 
4148   return NumLiterals + NumExprs <= 1;
4149 }
4150 
4151 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4152   const unsigned Opc = Inst.getOpcode();
4153   if (isPermlane16(Opc)) {
4154     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4155     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4156 
4157     if (OpSel & ~3)
4158       return false;
4159   }
4160 
4161   uint64_t TSFlags = MII.get(Opc).TSFlags;
4162 
4163   if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4164     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4165     if (OpSelIdx != -1) {
4166       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4167         return false;
4168     }
4169     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4170     if (OpSelHiIdx != -1) {
4171       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4172         return false;
4173     }
4174   }
4175 
4176   // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4177   if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4178       (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4179     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4180     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4181     if (OpSel & 3)
4182       return false;
4183   }
4184 
4185   return true;
4186 }
4187 
4188 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4189                                   const OperandVector &Operands) {
4190   const unsigned Opc = Inst.getOpcode();
4191   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4192   if (DppCtrlIdx < 0)
4193     return true;
4194   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4195 
4196   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4197     // DPP64 is supported for row_newbcast only.
4198     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4199     if (Src0Idx >= 0 &&
4200         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4201       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4202       Error(S, "64 bit dpp only supports row_newbcast");
4203       return false;
4204     }
4205   }
4206 
4207   return true;
4208 }
4209 
4210 // Check if VCC register matches wavefront size
4211 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4212   auto FB = getFeatureBits();
4213   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4214     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4215 }
4216 
4217 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4218 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4219                                          const OperandVector &Operands) {
4220   unsigned Opcode = Inst.getOpcode();
4221   const MCInstrDesc &Desc = MII.get(Opcode);
4222   bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4223   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4224       !HasMandatoryLiteral && !isVOPD(Opcode))
4225     return true;
4226 
4227   OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4228 
4229   unsigned NumExprs = 0;
4230   unsigned NumLiterals = 0;
4231   uint32_t LiteralValue;
4232 
4233   for (int OpIdx : OpIndices) {
4234     if (OpIdx == -1)
4235       continue;
4236 
4237     const MCOperand &MO = Inst.getOperand(OpIdx);
4238     if (!MO.isImm() && !MO.isExpr())
4239       continue;
4240     if (!isSISrcOperand(Desc, OpIdx))
4241       continue;
4242 
4243     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4244       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4245       if (NumLiterals == 0 || LiteralValue != Value) {
4246         LiteralValue = Value;
4247         ++NumLiterals;
4248       }
4249     } else if (MO.isExpr()) {
4250       ++NumExprs;
4251     }
4252   }
4253   NumLiterals += NumExprs;
4254 
4255   if (!NumLiterals)
4256     return true;
4257 
4258   if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4259     Error(getLitLoc(Operands), "literal operands are not supported");
4260     return false;
4261   }
4262 
4263   if (NumLiterals > 1) {
4264     Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4265     return false;
4266   }
4267 
4268   return true;
4269 }
4270 
4271 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4272 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4273                          const MCRegisterInfo *MRI) {
4274   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4275   if (OpIdx < 0)
4276     return -1;
4277 
4278   const MCOperand &Op = Inst.getOperand(OpIdx);
4279   if (!Op.isReg())
4280     return -1;
4281 
4282   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4283   auto Reg = Sub ? Sub : Op.getReg();
4284   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4285   return AGPR32.contains(Reg) ? 1 : 0;
4286 }
4287 
4288 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4289   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4290   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4291                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4292                   SIInstrFlags::DS)) == 0)
4293     return true;
4294 
4295   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4296                                                       : AMDGPU::OpName::vdata;
4297 
4298   const MCRegisterInfo *MRI = getMRI();
4299   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4300   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4301 
4302   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4303     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4304     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4305       return false;
4306   }
4307 
4308   auto FB = getFeatureBits();
4309   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4310     if (DataAreg < 0 || DstAreg < 0)
4311       return true;
4312     return DstAreg == DataAreg;
4313   }
4314 
4315   return DstAreg < 1 && DataAreg < 1;
4316 }
4317 
4318 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4319   auto FB = getFeatureBits();
4320   if (!FB[AMDGPU::FeatureGFX90AInsts])
4321     return true;
4322 
4323   const MCRegisterInfo *MRI = getMRI();
4324   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4325   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4326   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4327     const MCOperand &Op = Inst.getOperand(I);
4328     if (!Op.isReg())
4329       continue;
4330 
4331     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4332     if (!Sub)
4333       continue;
4334 
4335     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4336       return false;
4337     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4338       return false;
4339   }
4340 
4341   return true;
4342 }
4343 
4344 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4345   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4346     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4347     if (Op.isBLGP())
4348       return Op.getStartLoc();
4349   }
4350   return SMLoc();
4351 }
4352 
4353 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4354                                    const OperandVector &Operands) {
4355   unsigned Opc = Inst.getOpcode();
4356   int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4357   if (BlgpIdx == -1)
4358     return true;
4359   SMLoc BLGPLoc = getBLGPLoc(Operands);
4360   if (!BLGPLoc.isValid())
4361     return true;
4362   bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4363   auto FB = getFeatureBits();
4364   bool UsesNeg = false;
4365   if (FB[AMDGPU::FeatureGFX940Insts]) {
4366     switch (Opc) {
4367     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4368     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4369     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4370     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4371       UsesNeg = true;
4372     }
4373   }
4374 
4375   if (IsNeg == UsesNeg)
4376     return true;
4377 
4378   Error(BLGPLoc,
4379         UsesNeg ? "invalid modifier: blgp is not supported"
4380                 : "invalid modifier: neg is not supported");
4381 
4382   return false;
4383 }
4384 
4385 bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4386                                       const OperandVector &Operands) {
4387   if (!isGFX11Plus())
4388     return true;
4389 
4390   unsigned Opc = Inst.getOpcode();
4391   if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4392       Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4393       Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4394       Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4395     return true;
4396 
4397   int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4398   assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4399   auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4400   if (Reg == AMDGPU::SGPR_NULL)
4401     return true;
4402 
4403   SMLoc RegLoc = getRegLoc(Reg, Operands);
4404   Error(RegLoc, "src0 must be null");
4405   return false;
4406 }
4407 
4408 // gfx90a has an undocumented limitation:
4409 // DS_GWS opcodes must use even aligned registers.
4410 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4411                                   const OperandVector &Operands) {
4412   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4413     return true;
4414 
4415   int Opc = Inst.getOpcode();
4416   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4417       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4418     return true;
4419 
4420   const MCRegisterInfo *MRI = getMRI();
4421   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4422   int Data0Pos =
4423       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4424   assert(Data0Pos != -1);
4425   auto Reg = Inst.getOperand(Data0Pos).getReg();
4426   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4427   if (RegIdx & 1) {
4428     SMLoc RegLoc = getRegLoc(Reg, Operands);
4429     Error(RegLoc, "vgpr must be even aligned");
4430     return false;
4431   }
4432 
4433   return true;
4434 }
4435 
4436 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4437                                             const OperandVector &Operands,
4438                                             const SMLoc &IDLoc) {
4439   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4440                                            AMDGPU::OpName::cpol);
4441   if (CPolPos == -1)
4442     return true;
4443 
4444   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4445 
4446   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4447   if (TSFlags & SIInstrFlags::SMRD) {
4448     if (CPol && (isSI() || isCI())) {
4449       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4450       Error(S, "cache policy is not supported for SMRD instructions");
4451       return false;
4452     }
4453     if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4454       Error(IDLoc, "invalid cache policy for SMEM instruction");
4455       return false;
4456     }
4457   }
4458 
4459   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4460     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4461     StringRef CStr(S.getPointer());
4462     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4463     Error(S, "scc is not supported on this GPU");
4464     return false;
4465   }
4466 
4467   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4468     return true;
4469 
4470   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4471     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4472       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4473                               : "instruction must use glc");
4474       return false;
4475     }
4476   } else {
4477     if (CPol & CPol::GLC) {
4478       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4479       StringRef CStr(S.getPointer());
4480       S = SMLoc::getFromPointer(
4481           &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4482       Error(S, isGFX940() ? "instruction must not use sc0"
4483                           : "instruction must not use glc");
4484       return false;
4485     }
4486   }
4487 
4488   return true;
4489 }
4490 
4491 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
4492   if (!isGFX11Plus())
4493     return true;
4494   for (auto &Operand : Operands) {
4495     if (!Operand->isReg())
4496       continue;
4497     unsigned Reg = Operand->getReg();
4498     if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
4499       Error(getRegLoc(Reg, Operands),
4500             "execz and vccz are not supported on this GPU");
4501       return false;
4502     }
4503   }
4504   return true;
4505 }
4506 
4507 bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
4508                                   const OperandVector &Operands) {
4509   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4510   if (Desc.mayStore() &&
4511       (Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4512     SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
4513     if (Loc != getInstLoc(Operands)) {
4514       Error(Loc, "TFE modifier has no meaning for store instructions");
4515       return false;
4516     }
4517   }
4518 
4519   return true;
4520 }
4521 
4522 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4523                                           const SMLoc &IDLoc,
4524                                           const OperandVector &Operands) {
4525   if (auto ErrMsg = validateLdsDirect(Inst)) {
4526     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4527     return false;
4528   }
4529   if (!validateSOPLiteral(Inst)) {
4530     Error(getLitLoc(Operands),
4531       "only one unique literal operand is allowed");
4532     return false;
4533   }
4534   if (!validateVOPLiteral(Inst, Operands)) {
4535     return false;
4536   }
4537   if (!validateConstantBusLimitations(Inst, Operands)) {
4538     return false;
4539   }
4540   if (!validateVOPDRegBankConstraints(Inst, Operands)) {
4541     return false;
4542   }
4543   if (!validateIntClampSupported(Inst)) {
4544     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4545       "integer clamping is not supported on this GPU");
4546     return false;
4547   }
4548   if (!validateOpSel(Inst)) {
4549     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4550       "invalid op_sel operand");
4551     return false;
4552   }
4553   if (!validateDPP(Inst, Operands)) {
4554     return false;
4555   }
4556   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4557   if (!validateMIMGD16(Inst)) {
4558     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4559       "d16 modifier is not supported on this GPU");
4560     return false;
4561   }
4562   if (!validateMIMGMSAA(Inst)) {
4563     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4564           "invalid dim; must be MSAA type");
4565     return false;
4566   }
4567   if (!validateMIMGDataSize(Inst, IDLoc)) {
4568     return false;
4569   }
4570   if (!validateMIMGAddrSize(Inst, IDLoc))
4571     return false;
4572   if (!validateMIMGAtomicDMask(Inst)) {
4573     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4574       "invalid atomic image dmask");
4575     return false;
4576   }
4577   if (!validateMIMGGatherDMask(Inst)) {
4578     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4579       "invalid image_gather dmask: only one bit must be set");
4580     return false;
4581   }
4582   if (!validateMovrels(Inst, Operands)) {
4583     return false;
4584   }
4585   if (!validateFlatOffset(Inst, Operands)) {
4586     return false;
4587   }
4588   if (!validateSMEMOffset(Inst, Operands)) {
4589     return false;
4590   }
4591   if (!validateMAIAccWrite(Inst, Operands)) {
4592     return false;
4593   }
4594   if (!validateMAISrc2(Inst, Operands)) {
4595     return false;
4596   }
4597   if (!validateMFMA(Inst, Operands)) {
4598     return false;
4599   }
4600   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4601     return false;
4602   }
4603 
4604   if (!validateAGPRLdSt(Inst)) {
4605     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4606     ? "invalid register class: data and dst should be all VGPR or AGPR"
4607     : "invalid register class: agpr loads and stores not supported on this GPU"
4608     );
4609     return false;
4610   }
4611   if (!validateVGPRAlign(Inst)) {
4612     Error(IDLoc,
4613       "invalid register class: vgpr tuples must be 64 bit aligned");
4614     return false;
4615   }
4616   if (!validateGWS(Inst, Operands)) {
4617     return false;
4618   }
4619 
4620   if (!validateBLGP(Inst, Operands)) {
4621     return false;
4622   }
4623 
4624   if (!validateDivScale(Inst)) {
4625     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4626     return false;
4627   }
4628   if (!validateWaitCnt(Inst, Operands)) {
4629     return false;
4630   }
4631   if (!validateExeczVcczOperands(Operands)) {
4632     return false;
4633   }
4634   if (!validateTFE(Inst, Operands)) {
4635     return false;
4636   }
4637 
4638   return true;
4639 }
4640 
4641 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4642                                             const FeatureBitset &FBS,
4643                                             unsigned VariantID = 0);
4644 
4645 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4646                                 const FeatureBitset &AvailableFeatures,
4647                                 unsigned VariantID);
4648 
4649 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4650                                        const FeatureBitset &FBS) {
4651   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4652 }
4653 
4654 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4655                                        const FeatureBitset &FBS,
4656                                        ArrayRef<unsigned> Variants) {
4657   for (auto Variant : Variants) {
4658     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4659       return true;
4660   }
4661 
4662   return false;
4663 }
4664 
4665 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4666                                                   const SMLoc &IDLoc) {
4667   FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
4668 
4669   // Check if requested instruction variant is supported.
4670   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4671     return false;
4672 
4673   // This instruction is not supported.
4674   // Clear any other pending errors because they are no longer relevant.
4675   getParser().clearPendingErrors();
4676 
4677   // Requested instruction variant is not supported.
4678   // Check if any other variants are supported.
4679   StringRef VariantName = getMatchedVariantName();
4680   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4681     return Error(IDLoc,
4682                  Twine(VariantName,
4683                        " variant of this instruction is not supported"));
4684   }
4685 
4686   // Check if this instruction may be used with a different wavesize.
4687   if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
4688       !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
4689 
4690     FeatureBitset FeaturesWS32 = getFeatureBits();
4691     FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
4692         .flip(AMDGPU::FeatureWavefrontSize32);
4693     FeatureBitset AvailableFeaturesWS32 =
4694         ComputeAvailableFeatures(FeaturesWS32);
4695 
4696     if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
4697       return Error(IDLoc, "instruction requires wavesize=32");
4698   }
4699 
4700   // Finally check if this instruction is supported on any other GPU.
4701   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4702     return Error(IDLoc, "instruction not supported on this GPU");
4703   }
4704 
4705   // Instruction not supported on any GPU. Probably a typo.
4706   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4707   return Error(IDLoc, "invalid instruction" + Suggestion);
4708 }
4709 
4710 static bool isInvalidVOPDY(const OperandVector &Operands,
4711                            uint64_t InvalidOprIdx) {
4712   assert(InvalidOprIdx < Operands.size());
4713   const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
4714   if (Op.isToken() && InvalidOprIdx > 1) {
4715     const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
4716     return PrevOp.isToken() && PrevOp.getToken() == "::";
4717   }
4718   return false;
4719 }
4720 
4721 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4722                                               OperandVector &Operands,
4723                                               MCStreamer &Out,
4724                                               uint64_t &ErrorInfo,
4725                                               bool MatchingInlineAsm) {
4726   MCInst Inst;
4727   unsigned Result = Match_Success;
4728   for (auto Variant : getMatchedVariants()) {
4729     uint64_t EI;
4730     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4731                                   Variant);
4732     // We order match statuses from least to most specific. We use most specific
4733     // status as resulting
4734     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4735     if ((R == Match_Success) ||
4736         (R == Match_PreferE32) ||
4737         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4738         (R == Match_InvalidOperand && Result != Match_MissingFeature
4739                                    && Result != Match_PreferE32) ||
4740         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4741                                    && Result != Match_MissingFeature
4742                                    && Result != Match_PreferE32)) {
4743       Result = R;
4744       ErrorInfo = EI;
4745     }
4746     if (R == Match_Success)
4747       break;
4748   }
4749 
4750   if (Result == Match_Success) {
4751     if (!validateInstruction(Inst, IDLoc, Operands)) {
4752       return true;
4753     }
4754     Inst.setLoc(IDLoc);
4755     Out.emitInstruction(Inst, getSTI());
4756     return false;
4757   }
4758 
4759   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4760   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4761     return true;
4762   }
4763 
4764   switch (Result) {
4765   default: break;
4766   case Match_MissingFeature:
4767     // It has been verified that the specified instruction
4768     // mnemonic is valid. A match was found but it requires
4769     // features which are not supported on this GPU.
4770     return Error(IDLoc, "operands are not valid for this GPU or mode");
4771 
4772   case Match_InvalidOperand: {
4773     SMLoc ErrorLoc = IDLoc;
4774     if (ErrorInfo != ~0ULL) {
4775       if (ErrorInfo >= Operands.size()) {
4776         return Error(IDLoc, "too few operands for instruction");
4777       }
4778       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4779       if (ErrorLoc == SMLoc())
4780         ErrorLoc = IDLoc;
4781 
4782       if (isInvalidVOPDY(Operands, ErrorInfo))
4783         return Error(ErrorLoc, "invalid VOPDY instruction");
4784     }
4785     return Error(ErrorLoc, "invalid operand for instruction");
4786   }
4787 
4788   case Match_PreferE32:
4789     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4790                         "should be encoded as e32");
4791   case Match_MnemonicFail:
4792     llvm_unreachable("Invalid instructions should have been handled already");
4793   }
4794   llvm_unreachable("Implement any new match types added!");
4795 }
4796 
4797 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4798   int64_t Tmp = -1;
4799   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4800     return true;
4801   }
4802   if (getParser().parseAbsoluteExpression(Tmp)) {
4803     return true;
4804   }
4805   Ret = static_cast<uint32_t>(Tmp);
4806   return false;
4807 }
4808 
4809 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4810                                                uint32_t &Minor) {
4811   if (ParseAsAbsoluteExpression(Major))
4812     return TokError("invalid major version");
4813 
4814   if (!trySkipToken(AsmToken::Comma))
4815     return TokError("minor version number required, comma expected");
4816 
4817   if (ParseAsAbsoluteExpression(Minor))
4818     return TokError("invalid minor version");
4819 
4820   return false;
4821 }
4822 
4823 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4824   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4825     return TokError("directive only supported for amdgcn architecture");
4826 
4827   std::string TargetIDDirective;
4828   SMLoc TargetStart = getTok().getLoc();
4829   if (getParser().parseEscapedString(TargetIDDirective))
4830     return true;
4831 
4832   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4833   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4834     return getParser().Error(TargetRange.Start,
4835         (Twine(".amdgcn_target directive's target id ") +
4836          Twine(TargetIDDirective) +
4837          Twine(" does not match the specified target id ") +
4838          Twine(getTargetStreamer().getTargetID()->toString())).str());
4839 
4840   return false;
4841 }
4842 
4843 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4844   return Error(Range.Start, "value out of range", Range);
4845 }
4846 
4847 bool AMDGPUAsmParser::calculateGPRBlocks(
4848     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4849     bool XNACKUsed, std::optional<bool> EnableWavefrontSize32,
4850     unsigned NextFreeVGPR, SMRange VGPRRange, unsigned NextFreeSGPR,
4851     SMRange SGPRRange, unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4852   // TODO(scott.linder): These calculations are duplicated from
4853   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4854   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4855 
4856   unsigned NumVGPRs = NextFreeVGPR;
4857   unsigned NumSGPRs = NextFreeSGPR;
4858 
4859   if (Version.Major >= 10)
4860     NumSGPRs = 0;
4861   else {
4862     unsigned MaxAddressableNumSGPRs =
4863         IsaInfo::getAddressableNumSGPRs(&getSTI());
4864 
4865     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4866         NumSGPRs > MaxAddressableNumSGPRs)
4867       return OutOfRangeError(SGPRRange);
4868 
4869     NumSGPRs +=
4870         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4871 
4872     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4873         NumSGPRs > MaxAddressableNumSGPRs)
4874       return OutOfRangeError(SGPRRange);
4875 
4876     if (Features.test(FeatureSGPRInitBug))
4877       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4878   }
4879 
4880   VGPRBlocks =
4881       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4882   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4883 
4884   return false;
4885 }
4886 
4887 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4888   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4889     return TokError("directive only supported for amdgcn architecture");
4890 
4891   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4892     return TokError("directive only supported for amdhsa OS");
4893 
4894   StringRef KernelName;
4895   if (getParser().parseIdentifier(KernelName))
4896     return true;
4897 
4898   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4899 
4900   StringSet<> Seen;
4901 
4902   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4903 
4904   SMRange VGPRRange;
4905   uint64_t NextFreeVGPR = 0;
4906   uint64_t AccumOffset = 0;
4907   uint64_t SharedVGPRCount = 0;
4908   SMRange SGPRRange;
4909   uint64_t NextFreeSGPR = 0;
4910 
4911   // Count the number of user SGPRs implied from the enabled feature bits.
4912   unsigned ImpliedUserSGPRCount = 0;
4913 
4914   // Track if the asm explicitly contains the directive for the user SGPR
4915   // count.
4916   std::optional<unsigned> ExplicitUserSGPRCount;
4917   bool ReserveVCC = true;
4918   bool ReserveFlatScr = true;
4919   std::optional<bool> EnableWavefrontSize32;
4920 
4921   while (true) {
4922     while (trySkipToken(AsmToken::EndOfStatement));
4923 
4924     StringRef ID;
4925     SMRange IDRange = getTok().getLocRange();
4926     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4927       return true;
4928 
4929     if (ID == ".end_amdhsa_kernel")
4930       break;
4931 
4932     if (!Seen.insert(ID).second)
4933       return TokError(".amdhsa_ directives cannot be repeated");
4934 
4935     SMLoc ValStart = getLoc();
4936     int64_t IVal;
4937     if (getParser().parseAbsoluteExpression(IVal))
4938       return true;
4939     SMLoc ValEnd = getLoc();
4940     SMRange ValRange = SMRange(ValStart, ValEnd);
4941 
4942     if (IVal < 0)
4943       return OutOfRangeError(ValRange);
4944 
4945     uint64_t Val = IVal;
4946 
4947 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4948   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4949     return OutOfRangeError(RANGE);                                             \
4950   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4951 
4952     if (ID == ".amdhsa_group_segment_fixed_size") {
4953       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4954         return OutOfRangeError(ValRange);
4955       KD.group_segment_fixed_size = Val;
4956     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4957       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4958         return OutOfRangeError(ValRange);
4959       KD.private_segment_fixed_size = Val;
4960     } else if (ID == ".amdhsa_kernarg_size") {
4961       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4962         return OutOfRangeError(ValRange);
4963       KD.kernarg_size = Val;
4964     } else if (ID == ".amdhsa_user_sgpr_count") {
4965       ExplicitUserSGPRCount = Val;
4966     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4967       if (hasArchitectedFlatScratch())
4968         return Error(IDRange.Start,
4969                      "directive is not supported with architected flat scratch",
4970                      IDRange);
4971       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4972                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4973                        Val, ValRange);
4974       if (Val)
4975         ImpliedUserSGPRCount += 4;
4976     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4977       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4978                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4979                        ValRange);
4980       if (Val)
4981         ImpliedUserSGPRCount += 2;
4982     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4983       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4984                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4985                        ValRange);
4986       if (Val)
4987         ImpliedUserSGPRCount += 2;
4988     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4989       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4990                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4991                        Val, ValRange);
4992       if (Val)
4993         ImpliedUserSGPRCount += 2;
4994     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4995       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4996                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4997                        ValRange);
4998       if (Val)
4999         ImpliedUserSGPRCount += 2;
5000     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5001       if (hasArchitectedFlatScratch())
5002         return Error(IDRange.Start,
5003                      "directive is not supported with architected flat scratch",
5004                      IDRange);
5005       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5006                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
5007                        ValRange);
5008       if (Val)
5009         ImpliedUserSGPRCount += 2;
5010     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5011       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5012                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5013                        Val, ValRange);
5014       if (Val)
5015         ImpliedUserSGPRCount += 1;
5016     } else if (ID == ".amdhsa_wavefront_size32") {
5017       if (IVersion.Major < 10)
5018         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5019       EnableWavefrontSize32 = Val;
5020       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5021                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
5022                        Val, ValRange);
5023     } else if (ID == ".amdhsa_uses_dynamic_stack") {
5024       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5025                        KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
5026     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5027       if (hasArchitectedFlatScratch())
5028         return Error(IDRange.Start,
5029                      "directive is not supported with architected flat scratch",
5030                      IDRange);
5031       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5032                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5033     } else if (ID == ".amdhsa_enable_private_segment") {
5034       if (!hasArchitectedFlatScratch())
5035         return Error(
5036             IDRange.Start,
5037             "directive is not supported without architected flat scratch",
5038             IDRange);
5039       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5040                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5041     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5042       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5043                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
5044                        ValRange);
5045     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5046       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5047                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
5048                        ValRange);
5049     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5050       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5051                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
5052                        ValRange);
5053     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5054       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5055                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
5056                        ValRange);
5057     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5058       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5059                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
5060                        ValRange);
5061     } else if (ID == ".amdhsa_next_free_vgpr") {
5062       VGPRRange = ValRange;
5063       NextFreeVGPR = Val;
5064     } else if (ID == ".amdhsa_next_free_sgpr") {
5065       SGPRRange = ValRange;
5066       NextFreeSGPR = Val;
5067     } else if (ID == ".amdhsa_accum_offset") {
5068       if (!isGFX90A())
5069         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5070       AccumOffset = Val;
5071     } else if (ID == ".amdhsa_reserve_vcc") {
5072       if (!isUInt<1>(Val))
5073         return OutOfRangeError(ValRange);
5074       ReserveVCC = Val;
5075     } else if (ID == ".amdhsa_reserve_flat_scratch") {
5076       if (IVersion.Major < 7)
5077         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5078       if (hasArchitectedFlatScratch())
5079         return Error(IDRange.Start,
5080                      "directive is not supported with architected flat scratch",
5081                      IDRange);
5082       if (!isUInt<1>(Val))
5083         return OutOfRangeError(ValRange);
5084       ReserveFlatScr = Val;
5085     } else if (ID == ".amdhsa_reserve_xnack_mask") {
5086       if (IVersion.Major < 8)
5087         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5088       if (!isUInt<1>(Val))
5089         return OutOfRangeError(ValRange);
5090       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5091         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5092                                  IDRange);
5093     } else if (ID == ".amdhsa_float_round_mode_32") {
5094       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5095                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5096     } else if (ID == ".amdhsa_float_round_mode_16_64") {
5097       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5098                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5099     } else if (ID == ".amdhsa_float_denorm_mode_32") {
5100       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5101                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5102     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5103       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5104                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5105                        ValRange);
5106     } else if (ID == ".amdhsa_dx10_clamp") {
5107       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5108                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
5109     } else if (ID == ".amdhsa_ieee_mode") {
5110       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
5111                        Val, ValRange);
5112     } else if (ID == ".amdhsa_fp16_overflow") {
5113       if (IVersion.Major < 9)
5114         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5115       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
5116                        ValRange);
5117     } else if (ID == ".amdhsa_tg_split") {
5118       if (!isGFX90A())
5119         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5120       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5121                        ValRange);
5122     } else if (ID == ".amdhsa_workgroup_processor_mode") {
5123       if (IVersion.Major < 10)
5124         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5125       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
5126                        ValRange);
5127     } else if (ID == ".amdhsa_memory_ordered") {
5128       if (IVersion.Major < 10)
5129         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5130       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
5131                        ValRange);
5132     } else if (ID == ".amdhsa_forward_progress") {
5133       if (IVersion.Major < 10)
5134         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5135       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
5136                        ValRange);
5137     } else if (ID == ".amdhsa_shared_vgpr_count") {
5138       if (IVersion.Major < 10)
5139         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5140       SharedVGPRCount = Val;
5141       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5142                        COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val,
5143                        ValRange);
5144     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5145       PARSE_BITS_ENTRY(
5146           KD.compute_pgm_rsrc2,
5147           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5148           ValRange);
5149     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5150       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5151                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5152                        Val, ValRange);
5153     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5154       PARSE_BITS_ENTRY(
5155           KD.compute_pgm_rsrc2,
5156           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5157           ValRange);
5158     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5159       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5160                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5161                        Val, ValRange);
5162     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5163       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5164                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5165                        Val, ValRange);
5166     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5167       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5168                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5169                        Val, ValRange);
5170     } else if (ID == ".amdhsa_exception_int_div_zero") {
5171       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5172                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5173                        Val, ValRange);
5174     } else {
5175       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5176     }
5177 
5178 #undef PARSE_BITS_ENTRY
5179   }
5180 
5181   if (!Seen.contains(".amdhsa_next_free_vgpr"))
5182     return TokError(".amdhsa_next_free_vgpr directive is required");
5183 
5184   if (!Seen.contains(".amdhsa_next_free_sgpr"))
5185     return TokError(".amdhsa_next_free_sgpr directive is required");
5186 
5187   unsigned VGPRBlocks;
5188   unsigned SGPRBlocks;
5189   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5190                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5191                          EnableWavefrontSize32, NextFreeVGPR,
5192                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5193                          SGPRBlocks))
5194     return true;
5195 
5196   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5197           VGPRBlocks))
5198     return OutOfRangeError(VGPRRange);
5199   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5200                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5201 
5202   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5203           SGPRBlocks))
5204     return OutOfRangeError(SGPRRange);
5205   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5206                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5207                   SGPRBlocks);
5208 
5209   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5210     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5211                     "enabled user SGPRs");
5212 
5213   unsigned UserSGPRCount =
5214       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5215 
5216   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5217     return TokError("too many user SGPRs enabled");
5218   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5219                   UserSGPRCount);
5220 
5221   if (isGFX90A()) {
5222     if (!Seen.contains(".amdhsa_accum_offset"))
5223       return TokError(".amdhsa_accum_offset directive is required");
5224     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5225       return TokError("accum_offset should be in range [4..256] in "
5226                       "increments of 4");
5227     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5228       return TokError("accum_offset exceeds total VGPR allocation");
5229     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5230                     (AccumOffset / 4 - 1));
5231   }
5232 
5233   if (IVersion.Major >= 10) {
5234     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5235     if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5236       return TokError("shared_vgpr_count directive not valid on "
5237                       "wavefront size 32");
5238     }
5239     if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5240       return TokError("shared_vgpr_count*2 + "
5241                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5242                       "exceed 63\n");
5243     }
5244   }
5245 
5246   getTargetStreamer().EmitAmdhsaKernelDescriptor(
5247       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5248       ReserveFlatScr, AMDGPU::getAmdhsaCodeObjectVersion());
5249   return false;
5250 }
5251 
5252 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5253   uint32_t Major;
5254   uint32_t Minor;
5255 
5256   if (ParseDirectiveMajorMinor(Major, Minor))
5257     return true;
5258 
5259   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5260   return false;
5261 }
5262 
5263 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5264   uint32_t Major;
5265   uint32_t Minor;
5266   uint32_t Stepping;
5267   StringRef VendorName;
5268   StringRef ArchName;
5269 
5270   // If this directive has no arguments, then use the ISA version for the
5271   // targeted GPU.
5272   if (isToken(AsmToken::EndOfStatement)) {
5273     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5274     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5275                                                         ISA.Stepping,
5276                                                         "AMD", "AMDGPU");
5277     return false;
5278   }
5279 
5280   if (ParseDirectiveMajorMinor(Major, Minor))
5281     return true;
5282 
5283   if (!trySkipToken(AsmToken::Comma))
5284     return TokError("stepping version number required, comma expected");
5285 
5286   if (ParseAsAbsoluteExpression(Stepping))
5287     return TokError("invalid stepping version");
5288 
5289   if (!trySkipToken(AsmToken::Comma))
5290     return TokError("vendor name required, comma expected");
5291 
5292   if (!parseString(VendorName, "invalid vendor name"))
5293     return true;
5294 
5295   if (!trySkipToken(AsmToken::Comma))
5296     return TokError("arch name required, comma expected");
5297 
5298   if (!parseString(ArchName, "invalid arch name"))
5299     return true;
5300 
5301   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5302                                                       VendorName, ArchName);
5303   return false;
5304 }
5305 
5306 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5307                                                amd_kernel_code_t &Header) {
5308   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5309   // assembly for backwards compatibility.
5310   if (ID == "max_scratch_backing_memory_byte_size") {
5311     Parser.eatToEndOfStatement();
5312     return false;
5313   }
5314 
5315   SmallString<40> ErrStr;
5316   raw_svector_ostream Err(ErrStr);
5317   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5318     return TokError(Err.str());
5319   }
5320   Lex();
5321 
5322   if (ID == "enable_wavefront_size32") {
5323     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5324       if (!isGFX10Plus())
5325         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5326       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5327         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5328     } else {
5329       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5330         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5331     }
5332   }
5333 
5334   if (ID == "wavefront_size") {
5335     if (Header.wavefront_size == 5) {
5336       if (!isGFX10Plus())
5337         return TokError("wavefront_size=5 is only allowed on GFX10+");
5338       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5339         return TokError("wavefront_size=5 requires +WavefrontSize32");
5340     } else if (Header.wavefront_size == 6) {
5341       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5342         return TokError("wavefront_size=6 requires +WavefrontSize64");
5343     }
5344   }
5345 
5346   if (ID == "enable_wgp_mode") {
5347     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5348         !isGFX10Plus())
5349       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5350   }
5351 
5352   if (ID == "enable_mem_ordered") {
5353     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5354         !isGFX10Plus())
5355       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5356   }
5357 
5358   if (ID == "enable_fwd_progress") {
5359     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5360         !isGFX10Plus())
5361       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5362   }
5363 
5364   return false;
5365 }
5366 
5367 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5368   amd_kernel_code_t Header;
5369   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5370 
5371   while (true) {
5372     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5373     // will set the current token to EndOfStatement.
5374     while(trySkipToken(AsmToken::EndOfStatement));
5375 
5376     StringRef ID;
5377     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5378       return true;
5379 
5380     if (ID == ".end_amd_kernel_code_t")
5381       break;
5382 
5383     if (ParseAMDKernelCodeTValue(ID, Header))
5384       return true;
5385   }
5386 
5387   getTargetStreamer().EmitAMDKernelCodeT(Header);
5388 
5389   return false;
5390 }
5391 
5392 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5393   StringRef KernelName;
5394   if (!parseId(KernelName, "expected symbol name"))
5395     return true;
5396 
5397   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5398                                            ELF::STT_AMDGPU_HSA_KERNEL);
5399 
5400   KernelScope.initialize(getContext());
5401   return false;
5402 }
5403 
5404 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5405   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5406     return Error(getLoc(),
5407                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5408                  "architectures");
5409   }
5410 
5411   auto TargetIDDirective = getLexer().getTok().getStringContents();
5412   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5413     return Error(getParser().getTok().getLoc(), "target id must match options");
5414 
5415   getTargetStreamer().EmitISAVersion();
5416   Lex();
5417 
5418   return false;
5419 }
5420 
5421 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5422   const char *AssemblerDirectiveBegin;
5423   const char *AssemblerDirectiveEnd;
5424   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5425       isHsaAbiVersion3AndAbove(&getSTI())
5426           ? std::pair(HSAMD::V3::AssemblerDirectiveBegin,
5427                       HSAMD::V3::AssemblerDirectiveEnd)
5428           : std::pair(HSAMD::AssemblerDirectiveBegin,
5429                       HSAMD::AssemblerDirectiveEnd);
5430 
5431   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5432     return Error(getLoc(),
5433                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5434                  "not available on non-amdhsa OSes")).str());
5435   }
5436 
5437   std::string HSAMetadataString;
5438   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5439                           HSAMetadataString))
5440     return true;
5441 
5442   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5443     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5444       return Error(getLoc(), "invalid HSA metadata");
5445   } else {
5446     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5447       return Error(getLoc(), "invalid HSA metadata");
5448   }
5449 
5450   return false;
5451 }
5452 
5453 /// Common code to parse out a block of text (typically YAML) between start and
5454 /// end directives.
5455 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5456                                           const char *AssemblerDirectiveEnd,
5457                                           std::string &CollectString) {
5458 
5459   raw_string_ostream CollectStream(CollectString);
5460 
5461   getLexer().setSkipSpace(false);
5462 
5463   bool FoundEnd = false;
5464   while (!isToken(AsmToken::Eof)) {
5465     while (isToken(AsmToken::Space)) {
5466       CollectStream << getTokenStr();
5467       Lex();
5468     }
5469 
5470     if (trySkipId(AssemblerDirectiveEnd)) {
5471       FoundEnd = true;
5472       break;
5473     }
5474 
5475     CollectStream << Parser.parseStringToEndOfStatement()
5476                   << getContext().getAsmInfo()->getSeparatorString();
5477 
5478     Parser.eatToEndOfStatement();
5479   }
5480 
5481   getLexer().setSkipSpace(true);
5482 
5483   if (isToken(AsmToken::Eof) && !FoundEnd) {
5484     return TokError(Twine("expected directive ") +
5485                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5486   }
5487 
5488   CollectStream.flush();
5489   return false;
5490 }
5491 
5492 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5493 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5494   std::string String;
5495   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5496                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5497     return true;
5498 
5499   auto PALMetadata = getTargetStreamer().getPALMetadata();
5500   if (!PALMetadata->setFromString(String))
5501     return Error(getLoc(), "invalid PAL metadata");
5502   return false;
5503 }
5504 
5505 /// Parse the assembler directive for old linear-format PAL metadata.
5506 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5507   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5508     return Error(getLoc(),
5509                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5510                  "not available on non-amdpal OSes")).str());
5511   }
5512 
5513   auto PALMetadata = getTargetStreamer().getPALMetadata();
5514   PALMetadata->setLegacy();
5515   for (;;) {
5516     uint32_t Key, Value;
5517     if (ParseAsAbsoluteExpression(Key)) {
5518       return TokError(Twine("invalid value in ") +
5519                       Twine(PALMD::AssemblerDirective));
5520     }
5521     if (!trySkipToken(AsmToken::Comma)) {
5522       return TokError(Twine("expected an even number of values in ") +
5523                       Twine(PALMD::AssemblerDirective));
5524     }
5525     if (ParseAsAbsoluteExpression(Value)) {
5526       return TokError(Twine("invalid value in ") +
5527                       Twine(PALMD::AssemblerDirective));
5528     }
5529     PALMetadata->setRegister(Key, Value);
5530     if (!trySkipToken(AsmToken::Comma))
5531       break;
5532   }
5533   return false;
5534 }
5535 
5536 /// ParseDirectiveAMDGPULDS
5537 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5538 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5539   if (getParser().checkForValidSection())
5540     return true;
5541 
5542   StringRef Name;
5543   SMLoc NameLoc = getLoc();
5544   if (getParser().parseIdentifier(Name))
5545     return TokError("expected identifier in directive");
5546 
5547   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5548   if (getParser().parseComma())
5549     return true;
5550 
5551   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5552 
5553   int64_t Size;
5554   SMLoc SizeLoc = getLoc();
5555   if (getParser().parseAbsoluteExpression(Size))
5556     return true;
5557   if (Size < 0)
5558     return Error(SizeLoc, "size must be non-negative");
5559   if (Size > LocalMemorySize)
5560     return Error(SizeLoc, "size is too large");
5561 
5562   int64_t Alignment = 4;
5563   if (trySkipToken(AsmToken::Comma)) {
5564     SMLoc AlignLoc = getLoc();
5565     if (getParser().parseAbsoluteExpression(Alignment))
5566       return true;
5567     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5568       return Error(AlignLoc, "alignment must be a power of two");
5569 
5570     // Alignment larger than the size of LDS is possible in theory, as long
5571     // as the linker manages to place to symbol at address 0, but we do want
5572     // to make sure the alignment fits nicely into a 32-bit integer.
5573     if (Alignment >= 1u << 31)
5574       return Error(AlignLoc, "alignment is too large");
5575   }
5576 
5577   if (parseEOL())
5578     return true;
5579 
5580   Symbol->redefineIfPossible();
5581   if (!Symbol->isUndefined())
5582     return Error(NameLoc, "invalid symbol redefinition");
5583 
5584   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5585   return false;
5586 }
5587 
5588 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5589   StringRef IDVal = DirectiveID.getString();
5590 
5591   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5592     if (IDVal == ".amdhsa_kernel")
5593      return ParseDirectiveAMDHSAKernel();
5594 
5595     // TODO: Restructure/combine with PAL metadata directive.
5596     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5597       return ParseDirectiveHSAMetadata();
5598   } else {
5599     if (IDVal == ".hsa_code_object_version")
5600       return ParseDirectiveHSACodeObjectVersion();
5601 
5602     if (IDVal == ".hsa_code_object_isa")
5603       return ParseDirectiveHSACodeObjectISA();
5604 
5605     if (IDVal == ".amd_kernel_code_t")
5606       return ParseDirectiveAMDKernelCodeT();
5607 
5608     if (IDVal == ".amdgpu_hsa_kernel")
5609       return ParseDirectiveAMDGPUHsaKernel();
5610 
5611     if (IDVal == ".amd_amdgpu_isa")
5612       return ParseDirectiveISAVersion();
5613 
5614     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5615       return ParseDirectiveHSAMetadata();
5616   }
5617 
5618   if (IDVal == ".amdgcn_target")
5619     return ParseDirectiveAMDGCNTarget();
5620 
5621   if (IDVal == ".amdgpu_lds")
5622     return ParseDirectiveAMDGPULDS();
5623 
5624   if (IDVal == PALMD::AssemblerDirectiveBegin)
5625     return ParseDirectivePALMetadataBegin();
5626 
5627   if (IDVal == PALMD::AssemblerDirective)
5628     return ParseDirectivePALMetadata();
5629 
5630   return true;
5631 }
5632 
5633 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5634                                            unsigned RegNo) {
5635 
5636   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5637     return isGFX9Plus();
5638 
5639   // GFX10+ has 2 more SGPRs 104 and 105.
5640   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5641     return hasSGPR104_SGPR105();
5642 
5643   switch (RegNo) {
5644   case AMDGPU::SRC_SHARED_BASE_LO:
5645   case AMDGPU::SRC_SHARED_BASE:
5646   case AMDGPU::SRC_SHARED_LIMIT_LO:
5647   case AMDGPU::SRC_SHARED_LIMIT:
5648   case AMDGPU::SRC_PRIVATE_BASE_LO:
5649   case AMDGPU::SRC_PRIVATE_BASE:
5650   case AMDGPU::SRC_PRIVATE_LIMIT_LO:
5651   case AMDGPU::SRC_PRIVATE_LIMIT:
5652     return isGFX9Plus();
5653   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5654     return isGFX9Plus() && !isGFX11Plus();
5655   case AMDGPU::TBA:
5656   case AMDGPU::TBA_LO:
5657   case AMDGPU::TBA_HI:
5658   case AMDGPU::TMA:
5659   case AMDGPU::TMA_LO:
5660   case AMDGPU::TMA_HI:
5661     return !isGFX9Plus();
5662   case AMDGPU::XNACK_MASK:
5663   case AMDGPU::XNACK_MASK_LO:
5664   case AMDGPU::XNACK_MASK_HI:
5665     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5666   case AMDGPU::SGPR_NULL:
5667     return isGFX10Plus();
5668   default:
5669     break;
5670   }
5671 
5672   if (isCI())
5673     return true;
5674 
5675   if (isSI() || isGFX10Plus()) {
5676     // No flat_scr on SI.
5677     // On GFX10Plus flat scratch is not a valid register operand and can only be
5678     // accessed with s_setreg/s_getreg.
5679     switch (RegNo) {
5680     case AMDGPU::FLAT_SCR:
5681     case AMDGPU::FLAT_SCR_LO:
5682     case AMDGPU::FLAT_SCR_HI:
5683       return false;
5684     default:
5685       return true;
5686     }
5687   }
5688 
5689   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5690   // SI/CI have.
5691   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5692     return hasSGPR102_SGPR103();
5693 
5694   return true;
5695 }
5696 
5697 ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
5698                                           StringRef Mnemonic,
5699                                           OperandMode Mode) {
5700   ParseStatus Res = parseVOPD(Operands);
5701   if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
5702     return Res;
5703 
5704   // Try to parse with a custom parser
5705   Res = MatchOperandParserImpl(Operands, Mnemonic);
5706 
5707   // If we successfully parsed the operand or if there as an error parsing,
5708   // we are done.
5709   //
5710   // If we are parsing after we reach EndOfStatement then this means we
5711   // are appending default values to the Operands list.  This is only done
5712   // by custom parser, so we shouldn't continue on to the generic parsing.
5713   if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
5714     return Res;
5715 
5716   SMLoc RBraceLoc;
5717   SMLoc LBraceLoc = getLoc();
5718   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5719     unsigned Prefix = Operands.size();
5720 
5721     for (;;) {
5722       auto Loc = getLoc();
5723       Res = parseReg(Operands);
5724       if (Res.isNoMatch())
5725         Error(Loc, "expected a register");
5726       if (!Res.isSuccess())
5727         return ParseStatus::Failure;
5728 
5729       RBraceLoc = getLoc();
5730       if (trySkipToken(AsmToken::RBrac))
5731         break;
5732 
5733       if (!skipToken(AsmToken::Comma,
5734                      "expected a comma or a closing square bracket"))
5735         return ParseStatus::Failure;
5736     }
5737 
5738     if (Operands.size() - Prefix > 1) {
5739       Operands.insert(Operands.begin() + Prefix,
5740                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5741       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5742     }
5743 
5744     return ParseStatus::Success;
5745   }
5746 
5747   return parseRegOrImm(Operands);
5748 }
5749 
5750 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5751   // Clear any forced encodings from the previous instruction.
5752   setForcedEncodingSize(0);
5753   setForcedDPP(false);
5754   setForcedSDWA(false);
5755 
5756   if (Name.endswith("_e64_dpp")) {
5757     setForcedDPP(true);
5758     setForcedEncodingSize(64);
5759     return Name.substr(0, Name.size() - 8);
5760   } else if (Name.endswith("_e64")) {
5761     setForcedEncodingSize(64);
5762     return Name.substr(0, Name.size() - 4);
5763   } else if (Name.endswith("_e32")) {
5764     setForcedEncodingSize(32);
5765     return Name.substr(0, Name.size() - 4);
5766   } else if (Name.endswith("_dpp")) {
5767     setForcedDPP(true);
5768     return Name.substr(0, Name.size() - 4);
5769   } else if (Name.endswith("_sdwa")) {
5770     setForcedSDWA(true);
5771     return Name.substr(0, Name.size() - 5);
5772   }
5773   return Name;
5774 }
5775 
5776 static void applyMnemonicAliases(StringRef &Mnemonic,
5777                                  const FeatureBitset &Features,
5778                                  unsigned VariantID);
5779 
5780 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5781                                        StringRef Name,
5782                                        SMLoc NameLoc, OperandVector &Operands) {
5783   // Add the instruction mnemonic
5784   Name = parseMnemonicSuffix(Name);
5785 
5786   // If the target architecture uses MnemonicAlias, call it here to parse
5787   // operands correctly.
5788   applyMnemonicAliases(Name, getAvailableFeatures(), 0);
5789 
5790   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5791 
5792   bool IsMIMG = Name.startswith("image_");
5793 
5794   while (!trySkipToken(AsmToken::EndOfStatement)) {
5795     OperandMode Mode = OperandMode_Default;
5796     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5797       Mode = OperandMode_NSA;
5798     ParseStatus Res = parseOperand(Operands, Name, Mode);
5799 
5800     if (!Res.isSuccess()) {
5801       checkUnsupportedInstruction(Name, NameLoc);
5802       if (!Parser.hasPendingError()) {
5803         // FIXME: use real operand location rather than the current location.
5804         StringRef Msg = Res.isFailure() ? "failed parsing operand."
5805                                         : "not a valid operand.";
5806         Error(getLoc(), Msg);
5807       }
5808       while (!trySkipToken(AsmToken::EndOfStatement)) {
5809         lex();
5810       }
5811       return true;
5812     }
5813 
5814     // Eat the comma or space if there is one.
5815     trySkipToken(AsmToken::Comma);
5816   }
5817 
5818   return false;
5819 }
5820 
5821 //===----------------------------------------------------------------------===//
5822 // Utility functions
5823 //===----------------------------------------------------------------------===//
5824 
5825 ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
5826                                           OperandVector &Operands) {
5827   SMLoc S = getLoc();
5828   if (!trySkipId(Name))
5829     return ParseStatus::NoMatch;
5830 
5831   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
5832   return ParseStatus::Success;
5833 }
5834 
5835 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
5836                                                 int64_t &IntVal) {
5837 
5838   if (!trySkipId(Prefix, AsmToken::Colon))
5839     return ParseStatus::NoMatch;
5840 
5841   return parseExpr(IntVal) ? ParseStatus::Success : ParseStatus::Failure;
5842 }
5843 
5844 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
5845     const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
5846     std::function<bool(int64_t &)> ConvertResult) {
5847   SMLoc S = getLoc();
5848   int64_t Value = 0;
5849 
5850   ParseStatus Res = parseIntWithPrefix(Prefix, Value);
5851   if (!Res.isSuccess())
5852     return Res;
5853 
5854   if (ConvertResult && !ConvertResult(Value)) {
5855     Error(S, "invalid " + StringRef(Prefix) + " value.");
5856   }
5857 
5858   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5859   return ParseStatus::Success;
5860 }
5861 
5862 ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
5863     const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
5864     bool (*ConvertResult)(int64_t &)) {
5865   SMLoc S = getLoc();
5866   if (!trySkipId(Prefix, AsmToken::Colon))
5867     return ParseStatus::NoMatch;
5868 
5869   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5870     return ParseStatus::Failure;
5871 
5872   unsigned Val = 0;
5873   const unsigned MaxSize = 4;
5874 
5875   // FIXME: How to verify the number of elements matches the number of src
5876   // operands?
5877   for (int I = 0; ; ++I) {
5878     int64_t Op;
5879     SMLoc Loc = getLoc();
5880     if (!parseExpr(Op))
5881       return ParseStatus::Failure;
5882 
5883     if (Op != 0 && Op != 1)
5884       return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5885 
5886     Val |= (Op << I);
5887 
5888     if (trySkipToken(AsmToken::RBrac))
5889       break;
5890 
5891     if (I + 1 == MaxSize)
5892       return Error(getLoc(), "expected a closing square bracket");
5893 
5894     if (!skipToken(AsmToken::Comma, "expected a comma"))
5895       return ParseStatus::Failure;
5896   }
5897 
5898   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5899   return ParseStatus::Success;
5900 }
5901 
5902 ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
5903                                            OperandVector &Operands,
5904                                            AMDGPUOperand::ImmTy ImmTy) {
5905   int64_t Bit;
5906   SMLoc S = getLoc();
5907 
5908   if (trySkipId(Name)) {
5909     Bit = 1;
5910   } else if (trySkipId("no", Name)) {
5911     Bit = 0;
5912   } else {
5913     return ParseStatus::NoMatch;
5914   }
5915 
5916   if (Name == "r128" && !hasMIMG_R128())
5917     return Error(S, "r128 modifier is not supported on this GPU");
5918   if (Name == "a16" && !hasA16())
5919     return Error(S, "a16 modifier is not supported on this GPU");
5920 
5921   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5922     ImmTy = AMDGPUOperand::ImmTyR128A16;
5923 
5924   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5925   return ParseStatus::Success;
5926 }
5927 
5928 unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
5929                                       bool &Disabling) const {
5930   Disabling = Id.consume_front("no");
5931 
5932   if (isGFX940() && !Mnemo.startswith("s_")) {
5933     return StringSwitch<unsigned>(Id)
5934         .Case("nt", AMDGPU::CPol::NT)
5935         .Case("sc0", AMDGPU::CPol::SC0)
5936         .Case("sc1", AMDGPU::CPol::SC1)
5937         .Default(0);
5938   }
5939 
5940   return StringSwitch<unsigned>(Id)
5941       .Case("dlc", AMDGPU::CPol::DLC)
5942       .Case("glc", AMDGPU::CPol::GLC)
5943       .Case("scc", AMDGPU::CPol::SCC)
5944       .Case("slc", AMDGPU::CPol::SLC)
5945       .Default(0);
5946 }
5947 
5948 ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5949   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5950   SMLoc OpLoc = getLoc();
5951   unsigned Enabled = 0, Seen = 0;
5952   for (;;) {
5953     SMLoc S = getLoc();
5954     bool Disabling;
5955     unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
5956     if (!CPol)
5957       break;
5958 
5959     lex();
5960 
5961     if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
5962       return Error(S, "dlc modifier is not supported on this GPU");
5963 
5964     if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
5965       return Error(S, "scc modifier is not supported on this GPU");
5966 
5967     if (Seen & CPol)
5968       return Error(S, "duplicate cache policy modifier");
5969 
5970     if (!Disabling)
5971       Enabled |= CPol;
5972 
5973     Seen |= CPol;
5974   }
5975 
5976   if (!Seen)
5977     return ParseStatus::NoMatch;
5978 
5979   Operands.push_back(
5980       AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
5981   return ParseStatus::Success;
5982 }
5983 
5984 static void addOptionalImmOperand(
5985   MCInst& Inst, const OperandVector& Operands,
5986   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5987   AMDGPUOperand::ImmTy ImmT,
5988   int64_t Default = 0) {
5989   auto i = OptionalIdx.find(ImmT);
5990   if (i != OptionalIdx.end()) {
5991     unsigned Idx = i->second;
5992     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5993   } else {
5994     Inst.addOperand(MCOperand::createImm(Default));
5995   }
5996 }
5997 
5998 ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5999                                                    StringRef &Value,
6000                                                    SMLoc &StringLoc) {
6001   if (!trySkipId(Prefix, AsmToken::Colon))
6002     return ParseStatus::NoMatch;
6003 
6004   StringLoc = getLoc();
6005   return parseId(Value, "expected an identifier") ? ParseStatus::Success
6006                                                   : ParseStatus::Failure;
6007 }
6008 
6009 //===----------------------------------------------------------------------===//
6010 // MTBUF format
6011 //===----------------------------------------------------------------------===//
6012 
6013 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6014                                   int64_t MaxVal,
6015                                   int64_t &Fmt) {
6016   int64_t Val;
6017   SMLoc Loc = getLoc();
6018 
6019   auto Res = parseIntWithPrefix(Pref, Val);
6020   if (Res.isFailure())
6021     return false;
6022   if (Res.isNoMatch())
6023     return true;
6024 
6025   if (Val < 0 || Val > MaxVal) {
6026     Error(Loc, Twine("out of range ", StringRef(Pref)));
6027     return false;
6028   }
6029 
6030   Fmt = Val;
6031   return true;
6032 }
6033 
6034 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6035 // values to live in a joint format operand in the MCInst encoding.
6036 ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6037   using namespace llvm::AMDGPU::MTBUFFormat;
6038 
6039   int64_t Dfmt = DFMT_UNDEF;
6040   int64_t Nfmt = NFMT_UNDEF;
6041 
6042   // dfmt and nfmt can appear in either order, and each is optional.
6043   for (int I = 0; I < 2; ++I) {
6044     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6045       return ParseStatus::Failure;
6046 
6047     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
6048       return ParseStatus::Failure;
6049 
6050     // Skip optional comma between dfmt/nfmt
6051     // but guard against 2 commas following each other.
6052     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6053         !peekToken().is(AsmToken::Comma)) {
6054       trySkipToken(AsmToken::Comma);
6055     }
6056   }
6057 
6058   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6059     return ParseStatus::NoMatch;
6060 
6061   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6062   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6063 
6064   Format = encodeDfmtNfmt(Dfmt, Nfmt);
6065   return ParseStatus::Success;
6066 }
6067 
6068 ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6069   using namespace llvm::AMDGPU::MTBUFFormat;
6070 
6071   int64_t Fmt = UFMT_UNDEF;
6072 
6073   if (!tryParseFmt("format", UFMT_MAX, Fmt))
6074     return ParseStatus::Failure;
6075 
6076   if (Fmt == UFMT_UNDEF)
6077     return ParseStatus::NoMatch;
6078 
6079   Format = Fmt;
6080   return ParseStatus::Success;
6081 }
6082 
6083 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6084                                     int64_t &Nfmt,
6085                                     StringRef FormatStr,
6086                                     SMLoc Loc) {
6087   using namespace llvm::AMDGPU::MTBUFFormat;
6088   int64_t Format;
6089 
6090   Format = getDfmt(FormatStr);
6091   if (Format != DFMT_UNDEF) {
6092     Dfmt = Format;
6093     return true;
6094   }
6095 
6096   Format = getNfmt(FormatStr, getSTI());
6097   if (Format != NFMT_UNDEF) {
6098     Nfmt = Format;
6099     return true;
6100   }
6101 
6102   Error(Loc, "unsupported format");
6103   return false;
6104 }
6105 
6106 ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6107                                                       SMLoc FormatLoc,
6108                                                       int64_t &Format) {
6109   using namespace llvm::AMDGPU::MTBUFFormat;
6110 
6111   int64_t Dfmt = DFMT_UNDEF;
6112   int64_t Nfmt = NFMT_UNDEF;
6113   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6114     return ParseStatus::Failure;
6115 
6116   if (trySkipToken(AsmToken::Comma)) {
6117     StringRef Str;
6118     SMLoc Loc = getLoc();
6119     if (!parseId(Str, "expected a format string") ||
6120         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
6121       return ParseStatus::Failure;
6122     if (Dfmt == DFMT_UNDEF)
6123       return Error(Loc, "duplicate numeric format");
6124     if (Nfmt == NFMT_UNDEF)
6125       return Error(Loc, "duplicate data format");
6126   }
6127 
6128   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6129   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6130 
6131   if (isGFX10Plus()) {
6132     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6133     if (Ufmt == UFMT_UNDEF)
6134       return Error(FormatLoc, "unsupported format");
6135     Format = Ufmt;
6136   } else {
6137     Format = encodeDfmtNfmt(Dfmt, Nfmt);
6138   }
6139 
6140   return ParseStatus::Success;
6141 }
6142 
6143 ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6144                                                         SMLoc Loc,
6145                                                         int64_t &Format) {
6146   using namespace llvm::AMDGPU::MTBUFFormat;
6147 
6148   auto Id = getUnifiedFormat(FormatStr, getSTI());
6149   if (Id == UFMT_UNDEF)
6150     return ParseStatus::NoMatch;
6151 
6152   if (!isGFX10Plus())
6153     return Error(Loc, "unified format is not supported on this GPU");
6154 
6155   Format = Id;
6156   return ParseStatus::Success;
6157 }
6158 
6159 ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6160   using namespace llvm::AMDGPU::MTBUFFormat;
6161   SMLoc Loc = getLoc();
6162 
6163   if (!parseExpr(Format))
6164     return ParseStatus::Failure;
6165   if (!isValidFormatEncoding(Format, getSTI()))
6166     return Error(Loc, "out of range format");
6167 
6168   return ParseStatus::Success;
6169 }
6170 
6171 ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6172   using namespace llvm::AMDGPU::MTBUFFormat;
6173 
6174   if (!trySkipId("format", AsmToken::Colon))
6175     return ParseStatus::NoMatch;
6176 
6177   if (trySkipToken(AsmToken::LBrac)) {
6178     StringRef FormatStr;
6179     SMLoc Loc = getLoc();
6180     if (!parseId(FormatStr, "expected a format string"))
6181       return ParseStatus::Failure;
6182 
6183     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6184     if (Res.isNoMatch())
6185       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6186     if (!Res.isSuccess())
6187       return Res;
6188 
6189     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6190       return ParseStatus::Failure;
6191 
6192     return ParseStatus::Success;
6193   }
6194 
6195   return parseNumericFormat(Format);
6196 }
6197 
6198 ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6199   using namespace llvm::AMDGPU::MTBUFFormat;
6200 
6201   int64_t Format = getDefaultFormatEncoding(getSTI());
6202   ParseStatus Res;
6203   SMLoc Loc = getLoc();
6204 
6205   // Parse legacy format syntax.
6206   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6207   if (Res.isFailure())
6208     return Res;
6209 
6210   bool FormatFound = Res.isSuccess();
6211 
6212   Operands.push_back(
6213     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6214 
6215   if (FormatFound)
6216     trySkipToken(AsmToken::Comma);
6217 
6218   if (isToken(AsmToken::EndOfStatement)) {
6219     // We are expecting an soffset operand,
6220     // but let matcher handle the error.
6221     return ParseStatus::Success;
6222   }
6223 
6224   // Parse soffset.
6225   Res = parseRegOrImm(Operands);
6226   if (!Res.isSuccess())
6227     return Res;
6228 
6229   trySkipToken(AsmToken::Comma);
6230 
6231   if (!FormatFound) {
6232     Res = parseSymbolicOrNumericFormat(Format);
6233     if (Res.isFailure())
6234       return Res;
6235     if (Res.isSuccess()) {
6236       auto Size = Operands.size();
6237       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6238       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6239       Op.setImm(Format);
6240     }
6241     return ParseStatus::Success;
6242   }
6243 
6244   if (isId("format") && peekToken().is(AsmToken::Colon))
6245     return Error(getLoc(), "duplicate format");
6246   return ParseStatus::Success;
6247 }
6248 
6249 ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
6250   ParseStatus Res =
6251       parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
6252   if (Res.isNoMatch()) {
6253     Res = parseIntWithPrefix("inst_offset", Operands,
6254                              AMDGPUOperand::ImmTyInstOffset);
6255   }
6256   return Res;
6257 }
6258 
6259 ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
6260   ParseStatus Res =
6261       parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
6262   if (Res.isNoMatch())
6263     Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
6264   return Res;
6265 }
6266 
6267 ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
6268   ParseStatus Res =
6269       parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
6270   if (Res.isNoMatch()) {
6271     Res =
6272         parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
6273   }
6274   return Res;
6275 }
6276 
6277 //===----------------------------------------------------------------------===//
6278 // Exp
6279 //===----------------------------------------------------------------------===//
6280 
6281 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6282   OptionalImmIndexMap OptionalIdx;
6283 
6284   unsigned OperandIdx[4];
6285   unsigned EnMask = 0;
6286   int SrcIdx = 0;
6287 
6288   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6289     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6290 
6291     // Add the register arguments
6292     if (Op.isReg()) {
6293       assert(SrcIdx < 4);
6294       OperandIdx[SrcIdx] = Inst.size();
6295       Op.addRegOperands(Inst, 1);
6296       ++SrcIdx;
6297       continue;
6298     }
6299 
6300     if (Op.isOff()) {
6301       assert(SrcIdx < 4);
6302       OperandIdx[SrcIdx] = Inst.size();
6303       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6304       ++SrcIdx;
6305       continue;
6306     }
6307 
6308     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6309       Op.addImmOperands(Inst, 1);
6310       continue;
6311     }
6312 
6313     if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
6314       continue;
6315 
6316     // Handle optional arguments
6317     OptionalIdx[Op.getImmTy()] = i;
6318   }
6319 
6320   assert(SrcIdx == 4);
6321 
6322   bool Compr = false;
6323   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6324     Compr = true;
6325     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6326     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6327     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6328   }
6329 
6330   for (auto i = 0; i < SrcIdx; ++i) {
6331     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6332       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6333     }
6334   }
6335 
6336   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6337   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6338 
6339   Inst.addOperand(MCOperand::createImm(EnMask));
6340 }
6341 
6342 //===----------------------------------------------------------------------===//
6343 // s_waitcnt
6344 //===----------------------------------------------------------------------===//
6345 
6346 static bool
6347 encodeCnt(
6348   const AMDGPU::IsaVersion ISA,
6349   int64_t &IntVal,
6350   int64_t CntVal,
6351   bool Saturate,
6352   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6353   unsigned (*decode)(const IsaVersion &Version, unsigned))
6354 {
6355   bool Failed = false;
6356 
6357   IntVal = encode(ISA, IntVal, CntVal);
6358   if (CntVal != decode(ISA, IntVal)) {
6359     if (Saturate) {
6360       IntVal = encode(ISA, IntVal, -1);
6361     } else {
6362       Failed = true;
6363     }
6364   }
6365   return Failed;
6366 }
6367 
6368 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6369 
6370   SMLoc CntLoc = getLoc();
6371   StringRef CntName = getTokenStr();
6372 
6373   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6374       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6375     return false;
6376 
6377   int64_t CntVal;
6378   SMLoc ValLoc = getLoc();
6379   if (!parseExpr(CntVal))
6380     return false;
6381 
6382   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6383 
6384   bool Failed = true;
6385   bool Sat = CntName.endswith("_sat");
6386 
6387   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6388     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6389   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6390     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6391   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6392     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6393   } else {
6394     Error(CntLoc, "invalid counter name " + CntName);
6395     return false;
6396   }
6397 
6398   if (Failed) {
6399     Error(ValLoc, "too large value for " + CntName);
6400     return false;
6401   }
6402 
6403   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6404     return false;
6405 
6406   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6407     if (isToken(AsmToken::EndOfStatement)) {
6408       Error(getLoc(), "expected a counter name");
6409       return false;
6410     }
6411   }
6412 
6413   return true;
6414 }
6415 
6416 ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
6417   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6418   int64_t Waitcnt = getWaitcntBitMask(ISA);
6419   SMLoc S = getLoc();
6420 
6421   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6422     while (!isToken(AsmToken::EndOfStatement)) {
6423       if (!parseCnt(Waitcnt))
6424         return ParseStatus::Failure;
6425     }
6426   } else {
6427     if (!parseExpr(Waitcnt))
6428       return ParseStatus::Failure;
6429   }
6430 
6431   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6432   return ParseStatus::Success;
6433 }
6434 
6435 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
6436   SMLoc FieldLoc = getLoc();
6437   StringRef FieldName = getTokenStr();
6438   if (!skipToken(AsmToken::Identifier, "expected a field name") ||
6439       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6440     return false;
6441 
6442   SMLoc ValueLoc = getLoc();
6443   StringRef ValueName = getTokenStr();
6444   if (!skipToken(AsmToken::Identifier, "expected a value name") ||
6445       !skipToken(AsmToken::RParen, "expected a right parenthesis"))
6446     return false;
6447 
6448   unsigned Shift;
6449   if (FieldName == "instid0") {
6450     Shift = 0;
6451   } else if (FieldName == "instskip") {
6452     Shift = 4;
6453   } else if (FieldName == "instid1") {
6454     Shift = 7;
6455   } else {
6456     Error(FieldLoc, "invalid field name " + FieldName);
6457     return false;
6458   }
6459 
6460   int Value;
6461   if (Shift == 4) {
6462     // Parse values for instskip.
6463     Value = StringSwitch<int>(ValueName)
6464                 .Case("SAME", 0)
6465                 .Case("NEXT", 1)
6466                 .Case("SKIP_1", 2)
6467                 .Case("SKIP_2", 3)
6468                 .Case("SKIP_3", 4)
6469                 .Case("SKIP_4", 5)
6470                 .Default(-1);
6471   } else {
6472     // Parse values for instid0 and instid1.
6473     Value = StringSwitch<int>(ValueName)
6474                 .Case("NO_DEP", 0)
6475                 .Case("VALU_DEP_1", 1)
6476                 .Case("VALU_DEP_2", 2)
6477                 .Case("VALU_DEP_3", 3)
6478                 .Case("VALU_DEP_4", 4)
6479                 .Case("TRANS32_DEP_1", 5)
6480                 .Case("TRANS32_DEP_2", 6)
6481                 .Case("TRANS32_DEP_3", 7)
6482                 .Case("FMA_ACCUM_CYCLE_1", 8)
6483                 .Case("SALU_CYCLE_1", 9)
6484                 .Case("SALU_CYCLE_2", 10)
6485                 .Case("SALU_CYCLE_3", 11)
6486                 .Default(-1);
6487   }
6488   if (Value < 0) {
6489     Error(ValueLoc, "invalid value name " + ValueName);
6490     return false;
6491   }
6492 
6493   Delay |= Value << Shift;
6494   return true;
6495 }
6496 
6497 ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
6498   int64_t Delay = 0;
6499   SMLoc S = getLoc();
6500 
6501   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6502     do {
6503       if (!parseDelay(Delay))
6504         return ParseStatus::Failure;
6505     } while (trySkipToken(AsmToken::Pipe));
6506   } else {
6507     if (!parseExpr(Delay))
6508       return ParseStatus::Failure;
6509   }
6510 
6511   Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
6512   return ParseStatus::Success;
6513 }
6514 
6515 bool
6516 AMDGPUOperand::isSWaitCnt() const {
6517   return isImm();
6518 }
6519 
6520 bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
6521 
6522 //===----------------------------------------------------------------------===//
6523 // DepCtr
6524 //===----------------------------------------------------------------------===//
6525 
6526 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
6527                                   StringRef DepCtrName) {
6528   switch (ErrorId) {
6529   case OPR_ID_UNKNOWN:
6530     Error(Loc, Twine("invalid counter name ", DepCtrName));
6531     return;
6532   case OPR_ID_UNSUPPORTED:
6533     Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
6534     return;
6535   case OPR_ID_DUPLICATE:
6536     Error(Loc, Twine("duplicate counter name ", DepCtrName));
6537     return;
6538   case OPR_VAL_INVALID:
6539     Error(Loc, Twine("invalid value for ", DepCtrName));
6540     return;
6541   default:
6542     assert(false);
6543   }
6544 }
6545 
6546 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
6547 
6548   using namespace llvm::AMDGPU::DepCtr;
6549 
6550   SMLoc DepCtrLoc = getLoc();
6551   StringRef DepCtrName = getTokenStr();
6552 
6553   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6554       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6555     return false;
6556 
6557   int64_t ExprVal;
6558   if (!parseExpr(ExprVal))
6559     return false;
6560 
6561   unsigned PrevOprMask = UsedOprMask;
6562   int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
6563 
6564   if (CntVal < 0) {
6565     depCtrError(DepCtrLoc, CntVal, DepCtrName);
6566     return false;
6567   }
6568 
6569   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6570     return false;
6571 
6572   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6573     if (isToken(AsmToken::EndOfStatement)) {
6574       Error(getLoc(), "expected a counter name");
6575       return false;
6576     }
6577   }
6578 
6579   unsigned CntValMask = PrevOprMask ^ UsedOprMask;
6580   DepCtr = (DepCtr & ~CntValMask) | CntVal;
6581   return true;
6582 }
6583 
6584 ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
6585   using namespace llvm::AMDGPU::DepCtr;
6586 
6587   int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
6588   SMLoc Loc = getLoc();
6589 
6590   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6591     unsigned UsedOprMask = 0;
6592     while (!isToken(AsmToken::EndOfStatement)) {
6593       if (!parseDepCtr(DepCtr, UsedOprMask))
6594         return ParseStatus::Failure;
6595     }
6596   } else {
6597     if (!parseExpr(DepCtr))
6598       return ParseStatus::Failure;
6599   }
6600 
6601   Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
6602   return ParseStatus::Success;
6603 }
6604 
6605 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
6606 
6607 //===----------------------------------------------------------------------===//
6608 // hwreg
6609 //===----------------------------------------------------------------------===//
6610 
6611 bool
6612 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6613                                 OperandInfoTy &Offset,
6614                                 OperandInfoTy &Width) {
6615   using namespace llvm::AMDGPU::Hwreg;
6616 
6617   // The register may be specified by name or using a numeric code
6618   HwReg.Loc = getLoc();
6619   if (isToken(AsmToken::Identifier) &&
6620       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6621     HwReg.IsSymbolic = true;
6622     lex(); // skip register name
6623   } else if (!parseExpr(HwReg.Id, "a register name")) {
6624     return false;
6625   }
6626 
6627   if (trySkipToken(AsmToken::RParen))
6628     return true;
6629 
6630   // parse optional params
6631   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6632     return false;
6633 
6634   Offset.Loc = getLoc();
6635   if (!parseExpr(Offset.Id))
6636     return false;
6637 
6638   if (!skipToken(AsmToken::Comma, "expected a comma"))
6639     return false;
6640 
6641   Width.Loc = getLoc();
6642   return parseExpr(Width.Id) &&
6643          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6644 }
6645 
6646 bool
6647 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6648                                const OperandInfoTy &Offset,
6649                                const OperandInfoTy &Width) {
6650 
6651   using namespace llvm::AMDGPU::Hwreg;
6652 
6653   if (HwReg.IsSymbolic) {
6654     if (HwReg.Id == OPR_ID_UNSUPPORTED) {
6655       Error(HwReg.Loc,
6656             "specified hardware register is not supported on this GPU");
6657       return false;
6658     }
6659   } else {
6660     if (!isValidHwreg(HwReg.Id)) {
6661       Error(HwReg.Loc,
6662             "invalid code of hardware register: only 6-bit values are legal");
6663       return false;
6664     }
6665   }
6666   if (!isValidHwregOffset(Offset.Id)) {
6667     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6668     return false;
6669   }
6670   if (!isValidHwregWidth(Width.Id)) {
6671     Error(Width.Loc,
6672           "invalid bitfield width: only values from 1 to 32 are legal");
6673     return false;
6674   }
6675   return true;
6676 }
6677 
6678 ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6679   using namespace llvm::AMDGPU::Hwreg;
6680 
6681   int64_t ImmVal = 0;
6682   SMLoc Loc = getLoc();
6683 
6684   if (trySkipId("hwreg", AsmToken::LParen)) {
6685     OperandInfoTy HwReg(OPR_ID_UNKNOWN);
6686     OperandInfoTy Offset(OFFSET_DEFAULT_);
6687     OperandInfoTy Width(WIDTH_DEFAULT_);
6688     if (parseHwregBody(HwReg, Offset, Width) &&
6689         validateHwreg(HwReg, Offset, Width)) {
6690       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6691     } else {
6692       return ParseStatus::Failure;
6693     }
6694   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6695     if (ImmVal < 0 || !isUInt<16>(ImmVal))
6696       return Error(Loc, "invalid immediate: only 16-bit values are legal");
6697   } else {
6698     return ParseStatus::Failure;
6699   }
6700 
6701   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6702   return ParseStatus::Success;
6703 }
6704 
6705 bool AMDGPUOperand::isHwreg() const {
6706   return isImmTy(ImmTyHwreg);
6707 }
6708 
6709 //===----------------------------------------------------------------------===//
6710 // sendmsg
6711 //===----------------------------------------------------------------------===//
6712 
6713 bool
6714 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6715                                   OperandInfoTy &Op,
6716                                   OperandInfoTy &Stream) {
6717   using namespace llvm::AMDGPU::SendMsg;
6718 
6719   Msg.Loc = getLoc();
6720   if (isToken(AsmToken::Identifier) &&
6721       (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6722     Msg.IsSymbolic = true;
6723     lex(); // skip message name
6724   } else if (!parseExpr(Msg.Id, "a message name")) {
6725     return false;
6726   }
6727 
6728   if (trySkipToken(AsmToken::Comma)) {
6729     Op.IsDefined = true;
6730     Op.Loc = getLoc();
6731     if (isToken(AsmToken::Identifier) &&
6732         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6733       lex(); // skip operation name
6734     } else if (!parseExpr(Op.Id, "an operation name")) {
6735       return false;
6736     }
6737 
6738     if (trySkipToken(AsmToken::Comma)) {
6739       Stream.IsDefined = true;
6740       Stream.Loc = getLoc();
6741       if (!parseExpr(Stream.Id))
6742         return false;
6743     }
6744   }
6745 
6746   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6747 }
6748 
6749 bool
6750 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6751                                  const OperandInfoTy &Op,
6752                                  const OperandInfoTy &Stream) {
6753   using namespace llvm::AMDGPU::SendMsg;
6754 
6755   // Validation strictness depends on whether message is specified
6756   // in a symbolic or in a numeric form. In the latter case
6757   // only encoding possibility is checked.
6758   bool Strict = Msg.IsSymbolic;
6759 
6760   if (Strict) {
6761     if (Msg.Id == OPR_ID_UNSUPPORTED) {
6762       Error(Msg.Loc, "specified message id is not supported on this GPU");
6763       return false;
6764     }
6765   } else {
6766     if (!isValidMsgId(Msg.Id, getSTI())) {
6767       Error(Msg.Loc, "invalid message id");
6768       return false;
6769     }
6770   }
6771   if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) {
6772     if (Op.IsDefined) {
6773       Error(Op.Loc, "message does not support operations");
6774     } else {
6775       Error(Msg.Loc, "missing message operation");
6776     }
6777     return false;
6778   }
6779   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6780     Error(Op.Loc, "invalid operation id");
6781     return false;
6782   }
6783   if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) &&
6784       Stream.IsDefined) {
6785     Error(Stream.Loc, "message operation does not support streams");
6786     return false;
6787   }
6788   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6789     Error(Stream.Loc, "invalid message stream id");
6790     return false;
6791   }
6792   return true;
6793 }
6794 
6795 ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
6796   using namespace llvm::AMDGPU::SendMsg;
6797 
6798   int64_t ImmVal = 0;
6799   SMLoc Loc = getLoc();
6800 
6801   if (trySkipId("sendmsg", AsmToken::LParen)) {
6802     OperandInfoTy Msg(OPR_ID_UNKNOWN);
6803     OperandInfoTy Op(OP_NONE_);
6804     OperandInfoTy Stream(STREAM_ID_NONE_);
6805     if (parseSendMsgBody(Msg, Op, Stream) &&
6806         validateSendMsg(Msg, Op, Stream)) {
6807       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6808     } else {
6809       return ParseStatus::Failure;
6810     }
6811   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6812     if (ImmVal < 0 || !isUInt<16>(ImmVal))
6813       return Error(Loc, "invalid immediate: only 16-bit values are legal");
6814   } else {
6815     return ParseStatus::Failure;
6816   }
6817 
6818   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6819   return ParseStatus::Success;
6820 }
6821 
6822 bool AMDGPUOperand::isSendMsg() const {
6823   return isImmTy(ImmTySendMsg);
6824 }
6825 
6826 //===----------------------------------------------------------------------===//
6827 // v_interp
6828 //===----------------------------------------------------------------------===//
6829 
6830 ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6831   StringRef Str;
6832   SMLoc S = getLoc();
6833 
6834   if (!parseId(Str))
6835     return ParseStatus::NoMatch;
6836 
6837   int Slot = StringSwitch<int>(Str)
6838     .Case("p10", 0)
6839     .Case("p20", 1)
6840     .Case("p0", 2)
6841     .Default(-1);
6842 
6843   if (Slot == -1)
6844     return Error(S, "invalid interpolation slot");
6845 
6846   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6847                                               AMDGPUOperand::ImmTyInterpSlot));
6848   return ParseStatus::Success;
6849 }
6850 
6851 ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6852   StringRef Str;
6853   SMLoc S = getLoc();
6854 
6855   if (!parseId(Str))
6856     return ParseStatus::NoMatch;
6857 
6858   if (!Str.startswith("attr"))
6859     return Error(S, "invalid interpolation attribute");
6860 
6861   StringRef Chan = Str.take_back(2);
6862   int AttrChan = StringSwitch<int>(Chan)
6863     .Case(".x", 0)
6864     .Case(".y", 1)
6865     .Case(".z", 2)
6866     .Case(".w", 3)
6867     .Default(-1);
6868   if (AttrChan == -1)
6869     return Error(S, "invalid or missing interpolation attribute channel");
6870 
6871   Str = Str.drop_back(2).drop_front(4);
6872 
6873   uint8_t Attr;
6874   if (Str.getAsInteger(10, Attr))
6875     return Error(S, "invalid or missing interpolation attribute number");
6876 
6877   if (Attr > 32)
6878     return Error(S, "out of bounds interpolation attribute number");
6879 
6880   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6881 
6882   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6883                                               AMDGPUOperand::ImmTyInterpAttr));
6884   Operands.push_back(AMDGPUOperand::CreateImm(
6885       this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
6886   return ParseStatus::Success;
6887 }
6888 
6889 //===----------------------------------------------------------------------===//
6890 // exp
6891 //===----------------------------------------------------------------------===//
6892 
6893 ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6894   using namespace llvm::AMDGPU::Exp;
6895 
6896   StringRef Str;
6897   SMLoc S = getLoc();
6898 
6899   if (!parseId(Str))
6900     return ParseStatus::NoMatch;
6901 
6902   unsigned Id = getTgtId(Str);
6903   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
6904     return Error(S, (Id == ET_INVALID)
6905                         ? "invalid exp target"
6906                         : "exp target is not supported on this GPU");
6907 
6908   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6909                                               AMDGPUOperand::ImmTyExpTgt));
6910   return ParseStatus::Success;
6911 }
6912 
6913 //===----------------------------------------------------------------------===//
6914 // parser helpers
6915 //===----------------------------------------------------------------------===//
6916 
6917 bool
6918 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6919   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6920 }
6921 
6922 bool
6923 AMDGPUAsmParser::isId(const StringRef Id) const {
6924   return isId(getToken(), Id);
6925 }
6926 
6927 bool
6928 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6929   return getTokenKind() == Kind;
6930 }
6931 
6932 StringRef AMDGPUAsmParser::getId() const {
6933   return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
6934 }
6935 
6936 bool
6937 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6938   if (isId(Id)) {
6939     lex();
6940     return true;
6941   }
6942   return false;
6943 }
6944 
6945 bool
6946 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6947   if (isToken(AsmToken::Identifier)) {
6948     StringRef Tok = getTokenStr();
6949     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6950       lex();
6951       return true;
6952     }
6953   }
6954   return false;
6955 }
6956 
6957 bool
6958 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6959   if (isId(Id) && peekToken().is(Kind)) {
6960     lex();
6961     lex();
6962     return true;
6963   }
6964   return false;
6965 }
6966 
6967 bool
6968 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6969   if (isToken(Kind)) {
6970     lex();
6971     return true;
6972   }
6973   return false;
6974 }
6975 
6976 bool
6977 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6978                            const StringRef ErrMsg) {
6979   if (!trySkipToken(Kind)) {
6980     Error(getLoc(), ErrMsg);
6981     return false;
6982   }
6983   return true;
6984 }
6985 
6986 bool
6987 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6988   SMLoc S = getLoc();
6989 
6990   const MCExpr *Expr;
6991   if (Parser.parseExpression(Expr))
6992     return false;
6993 
6994   if (Expr->evaluateAsAbsolute(Imm))
6995     return true;
6996 
6997   if (Expected.empty()) {
6998     Error(S, "expected absolute expression");
6999   } else {
7000     Error(S, Twine("expected ", Expected) +
7001              Twine(" or an absolute expression"));
7002   }
7003   return false;
7004 }
7005 
7006 bool
7007 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7008   SMLoc S = getLoc();
7009 
7010   const MCExpr *Expr;
7011   if (Parser.parseExpression(Expr))
7012     return false;
7013 
7014   int64_t IntVal;
7015   if (Expr->evaluateAsAbsolute(IntVal)) {
7016     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7017   } else {
7018     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7019   }
7020   return true;
7021 }
7022 
7023 bool
7024 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7025   if (isToken(AsmToken::String)) {
7026     Val = getToken().getStringContents();
7027     lex();
7028     return true;
7029   } else {
7030     Error(getLoc(), ErrMsg);
7031     return false;
7032   }
7033 }
7034 
7035 bool
7036 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7037   if (isToken(AsmToken::Identifier)) {
7038     Val = getTokenStr();
7039     lex();
7040     return true;
7041   } else {
7042     if (!ErrMsg.empty())
7043       Error(getLoc(), ErrMsg);
7044     return false;
7045   }
7046 }
7047 
7048 AsmToken
7049 AMDGPUAsmParser::getToken() const {
7050   return Parser.getTok();
7051 }
7052 
7053 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7054   return isToken(AsmToken::EndOfStatement)
7055              ? getToken()
7056              : getLexer().peekTok(ShouldSkipSpace);
7057 }
7058 
7059 void
7060 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7061   auto TokCount = getLexer().peekTokens(Tokens);
7062 
7063   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7064     Tokens[Idx] = AsmToken(AsmToken::Error, "");
7065 }
7066 
7067 AsmToken::TokenKind
7068 AMDGPUAsmParser::getTokenKind() const {
7069   return getLexer().getKind();
7070 }
7071 
7072 SMLoc
7073 AMDGPUAsmParser::getLoc() const {
7074   return getToken().getLoc();
7075 }
7076 
7077 StringRef
7078 AMDGPUAsmParser::getTokenStr() const {
7079   return getToken().getString();
7080 }
7081 
7082 void
7083 AMDGPUAsmParser::lex() {
7084   Parser.Lex();
7085 }
7086 
7087 SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
7088   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7089 }
7090 
7091 SMLoc
7092 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7093                                const OperandVector &Operands) const {
7094   for (unsigned i = Operands.size() - 1; i > 0; --i) {
7095     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7096     if (Test(Op))
7097       return Op.getStartLoc();
7098   }
7099   return getInstLoc(Operands);
7100 }
7101 
7102 SMLoc
7103 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7104                            const OperandVector &Operands) const {
7105   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7106   return getOperandLoc(Test, Operands);
7107 }
7108 
7109 SMLoc
7110 AMDGPUAsmParser::getRegLoc(unsigned Reg,
7111                            const OperandVector &Operands) const {
7112   auto Test = [=](const AMDGPUOperand& Op) {
7113     return Op.isRegKind() && Op.getReg() == Reg;
7114   };
7115   return getOperandLoc(Test, Operands);
7116 }
7117 
7118 SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,
7119                                  bool SearchMandatoryLiterals) const {
7120   auto Test = [](const AMDGPUOperand& Op) {
7121     return Op.IsImmKindLiteral() || Op.isExpr();
7122   };
7123   SMLoc Loc = getOperandLoc(Test, Operands);
7124   if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))
7125     Loc = getMandatoryLitLoc(Operands);
7126   return Loc;
7127 }
7128 
7129 SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {
7130   auto Test = [](const AMDGPUOperand &Op) {
7131     return Op.IsImmKindMandatoryLiteral();
7132   };
7133   return getOperandLoc(Test, Operands);
7134 }
7135 
7136 SMLoc
7137 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7138   auto Test = [](const AMDGPUOperand& Op) {
7139     return Op.isImmKindConst();
7140   };
7141   return getOperandLoc(Test, Operands);
7142 }
7143 
7144 //===----------------------------------------------------------------------===//
7145 // swizzle
7146 //===----------------------------------------------------------------------===//
7147 
7148 LLVM_READNONE
7149 static unsigned
7150 encodeBitmaskPerm(const unsigned AndMask,
7151                   const unsigned OrMask,
7152                   const unsigned XorMask) {
7153   using namespace llvm::AMDGPU::Swizzle;
7154 
7155   return BITMASK_PERM_ENC |
7156          (AndMask << BITMASK_AND_SHIFT) |
7157          (OrMask  << BITMASK_OR_SHIFT)  |
7158          (XorMask << BITMASK_XOR_SHIFT);
7159 }
7160 
7161 bool
7162 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7163                                      const unsigned MinVal,
7164                                      const unsigned MaxVal,
7165                                      const StringRef ErrMsg,
7166                                      SMLoc &Loc) {
7167   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7168     return false;
7169   }
7170   Loc = getLoc();
7171   if (!parseExpr(Op)) {
7172     return false;
7173   }
7174   if (Op < MinVal || Op > MaxVal) {
7175     Error(Loc, ErrMsg);
7176     return false;
7177   }
7178 
7179   return true;
7180 }
7181 
7182 bool
7183 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7184                                       const unsigned MinVal,
7185                                       const unsigned MaxVal,
7186                                       const StringRef ErrMsg) {
7187   SMLoc Loc;
7188   for (unsigned i = 0; i < OpNum; ++i) {
7189     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7190       return false;
7191   }
7192 
7193   return true;
7194 }
7195 
7196 bool
7197 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7198   using namespace llvm::AMDGPU::Swizzle;
7199 
7200   int64_t Lane[LANE_NUM];
7201   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7202                            "expected a 2-bit lane id")) {
7203     Imm = QUAD_PERM_ENC;
7204     for (unsigned I = 0; I < LANE_NUM; ++I) {
7205       Imm |= Lane[I] << (LANE_SHIFT * I);
7206     }
7207     return true;
7208   }
7209   return false;
7210 }
7211 
7212 bool
7213 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7214   using namespace llvm::AMDGPU::Swizzle;
7215 
7216   SMLoc Loc;
7217   int64_t GroupSize;
7218   int64_t LaneIdx;
7219 
7220   if (!parseSwizzleOperand(GroupSize,
7221                            2, 32,
7222                            "group size must be in the interval [2,32]",
7223                            Loc)) {
7224     return false;
7225   }
7226   if (!isPowerOf2_64(GroupSize)) {
7227     Error(Loc, "group size must be a power of two");
7228     return false;
7229   }
7230   if (parseSwizzleOperand(LaneIdx,
7231                           0, GroupSize - 1,
7232                           "lane id must be in the interval [0,group size - 1]",
7233                           Loc)) {
7234     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7235     return true;
7236   }
7237   return false;
7238 }
7239 
7240 bool
7241 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7242   using namespace llvm::AMDGPU::Swizzle;
7243 
7244   SMLoc Loc;
7245   int64_t GroupSize;
7246 
7247   if (!parseSwizzleOperand(GroupSize,
7248                            2, 32,
7249                            "group size must be in the interval [2,32]",
7250                            Loc)) {
7251     return false;
7252   }
7253   if (!isPowerOf2_64(GroupSize)) {
7254     Error(Loc, "group size must be a power of two");
7255     return false;
7256   }
7257 
7258   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
7259   return true;
7260 }
7261 
7262 bool
7263 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
7264   using namespace llvm::AMDGPU::Swizzle;
7265 
7266   SMLoc Loc;
7267   int64_t GroupSize;
7268 
7269   if (!parseSwizzleOperand(GroupSize,
7270                            1, 16,
7271                            "group size must be in the interval [1,16]",
7272                            Loc)) {
7273     return false;
7274   }
7275   if (!isPowerOf2_64(GroupSize)) {
7276     Error(Loc, "group size must be a power of two");
7277     return false;
7278   }
7279 
7280   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
7281   return true;
7282 }
7283 
7284 bool
7285 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
7286   using namespace llvm::AMDGPU::Swizzle;
7287 
7288   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7289     return false;
7290   }
7291 
7292   StringRef Ctl;
7293   SMLoc StrLoc = getLoc();
7294   if (!parseString(Ctl)) {
7295     return false;
7296   }
7297   if (Ctl.size() != BITMASK_WIDTH) {
7298     Error(StrLoc, "expected a 5-character mask");
7299     return false;
7300   }
7301 
7302   unsigned AndMask = 0;
7303   unsigned OrMask = 0;
7304   unsigned XorMask = 0;
7305 
7306   for (size_t i = 0; i < Ctl.size(); ++i) {
7307     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
7308     switch(Ctl[i]) {
7309     default:
7310       Error(StrLoc, "invalid mask");
7311       return false;
7312     case '0':
7313       break;
7314     case '1':
7315       OrMask |= Mask;
7316       break;
7317     case 'p':
7318       AndMask |= Mask;
7319       break;
7320     case 'i':
7321       AndMask |= Mask;
7322       XorMask |= Mask;
7323       break;
7324     }
7325   }
7326 
7327   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
7328   return true;
7329 }
7330 
7331 bool
7332 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
7333 
7334   SMLoc OffsetLoc = getLoc();
7335 
7336   if (!parseExpr(Imm, "a swizzle macro")) {
7337     return false;
7338   }
7339   if (!isUInt<16>(Imm)) {
7340     Error(OffsetLoc, "expected a 16-bit offset");
7341     return false;
7342   }
7343   return true;
7344 }
7345 
7346 bool
7347 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
7348   using namespace llvm::AMDGPU::Swizzle;
7349 
7350   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
7351 
7352     SMLoc ModeLoc = getLoc();
7353     bool Ok = false;
7354 
7355     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
7356       Ok = parseSwizzleQuadPerm(Imm);
7357     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
7358       Ok = parseSwizzleBitmaskPerm(Imm);
7359     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
7360       Ok = parseSwizzleBroadcast(Imm);
7361     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
7362       Ok = parseSwizzleSwap(Imm);
7363     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
7364       Ok = parseSwizzleReverse(Imm);
7365     } else {
7366       Error(ModeLoc, "expected a swizzle mode");
7367     }
7368 
7369     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7370   }
7371 
7372   return false;
7373 }
7374 
7375 ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
7376   SMLoc S = getLoc();
7377   int64_t Imm = 0;
7378 
7379   if (trySkipId("offset")) {
7380 
7381     bool Ok = false;
7382     if (skipToken(AsmToken::Colon, "expected a colon")) {
7383       if (trySkipId("swizzle")) {
7384         Ok = parseSwizzleMacro(Imm);
7385       } else {
7386         Ok = parseSwizzleOffset(Imm);
7387       }
7388     }
7389 
7390     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7391 
7392     return Ok ? ParseStatus::Success : ParseStatus::Failure;
7393   }
7394   return ParseStatus::NoMatch;
7395 }
7396 
7397 bool
7398 AMDGPUOperand::isSwizzle() const {
7399   return isImmTy(ImmTySwizzle);
7400 }
7401 
7402 //===----------------------------------------------------------------------===//
7403 // VGPR Index Mode
7404 //===----------------------------------------------------------------------===//
7405 
7406 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7407 
7408   using namespace llvm::AMDGPU::VGPRIndexMode;
7409 
7410   if (trySkipToken(AsmToken::RParen)) {
7411     return OFF;
7412   }
7413 
7414   int64_t Imm = 0;
7415 
7416   while (true) {
7417     unsigned Mode = 0;
7418     SMLoc S = getLoc();
7419 
7420     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7421       if (trySkipId(IdSymbolic[ModeId])) {
7422         Mode = 1 << ModeId;
7423         break;
7424       }
7425     }
7426 
7427     if (Mode == 0) {
7428       Error(S, (Imm == 0)?
7429                "expected a VGPR index mode or a closing parenthesis" :
7430                "expected a VGPR index mode");
7431       return UNDEF;
7432     }
7433 
7434     if (Imm & Mode) {
7435       Error(S, "duplicate VGPR index mode");
7436       return UNDEF;
7437     }
7438     Imm |= Mode;
7439 
7440     if (trySkipToken(AsmToken::RParen))
7441       break;
7442     if (!skipToken(AsmToken::Comma,
7443                    "expected a comma or a closing parenthesis"))
7444       return UNDEF;
7445   }
7446 
7447   return Imm;
7448 }
7449 
7450 ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7451 
7452   using namespace llvm::AMDGPU::VGPRIndexMode;
7453 
7454   int64_t Imm = 0;
7455   SMLoc S = getLoc();
7456 
7457   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7458     Imm = parseGPRIdxMacro();
7459     if (Imm == UNDEF)
7460       return ParseStatus::Failure;
7461   } else {
7462     if (getParser().parseAbsoluteExpression(Imm))
7463       return ParseStatus::Failure;
7464     if (Imm < 0 || !isUInt<4>(Imm))
7465       return Error(S, "invalid immediate: only 4-bit values are legal");
7466   }
7467 
7468   Operands.push_back(
7469       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7470   return ParseStatus::Success;
7471 }
7472 
7473 bool AMDGPUOperand::isGPRIdxMode() const {
7474   return isImmTy(ImmTyGprIdxMode);
7475 }
7476 
7477 //===----------------------------------------------------------------------===//
7478 // sopp branch targets
7479 //===----------------------------------------------------------------------===//
7480 
7481 ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
7482 
7483   // Make sure we are not parsing something
7484   // that looks like a label or an expression but is not.
7485   // This will improve error messages.
7486   if (isRegister() || isModifier())
7487     return ParseStatus::NoMatch;
7488 
7489   if (!parseExpr(Operands))
7490     return ParseStatus::Failure;
7491 
7492   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7493   assert(Opr.isImm() || Opr.isExpr());
7494   SMLoc Loc = Opr.getStartLoc();
7495 
7496   // Currently we do not support arbitrary expressions as branch targets.
7497   // Only labels and absolute expressions are accepted.
7498   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7499     Error(Loc, "expected an absolute expression or a label");
7500   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7501     Error(Loc, "expected a 16-bit signed jump offset");
7502   }
7503 
7504   return ParseStatus::Success;
7505 }
7506 
7507 //===----------------------------------------------------------------------===//
7508 // Boolean holding registers
7509 //===----------------------------------------------------------------------===//
7510 
7511 ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7512   return parseReg(Operands);
7513 }
7514 
7515 //===----------------------------------------------------------------------===//
7516 // mubuf
7517 //===----------------------------------------------------------------------===//
7518 
7519 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7520                                    const OperandVector &Operands,
7521                                    bool IsAtomic) {
7522   OptionalImmIndexMap OptionalIdx;
7523   unsigned FirstOperandIdx = 1;
7524   bool IsAtomicReturn = false;
7525 
7526   if (IsAtomic) {
7527     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7528       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7529       if (!Op.isCPol())
7530         continue;
7531       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7532       break;
7533     }
7534 
7535     if (!IsAtomicReturn) {
7536       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7537       if (NewOpc != -1)
7538         Inst.setOpcode(NewOpc);
7539     }
7540 
7541     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7542                       SIInstrFlags::IsAtomicRet;
7543   }
7544 
7545   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7546     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7547 
7548     // Add the register arguments
7549     if (Op.isReg()) {
7550       Op.addRegOperands(Inst, 1);
7551       // Insert a tied src for atomic return dst.
7552       // This cannot be postponed as subsequent calls to
7553       // addImmOperands rely on correct number of MC operands.
7554       if (IsAtomicReturn && i == FirstOperandIdx)
7555         Op.addRegOperands(Inst, 1);
7556       continue;
7557     }
7558 
7559     // Handle the case where soffset is an immediate
7560     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7561       Op.addImmOperands(Inst, 1);
7562       continue;
7563     }
7564 
7565     // Handle tokens like 'offen' which are sometimes hard-coded into the
7566     // asm string.  There are no MCInst operands for these.
7567     if (Op.isToken()) {
7568       continue;
7569     }
7570     assert(Op.isImm());
7571 
7572     // Handle optional arguments
7573     OptionalIdx[Op.getImmTy()] = i;
7574   }
7575 
7576   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7577   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7578 }
7579 
7580 //===----------------------------------------------------------------------===//
7581 // SMEM
7582 //===----------------------------------------------------------------------===//
7583 
7584 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7585   OptionalImmIndexMap OptionalIdx;
7586   bool IsAtomicReturn = false;
7587 
7588   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7589     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7590     if (!Op.isCPol())
7591       continue;
7592     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7593     break;
7594   }
7595 
7596   if (!IsAtomicReturn) {
7597     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7598     if (NewOpc != -1)
7599       Inst.setOpcode(NewOpc);
7600   }
7601 
7602   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7603                     SIInstrFlags::IsAtomicRet;
7604 
7605   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7606     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7607 
7608     // Add the register arguments
7609     if (Op.isReg()) {
7610       Op.addRegOperands(Inst, 1);
7611       if (IsAtomicReturn && i == 1)
7612         Op.addRegOperands(Inst, 1);
7613       continue;
7614     }
7615 
7616     // Handle the case where soffset is an immediate
7617     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7618       Op.addImmOperands(Inst, 1);
7619       continue;
7620     }
7621 
7622     // Handle tokens like 'offen' which are sometimes hard-coded into the
7623     // asm string.  There are no MCInst operands for these.
7624     if (Op.isToken()) {
7625       continue;
7626     }
7627     assert(Op.isImm());
7628 
7629     // Handle optional arguments
7630     OptionalIdx[Op.getImmTy()] = i;
7631   }
7632 
7633   if ((int)Inst.getNumOperands() <=
7634       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7635     addOptionalImmOperand(Inst, Operands, OptionalIdx,
7636                           AMDGPUOperand::ImmTySMEMOffsetMod);
7637   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7638 }
7639 
7640 //===----------------------------------------------------------------------===//
7641 // smrd
7642 //===----------------------------------------------------------------------===//
7643 
7644 bool AMDGPUOperand::isSMRDOffset8() const {
7645   return isImmLiteral() && isUInt<8>(getImm());
7646 }
7647 
7648 bool AMDGPUOperand::isSMEMOffset() const {
7649   // Offset range is checked later by validator.
7650   return isImmLiteral();
7651 }
7652 
7653 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7654   // 32-bit literals are only supported on CI and we only want to use them
7655   // when the offset is > 8-bits.
7656   return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7657 }
7658 
7659 //===----------------------------------------------------------------------===//
7660 // vop3
7661 //===----------------------------------------------------------------------===//
7662 
7663 static bool ConvertOmodMul(int64_t &Mul) {
7664   if (Mul != 1 && Mul != 2 && Mul != 4)
7665     return false;
7666 
7667   Mul >>= 1;
7668   return true;
7669 }
7670 
7671 static bool ConvertOmodDiv(int64_t &Div) {
7672   if (Div == 1) {
7673     Div = 0;
7674     return true;
7675   }
7676 
7677   if (Div == 2) {
7678     Div = 3;
7679     return true;
7680   }
7681 
7682   return false;
7683 }
7684 
7685 // For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7686 // This is intentional and ensures compatibility with sp3.
7687 // See bug 35397 for details.
7688 bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
7689   if (BoundCtrl == 0 || BoundCtrl == 1) {
7690     if (!isGFX11Plus())
7691       BoundCtrl = 1;
7692     return true;
7693   }
7694   return false;
7695 }
7696 
7697 void AMDGPUAsmParser::onBeginOfFile() {
7698   if (!getParser().getStreamer().getTargetStreamer() ||
7699       getSTI().getTargetTriple().getArch() == Triple::r600)
7700     return;
7701 
7702   if (!getTargetStreamer().getTargetID())
7703     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString(),
7704         // TODO: Should try to check code object version from directive???
7705         AMDGPU::getAmdhsaCodeObjectVersion());
7706 
7707   if (isHsaAbiVersion3AndAbove(&getSTI()))
7708     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7709 }
7710 
7711 ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
7712   StringRef Name = getTokenStr();
7713   if (Name == "mul") {
7714     return parseIntWithPrefix("mul", Operands,
7715                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7716   }
7717 
7718   if (Name == "div") {
7719     return parseIntWithPrefix("div", Operands,
7720                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7721   }
7722 
7723   return ParseStatus::NoMatch;
7724 }
7725 
7726 // Determines which bit DST_OP_SEL occupies in the op_sel operand according to
7727 // the number of src operands present, then copies that bit into src0_modifiers.
7728 void cvtVOP3DstOpSelOnly(MCInst &Inst) {
7729   int Opc = Inst.getOpcode();
7730   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7731   if (OpSelIdx == -1)
7732     return;
7733 
7734   int SrcNum;
7735   const int Ops[] = { AMDGPU::OpName::src0,
7736                       AMDGPU::OpName::src1,
7737                       AMDGPU::OpName::src2 };
7738   for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
7739        ++SrcNum)
7740     ;
7741   assert(SrcNum > 0);
7742 
7743   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7744 
7745   if ((OpSel & (1 << SrcNum)) != 0) {
7746     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7747     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7748     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7749   }
7750 }
7751 
7752 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
7753                                    const OperandVector &Operands) {
7754   cvtVOP3P(Inst, Operands);
7755   cvtVOP3DstOpSelOnly(Inst);
7756 }
7757 
7758 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
7759                                    OptionalImmIndexMap &OptionalIdx) {
7760   cvtVOP3P(Inst, Operands, OptionalIdx);
7761   cvtVOP3DstOpSelOnly(Inst);
7762 }
7763 
7764 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7765   return
7766       // 1. This operand is input modifiers
7767       Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7768       // 2. This is not last operand
7769       && Desc.NumOperands > (OpNum + 1)
7770       // 3. Next operand is register class
7771       && Desc.operands()[OpNum + 1].RegClass != -1
7772       // 4. Next register is not tied to any other operand
7773       && Desc.getOperandConstraint(OpNum + 1,
7774                                    MCOI::OperandConstraint::TIED_TO) == -1;
7775 }
7776 
7777 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7778 {
7779   OptionalImmIndexMap OptionalIdx;
7780   unsigned Opc = Inst.getOpcode();
7781 
7782   unsigned I = 1;
7783   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7784   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7785     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7786   }
7787 
7788   for (unsigned E = Operands.size(); I != E; ++I) {
7789     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7790     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7791       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7792     } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
7793                Op.isInterpAttrChan()) {
7794       Inst.addOperand(MCOperand::createImm(Op.getImm()));
7795     } else if (Op.isImmModifier()) {
7796       OptionalIdx[Op.getImmTy()] = I;
7797     } else {
7798       llvm_unreachable("unhandled operand type");
7799     }
7800   }
7801 
7802   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
7803     addOptionalImmOperand(Inst, Operands, OptionalIdx,
7804                           AMDGPUOperand::ImmTyHigh);
7805 
7806   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
7807     addOptionalImmOperand(Inst, Operands, OptionalIdx,
7808                           AMDGPUOperand::ImmTyClampSI);
7809 
7810   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
7811     addOptionalImmOperand(Inst, Operands, OptionalIdx,
7812                           AMDGPUOperand::ImmTyOModSI);
7813 }
7814 
7815 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
7816 {
7817   OptionalImmIndexMap OptionalIdx;
7818   unsigned Opc = Inst.getOpcode();
7819 
7820   unsigned I = 1;
7821   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7822   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7823     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7824   }
7825 
7826   for (unsigned E = Operands.size(); I != E; ++I) {
7827     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7828     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7829       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7830     } else if (Op.isImmModifier()) {
7831       OptionalIdx[Op.getImmTy()] = I;
7832     } else {
7833       llvm_unreachable("unhandled operand type");
7834     }
7835   }
7836 
7837   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7838 
7839   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7840   if (OpSelIdx != -1)
7841     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
7842 
7843   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
7844 
7845   if (OpSelIdx == -1)
7846     return;
7847 
7848   const int Ops[] = { AMDGPU::OpName::src0,
7849                       AMDGPU::OpName::src1,
7850                       AMDGPU::OpName::src2 };
7851   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7852                          AMDGPU::OpName::src1_modifiers,
7853                          AMDGPU::OpName::src2_modifiers };
7854 
7855   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7856 
7857   for (int J = 0; J < 3; ++J) {
7858     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7859     if (OpIdx == -1)
7860       break;
7861 
7862     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7863     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7864 
7865     if ((OpSel & (1 << J)) != 0)
7866       ModVal |= SISrcMods::OP_SEL_0;
7867     if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
7868         (OpSel & (1 << 3)) != 0)
7869       ModVal |= SISrcMods::DST_OP_SEL;
7870 
7871     Inst.getOperand(ModIdx).setImm(ModVal);
7872   }
7873 }
7874 
7875 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7876                               OptionalImmIndexMap &OptionalIdx) {
7877   unsigned Opc = Inst.getOpcode();
7878 
7879   unsigned I = 1;
7880   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7881   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7882     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7883   }
7884 
7885   for (unsigned E = Operands.size(); I != E; ++I) {
7886     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7887     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7888       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7889     } else if (Op.isImmModifier()) {
7890       OptionalIdx[Op.getImmTy()] = I;
7891     } else if (Op.isRegOrImm()) {
7892       Op.addRegOrImmOperands(Inst, 1);
7893     } else {
7894       llvm_unreachable("unhandled operand type");
7895     }
7896   }
7897 
7898   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
7899     addOptionalImmOperand(Inst, Operands, OptionalIdx,
7900                           AMDGPUOperand::ImmTyClampSI);
7901 
7902   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
7903     addOptionalImmOperand(Inst, Operands, OptionalIdx,
7904                           AMDGPUOperand::ImmTyOModSI);
7905 
7906   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7907   // it has src2 register operand that is tied to dst operand
7908   // we don't allow modifiers for this operand in assembler so src2_modifiers
7909   // should be 0.
7910   if (isMAC(Opc)) {
7911     auto it = Inst.begin();
7912     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7913     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7914     ++it;
7915     // Copy the operand to ensure it's not invalidated when Inst grows.
7916     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7917   }
7918 }
7919 
7920 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7921   OptionalImmIndexMap OptionalIdx;
7922   cvtVOP3(Inst, Operands, OptionalIdx);
7923 }
7924 
7925 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7926                                OptionalImmIndexMap &OptIdx) {
7927   const int Opc = Inst.getOpcode();
7928   const MCInstrDesc &Desc = MII.get(Opc);
7929 
7930   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7931 
7932   if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
7933       Opc == AMDGPU::V_CVT_SR_FP8_F32_vi) {
7934     Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
7935     Inst.addOperand(Inst.getOperand(0));
7936   }
7937 
7938   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in)) {
7939     assert(!IsPacked);
7940     Inst.addOperand(Inst.getOperand(0));
7941   }
7942 
7943   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7944   // instruction, and then figure out where to actually put the modifiers
7945 
7946   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7947   if (OpSelIdx != -1) {
7948     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7949   }
7950 
7951   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7952   if (OpSelHiIdx != -1) {
7953     int DefaultVal = IsPacked ? -1 : 0;
7954     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7955                           DefaultVal);
7956   }
7957 
7958   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7959   if (NegLoIdx != -1) {
7960     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7961     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7962   }
7963 
7964   const int Ops[] = { AMDGPU::OpName::src0,
7965                       AMDGPU::OpName::src1,
7966                       AMDGPU::OpName::src2 };
7967   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7968                          AMDGPU::OpName::src1_modifiers,
7969                          AMDGPU::OpName::src2_modifiers };
7970 
7971   unsigned OpSel = 0;
7972   unsigned OpSelHi = 0;
7973   unsigned NegLo = 0;
7974   unsigned NegHi = 0;
7975 
7976   if (OpSelIdx != -1)
7977     OpSel = Inst.getOperand(OpSelIdx).getImm();
7978 
7979   if (OpSelHiIdx != -1)
7980     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7981 
7982   if (NegLoIdx != -1) {
7983     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7984     NegLo = Inst.getOperand(NegLoIdx).getImm();
7985     NegHi = Inst.getOperand(NegHiIdx).getImm();
7986   }
7987 
7988   for (int J = 0; J < 3; ++J) {
7989     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7990     if (OpIdx == -1)
7991       break;
7992 
7993     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7994 
7995     if (ModIdx == -1)
7996       continue;
7997 
7998     uint32_t ModVal = 0;
7999 
8000     if ((OpSel & (1 << J)) != 0)
8001       ModVal |= SISrcMods::OP_SEL_0;
8002 
8003     if ((OpSelHi & (1 << J)) != 0)
8004       ModVal |= SISrcMods::OP_SEL_1;
8005 
8006     if ((NegLo & (1 << J)) != 0)
8007       ModVal |= SISrcMods::NEG;
8008 
8009     if ((NegHi & (1 << J)) != 0)
8010       ModVal |= SISrcMods::NEG_HI;
8011 
8012     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8013   }
8014 }
8015 
8016 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8017   OptionalImmIndexMap OptIdx;
8018   cvtVOP3(Inst, Operands, OptIdx);
8019   cvtVOP3P(Inst, Operands, OptIdx);
8020 }
8021 
8022 //===----------------------------------------------------------------------===//
8023 // VOPD
8024 //===----------------------------------------------------------------------===//
8025 
8026 ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
8027   if (!hasVOPD(getSTI()))
8028     return ParseStatus::NoMatch;
8029 
8030   if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
8031     SMLoc S = getLoc();
8032     lex();
8033     lex();
8034     Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
8035     SMLoc OpYLoc = getLoc();
8036     StringRef OpYName;
8037     if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
8038       Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
8039       return ParseStatus::Success;
8040     }
8041     return Error(OpYLoc, "expected a VOPDY instruction after ::");
8042   }
8043   return ParseStatus::NoMatch;
8044 }
8045 
8046 // Create VOPD MCInst operands using parsed assembler operands.
8047 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
8048   auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
8049     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
8050     if (Op.isReg()) {
8051       Op.addRegOperands(Inst, 1);
8052       return;
8053     }
8054     if (Op.isImm()) {
8055       Op.addImmOperands(Inst, 1);
8056       return;
8057     }
8058     llvm_unreachable("Unhandled operand type in cvtVOPD");
8059   };
8060 
8061   const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
8062 
8063   // MCInst operands are ordered as follows:
8064   //   dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
8065 
8066   for (auto CompIdx : VOPD::COMPONENTS) {
8067     addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
8068   }
8069 
8070   for (auto CompIdx : VOPD::COMPONENTS) {
8071     const auto &CInfo = InstInfo[CompIdx];
8072     auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
8073     for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
8074       addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
8075     if (CInfo.hasSrc2Acc())
8076       addOp(CInfo.getIndexOfDstInParsedOperands());
8077   }
8078 }
8079 
8080 //===----------------------------------------------------------------------===//
8081 // dpp
8082 //===----------------------------------------------------------------------===//
8083 
8084 bool AMDGPUOperand::isDPP8() const {
8085   return isImmTy(ImmTyDPP8);
8086 }
8087 
8088 bool AMDGPUOperand::isDPPCtrl() const {
8089   using namespace AMDGPU::DPP;
8090 
8091   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8092   if (result) {
8093     int64_t Imm = getImm();
8094     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8095            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8096            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8097            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8098            (Imm == DppCtrl::WAVE_SHL1) ||
8099            (Imm == DppCtrl::WAVE_ROL1) ||
8100            (Imm == DppCtrl::WAVE_SHR1) ||
8101            (Imm == DppCtrl::WAVE_ROR1) ||
8102            (Imm == DppCtrl::ROW_MIRROR) ||
8103            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8104            (Imm == DppCtrl::BCAST15) ||
8105            (Imm == DppCtrl::BCAST31) ||
8106            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8107            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8108   }
8109   return false;
8110 }
8111 
8112 //===----------------------------------------------------------------------===//
8113 // mAI
8114 //===----------------------------------------------------------------------===//
8115 
8116 bool AMDGPUOperand::isBLGP() const {
8117   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8118 }
8119 
8120 bool AMDGPUOperand::isCBSZ() const {
8121   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8122 }
8123 
8124 bool AMDGPUOperand::isABID() const {
8125   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8126 }
8127 
8128 bool AMDGPUOperand::isS16Imm() const {
8129   return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8130 }
8131 
8132 bool AMDGPUOperand::isU16Imm() const {
8133   return isImmLiteral() && isUInt<16>(getImm());
8134 }
8135 
8136 //===----------------------------------------------------------------------===//
8137 // dim
8138 //===----------------------------------------------------------------------===//
8139 
8140 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8141   // We want to allow "dim:1D" etc.,
8142   // but the initial 1 is tokenized as an integer.
8143   std::string Token;
8144   if (isToken(AsmToken::Integer)) {
8145     SMLoc Loc = getToken().getEndLoc();
8146     Token = std::string(getTokenStr());
8147     lex();
8148     if (getLoc() != Loc)
8149       return false;
8150   }
8151 
8152   StringRef Suffix;
8153   if (!parseId(Suffix))
8154     return false;
8155   Token += Suffix;
8156 
8157   StringRef DimId = Token;
8158   if (DimId.startswith("SQ_RSRC_IMG_"))
8159     DimId = DimId.drop_front(12);
8160 
8161   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8162   if (!DimInfo)
8163     return false;
8164 
8165   Encoding = DimInfo->Encoding;
8166   return true;
8167 }
8168 
8169 ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8170   if (!isGFX10Plus())
8171     return ParseStatus::NoMatch;
8172 
8173   SMLoc S = getLoc();
8174 
8175   if (!trySkipId("dim", AsmToken::Colon))
8176     return ParseStatus::NoMatch;
8177 
8178   unsigned Encoding;
8179   SMLoc Loc = getLoc();
8180   if (!parseDimId(Encoding))
8181     return Error(Loc, "invalid dim value");
8182 
8183   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8184                                               AMDGPUOperand::ImmTyDim));
8185   return ParseStatus::Success;
8186 }
8187 
8188 //===----------------------------------------------------------------------===//
8189 // dpp
8190 //===----------------------------------------------------------------------===//
8191 
8192 ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8193   SMLoc S = getLoc();
8194 
8195   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8196     return ParseStatus::NoMatch;
8197 
8198   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8199 
8200   int64_t Sels[8];
8201 
8202   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8203     return ParseStatus::Failure;
8204 
8205   for (size_t i = 0; i < 8; ++i) {
8206     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8207       return ParseStatus::Failure;
8208 
8209     SMLoc Loc = getLoc();
8210     if (getParser().parseAbsoluteExpression(Sels[i]))
8211       return ParseStatus::Failure;
8212     if (0 > Sels[i] || 7 < Sels[i])
8213       return Error(Loc, "expected a 3-bit value");
8214   }
8215 
8216   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8217     return ParseStatus::Failure;
8218 
8219   unsigned DPP8 = 0;
8220   for (size_t i = 0; i < 8; ++i)
8221     DPP8 |= (Sels[i] << (i * 3));
8222 
8223   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8224   return ParseStatus::Success;
8225 }
8226 
8227 bool
8228 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8229                                     const OperandVector &Operands) {
8230   if (Ctrl == "row_newbcast")
8231     return isGFX90A();
8232 
8233   if (Ctrl == "row_share" ||
8234       Ctrl == "row_xmask")
8235     return isGFX10Plus();
8236 
8237   if (Ctrl == "wave_shl" ||
8238       Ctrl == "wave_shr" ||
8239       Ctrl == "wave_rol" ||
8240       Ctrl == "wave_ror" ||
8241       Ctrl == "row_bcast")
8242     return isVI() || isGFX9();
8243 
8244   return Ctrl == "row_mirror" ||
8245          Ctrl == "row_half_mirror" ||
8246          Ctrl == "quad_perm" ||
8247          Ctrl == "row_shl" ||
8248          Ctrl == "row_shr" ||
8249          Ctrl == "row_ror";
8250 }
8251 
8252 int64_t
8253 AMDGPUAsmParser::parseDPPCtrlPerm() {
8254   // quad_perm:[%d,%d,%d,%d]
8255 
8256   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8257     return -1;
8258 
8259   int64_t Val = 0;
8260   for (int i = 0; i < 4; ++i) {
8261     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8262       return -1;
8263 
8264     int64_t Temp;
8265     SMLoc Loc = getLoc();
8266     if (getParser().parseAbsoluteExpression(Temp))
8267       return -1;
8268     if (Temp < 0 || Temp > 3) {
8269       Error(Loc, "expected a 2-bit value");
8270       return -1;
8271     }
8272 
8273     Val += (Temp << i * 2);
8274   }
8275 
8276   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8277     return -1;
8278 
8279   return Val;
8280 }
8281 
8282 int64_t
8283 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8284   using namespace AMDGPU::DPP;
8285 
8286   // sel:%d
8287 
8288   int64_t Val;
8289   SMLoc Loc = getLoc();
8290 
8291   if (getParser().parseAbsoluteExpression(Val))
8292     return -1;
8293 
8294   struct DppCtrlCheck {
8295     int64_t Ctrl;
8296     int Lo;
8297     int Hi;
8298   };
8299 
8300   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8301     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8302     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8303     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8304     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8305     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8306     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8307     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8308     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8309     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8310     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8311     .Default({-1, 0, 0});
8312 
8313   bool Valid;
8314   if (Check.Ctrl == -1) {
8315     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8316     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8317   } else {
8318     Valid = Check.Lo <= Val && Val <= Check.Hi;
8319     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8320   }
8321 
8322   if (!Valid) {
8323     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8324     return -1;
8325   }
8326 
8327   return Val;
8328 }
8329 
8330 ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8331   using namespace AMDGPU::DPP;
8332 
8333   if (!isToken(AsmToken::Identifier) ||
8334       !isSupportedDPPCtrl(getTokenStr(), Operands))
8335     return ParseStatus::NoMatch;
8336 
8337   SMLoc S = getLoc();
8338   int64_t Val = -1;
8339   StringRef Ctrl;
8340 
8341   parseId(Ctrl);
8342 
8343   if (Ctrl == "row_mirror") {
8344     Val = DppCtrl::ROW_MIRROR;
8345   } else if (Ctrl == "row_half_mirror") {
8346     Val = DppCtrl::ROW_HALF_MIRROR;
8347   } else {
8348     if (skipToken(AsmToken::Colon, "expected a colon")) {
8349       if (Ctrl == "quad_perm") {
8350         Val = parseDPPCtrlPerm();
8351       } else {
8352         Val = parseDPPCtrlSel(Ctrl);
8353       }
8354     }
8355   }
8356 
8357   if (Val == -1)
8358     return ParseStatus::Failure;
8359 
8360   Operands.push_back(
8361     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8362   return ParseStatus::Success;
8363 }
8364 
8365 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
8366                                  bool IsDPP8) {
8367   OptionalImmIndexMap OptionalIdx;
8368   unsigned Opc = Inst.getOpcode();
8369   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8370 
8371   // MAC instructions are special because they have 'old'
8372   // operand which is not tied to dst (but assumed to be).
8373   // They also have dummy unused src2_modifiers.
8374   int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
8375   int Src2ModIdx =
8376       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
8377   bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
8378                Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
8379 
8380   unsigned I = 1;
8381   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8382     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8383   }
8384 
8385   int Fi = 0;
8386   for (unsigned E = Operands.size(); I != E; ++I) {
8387 
8388     if (IsMAC) {
8389       int NumOperands = Inst.getNumOperands();
8390       if (OldIdx == NumOperands) {
8391         // Handle old operand
8392         constexpr int DST_IDX = 0;
8393         Inst.addOperand(Inst.getOperand(DST_IDX));
8394       } else if (Src2ModIdx == NumOperands) {
8395         // Add unused dummy src2_modifiers
8396         Inst.addOperand(MCOperand::createImm(0));
8397       }
8398     }
8399 
8400     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8401                                             MCOI::TIED_TO);
8402     if (TiedTo != -1) {
8403       assert((unsigned)TiedTo < Inst.getNumOperands());
8404       // handle tied old or src2 for MAC instructions
8405       Inst.addOperand(Inst.getOperand(TiedTo));
8406     }
8407     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8408     // Add the register arguments
8409     if (IsDPP8 && Op.isDppFI()) {
8410       Fi = Op.getImm();
8411     } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8412       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8413     } else if (Op.isReg()) {
8414       Op.addRegOperands(Inst, 1);
8415     } else if (Op.isImm() &&
8416                Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
8417       assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
8418       Op.addImmOperands(Inst, 1);
8419     } else if (Op.isImm()) {
8420       OptionalIdx[Op.getImmTy()] = I;
8421     } else {
8422       llvm_unreachable("unhandled operand type");
8423     }
8424   }
8425   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8426     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8427 
8428   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8429     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8430 
8431   if (Desc.TSFlags & SIInstrFlags::VOP3P)
8432     cvtVOP3P(Inst, Operands, OptionalIdx);
8433   else if (Desc.TSFlags & SIInstrFlags::VOP3)
8434     cvtVOP3OpSel(Inst, Operands, OptionalIdx);
8435   else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
8436     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8437   }
8438 
8439   if (IsDPP8) {
8440     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
8441     using namespace llvm::AMDGPU::DPP;
8442     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8443   } else {
8444     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
8445     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8446     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8447     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8448 
8449     if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
8450       addOptionalImmOperand(Inst, Operands, OptionalIdx,
8451                             AMDGPUOperand::ImmTyDppFI);
8452   }
8453 }
8454 
8455 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8456   OptionalImmIndexMap OptionalIdx;
8457 
8458   unsigned I = 1;
8459   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8460   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8461     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8462   }
8463 
8464   int Fi = 0;
8465   for (unsigned E = Operands.size(); I != E; ++I) {
8466     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8467                                             MCOI::TIED_TO);
8468     if (TiedTo != -1) {
8469       assert((unsigned)TiedTo < Inst.getNumOperands());
8470       // handle tied old or src2 for MAC instructions
8471       Inst.addOperand(Inst.getOperand(TiedTo));
8472     }
8473     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8474     // Add the register arguments
8475     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8476       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8477       // Skip it.
8478       continue;
8479     }
8480 
8481     if (IsDPP8) {
8482       if (Op.isDPP8()) {
8483         Op.addImmOperands(Inst, 1);
8484       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8485         Op.addRegWithFPInputModsOperands(Inst, 2);
8486       } else if (Op.isDppFI()) {
8487         Fi = Op.getImm();
8488       } else if (Op.isReg()) {
8489         Op.addRegOperands(Inst, 1);
8490       } else {
8491         llvm_unreachable("Invalid operand type");
8492       }
8493     } else {
8494       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8495         Op.addRegWithFPInputModsOperands(Inst, 2);
8496       } else if (Op.isReg()) {
8497         Op.addRegOperands(Inst, 1);
8498       } else if (Op.isDPPCtrl()) {
8499         Op.addImmOperands(Inst, 1);
8500       } else if (Op.isImm()) {
8501         // Handle optional arguments
8502         OptionalIdx[Op.getImmTy()] = I;
8503       } else {
8504         llvm_unreachable("Invalid operand type");
8505       }
8506     }
8507   }
8508 
8509   if (IsDPP8) {
8510     using namespace llvm::AMDGPU::DPP;
8511     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8512   } else {
8513     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8514     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8515     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8516     if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
8517       addOptionalImmOperand(Inst, Operands, OptionalIdx,
8518                             AMDGPUOperand::ImmTyDppFI);
8519     }
8520   }
8521 }
8522 
8523 //===----------------------------------------------------------------------===//
8524 // sdwa
8525 //===----------------------------------------------------------------------===//
8526 
8527 ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
8528                                           StringRef Prefix,
8529                                           AMDGPUOperand::ImmTy Type) {
8530   using namespace llvm::AMDGPU::SDWA;
8531 
8532   SMLoc S = getLoc();
8533   StringRef Value;
8534 
8535   SMLoc StringLoc;
8536   ParseStatus Res = parseStringWithPrefix(Prefix, Value, StringLoc);
8537   if (!Res.isSuccess())
8538     return Res;
8539 
8540   int64_t Int;
8541   Int = StringSwitch<int64_t>(Value)
8542         .Case("BYTE_0", SdwaSel::BYTE_0)
8543         .Case("BYTE_1", SdwaSel::BYTE_1)
8544         .Case("BYTE_2", SdwaSel::BYTE_2)
8545         .Case("BYTE_3", SdwaSel::BYTE_3)
8546         .Case("WORD_0", SdwaSel::WORD_0)
8547         .Case("WORD_1", SdwaSel::WORD_1)
8548         .Case("DWORD", SdwaSel::DWORD)
8549         .Default(0xffffffff);
8550 
8551   if (Int == 0xffffffff)
8552     return Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8553 
8554   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8555   return ParseStatus::Success;
8556 }
8557 
8558 ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8559   using namespace llvm::AMDGPU::SDWA;
8560 
8561   SMLoc S = getLoc();
8562   StringRef Value;
8563 
8564   SMLoc StringLoc;
8565   ParseStatus Res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8566   if (!Res.isSuccess())
8567     return Res;
8568 
8569   int64_t Int;
8570   Int = StringSwitch<int64_t>(Value)
8571         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8572         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8573         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8574         .Default(0xffffffff);
8575 
8576   if (Int == 0xffffffff)
8577     return Error(StringLoc, "invalid dst_unused value");
8578 
8579   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySDWADstUnused));
8580   return ParseStatus::Success;
8581 }
8582 
8583 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8584   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8585 }
8586 
8587 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8588   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8589 }
8590 
8591 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8592   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8593 }
8594 
8595 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8596   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8597 }
8598 
8599 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8600   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8601 }
8602 
8603 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8604                               uint64_t BasicInstType,
8605                               bool SkipDstVcc,
8606                               bool SkipSrcVcc) {
8607   using namespace llvm::AMDGPU::SDWA;
8608 
8609   OptionalImmIndexMap OptionalIdx;
8610   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8611   bool SkippedVcc = false;
8612 
8613   unsigned I = 1;
8614   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8615   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8616     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8617   }
8618 
8619   for (unsigned E = Operands.size(); I != E; ++I) {
8620     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8621     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8622         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8623       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8624       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8625       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8626       // Skip VCC only if we didn't skip it on previous iteration.
8627       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8628       if (BasicInstType == SIInstrFlags::VOP2 &&
8629           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8630            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8631         SkippedVcc = true;
8632         continue;
8633       } else if (BasicInstType == SIInstrFlags::VOPC &&
8634                  Inst.getNumOperands() == 0) {
8635         SkippedVcc = true;
8636         continue;
8637       }
8638     }
8639     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8640       Op.addRegOrImmWithInputModsOperands(Inst, 2);
8641     } else if (Op.isImm()) {
8642       // Handle optional arguments
8643       OptionalIdx[Op.getImmTy()] = I;
8644     } else {
8645       llvm_unreachable("Invalid operand type");
8646     }
8647     SkippedVcc = false;
8648   }
8649 
8650   const unsigned Opc = Inst.getOpcode();
8651   if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
8652       Opc != AMDGPU::V_NOP_sdwa_vi) {
8653     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8654     switch (BasicInstType) {
8655     case SIInstrFlags::VOP1:
8656       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8657         addOptionalImmOperand(Inst, Operands, OptionalIdx,
8658                               AMDGPUOperand::ImmTyClampSI, 0);
8659 
8660       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8661         addOptionalImmOperand(Inst, Operands, OptionalIdx,
8662                               AMDGPUOperand::ImmTyOModSI, 0);
8663 
8664       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
8665         addOptionalImmOperand(Inst, Operands, OptionalIdx,
8666                               AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
8667 
8668       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
8669         addOptionalImmOperand(Inst, Operands, OptionalIdx,
8670                               AMDGPUOperand::ImmTySDWADstUnused,
8671                               DstUnused::UNUSED_PRESERVE);
8672 
8673       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
8674       break;
8675 
8676     case SIInstrFlags::VOP2:
8677       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8678 
8679       if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
8680         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8681 
8682       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
8683       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
8684       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
8685       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
8686       break;
8687 
8688     case SIInstrFlags::VOPC:
8689       if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
8690         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8691       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
8692       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
8693       break;
8694 
8695     default:
8696       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8697     }
8698   }
8699 
8700   // special case v_mac_{f16, f32}:
8701   // it has src2 register operand that is tied to dst operand
8702   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8703       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
8704     auto it = Inst.begin();
8705     std::advance(
8706       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8707     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8708   }
8709 }
8710 
8711 /// Force static initialization.
8712 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8713   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheR600Target());
8714   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8715 }
8716 
8717 #define GET_REGISTER_MATCHER
8718 #define GET_MATCHER_IMPLEMENTATION
8719 #define GET_MNEMONIC_SPELL_CHECKER
8720 #define GET_MNEMONIC_CHECKER
8721 #include "AMDGPUGenAsmMatcher.inc"
8722 
8723 ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
8724                                                 unsigned MCK) {
8725   switch (MCK) {
8726   case MCK_addr64:
8727     return parseTokenOp("addr64", Operands);
8728   case MCK_done:
8729     return parseTokenOp("done", Operands);
8730   case MCK_idxen:
8731     return parseTokenOp("idxen", Operands);
8732   case MCK_lds:
8733     return parseTokenOp("lds", Operands);
8734   case MCK_offen:
8735     return parseTokenOp("offen", Operands);
8736   case MCK_off:
8737     return parseTokenOp("off", Operands);
8738   case MCK_row_95_en:
8739     return parseTokenOp("row_en", Operands);
8740   case MCK_gds:
8741     return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
8742   case MCK_tfe:
8743     return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
8744   }
8745   return tryCustomParseOperand(Operands, MCK);
8746 }
8747 
8748 // This function should be defined after auto-generated include so that we have
8749 // MatchClassKind enum defined
8750 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8751                                                      unsigned Kind) {
8752   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8753   // But MatchInstructionImpl() expects to meet token and fails to validate
8754   // operand. This method checks if we are given immediate operand but expect to
8755   // get corresponding token.
8756   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8757   switch (Kind) {
8758   case MCK_addr64:
8759     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8760   case MCK_gds:
8761     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8762   case MCK_lds:
8763     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8764   case MCK_idxen:
8765     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8766   case MCK_offen:
8767     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8768   case MCK_tfe:
8769     return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
8770   case MCK_SSrcB32:
8771     // When operands have expression values, they will return true for isToken,
8772     // because it is not possible to distinguish between a token and an
8773     // expression at parse time. MatchInstructionImpl() will always try to
8774     // match an operand as a token, when isToken returns true, and when the
8775     // name of the expression is not a valid token, the match will fail,
8776     // so we need to handle it here.
8777     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8778   case MCK_SSrcF32:
8779     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8780   case MCK_SOPPBrTarget:
8781     return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
8782   case MCK_VReg32OrOff:
8783     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8784   case MCK_InterpSlot:
8785     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8786   case MCK_InterpAttr:
8787     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8788   case MCK_InterpAttrChan:
8789     return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
8790   case MCK_SReg_64:
8791   case MCK_SReg_64_XEXEC:
8792     // Null is defined as a 32-bit register but
8793     // it should also be enabled with 64-bit operands.
8794     // The following code enables it for SReg_64 operands
8795     // used as source and destination. Remaining source
8796     // operands are handled in isInlinableImm.
8797     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8798   default:
8799     return Match_InvalidOperand;
8800   }
8801 }
8802 
8803 //===----------------------------------------------------------------------===//
8804 // endpgm
8805 //===----------------------------------------------------------------------===//
8806 
8807 ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
8808   SMLoc S = getLoc();
8809   int64_t Imm = 0;
8810 
8811   if (!parseExpr(Imm)) {
8812     // The operand is optional, if not present default to 0
8813     Imm = 0;
8814   }
8815 
8816   if (!isUInt<16>(Imm))
8817     return Error(S, "expected a 16-bit value");
8818 
8819   Operands.push_back(
8820       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8821   return ParseStatus::Success;
8822 }
8823 
8824 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8825 
8826 //===----------------------------------------------------------------------===//
8827 // LDSDIR
8828 //===----------------------------------------------------------------------===//
8829 
8830 bool AMDGPUOperand::isWaitVDST() const {
8831   return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
8832 }
8833 
8834 //===----------------------------------------------------------------------===//
8835 // VINTERP
8836 //===----------------------------------------------------------------------===//
8837 
8838 bool AMDGPUOperand::isWaitEXP() const {
8839   return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());
8840 }
8841