1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/CodeGen/MachineValueType.h"
25 #include "llvm/MC/MCAsmInfo.h"
26 #include "llvm/MC/MCContext.h"
27 #include "llvm/MC/MCExpr.h"
28 #include "llvm/MC/MCInst.h"
29 #include "llvm/MC/MCInstrDesc.h"
30 #include "llvm/MC/MCParser/MCAsmLexer.h"
31 #include "llvm/MC/MCParser/MCAsmParser.h"
32 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
33 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
34 #include "llvm/MC/MCSymbol.h"
35 #include "llvm/MC/TargetRegistry.h"
36 #include "llvm/Support/AMDGPUMetadata.h"
37 #include "llvm/Support/AMDHSAKernelDescriptor.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/MathExtras.h"
40 #include "llvm/TargetParser/TargetParser.h"
41 #include <optional>
42 
43 using namespace llvm;
44 using namespace llvm::AMDGPU;
45 using namespace llvm::amdhsa;
46 
47 namespace {
48 
49 class AMDGPUAsmParser;
50 
51 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
52 
53 //===----------------------------------------------------------------------===//
54 // Operand
55 //===----------------------------------------------------------------------===//
56 
57 class AMDGPUOperand : public MCParsedAsmOperand {
58   enum KindTy {
59     Token,
60     Immediate,
61     Register,
62     Expression
63   } Kind;
64 
65   SMLoc StartLoc, EndLoc;
66   const AMDGPUAsmParser *AsmParser;
67 
68 public:
69   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
70       : Kind(Kind_), AsmParser(AsmParser_) {}
71 
72   using Ptr = std::unique_ptr<AMDGPUOperand>;
73 
74   struct Modifiers {
75     bool Abs = false;
76     bool Neg = false;
77     bool Sext = false;
78     bool Lit = false;
79 
80     bool hasFPModifiers() const { return Abs || Neg; }
81     bool hasIntModifiers() const { return Sext; }
82     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
83 
84     int64_t getFPModifiersOperand() const {
85       int64_t Operand = 0;
86       Operand |= Abs ? SISrcMods::ABS : 0u;
87       Operand |= Neg ? SISrcMods::NEG : 0u;
88       return Operand;
89     }
90 
91     int64_t getIntModifiersOperand() const {
92       int64_t Operand = 0;
93       Operand |= Sext ? SISrcMods::SEXT : 0u;
94       return Operand;
95     }
96 
97     int64_t getModifiersOperand() const {
98       assert(!(hasFPModifiers() && hasIntModifiers())
99            && "fp and int modifiers should not be used simultaneously");
100       if (hasFPModifiers()) {
101         return getFPModifiersOperand();
102       } else if (hasIntModifiers()) {
103         return getIntModifiersOperand();
104       } else {
105         return 0;
106       }
107     }
108 
109     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
110   };
111 
112   enum ImmTy {
113     ImmTyNone,
114     ImmTyGDS,
115     ImmTyLDS,
116     ImmTyOffen,
117     ImmTyIdxen,
118     ImmTyAddr64,
119     ImmTyOffset,
120     ImmTyInstOffset,
121     ImmTyOffset0,
122     ImmTyOffset1,
123     ImmTySMEMOffsetMod,
124     ImmTyCPol,
125     ImmTyTFE,
126     ImmTyD16,
127     ImmTyClampSI,
128     ImmTyOModSI,
129     ImmTySDWADstSel,
130     ImmTySDWASrc0Sel,
131     ImmTySDWASrc1Sel,
132     ImmTySDWADstUnused,
133     ImmTyDMask,
134     ImmTyDim,
135     ImmTyUNorm,
136     ImmTyDA,
137     ImmTyR128A16,
138     ImmTyA16,
139     ImmTyLWE,
140     ImmTyExpTgt,
141     ImmTyExpCompr,
142     ImmTyExpVM,
143     ImmTyFORMAT,
144     ImmTyHwreg,
145     ImmTyOff,
146     ImmTySendMsg,
147     ImmTyInterpSlot,
148     ImmTyInterpAttr,
149     ImmTyInterpAttrChan,
150     ImmTyOpSel,
151     ImmTyOpSelHi,
152     ImmTyNegLo,
153     ImmTyNegHi,
154     ImmTyDPP8,
155     ImmTyDppCtrl,
156     ImmTyDppRowMask,
157     ImmTyDppBankMask,
158     ImmTyDppBoundCtrl,
159     ImmTyDppFI,
160     ImmTySwizzle,
161     ImmTyGprIdxMode,
162     ImmTyHigh,
163     ImmTyBLGP,
164     ImmTyCBSZ,
165     ImmTyABID,
166     ImmTyEndpgm,
167     ImmTyWaitVDST,
168     ImmTyWaitEXP,
169   };
170 
171   // Immediate operand kind.
172   // It helps to identify the location of an offending operand after an error.
173   // Note that regular literals and mandatory literals (KImm) must be handled
174   // differently. When looking for an offending operand, we should usually
175   // ignore mandatory literals because they are part of the instruction and
176   // cannot be changed. Report location of mandatory operands only for VOPD,
177   // when both OpX and OpY have a KImm and there are no other literals.
178   enum ImmKindTy {
179     ImmKindTyNone,
180     ImmKindTyLiteral,
181     ImmKindTyMandatoryLiteral,
182     ImmKindTyConst,
183   };
184 
185 private:
186   struct TokOp {
187     const char *Data;
188     unsigned Length;
189   };
190 
191   struct ImmOp {
192     int64_t Val;
193     ImmTy Type;
194     bool IsFPImm;
195     mutable ImmKindTy Kind;
196     Modifiers Mods;
197   };
198 
199   struct RegOp {
200     unsigned RegNo;
201     Modifiers Mods;
202   };
203 
204   union {
205     TokOp Tok;
206     ImmOp Imm;
207     RegOp Reg;
208     const MCExpr *Expr;
209   };
210 
211 public:
212   bool isToken() const override { return Kind == Token; }
213 
214   bool isSymbolRefExpr() const {
215     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
216   }
217 
218   bool isImm() const override {
219     return Kind == Immediate;
220   }
221 
222   void setImmKindNone() const {
223     assert(isImm());
224     Imm.Kind = ImmKindTyNone;
225   }
226 
227   void setImmKindLiteral() const {
228     assert(isImm());
229     Imm.Kind = ImmKindTyLiteral;
230   }
231 
232   void setImmKindMandatoryLiteral() const {
233     assert(isImm());
234     Imm.Kind = ImmKindTyMandatoryLiteral;
235   }
236 
237   void setImmKindConst() const {
238     assert(isImm());
239     Imm.Kind = ImmKindTyConst;
240   }
241 
242   bool IsImmKindLiteral() const {
243     return isImm() && Imm.Kind == ImmKindTyLiteral;
244   }
245 
246   bool IsImmKindMandatoryLiteral() const {
247     return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
248   }
249 
250   bool isImmKindConst() const {
251     return isImm() && Imm.Kind == ImmKindTyConst;
252   }
253 
254   bool isInlinableImm(MVT type) const;
255   bool isLiteralImm(MVT type) const;
256 
257   bool isRegKind() const {
258     return Kind == Register;
259   }
260 
261   bool isReg() const override {
262     return isRegKind() && !hasModifiers();
263   }
264 
265   bool isRegOrInline(unsigned RCID, MVT type) const {
266     return isRegClass(RCID) || isInlinableImm(type);
267   }
268 
269   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
270     return isRegOrInline(RCID, type) || isLiteralImm(type);
271   }
272 
273   bool isRegOrImmWithInt16InputMods() const {
274     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
275   }
276 
277   bool isRegOrImmWithIntT16InputMods() const {
278     return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16);
279   }
280 
281   bool isRegOrImmWithInt32InputMods() const {
282     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
283   }
284 
285   bool isRegOrInlineImmWithInt16InputMods() const {
286     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
287   }
288 
289   bool isRegOrInlineImmWithInt32InputMods() const {
290     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
291   }
292 
293   bool isRegOrImmWithInt64InputMods() const {
294     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
295   }
296 
297   bool isRegOrImmWithFP16InputMods() const {
298     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
299   }
300 
301   bool isRegOrImmWithFPT16InputMods() const {
302     return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16);
303   }
304 
305   bool isRegOrImmWithFP32InputMods() const {
306     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
307   }
308 
309   bool isRegOrImmWithFP64InputMods() const {
310     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
311   }
312 
313   bool isRegOrInlineImmWithFP16InputMods() const {
314     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
315   }
316 
317   bool isRegOrInlineImmWithFP32InputMods() const {
318     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
319   }
320 
321 
322   bool isVReg() const {
323     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
324            isRegClass(AMDGPU::VReg_64RegClassID) ||
325            isRegClass(AMDGPU::VReg_96RegClassID) ||
326            isRegClass(AMDGPU::VReg_128RegClassID) ||
327            isRegClass(AMDGPU::VReg_160RegClassID) ||
328            isRegClass(AMDGPU::VReg_192RegClassID) ||
329            isRegClass(AMDGPU::VReg_256RegClassID) ||
330            isRegClass(AMDGPU::VReg_512RegClassID) ||
331            isRegClass(AMDGPU::VReg_1024RegClassID);
332   }
333 
334   bool isVReg32() const {
335     return isRegClass(AMDGPU::VGPR_32RegClassID);
336   }
337 
338   bool isVReg32OrOff() const {
339     return isOff() || isVReg32();
340   }
341 
342   bool isNull() const {
343     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
344   }
345 
346   bool isVRegWithInputMods() const;
347   bool isT16VRegWithInputMods() const;
348 
349   bool isSDWAOperand(MVT type) const;
350   bool isSDWAFP16Operand() const;
351   bool isSDWAFP32Operand() const;
352   bool isSDWAInt16Operand() const;
353   bool isSDWAInt32Operand() const;
354 
355   bool isImmTy(ImmTy ImmT) const {
356     return isImm() && Imm.Type == ImmT;
357   }
358 
359   template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
360 
361   bool isImmLiteral() const { return isImmTy(ImmTyNone); }
362 
363   bool isImmModifier() const {
364     return isImm() && Imm.Type != ImmTyNone;
365   }
366 
367   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
368   bool isDMask() const { return isImmTy(ImmTyDMask); }
369   bool isDim() const { return isImmTy(ImmTyDim); }
370   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
371   bool isOff() const { return isImmTy(ImmTyOff); }
372   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
373   bool isOffen() const { return isImmTy(ImmTyOffen); }
374   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
375   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
376   bool isOffset() const { return isImmTy(ImmTyOffset); }
377   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
378   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
379   bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
380   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
381   bool isGDS() const { return isImmTy(ImmTyGDS); }
382   bool isLDS() const { return isImmTy(ImmTyLDS); }
383   bool isCPol() const { return isImmTy(ImmTyCPol); }
384   bool isTFE() const { return isImmTy(ImmTyTFE); }
385   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
386   bool isDppBankMask() const { return isImmTy(ImmTyDppBankMask); }
387   bool isDppRowMask() const { return isImmTy(ImmTyDppRowMask); }
388   bool isDppBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
389   bool isDppFI() const { return isImmTy(ImmTyDppFI); }
390   bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
391   bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
392   bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
393   bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
394   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
395   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
396   bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
397   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
398   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
399   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
400   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
401 
402   bool isRegOrImm() const {
403     return isReg() || isImm();
404   }
405 
406   bool isRegClass(unsigned RCID) const;
407 
408   bool isInlineValue() const;
409 
410   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
411     return isRegOrInline(RCID, type) && !hasModifiers();
412   }
413 
414   bool isSCSrcB16() const {
415     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
416   }
417 
418   bool isSCSrcV2B16() const {
419     return isSCSrcB16();
420   }
421 
422   bool isSCSrcB32() const {
423     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
424   }
425 
426   bool isSCSrcB64() const {
427     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
428   }
429 
430   bool isBoolReg() const;
431 
432   bool isSCSrcF16() const {
433     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
434   }
435 
436   bool isSCSrcV2F16() const {
437     return isSCSrcF16();
438   }
439 
440   bool isSCSrcF32() const {
441     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
442   }
443 
444   bool isSCSrcF64() const {
445     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
446   }
447 
448   bool isSSrcB32() const {
449     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
450   }
451 
452   bool isSSrcB16() const {
453     return isSCSrcB16() || isLiteralImm(MVT::i16);
454   }
455 
456   bool isSSrcV2B16() const {
457     llvm_unreachable("cannot happen");
458     return isSSrcB16();
459   }
460 
461   bool isSSrcB64() const {
462     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
463     // See isVSrc64().
464     return isSCSrcB64() || isLiteralImm(MVT::i64);
465   }
466 
467   bool isSSrcF32() const {
468     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
469   }
470 
471   bool isSSrcF64() const {
472     return isSCSrcB64() || isLiteralImm(MVT::f64);
473   }
474 
475   bool isSSrcF16() const {
476     return isSCSrcB16() || isLiteralImm(MVT::f16);
477   }
478 
479   bool isSSrcV2F16() const {
480     llvm_unreachable("cannot happen");
481     return isSSrcF16();
482   }
483 
484   bool isSSrcV2FP32() const {
485     llvm_unreachable("cannot happen");
486     return isSSrcF32();
487   }
488 
489   bool isSCSrcV2FP32() const {
490     llvm_unreachable("cannot happen");
491     return isSCSrcF32();
492   }
493 
494   bool isSSrcV2INT32() const {
495     llvm_unreachable("cannot happen");
496     return isSSrcB32();
497   }
498 
499   bool isSCSrcV2INT32() const {
500     llvm_unreachable("cannot happen");
501     return isSCSrcB32();
502   }
503 
504   bool isSSrcOrLdsB32() const {
505     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
506            isLiteralImm(MVT::i32) || isExpr();
507   }
508 
509   bool isVCSrcB32() const {
510     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
511   }
512 
513   bool isVCSrcB64() const {
514     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
515   }
516 
517   bool isVCSrcTB16() const {
518     return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
519   }
520 
521   bool isVCSrcTB16_Lo128() const {
522     return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
523   }
524 
525   bool isVCSrcFake16B16_Lo128() const {
526     return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
527   }
528 
529   bool isVCSrcB16() const {
530     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
531   }
532 
533   bool isVCSrcV2B16() const {
534     return isVCSrcB16();
535   }
536 
537   bool isVCSrcF32() const {
538     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
539   }
540 
541   bool isVCSrcF64() const {
542     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
543   }
544 
545   bool isVCSrcTF16() const {
546     return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
547   }
548 
549   bool isVCSrcTF16_Lo128() const {
550     return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
551   }
552 
553   bool isVCSrcFake16F16_Lo128() const {
554     return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
555   }
556 
557   bool isVCSrcF16() const {
558     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
559   }
560 
561   bool isVCSrcV2F16() const {
562     return isVCSrcF16();
563   }
564 
565   bool isVSrcB32() const {
566     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
567   }
568 
569   bool isVSrcB64() const {
570     return isVCSrcF64() || isLiteralImm(MVT::i64);
571   }
572 
573   bool isVSrcTB16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); }
574 
575   bool isVSrcTB16_Lo128() const {
576     return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
577   }
578 
579   bool isVSrcFake16B16_Lo128() const {
580     return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
581   }
582 
583   bool isVSrcB16() const {
584     return isVCSrcB16() || isLiteralImm(MVT::i16);
585   }
586 
587   bool isVSrcV2B16() const {
588     return isVSrcB16() || isLiteralImm(MVT::v2i16);
589   }
590 
591   bool isVCSrcV2FP32() const {
592     return isVCSrcF64();
593   }
594 
595   bool isVSrcV2FP32() const {
596     return isVSrcF64() || isLiteralImm(MVT::v2f32);
597   }
598 
599   bool isVCSrcV2INT32() const {
600     return isVCSrcB64();
601   }
602 
603   bool isVSrcV2INT32() const {
604     return isVSrcB64() || isLiteralImm(MVT::v2i32);
605   }
606 
607   bool isVSrcF32() const {
608     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
609   }
610 
611   bool isVSrcF64() const {
612     return isVCSrcF64() || isLiteralImm(MVT::f64);
613   }
614 
615   bool isVSrcTF16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
616 
617   bool isVSrcTF16_Lo128() const {
618     return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
619   }
620 
621   bool isVSrcFake16F16_Lo128() const {
622     return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
623   }
624 
625   bool isVSrcF16() const {
626     return isVCSrcF16() || isLiteralImm(MVT::f16);
627   }
628 
629   bool isVSrcV2F16() const {
630     return isVSrcF16() || isLiteralImm(MVT::v2f16);
631   }
632 
633   bool isVISrcB32() const {
634     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
635   }
636 
637   bool isVISrcB16() const {
638     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
639   }
640 
641   bool isVISrcV2B16() const {
642     return isVISrcB16();
643   }
644 
645   bool isVISrcF32() const {
646     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
647   }
648 
649   bool isVISrcF16() const {
650     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
651   }
652 
653   bool isVISrcV2F16() const {
654     return isVISrcF16() || isVISrcB32();
655   }
656 
657   bool isVISrc_64B64() const {
658     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
659   }
660 
661   bool isVISrc_64F64() const {
662     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
663   }
664 
665   bool isVISrc_64V2FP32() const {
666     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
667   }
668 
669   bool isVISrc_64V2INT32() const {
670     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
671   }
672 
673   bool isVISrc_256B64() const {
674     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
675   }
676 
677   bool isVISrc_256F64() const {
678     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
679   }
680 
681   bool isVISrc_128B16() const {
682     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
683   }
684 
685   bool isVISrc_128V2B16() const {
686     return isVISrc_128B16();
687   }
688 
689   bool isVISrc_128B32() const {
690     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
691   }
692 
693   bool isVISrc_128F32() const {
694     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
695   }
696 
697   bool isVISrc_256V2FP32() const {
698     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
699   }
700 
701   bool isVISrc_256V2INT32() const {
702     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
703   }
704 
705   bool isVISrc_512B32() const {
706     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
707   }
708 
709   bool isVISrc_512B16() const {
710     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
711   }
712 
713   bool isVISrc_512V2B16() const {
714     return isVISrc_512B16();
715   }
716 
717   bool isVISrc_512F32() const {
718     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
719   }
720 
721   bool isVISrc_512F16() const {
722     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
723   }
724 
725   bool isVISrc_512V2F16() const {
726     return isVISrc_512F16() || isVISrc_512B32();
727   }
728 
729   bool isVISrc_1024B32() const {
730     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
731   }
732 
733   bool isVISrc_1024B16() const {
734     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
735   }
736 
737   bool isVISrc_1024V2B16() const {
738     return isVISrc_1024B16();
739   }
740 
741   bool isVISrc_1024F32() const {
742     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
743   }
744 
745   bool isVISrc_1024F16() const {
746     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
747   }
748 
749   bool isVISrc_1024V2F16() const {
750     return isVISrc_1024F16() || isVISrc_1024B32();
751   }
752 
753   bool isAISrcB32() const {
754     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
755   }
756 
757   bool isAISrcB16() const {
758     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
759   }
760 
761   bool isAISrcV2B16() const {
762     return isAISrcB16();
763   }
764 
765   bool isAISrcF32() const {
766     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
767   }
768 
769   bool isAISrcF16() const {
770     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
771   }
772 
773   bool isAISrcV2F16() const {
774     return isAISrcF16() || isAISrcB32();
775   }
776 
777   bool isAISrc_64B64() const {
778     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
779   }
780 
781   bool isAISrc_64F64() const {
782     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
783   }
784 
785   bool isAISrc_128B32() const {
786     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
787   }
788 
789   bool isAISrc_128B16() const {
790     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
791   }
792 
793   bool isAISrc_128V2B16() const {
794     return isAISrc_128B16();
795   }
796 
797   bool isAISrc_128F32() const {
798     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
799   }
800 
801   bool isAISrc_128F16() const {
802     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
803   }
804 
805   bool isAISrc_128V2F16() const {
806     return isAISrc_128F16() || isAISrc_128B32();
807   }
808 
809   bool isVISrc_128F16() const {
810     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
811   }
812 
813   bool isVISrc_128V2F16() const {
814     return isVISrc_128F16() || isVISrc_128B32();
815   }
816 
817   bool isAISrc_256B64() const {
818     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
819   }
820 
821   bool isAISrc_256F64() const {
822     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
823   }
824 
825   bool isAISrc_512B32() const {
826     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
827   }
828 
829   bool isAISrc_512B16() const {
830     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
831   }
832 
833   bool isAISrc_512V2B16() const {
834     return isAISrc_512B16();
835   }
836 
837   bool isAISrc_512F32() const {
838     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
839   }
840 
841   bool isAISrc_512F16() const {
842     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
843   }
844 
845   bool isAISrc_512V2F16() const {
846     return isAISrc_512F16() || isAISrc_512B32();
847   }
848 
849   bool isAISrc_1024B32() const {
850     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
851   }
852 
853   bool isAISrc_1024B16() const {
854     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
855   }
856 
857   bool isAISrc_1024V2B16() const {
858     return isAISrc_1024B16();
859   }
860 
861   bool isAISrc_1024F32() const {
862     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
863   }
864 
865   bool isAISrc_1024F16() const {
866     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
867   }
868 
869   bool isAISrc_1024V2F16() const {
870     return isAISrc_1024F16() || isAISrc_1024B32();
871   }
872 
873   bool isKImmFP32() const {
874     return isLiteralImm(MVT::f32);
875   }
876 
877   bool isKImmFP16() const {
878     return isLiteralImm(MVT::f16);
879   }
880 
881   bool isMem() const override {
882     return false;
883   }
884 
885   bool isExpr() const {
886     return Kind == Expression;
887   }
888 
889   bool isSOPPBrTarget() const { return isExpr() || isImm(); }
890 
891   bool isSWaitCnt() const;
892   bool isDepCtr() const;
893   bool isSDelayALU() const;
894   bool isHwreg() const;
895   bool isSendMsg() const;
896   bool isSplitBarrier() const;
897   bool isSwizzle() const;
898   bool isSMRDOffset8() const;
899   bool isSMEMOffset() const;
900   bool isSMRDLiteralOffset() const;
901   bool isDPP8() const;
902   bool isDPPCtrl() const;
903   bool isBLGP() const;
904   bool isCBSZ() const;
905   bool isABID() const;
906   bool isGPRIdxMode() const;
907   bool isS16Imm() const;
908   bool isU16Imm() const;
909   bool isEndpgm() const;
910   bool isWaitVDST() const;
911   bool isWaitEXP() const;
912 
913   auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
914     return std::bind(P, *this);
915   }
916 
917   StringRef getToken() const {
918     assert(isToken());
919     return StringRef(Tok.Data, Tok.Length);
920   }
921 
922   int64_t getImm() const {
923     assert(isImm());
924     return Imm.Val;
925   }
926 
927   void setImm(int64_t Val) {
928     assert(isImm());
929     Imm.Val = Val;
930   }
931 
932   ImmTy getImmTy() const {
933     assert(isImm());
934     return Imm.Type;
935   }
936 
937   unsigned getReg() const override {
938     assert(isRegKind());
939     return Reg.RegNo;
940   }
941 
942   SMLoc getStartLoc() const override {
943     return StartLoc;
944   }
945 
946   SMLoc getEndLoc() const override {
947     return EndLoc;
948   }
949 
950   SMRange getLocRange() const {
951     return SMRange(StartLoc, EndLoc);
952   }
953 
954   Modifiers getModifiers() const {
955     assert(isRegKind() || isImmTy(ImmTyNone));
956     return isRegKind() ? Reg.Mods : Imm.Mods;
957   }
958 
959   void setModifiers(Modifiers Mods) {
960     assert(isRegKind() || isImmTy(ImmTyNone));
961     if (isRegKind())
962       Reg.Mods = Mods;
963     else
964       Imm.Mods = Mods;
965   }
966 
967   bool hasModifiers() const {
968     return getModifiers().hasModifiers();
969   }
970 
971   bool hasFPModifiers() const {
972     return getModifiers().hasFPModifiers();
973   }
974 
975   bool hasIntModifiers() const {
976     return getModifiers().hasIntModifiers();
977   }
978 
979   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
980 
981   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
982 
983   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
984 
985   void addRegOperands(MCInst &Inst, unsigned N) const;
986 
987   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
988     if (isRegKind())
989       addRegOperands(Inst, N);
990     else
991       addImmOperands(Inst, N);
992   }
993 
994   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
995     Modifiers Mods = getModifiers();
996     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
997     if (isRegKind()) {
998       addRegOperands(Inst, N);
999     } else {
1000       addImmOperands(Inst, N, false);
1001     }
1002   }
1003 
1004   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1005     assert(!hasIntModifiers());
1006     addRegOrImmWithInputModsOperands(Inst, N);
1007   }
1008 
1009   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1010     assert(!hasFPModifiers());
1011     addRegOrImmWithInputModsOperands(Inst, N);
1012   }
1013 
1014   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1015     Modifiers Mods = getModifiers();
1016     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1017     assert(isRegKind());
1018     addRegOperands(Inst, N);
1019   }
1020 
1021   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1022     assert(!hasIntModifiers());
1023     addRegWithInputModsOperands(Inst, N);
1024   }
1025 
1026   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1027     assert(!hasFPModifiers());
1028     addRegWithInputModsOperands(Inst, N);
1029   }
1030 
1031   static void printImmTy(raw_ostream& OS, ImmTy Type) {
1032     switch (Type) {
1033     case ImmTyNone: OS << "None"; break;
1034     case ImmTyGDS: OS << "GDS"; break;
1035     case ImmTyLDS: OS << "LDS"; break;
1036     case ImmTyOffen: OS << "Offen"; break;
1037     case ImmTyIdxen: OS << "Idxen"; break;
1038     case ImmTyAddr64: OS << "Addr64"; break;
1039     case ImmTyOffset: OS << "Offset"; break;
1040     case ImmTyInstOffset: OS << "InstOffset"; break;
1041     case ImmTyOffset0: OS << "Offset0"; break;
1042     case ImmTyOffset1: OS << "Offset1"; break;
1043     case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1044     case ImmTyCPol: OS << "CPol"; break;
1045     case ImmTyTFE: OS << "TFE"; break;
1046     case ImmTyD16: OS << "D16"; break;
1047     case ImmTyFORMAT: OS << "FORMAT"; break;
1048     case ImmTyClampSI: OS << "ClampSI"; break;
1049     case ImmTyOModSI: OS << "OModSI"; break;
1050     case ImmTyDPP8: OS << "DPP8"; break;
1051     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1052     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1053     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1054     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1055     case ImmTyDppFI: OS << "DppFI"; break;
1056     case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1057     case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1058     case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1059     case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1060     case ImmTyDMask: OS << "DMask"; break;
1061     case ImmTyDim: OS << "Dim"; break;
1062     case ImmTyUNorm: OS << "UNorm"; break;
1063     case ImmTyDA: OS << "DA"; break;
1064     case ImmTyR128A16: OS << "R128A16"; break;
1065     case ImmTyA16: OS << "A16"; break;
1066     case ImmTyLWE: OS << "LWE"; break;
1067     case ImmTyOff: OS << "Off"; break;
1068     case ImmTyExpTgt: OS << "ExpTgt"; break;
1069     case ImmTyExpCompr: OS << "ExpCompr"; break;
1070     case ImmTyExpVM: OS << "ExpVM"; break;
1071     case ImmTyHwreg: OS << "Hwreg"; break;
1072     case ImmTySendMsg: OS << "SendMsg"; break;
1073     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1074     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1075     case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1076     case ImmTyOpSel: OS << "OpSel"; break;
1077     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1078     case ImmTyNegLo: OS << "NegLo"; break;
1079     case ImmTyNegHi: OS << "NegHi"; break;
1080     case ImmTySwizzle: OS << "Swizzle"; break;
1081     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1082     case ImmTyHigh: OS << "High"; break;
1083     case ImmTyBLGP: OS << "BLGP"; break;
1084     case ImmTyCBSZ: OS << "CBSZ"; break;
1085     case ImmTyABID: OS << "ABID"; break;
1086     case ImmTyEndpgm: OS << "Endpgm"; break;
1087     case ImmTyWaitVDST: OS << "WaitVDST"; break;
1088     case ImmTyWaitEXP: OS << "WaitEXP"; break;
1089     }
1090   }
1091 
1092   void print(raw_ostream &OS) const override {
1093     switch (Kind) {
1094     case Register:
1095       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1096       break;
1097     case Immediate:
1098       OS << '<' << getImm();
1099       if (getImmTy() != ImmTyNone) {
1100         OS << " type: "; printImmTy(OS, getImmTy());
1101       }
1102       OS << " mods: " << Imm.Mods << '>';
1103       break;
1104     case Token:
1105       OS << '\'' << getToken() << '\'';
1106       break;
1107     case Expression:
1108       OS << "<expr " << *Expr << '>';
1109       break;
1110     }
1111   }
1112 
1113   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1114                                       int64_t Val, SMLoc Loc,
1115                                       ImmTy Type = ImmTyNone,
1116                                       bool IsFPImm = false) {
1117     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1118     Op->Imm.Val = Val;
1119     Op->Imm.IsFPImm = IsFPImm;
1120     Op->Imm.Kind = ImmKindTyNone;
1121     Op->Imm.Type = Type;
1122     Op->Imm.Mods = Modifiers();
1123     Op->StartLoc = Loc;
1124     Op->EndLoc = Loc;
1125     return Op;
1126   }
1127 
1128   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1129                                         StringRef Str, SMLoc Loc,
1130                                         bool HasExplicitEncodingSize = true) {
1131     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1132     Res->Tok.Data = Str.data();
1133     Res->Tok.Length = Str.size();
1134     Res->StartLoc = Loc;
1135     Res->EndLoc = Loc;
1136     return Res;
1137   }
1138 
1139   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1140                                       unsigned RegNo, SMLoc S,
1141                                       SMLoc E) {
1142     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1143     Op->Reg.RegNo = RegNo;
1144     Op->Reg.Mods = Modifiers();
1145     Op->StartLoc = S;
1146     Op->EndLoc = E;
1147     return Op;
1148   }
1149 
1150   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1151                                        const class MCExpr *Expr, SMLoc S) {
1152     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1153     Op->Expr = Expr;
1154     Op->StartLoc = S;
1155     Op->EndLoc = S;
1156     return Op;
1157   }
1158 };
1159 
1160 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1161   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1162   return OS;
1163 }
1164 
1165 //===----------------------------------------------------------------------===//
1166 // AsmParser
1167 //===----------------------------------------------------------------------===//
1168 
1169 // Holds info related to the current kernel, e.g. count of SGPRs used.
1170 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1171 // .amdgpu_hsa_kernel or at EOF.
1172 class KernelScopeInfo {
1173   int SgprIndexUnusedMin = -1;
1174   int VgprIndexUnusedMin = -1;
1175   int AgprIndexUnusedMin = -1;
1176   MCContext *Ctx = nullptr;
1177   MCSubtargetInfo const *MSTI = nullptr;
1178 
1179   void usesSgprAt(int i) {
1180     if (i >= SgprIndexUnusedMin) {
1181       SgprIndexUnusedMin = ++i;
1182       if (Ctx) {
1183         MCSymbol* const Sym =
1184           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1185         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1186       }
1187     }
1188   }
1189 
1190   void usesVgprAt(int i) {
1191     if (i >= VgprIndexUnusedMin) {
1192       VgprIndexUnusedMin = ++i;
1193       if (Ctx) {
1194         MCSymbol* const Sym =
1195           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1196         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1197                                          VgprIndexUnusedMin);
1198         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1199       }
1200     }
1201   }
1202 
1203   void usesAgprAt(int i) {
1204     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1205     if (!hasMAIInsts(*MSTI))
1206       return;
1207 
1208     if (i >= AgprIndexUnusedMin) {
1209       AgprIndexUnusedMin = ++i;
1210       if (Ctx) {
1211         MCSymbol* const Sym =
1212           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1213         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1214 
1215         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1216         MCSymbol* const vSym =
1217           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1218         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1219                                          VgprIndexUnusedMin);
1220         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1221       }
1222     }
1223   }
1224 
1225 public:
1226   KernelScopeInfo() = default;
1227 
1228   void initialize(MCContext &Context) {
1229     Ctx = &Context;
1230     MSTI = Ctx->getSubtargetInfo();
1231 
1232     usesSgprAt(SgprIndexUnusedMin = -1);
1233     usesVgprAt(VgprIndexUnusedMin = -1);
1234     if (hasMAIInsts(*MSTI)) {
1235       usesAgprAt(AgprIndexUnusedMin = -1);
1236     }
1237   }
1238 
1239   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1240                     unsigned RegWidth) {
1241     switch (RegKind) {
1242     case IS_SGPR:
1243       usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1244       break;
1245     case IS_AGPR:
1246       usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1247       break;
1248     case IS_VGPR:
1249       usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1250       break;
1251     default:
1252       break;
1253     }
1254   }
1255 };
1256 
1257 class AMDGPUAsmParser : public MCTargetAsmParser {
1258   MCAsmParser &Parser;
1259 
1260   unsigned ForcedEncodingSize = 0;
1261   bool ForcedDPP = false;
1262   bool ForcedSDWA = false;
1263   KernelScopeInfo KernelScope;
1264 
1265   /// @name Auto-generated Match Functions
1266   /// {
1267 
1268 #define GET_ASSEMBLER_HEADER
1269 #include "AMDGPUGenAsmMatcher.inc"
1270 
1271   /// }
1272 
1273 private:
1274   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1275   bool OutOfRangeError(SMRange Range);
1276   /// Calculate VGPR/SGPR blocks required for given target, reserved
1277   /// registers, and user-specified NextFreeXGPR values.
1278   ///
1279   /// \param Features [in] Target features, used for bug corrections.
1280   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1281   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1282   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1283   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1284   /// descriptor field, if valid.
1285   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1286   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1287   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1288   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1289   /// \param VGPRBlocks [out] Result VGPR block count.
1290   /// \param SGPRBlocks [out] Result SGPR block count.
1291   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1292                           bool FlatScrUsed, bool XNACKUsed,
1293                           std::optional<bool> EnableWavefrontSize32,
1294                           unsigned NextFreeVGPR, SMRange VGPRRange,
1295                           unsigned NextFreeSGPR, SMRange SGPRRange,
1296                           unsigned &VGPRBlocks, unsigned &SGPRBlocks);
1297   bool ParseDirectiveAMDGCNTarget();
1298   bool ParseDirectiveAMDHSAKernel();
1299   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1300   bool ParseDirectiveHSACodeObjectVersion();
1301   bool ParseDirectiveHSACodeObjectISA();
1302   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1303   bool ParseDirectiveAMDKernelCodeT();
1304   // TODO: Possibly make subtargetHasRegister const.
1305   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1306   bool ParseDirectiveAMDGPUHsaKernel();
1307 
1308   bool ParseDirectiveISAVersion();
1309   bool ParseDirectiveHSAMetadata();
1310   bool ParseDirectivePALMetadataBegin();
1311   bool ParseDirectivePALMetadata();
1312   bool ParseDirectiveAMDGPULDS();
1313 
1314   /// Common code to parse out a block of text (typically YAML) between start and
1315   /// end directives.
1316   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1317                            const char *AssemblerDirectiveEnd,
1318                            std::string &CollectString);
1319 
1320   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1321                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1322   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1323                            unsigned &RegNum, unsigned &RegWidth,
1324                            bool RestoreOnFailure = false);
1325   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1326                            unsigned &RegNum, unsigned &RegWidth,
1327                            SmallVectorImpl<AsmToken> &Tokens);
1328   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1329                            unsigned &RegWidth,
1330                            SmallVectorImpl<AsmToken> &Tokens);
1331   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1332                            unsigned &RegWidth,
1333                            SmallVectorImpl<AsmToken> &Tokens);
1334   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1335                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1336   bool ParseRegRange(unsigned& Num, unsigned& Width);
1337   unsigned getRegularReg(RegisterKind RegKind,
1338                          unsigned RegNum,
1339                          unsigned RegWidth,
1340                          SMLoc Loc);
1341 
1342   bool isRegister();
1343   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1344   std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1345   void initializeGprCountSymbol(RegisterKind RegKind);
1346   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1347                              unsigned RegWidth);
1348   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1349                     bool IsAtomic);
1350 
1351 public:
1352   enum AMDGPUMatchResultTy {
1353     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1354   };
1355   enum OperandMode {
1356     OperandMode_Default,
1357     OperandMode_NSA,
1358   };
1359 
1360   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1361 
1362   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1363                const MCInstrInfo &MII,
1364                const MCTargetOptions &Options)
1365       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1366     MCAsmParserExtension::Initialize(Parser);
1367 
1368     if (getFeatureBits().none()) {
1369       // Set default features.
1370       copySTI().ToggleFeature("southern-islands");
1371     }
1372 
1373     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1374 
1375     {
1376       // TODO: make those pre-defined variables read-only.
1377       // Currently there is none suitable machinery in the core llvm-mc for this.
1378       // MCSymbol::isRedefinable is intended for another purpose, and
1379       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1380       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1381       MCContext &Ctx = getContext();
1382       if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1383         MCSymbol *Sym =
1384             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1385         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1386         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1387         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1388         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1389         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1390       } else {
1391         MCSymbol *Sym =
1392             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1393         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1394         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1395         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1396         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1397         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1398       }
1399       if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1400         initializeGprCountSymbol(IS_VGPR);
1401         initializeGprCountSymbol(IS_SGPR);
1402       } else
1403         KernelScope.initialize(getContext());
1404     }
1405   }
1406 
1407   bool hasMIMG_R128() const {
1408     return AMDGPU::hasMIMG_R128(getSTI());
1409   }
1410 
1411   bool hasPackedD16() const {
1412     return AMDGPU::hasPackedD16(getSTI());
1413   }
1414 
1415   bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1416 
1417   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1418 
1419   bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1420 
1421   bool isSI() const {
1422     return AMDGPU::isSI(getSTI());
1423   }
1424 
1425   bool isCI() const {
1426     return AMDGPU::isCI(getSTI());
1427   }
1428 
1429   bool isVI() const {
1430     return AMDGPU::isVI(getSTI());
1431   }
1432 
1433   bool isGFX9() const {
1434     return AMDGPU::isGFX9(getSTI());
1435   }
1436 
1437   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1438   bool isGFX90A() const {
1439     return AMDGPU::isGFX90A(getSTI());
1440   }
1441 
1442   bool isGFX940() const {
1443     return AMDGPU::isGFX940(getSTI());
1444   }
1445 
1446   bool isGFX9Plus() const {
1447     return AMDGPU::isGFX9Plus(getSTI());
1448   }
1449 
1450   bool isGFX10() const {
1451     return AMDGPU::isGFX10(getSTI());
1452   }
1453 
1454   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1455 
1456   bool isGFX11() const {
1457     return AMDGPU::isGFX11(getSTI());
1458   }
1459 
1460   bool isGFX11Plus() const {
1461     return AMDGPU::isGFX11Plus(getSTI());
1462   }
1463 
1464   bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1465 
1466   bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1467 
1468   bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1469 
1470   bool isGFX10_BEncoding() const {
1471     return AMDGPU::isGFX10_BEncoding(getSTI());
1472   }
1473 
1474   bool hasInv2PiInlineImm() const {
1475     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1476   }
1477 
1478   bool hasFlatOffsets() const {
1479     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1480   }
1481 
1482   bool hasArchitectedFlatScratch() const {
1483     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1484   }
1485 
1486   bool hasSGPR102_SGPR103() const {
1487     return !isVI() && !isGFX9();
1488   }
1489 
1490   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1491 
1492   bool hasIntClamp() const {
1493     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1494   }
1495 
1496   bool hasPartialNSAEncoding() const {
1497     return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1498   }
1499 
1500   unsigned getNSAMaxSize(bool HasSampler = false) const {
1501     return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1502   }
1503 
1504   unsigned getMaxNumUserSGPRs() const {
1505     return AMDGPU::getMaxNumUserSGPRs(getSTI());
1506   }
1507 
1508   bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1509 
1510   AMDGPUTargetStreamer &getTargetStreamer() {
1511     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1512     return static_cast<AMDGPUTargetStreamer &>(TS);
1513   }
1514 
1515   const MCRegisterInfo *getMRI() const {
1516     // We need this const_cast because for some reason getContext() is not const
1517     // in MCAsmParser.
1518     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1519   }
1520 
1521   const MCInstrInfo *getMII() const {
1522     return &MII;
1523   }
1524 
1525   const FeatureBitset &getFeatureBits() const {
1526     return getSTI().getFeatureBits();
1527   }
1528 
1529   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1530   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1531   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1532 
1533   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1534   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1535   bool isForcedDPP() const { return ForcedDPP; }
1536   bool isForcedSDWA() const { return ForcedSDWA; }
1537   ArrayRef<unsigned> getMatchedVariants() const;
1538   StringRef getMatchedVariantName() const;
1539 
1540   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1541   bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1542                      bool RestoreOnFailure);
1543   bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1544   ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1545                                SMLoc &EndLoc) override;
1546   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1547   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1548                                       unsigned Kind) override;
1549   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1550                                OperandVector &Operands, MCStreamer &Out,
1551                                uint64_t &ErrorInfo,
1552                                bool MatchingInlineAsm) override;
1553   bool ParseDirective(AsmToken DirectiveID) override;
1554   ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1555                            OperandMode Mode = OperandMode_Default);
1556   StringRef parseMnemonicSuffix(StringRef Name);
1557   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1558                         SMLoc NameLoc, OperandVector &Operands) override;
1559   //bool ProcessInstruction(MCInst &Inst);
1560 
1561   ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1562 
1563   ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1564 
1565   ParseStatus
1566   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1567                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1568                      std::function<bool(int64_t &)> ConvertResult = nullptr);
1569 
1570   ParseStatus parseOperandArrayWithPrefix(
1571       const char *Prefix, OperandVector &Operands,
1572       AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1573       bool (*ConvertResult)(int64_t &) = nullptr);
1574 
1575   ParseStatus
1576   parseNamedBit(StringRef Name, OperandVector &Operands,
1577                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1578   unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1579   ParseStatus parseCPol(OperandVector &Operands);
1580   ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1581   ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1582   ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1583                                     SMLoc &StringLoc);
1584 
1585   bool isModifier();
1586   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1587   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1588   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1589   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1590   bool parseSP3NegModifier();
1591   ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1592                        bool HasLit = false);
1593   ParseStatus parseReg(OperandVector &Operands);
1594   ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1595                             bool HasLit = false);
1596   ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1597                                            bool AllowImm = true);
1598   ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1599                                             bool AllowImm = true);
1600   ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1601   ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1602   ParseStatus parseVReg32OrOff(OperandVector &Operands);
1603   ParseStatus parseDfmtNfmt(int64_t &Format);
1604   ParseStatus parseUfmt(int64_t &Format);
1605   ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1606                                        int64_t &Format);
1607   ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1608                                          int64_t &Format);
1609   ParseStatus parseFORMAT(OperandVector &Operands);
1610   ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1611   ParseStatus parseNumericFormat(int64_t &Format);
1612   ParseStatus parseFlatOffset(OperandVector &Operands);
1613   ParseStatus parseR128A16(OperandVector &Operands);
1614   ParseStatus parseBLGP(OperandVector &Operands);
1615   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1616   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1617 
1618   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1619 
1620   bool parseCnt(int64_t &IntVal);
1621   ParseStatus parseSWaitCnt(OperandVector &Operands);
1622 
1623   bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1624   void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1625   ParseStatus parseDepCtr(OperandVector &Operands);
1626 
1627   bool parseDelay(int64_t &Delay);
1628   ParseStatus parseSDelayALU(OperandVector &Operands);
1629 
1630   ParseStatus parseHwreg(OperandVector &Operands);
1631 
1632 private:
1633   struct OperandInfoTy {
1634     SMLoc Loc;
1635     int64_t Id;
1636     bool IsSymbolic = false;
1637     bool IsDefined = false;
1638 
1639     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1640   };
1641 
1642   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1643   bool validateSendMsg(const OperandInfoTy &Msg,
1644                        const OperandInfoTy &Op,
1645                        const OperandInfoTy &Stream);
1646 
1647   bool parseHwregBody(OperandInfoTy &HwReg,
1648                       OperandInfoTy &Offset,
1649                       OperandInfoTy &Width);
1650   bool validateHwreg(const OperandInfoTy &HwReg,
1651                      const OperandInfoTy &Offset,
1652                      const OperandInfoTy &Width);
1653 
1654   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1655   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1656   SMLoc getBLGPLoc(const OperandVector &Operands) const;
1657 
1658   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1659                       const OperandVector &Operands) const;
1660   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1661   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1662   SMLoc getLitLoc(const OperandVector &Operands,
1663                   bool SearchMandatoryLiterals = false) const;
1664   SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1665   SMLoc getConstLoc(const OperandVector &Operands) const;
1666   SMLoc getInstLoc(const OperandVector &Operands) const;
1667 
1668   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1669   bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1670   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1671   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1672   bool validateSOPLiteral(const MCInst &Inst) const;
1673   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1674   bool validateVOPDRegBankConstraints(const MCInst &Inst,
1675                                       const OperandVector &Operands);
1676   bool validateIntClampSupported(const MCInst &Inst);
1677   bool validateMIMGAtomicDMask(const MCInst &Inst);
1678   bool validateMIMGGatherDMask(const MCInst &Inst);
1679   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1680   bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1681   bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1682   bool validateMIMGD16(const MCInst &Inst);
1683   bool validateMIMGMSAA(const MCInst &Inst);
1684   bool validateOpSel(const MCInst &Inst);
1685   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1686   bool validateVccOperand(unsigned Reg) const;
1687   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1688   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1689   bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1690   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1691   bool validateAGPRLdSt(const MCInst &Inst) const;
1692   bool validateVGPRAlign(const MCInst &Inst) const;
1693   bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1694   bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1695   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1696   bool validateDivScale(const MCInst &Inst);
1697   bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1698   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1699                              const SMLoc &IDLoc);
1700   bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1701                               const unsigned CPol);
1702   bool validateExeczVcczOperands(const OperandVector &Operands);
1703   bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1704   std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1705   unsigned getConstantBusLimit(unsigned Opcode) const;
1706   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1707   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1708   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1709 
1710   bool isSupportedMnemo(StringRef Mnemo,
1711                         const FeatureBitset &FBS);
1712   bool isSupportedMnemo(StringRef Mnemo,
1713                         const FeatureBitset &FBS,
1714                         ArrayRef<unsigned> Variants);
1715   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1716 
1717   bool isId(const StringRef Id) const;
1718   bool isId(const AsmToken &Token, const StringRef Id) const;
1719   bool isToken(const AsmToken::TokenKind Kind) const;
1720   StringRef getId() const;
1721   bool trySkipId(const StringRef Id);
1722   bool trySkipId(const StringRef Pref, const StringRef Id);
1723   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1724   bool trySkipToken(const AsmToken::TokenKind Kind);
1725   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1726   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1727   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1728 
1729   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1730   AsmToken::TokenKind getTokenKind() const;
1731   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1732   bool parseExpr(OperandVector &Operands);
1733   StringRef getTokenStr() const;
1734   AsmToken peekToken(bool ShouldSkipSpace = true);
1735   AsmToken getToken() const;
1736   SMLoc getLoc() const;
1737   void lex();
1738 
1739 public:
1740   void onBeginOfFile() override;
1741 
1742   ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1743 
1744   ParseStatus parseExpTgt(OperandVector &Operands);
1745   ParseStatus parseSendMsg(OperandVector &Operands);
1746   ParseStatus parseInterpSlot(OperandVector &Operands);
1747   ParseStatus parseInterpAttr(OperandVector &Operands);
1748   ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1749   ParseStatus parseBoolReg(OperandVector &Operands);
1750 
1751   bool parseSwizzleOperand(int64_t &Op,
1752                            const unsigned MinVal,
1753                            const unsigned MaxVal,
1754                            const StringRef ErrMsg,
1755                            SMLoc &Loc);
1756   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1757                             const unsigned MinVal,
1758                             const unsigned MaxVal,
1759                             const StringRef ErrMsg);
1760   ParseStatus parseSwizzle(OperandVector &Operands);
1761   bool parseSwizzleOffset(int64_t &Imm);
1762   bool parseSwizzleMacro(int64_t &Imm);
1763   bool parseSwizzleQuadPerm(int64_t &Imm);
1764   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1765   bool parseSwizzleBroadcast(int64_t &Imm);
1766   bool parseSwizzleSwap(int64_t &Imm);
1767   bool parseSwizzleReverse(int64_t &Imm);
1768 
1769   ParseStatus parseGPRIdxMode(OperandVector &Operands);
1770   int64_t parseGPRIdxMacro();
1771 
1772   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1773   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1774 
1775   ParseStatus parseOModSI(OperandVector &Operands);
1776 
1777   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1778                OptionalImmIndexMap &OptionalIdx);
1779   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1780   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1781   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1782   void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1783   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1784                     OptionalImmIndexMap &OptionalIdx);
1785   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1786                 OptionalImmIndexMap &OptionalIdx);
1787 
1788   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1789   void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1790 
1791   bool parseDimId(unsigned &Encoding);
1792   ParseStatus parseDim(OperandVector &Operands);
1793   bool convertDppBoundCtrl(int64_t &BoundCtrl);
1794   ParseStatus parseDPP8(OperandVector &Operands);
1795   ParseStatus parseDPPCtrl(OperandVector &Operands);
1796   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1797   int64_t parseDPPCtrlSel(StringRef Ctrl);
1798   int64_t parseDPPCtrlPerm();
1799   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1800   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1801     cvtDPP(Inst, Operands, true);
1802   }
1803   void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1804                   bool IsDPP8 = false);
1805   void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1806     cvtVOP3DPP(Inst, Operands, true);
1807   }
1808 
1809   ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1810                            AMDGPUOperand::ImmTy Type);
1811   ParseStatus parseSDWADstUnused(OperandVector &Operands);
1812   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1813   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1814   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1815   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1816   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1817   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1818                uint64_t BasicInstType,
1819                bool SkipDstVcc = false,
1820                bool SkipSrcVcc = false);
1821 
1822   ParseStatus parseEndpgm(OperandVector &Operands);
1823 
1824   ParseStatus parseVOPD(OperandVector &Operands);
1825 };
1826 
1827 } // end anonymous namespace
1828 
1829 // May be called with integer type with equivalent bitwidth.
1830 static const fltSemantics *getFltSemantics(unsigned Size) {
1831   switch (Size) {
1832   case 4:
1833     return &APFloat::IEEEsingle();
1834   case 8:
1835     return &APFloat::IEEEdouble();
1836   case 2:
1837     return &APFloat::IEEEhalf();
1838   default:
1839     llvm_unreachable("unsupported fp type");
1840   }
1841 }
1842 
1843 static const fltSemantics *getFltSemantics(MVT VT) {
1844   return getFltSemantics(VT.getSizeInBits() / 8);
1845 }
1846 
1847 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1848   switch (OperandType) {
1849   case AMDGPU::OPERAND_REG_IMM_INT32:
1850   case AMDGPU::OPERAND_REG_IMM_FP32:
1851   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1852   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1853   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1854   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1855   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1856   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1857   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1858   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1859   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1860   case AMDGPU::OPERAND_KIMM32:
1861   case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
1862     return &APFloat::IEEEsingle();
1863   case AMDGPU::OPERAND_REG_IMM_INT64:
1864   case AMDGPU::OPERAND_REG_IMM_FP64:
1865   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1866   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1867   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1868     return &APFloat::IEEEdouble();
1869   case AMDGPU::OPERAND_REG_IMM_INT16:
1870   case AMDGPU::OPERAND_REG_IMM_FP16:
1871   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1872   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1873   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1874   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1875   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1876   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1877   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1878   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1879   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1880   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1881   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1882   case AMDGPU::OPERAND_KIMM16:
1883     return &APFloat::IEEEhalf();
1884   default:
1885     llvm_unreachable("unsupported fp type");
1886   }
1887 }
1888 
1889 //===----------------------------------------------------------------------===//
1890 // Operand
1891 //===----------------------------------------------------------------------===//
1892 
1893 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1894   bool Lost;
1895 
1896   // Convert literal to single precision
1897   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1898                                                APFloat::rmNearestTiesToEven,
1899                                                &Lost);
1900   // We allow precision lost but not overflow or underflow
1901   if (Status != APFloat::opOK &&
1902       Lost &&
1903       ((Status & APFloat::opOverflow)  != 0 ||
1904        (Status & APFloat::opUnderflow) != 0)) {
1905     return false;
1906   }
1907 
1908   return true;
1909 }
1910 
1911 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1912   return isUIntN(Size, Val) || isIntN(Size, Val);
1913 }
1914 
1915 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1916   if (VT.getScalarType() == MVT::i16) {
1917     // FP immediate values are broken.
1918     return isInlinableIntLiteral(Val);
1919   }
1920 
1921   // f16/v2f16 operands work correctly for all values.
1922   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1923 }
1924 
1925 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1926 
1927   // This is a hack to enable named inline values like
1928   // shared_base with both 32-bit and 64-bit operands.
1929   // Note that these values are defined as
1930   // 32-bit operands only.
1931   if (isInlineValue()) {
1932     return true;
1933   }
1934 
1935   if (!isImmTy(ImmTyNone)) {
1936     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1937     return false;
1938   }
1939   // TODO: We should avoid using host float here. It would be better to
1940   // check the float bit values which is what a few other places do.
1941   // We've had bot failures before due to weird NaN support on mips hosts.
1942 
1943   APInt Literal(64, Imm.Val);
1944 
1945   if (Imm.IsFPImm) { // We got fp literal token
1946     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1947       return AMDGPU::isInlinableLiteral64(Imm.Val,
1948                                           AsmParser->hasInv2PiInlineImm());
1949     }
1950 
1951     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1952     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1953       return false;
1954 
1955     if (type.getScalarSizeInBits() == 16) {
1956       return isInlineableLiteralOp16(
1957         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1958         type, AsmParser->hasInv2PiInlineImm());
1959     }
1960 
1961     // Check if single precision literal is inlinable
1962     return AMDGPU::isInlinableLiteral32(
1963       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1964       AsmParser->hasInv2PiInlineImm());
1965   }
1966 
1967   // We got int literal token.
1968   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1969     return AMDGPU::isInlinableLiteral64(Imm.Val,
1970                                         AsmParser->hasInv2PiInlineImm());
1971   }
1972 
1973   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1974     return false;
1975   }
1976 
1977   if (type.getScalarSizeInBits() == 16) {
1978     return isInlineableLiteralOp16(
1979       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1980       type, AsmParser->hasInv2PiInlineImm());
1981   }
1982 
1983   return AMDGPU::isInlinableLiteral32(
1984     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1985     AsmParser->hasInv2PiInlineImm());
1986 }
1987 
1988 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1989   // Check that this immediate can be added as literal
1990   if (!isImmTy(ImmTyNone)) {
1991     return false;
1992   }
1993 
1994   if (!Imm.IsFPImm) {
1995     // We got int literal token.
1996 
1997     if (type == MVT::f64 && hasFPModifiers()) {
1998       // Cannot apply fp modifiers to int literals preserving the same semantics
1999       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2000       // disable these cases.
2001       return false;
2002     }
2003 
2004     unsigned Size = type.getSizeInBits();
2005     if (Size == 64)
2006       Size = 32;
2007 
2008     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2009     // types.
2010     return isSafeTruncation(Imm.Val, Size);
2011   }
2012 
2013   // We got fp literal token
2014   if (type == MVT::f64) { // Expected 64-bit fp operand
2015     // We would set low 64-bits of literal to zeroes but we accept this literals
2016     return true;
2017   }
2018 
2019   if (type == MVT::i64) { // Expected 64-bit int operand
2020     // We don't allow fp literals in 64-bit integer instructions. It is
2021     // unclear how we should encode them.
2022     return false;
2023   }
2024 
2025   // We allow fp literals with f16x2 operands assuming that the specified
2026   // literal goes into the lower half and the upper half is zero. We also
2027   // require that the literal may be losslessly converted to f16.
2028   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
2029                      (type == MVT::v2i16)? MVT::i16 :
2030                      (type == MVT::v2f32)? MVT::f32 : type;
2031 
2032   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2033   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2034 }
2035 
2036 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2037   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2038 }
2039 
2040 bool AMDGPUOperand::isVRegWithInputMods() const {
2041   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2042          // GFX90A allows DPP on 64-bit operands.
2043          (isRegClass(AMDGPU::VReg_64RegClassID) &&
2044           AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2045 }
2046 
2047 bool AMDGPUOperand::isT16VRegWithInputMods() const {
2048   return isRegClass(AMDGPU::VGPR_32_Lo128RegClassID);
2049 }
2050 
2051 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2052   if (AsmParser->isVI())
2053     return isVReg32();
2054   else if (AsmParser->isGFX9Plus())
2055     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2056   else
2057     return false;
2058 }
2059 
2060 bool AMDGPUOperand::isSDWAFP16Operand() const {
2061   return isSDWAOperand(MVT::f16);
2062 }
2063 
2064 bool AMDGPUOperand::isSDWAFP32Operand() const {
2065   return isSDWAOperand(MVT::f32);
2066 }
2067 
2068 bool AMDGPUOperand::isSDWAInt16Operand() const {
2069   return isSDWAOperand(MVT::i16);
2070 }
2071 
2072 bool AMDGPUOperand::isSDWAInt32Operand() const {
2073   return isSDWAOperand(MVT::i32);
2074 }
2075 
2076 bool AMDGPUOperand::isBoolReg() const {
2077   auto FB = AsmParser->getFeatureBits();
2078   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2079                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2080 }
2081 
2082 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2083 {
2084   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2085   assert(Size == 2 || Size == 4 || Size == 8);
2086 
2087   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2088 
2089   if (Imm.Mods.Abs) {
2090     Val &= ~FpSignMask;
2091   }
2092   if (Imm.Mods.Neg) {
2093     Val ^= FpSignMask;
2094   }
2095 
2096   return Val;
2097 }
2098 
2099 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2100   if (isExpr()) {
2101     Inst.addOperand(MCOperand::createExpr(Expr));
2102     return;
2103   }
2104 
2105   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2106                              Inst.getNumOperands())) {
2107     addLiteralImmOperand(Inst, Imm.Val,
2108                          ApplyModifiers &
2109                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2110   } else {
2111     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2112     Inst.addOperand(MCOperand::createImm(Imm.Val));
2113     setImmKindNone();
2114   }
2115 }
2116 
2117 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2118   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2119   auto OpNum = Inst.getNumOperands();
2120   // Check that this operand accepts literals
2121   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2122 
2123   if (ApplyModifiers) {
2124     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2125     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2126     Val = applyInputFPModifiers(Val, Size);
2127   }
2128 
2129   APInt Literal(64, Val);
2130   uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2131 
2132   if (Imm.IsFPImm) { // We got fp literal token
2133     switch (OpTy) {
2134     case AMDGPU::OPERAND_REG_IMM_INT64:
2135     case AMDGPU::OPERAND_REG_IMM_FP64:
2136     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2137     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2138     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2139       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2140                                        AsmParser->hasInv2PiInlineImm())) {
2141         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2142         setImmKindConst();
2143         return;
2144       }
2145 
2146       // Non-inlineable
2147       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2148         // For fp operands we check if low 32 bits are zeros
2149         if (Literal.getLoBits(32) != 0) {
2150           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2151           "Can't encode literal as exact 64-bit floating-point operand. "
2152           "Low 32-bits will be set to zero");
2153           Val &= 0xffffffff00000000u;
2154         }
2155 
2156         Inst.addOperand(MCOperand::createImm(Val));
2157         setImmKindLiteral();
2158         return;
2159       }
2160 
2161       // We don't allow fp literals in 64-bit integer instructions. It is
2162       // unclear how we should encode them. This case should be checked earlier
2163       // in predicate methods (isLiteralImm())
2164       llvm_unreachable("fp literal in 64-bit integer instruction.");
2165 
2166     case AMDGPU::OPERAND_REG_IMM_INT32:
2167     case AMDGPU::OPERAND_REG_IMM_FP32:
2168     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2169     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2170     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2171     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2172     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2173     case AMDGPU::OPERAND_REG_IMM_INT16:
2174     case AMDGPU::OPERAND_REG_IMM_FP16:
2175     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2176     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2177     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2178     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2179     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2180     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2181     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2182     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2183     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2184     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2185     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2186     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2187     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2188     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2189     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2190     case AMDGPU::OPERAND_KIMM32:
2191     case AMDGPU::OPERAND_KIMM16:
2192     case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: {
2193       bool lost;
2194       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2195       // Convert literal to single precision
2196       FPLiteral.convert(*getOpFltSemantics(OpTy),
2197                         APFloat::rmNearestTiesToEven, &lost);
2198       // We allow precision lost but not overflow or underflow. This should be
2199       // checked earlier in isLiteralImm()
2200 
2201       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2202       Inst.addOperand(MCOperand::createImm(ImmVal));
2203       if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2204         setImmKindMandatoryLiteral();
2205       } else {
2206         setImmKindLiteral();
2207       }
2208       return;
2209     }
2210     default:
2211       llvm_unreachable("invalid operand size");
2212     }
2213 
2214     return;
2215   }
2216 
2217   // We got int literal token.
2218   // Only sign extend inline immediates.
2219   switch (OpTy) {
2220   case AMDGPU::OPERAND_REG_IMM_INT32:
2221   case AMDGPU::OPERAND_REG_IMM_FP32:
2222   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2223   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2224   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2225   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2226   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2227   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2228   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2229   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2230   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2231   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2232   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2233   case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
2234     if (isSafeTruncation(Val, 32) &&
2235         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2236                                      AsmParser->hasInv2PiInlineImm())) {
2237       Inst.addOperand(MCOperand::createImm(Val));
2238       setImmKindConst();
2239       return;
2240     }
2241 
2242     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2243     setImmKindLiteral();
2244     return;
2245 
2246   case AMDGPU::OPERAND_REG_IMM_INT64:
2247   case AMDGPU::OPERAND_REG_IMM_FP64:
2248   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2249   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2250   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2251     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2252       Inst.addOperand(MCOperand::createImm(Val));
2253       setImmKindConst();
2254       return;
2255     }
2256 
2257     Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32
2258                                                     : Lo_32(Val);
2259 
2260     Inst.addOperand(MCOperand::createImm(Val));
2261     setImmKindLiteral();
2262     return;
2263 
2264   case AMDGPU::OPERAND_REG_IMM_INT16:
2265   case AMDGPU::OPERAND_REG_IMM_FP16:
2266   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2267   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2268   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2269   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2270   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2271     if (isSafeTruncation(Val, 16) &&
2272         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2273                                      AsmParser->hasInv2PiInlineImm())) {
2274       Inst.addOperand(MCOperand::createImm(Val));
2275       setImmKindConst();
2276       return;
2277     }
2278 
2279     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2280     setImmKindLiteral();
2281     return;
2282 
2283   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2284   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2285   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2286   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2287     assert(isSafeTruncation(Val, 16));
2288     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2289                                         AsmParser->hasInv2PiInlineImm()));
2290 
2291     Inst.addOperand(MCOperand::createImm(Val));
2292     return;
2293   }
2294   case AMDGPU::OPERAND_KIMM32:
2295     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2296     setImmKindMandatoryLiteral();
2297     return;
2298   case AMDGPU::OPERAND_KIMM16:
2299     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2300     setImmKindMandatoryLiteral();
2301     return;
2302   default:
2303     llvm_unreachable("invalid operand size");
2304   }
2305 }
2306 
2307 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2308   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2309 }
2310 
2311 bool AMDGPUOperand::isInlineValue() const {
2312   return isRegKind() && ::isInlineValue(getReg());
2313 }
2314 
2315 //===----------------------------------------------------------------------===//
2316 // AsmParser
2317 //===----------------------------------------------------------------------===//
2318 
2319 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2320   if (Is == IS_VGPR) {
2321     switch (RegWidth) {
2322       default: return -1;
2323       case 32:
2324         return AMDGPU::VGPR_32RegClassID;
2325       case 64:
2326         return AMDGPU::VReg_64RegClassID;
2327       case 96:
2328         return AMDGPU::VReg_96RegClassID;
2329       case 128:
2330         return AMDGPU::VReg_128RegClassID;
2331       case 160:
2332         return AMDGPU::VReg_160RegClassID;
2333       case 192:
2334         return AMDGPU::VReg_192RegClassID;
2335       case 224:
2336         return AMDGPU::VReg_224RegClassID;
2337       case 256:
2338         return AMDGPU::VReg_256RegClassID;
2339       case 288:
2340         return AMDGPU::VReg_288RegClassID;
2341       case 320:
2342         return AMDGPU::VReg_320RegClassID;
2343       case 352:
2344         return AMDGPU::VReg_352RegClassID;
2345       case 384:
2346         return AMDGPU::VReg_384RegClassID;
2347       case 512:
2348         return AMDGPU::VReg_512RegClassID;
2349       case 1024:
2350         return AMDGPU::VReg_1024RegClassID;
2351     }
2352   } else if (Is == IS_TTMP) {
2353     switch (RegWidth) {
2354       default: return -1;
2355       case 32:
2356         return AMDGPU::TTMP_32RegClassID;
2357       case 64:
2358         return AMDGPU::TTMP_64RegClassID;
2359       case 128:
2360         return AMDGPU::TTMP_128RegClassID;
2361       case 256:
2362         return AMDGPU::TTMP_256RegClassID;
2363       case 512:
2364         return AMDGPU::TTMP_512RegClassID;
2365     }
2366   } else if (Is == IS_SGPR) {
2367     switch (RegWidth) {
2368       default: return -1;
2369       case 32:
2370         return AMDGPU::SGPR_32RegClassID;
2371       case 64:
2372         return AMDGPU::SGPR_64RegClassID;
2373       case 96:
2374         return AMDGPU::SGPR_96RegClassID;
2375       case 128:
2376         return AMDGPU::SGPR_128RegClassID;
2377       case 160:
2378         return AMDGPU::SGPR_160RegClassID;
2379       case 192:
2380         return AMDGPU::SGPR_192RegClassID;
2381       case 224:
2382         return AMDGPU::SGPR_224RegClassID;
2383       case 256:
2384         return AMDGPU::SGPR_256RegClassID;
2385       case 288:
2386         return AMDGPU::SGPR_288RegClassID;
2387       case 320:
2388         return AMDGPU::SGPR_320RegClassID;
2389       case 352:
2390         return AMDGPU::SGPR_352RegClassID;
2391       case 384:
2392         return AMDGPU::SGPR_384RegClassID;
2393       case 512:
2394         return AMDGPU::SGPR_512RegClassID;
2395     }
2396   } else if (Is == IS_AGPR) {
2397     switch (RegWidth) {
2398       default: return -1;
2399       case 32:
2400         return AMDGPU::AGPR_32RegClassID;
2401       case 64:
2402         return AMDGPU::AReg_64RegClassID;
2403       case 96:
2404         return AMDGPU::AReg_96RegClassID;
2405       case 128:
2406         return AMDGPU::AReg_128RegClassID;
2407       case 160:
2408         return AMDGPU::AReg_160RegClassID;
2409       case 192:
2410         return AMDGPU::AReg_192RegClassID;
2411       case 224:
2412         return AMDGPU::AReg_224RegClassID;
2413       case 256:
2414         return AMDGPU::AReg_256RegClassID;
2415       case 288:
2416         return AMDGPU::AReg_288RegClassID;
2417       case 320:
2418         return AMDGPU::AReg_320RegClassID;
2419       case 352:
2420         return AMDGPU::AReg_352RegClassID;
2421       case 384:
2422         return AMDGPU::AReg_384RegClassID;
2423       case 512:
2424         return AMDGPU::AReg_512RegClassID;
2425       case 1024:
2426         return AMDGPU::AReg_1024RegClassID;
2427     }
2428   }
2429   return -1;
2430 }
2431 
2432 static unsigned getSpecialRegForName(StringRef RegName) {
2433   return StringSwitch<unsigned>(RegName)
2434     .Case("exec", AMDGPU::EXEC)
2435     .Case("vcc", AMDGPU::VCC)
2436     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2437     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2438     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2439     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2440     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2441     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2442     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2443     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2444     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2445     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2446     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2447     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2448     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2449     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2450     .Case("m0", AMDGPU::M0)
2451     .Case("vccz", AMDGPU::SRC_VCCZ)
2452     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2453     .Case("execz", AMDGPU::SRC_EXECZ)
2454     .Case("src_execz", AMDGPU::SRC_EXECZ)
2455     .Case("scc", AMDGPU::SRC_SCC)
2456     .Case("src_scc", AMDGPU::SRC_SCC)
2457     .Case("tba", AMDGPU::TBA)
2458     .Case("tma", AMDGPU::TMA)
2459     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2460     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2461     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2462     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2463     .Case("vcc_lo", AMDGPU::VCC_LO)
2464     .Case("vcc_hi", AMDGPU::VCC_HI)
2465     .Case("exec_lo", AMDGPU::EXEC_LO)
2466     .Case("exec_hi", AMDGPU::EXEC_HI)
2467     .Case("tma_lo", AMDGPU::TMA_LO)
2468     .Case("tma_hi", AMDGPU::TMA_HI)
2469     .Case("tba_lo", AMDGPU::TBA_LO)
2470     .Case("tba_hi", AMDGPU::TBA_HI)
2471     .Case("pc", AMDGPU::PC_REG)
2472     .Case("null", AMDGPU::SGPR_NULL)
2473     .Default(AMDGPU::NoRegister);
2474 }
2475 
2476 bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2477                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2478   auto R = parseRegister();
2479   if (!R) return true;
2480   assert(R->isReg());
2481   RegNo = R->getReg();
2482   StartLoc = R->getStartLoc();
2483   EndLoc = R->getEndLoc();
2484   return false;
2485 }
2486 
2487 bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2488                                     SMLoc &EndLoc) {
2489   return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2490 }
2491 
2492 ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2493                                               SMLoc &EndLoc) {
2494   bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2495   bool PendingErrors = getParser().hasPendingError();
2496   getParser().clearPendingErrors();
2497   if (PendingErrors)
2498     return ParseStatus::Failure;
2499   if (Result)
2500     return ParseStatus::NoMatch;
2501   return ParseStatus::Success;
2502 }
2503 
2504 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2505                                             RegisterKind RegKind, unsigned Reg1,
2506                                             SMLoc Loc) {
2507   switch (RegKind) {
2508   case IS_SPECIAL:
2509     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2510       Reg = AMDGPU::EXEC;
2511       RegWidth = 64;
2512       return true;
2513     }
2514     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2515       Reg = AMDGPU::FLAT_SCR;
2516       RegWidth = 64;
2517       return true;
2518     }
2519     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2520       Reg = AMDGPU::XNACK_MASK;
2521       RegWidth = 64;
2522       return true;
2523     }
2524     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2525       Reg = AMDGPU::VCC;
2526       RegWidth = 64;
2527       return true;
2528     }
2529     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2530       Reg = AMDGPU::TBA;
2531       RegWidth = 64;
2532       return true;
2533     }
2534     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2535       Reg = AMDGPU::TMA;
2536       RegWidth = 64;
2537       return true;
2538     }
2539     Error(Loc, "register does not fit in the list");
2540     return false;
2541   case IS_VGPR:
2542   case IS_SGPR:
2543   case IS_AGPR:
2544   case IS_TTMP:
2545     if (Reg1 != Reg + RegWidth / 32) {
2546       Error(Loc, "registers in a list must have consecutive indices");
2547       return false;
2548     }
2549     RegWidth += 32;
2550     return true;
2551   default:
2552     llvm_unreachable("unexpected register kind");
2553   }
2554 }
2555 
2556 struct RegInfo {
2557   StringLiteral Name;
2558   RegisterKind Kind;
2559 };
2560 
2561 static constexpr RegInfo RegularRegisters[] = {
2562   {{"v"},    IS_VGPR},
2563   {{"s"},    IS_SGPR},
2564   {{"ttmp"}, IS_TTMP},
2565   {{"acc"},  IS_AGPR},
2566   {{"a"},    IS_AGPR},
2567 };
2568 
2569 static bool isRegularReg(RegisterKind Kind) {
2570   return Kind == IS_VGPR ||
2571          Kind == IS_SGPR ||
2572          Kind == IS_TTMP ||
2573          Kind == IS_AGPR;
2574 }
2575 
2576 static const RegInfo* getRegularRegInfo(StringRef Str) {
2577   for (const RegInfo &Reg : RegularRegisters)
2578     if (Str.starts_with(Reg.Name))
2579       return &Reg;
2580   return nullptr;
2581 }
2582 
2583 static bool getRegNum(StringRef Str, unsigned& Num) {
2584   return !Str.getAsInteger(10, Num);
2585 }
2586 
2587 bool
2588 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2589                             const AsmToken &NextToken) const {
2590 
2591   // A list of consecutive registers: [s0,s1,s2,s3]
2592   if (Token.is(AsmToken::LBrac))
2593     return true;
2594 
2595   if (!Token.is(AsmToken::Identifier))
2596     return false;
2597 
2598   // A single register like s0 or a range of registers like s[0:1]
2599 
2600   StringRef Str = Token.getString();
2601   const RegInfo *Reg = getRegularRegInfo(Str);
2602   if (Reg) {
2603     StringRef RegName = Reg->Name;
2604     StringRef RegSuffix = Str.substr(RegName.size());
2605     if (!RegSuffix.empty()) {
2606       unsigned Num;
2607       // A single register with an index: rXX
2608       if (getRegNum(RegSuffix, Num))
2609         return true;
2610     } else {
2611       // A range of registers: r[XX:YY].
2612       if (NextToken.is(AsmToken::LBrac))
2613         return true;
2614     }
2615   }
2616 
2617   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2618 }
2619 
2620 bool
2621 AMDGPUAsmParser::isRegister()
2622 {
2623   return isRegister(getToken(), peekToken());
2624 }
2625 
2626 unsigned
2627 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2628                                unsigned RegNum,
2629                                unsigned RegWidth,
2630                                SMLoc Loc) {
2631 
2632   assert(isRegularReg(RegKind));
2633 
2634   unsigned AlignSize = 1;
2635   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2636     // SGPR and TTMP registers must be aligned.
2637     // Max required alignment is 4 dwords.
2638     AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2639   }
2640 
2641   if (RegNum % AlignSize != 0) {
2642     Error(Loc, "invalid register alignment");
2643     return AMDGPU::NoRegister;
2644   }
2645 
2646   unsigned RegIdx = RegNum / AlignSize;
2647   int RCID = getRegClass(RegKind, RegWidth);
2648   if (RCID == -1) {
2649     Error(Loc, "invalid or unsupported register size");
2650     return AMDGPU::NoRegister;
2651   }
2652 
2653   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2654   const MCRegisterClass RC = TRI->getRegClass(RCID);
2655   if (RegIdx >= RC.getNumRegs()) {
2656     Error(Loc, "register index is out of range");
2657     return AMDGPU::NoRegister;
2658   }
2659 
2660   return RC.getRegister(RegIdx);
2661 }
2662 
2663 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2664   int64_t RegLo, RegHi;
2665   if (!skipToken(AsmToken::LBrac, "missing register index"))
2666     return false;
2667 
2668   SMLoc FirstIdxLoc = getLoc();
2669   SMLoc SecondIdxLoc;
2670 
2671   if (!parseExpr(RegLo))
2672     return false;
2673 
2674   if (trySkipToken(AsmToken::Colon)) {
2675     SecondIdxLoc = getLoc();
2676     if (!parseExpr(RegHi))
2677       return false;
2678   } else {
2679     RegHi = RegLo;
2680   }
2681 
2682   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2683     return false;
2684 
2685   if (!isUInt<32>(RegLo)) {
2686     Error(FirstIdxLoc, "invalid register index");
2687     return false;
2688   }
2689 
2690   if (!isUInt<32>(RegHi)) {
2691     Error(SecondIdxLoc, "invalid register index");
2692     return false;
2693   }
2694 
2695   if (RegLo > RegHi) {
2696     Error(FirstIdxLoc, "first register index should not exceed second index");
2697     return false;
2698   }
2699 
2700   Num = static_cast<unsigned>(RegLo);
2701   RegWidth = 32 * ((RegHi - RegLo) + 1);
2702   return true;
2703 }
2704 
2705 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2706                                           unsigned &RegNum, unsigned &RegWidth,
2707                                           SmallVectorImpl<AsmToken> &Tokens) {
2708   assert(isToken(AsmToken::Identifier));
2709   unsigned Reg = getSpecialRegForName(getTokenStr());
2710   if (Reg) {
2711     RegNum = 0;
2712     RegWidth = 32;
2713     RegKind = IS_SPECIAL;
2714     Tokens.push_back(getToken());
2715     lex(); // skip register name
2716   }
2717   return Reg;
2718 }
2719 
2720 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2721                                           unsigned &RegNum, unsigned &RegWidth,
2722                                           SmallVectorImpl<AsmToken> &Tokens) {
2723   assert(isToken(AsmToken::Identifier));
2724   StringRef RegName = getTokenStr();
2725   auto Loc = getLoc();
2726 
2727   const RegInfo *RI = getRegularRegInfo(RegName);
2728   if (!RI) {
2729     Error(Loc, "invalid register name");
2730     return AMDGPU::NoRegister;
2731   }
2732 
2733   Tokens.push_back(getToken());
2734   lex(); // skip register name
2735 
2736   RegKind = RI->Kind;
2737   StringRef RegSuffix = RegName.substr(RI->Name.size());
2738   if (!RegSuffix.empty()) {
2739     // Single 32-bit register: vXX.
2740     if (!getRegNum(RegSuffix, RegNum)) {
2741       Error(Loc, "invalid register index");
2742       return AMDGPU::NoRegister;
2743     }
2744     RegWidth = 32;
2745   } else {
2746     // Range of registers: v[XX:YY]. ":YY" is optional.
2747     if (!ParseRegRange(RegNum, RegWidth))
2748       return AMDGPU::NoRegister;
2749   }
2750 
2751   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2752 }
2753 
2754 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2755                                        unsigned &RegWidth,
2756                                        SmallVectorImpl<AsmToken> &Tokens) {
2757   unsigned Reg = AMDGPU::NoRegister;
2758   auto ListLoc = getLoc();
2759 
2760   if (!skipToken(AsmToken::LBrac,
2761                  "expected a register or a list of registers")) {
2762     return AMDGPU::NoRegister;
2763   }
2764 
2765   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2766 
2767   auto Loc = getLoc();
2768   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2769     return AMDGPU::NoRegister;
2770   if (RegWidth != 32) {
2771     Error(Loc, "expected a single 32-bit register");
2772     return AMDGPU::NoRegister;
2773   }
2774 
2775   for (; trySkipToken(AsmToken::Comma); ) {
2776     RegisterKind NextRegKind;
2777     unsigned NextReg, NextRegNum, NextRegWidth;
2778     Loc = getLoc();
2779 
2780     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2781                              NextRegNum, NextRegWidth,
2782                              Tokens)) {
2783       return AMDGPU::NoRegister;
2784     }
2785     if (NextRegWidth != 32) {
2786       Error(Loc, "expected a single 32-bit register");
2787       return AMDGPU::NoRegister;
2788     }
2789     if (NextRegKind != RegKind) {
2790       Error(Loc, "registers in a list must be of the same kind");
2791       return AMDGPU::NoRegister;
2792     }
2793     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2794       return AMDGPU::NoRegister;
2795   }
2796 
2797   if (!skipToken(AsmToken::RBrac,
2798                  "expected a comma or a closing square bracket")) {
2799     return AMDGPU::NoRegister;
2800   }
2801 
2802   if (isRegularReg(RegKind))
2803     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2804 
2805   return Reg;
2806 }
2807 
2808 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2809                                           unsigned &RegNum, unsigned &RegWidth,
2810                                           SmallVectorImpl<AsmToken> &Tokens) {
2811   auto Loc = getLoc();
2812   Reg = AMDGPU::NoRegister;
2813 
2814   if (isToken(AsmToken::Identifier)) {
2815     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2816     if (Reg == AMDGPU::NoRegister)
2817       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2818   } else {
2819     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2820   }
2821 
2822   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2823   if (Reg == AMDGPU::NoRegister) {
2824     assert(Parser.hasPendingError());
2825     return false;
2826   }
2827 
2828   if (!subtargetHasRegister(*TRI, Reg)) {
2829     if (Reg == AMDGPU::SGPR_NULL) {
2830       Error(Loc, "'null' operand is not supported on this GPU");
2831     } else {
2832       Error(Loc, "register not available on this GPU");
2833     }
2834     return false;
2835   }
2836 
2837   return true;
2838 }
2839 
2840 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2841                                           unsigned &RegNum, unsigned &RegWidth,
2842                                           bool RestoreOnFailure /*=false*/) {
2843   Reg = AMDGPU::NoRegister;
2844 
2845   SmallVector<AsmToken, 1> Tokens;
2846   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2847     if (RestoreOnFailure) {
2848       while (!Tokens.empty()) {
2849         getLexer().UnLex(Tokens.pop_back_val());
2850       }
2851     }
2852     return true;
2853   }
2854   return false;
2855 }
2856 
2857 std::optional<StringRef>
2858 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2859   switch (RegKind) {
2860   case IS_VGPR:
2861     return StringRef(".amdgcn.next_free_vgpr");
2862   case IS_SGPR:
2863     return StringRef(".amdgcn.next_free_sgpr");
2864   default:
2865     return std::nullopt;
2866   }
2867 }
2868 
2869 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2870   auto SymbolName = getGprCountSymbolName(RegKind);
2871   assert(SymbolName && "initializing invalid register kind");
2872   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2873   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2874 }
2875 
2876 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2877                                             unsigned DwordRegIndex,
2878                                             unsigned RegWidth) {
2879   // Symbols are only defined for GCN targets
2880   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2881     return true;
2882 
2883   auto SymbolName = getGprCountSymbolName(RegKind);
2884   if (!SymbolName)
2885     return true;
2886   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2887 
2888   int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2889   int64_t OldCount;
2890 
2891   if (!Sym->isVariable())
2892     return !Error(getLoc(),
2893                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2894   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2895     return !Error(
2896         getLoc(),
2897         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2898 
2899   if (OldCount <= NewMax)
2900     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2901 
2902   return true;
2903 }
2904 
2905 std::unique_ptr<AMDGPUOperand>
2906 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2907   const auto &Tok = getToken();
2908   SMLoc StartLoc = Tok.getLoc();
2909   SMLoc EndLoc = Tok.getEndLoc();
2910   RegisterKind RegKind;
2911   unsigned Reg, RegNum, RegWidth;
2912 
2913   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2914     return nullptr;
2915   }
2916   if (isHsaAbi(getSTI())) {
2917     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2918       return nullptr;
2919   } else
2920     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2921   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2922 }
2923 
2924 ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
2925                                       bool HasSP3AbsModifier, bool HasLit) {
2926   // TODO: add syntactic sugar for 1/(2*PI)
2927 
2928   if (isRegister())
2929     return ParseStatus::NoMatch;
2930   assert(!isModifier());
2931 
2932   if (!HasLit) {
2933     HasLit = trySkipId("lit");
2934     if (HasLit) {
2935       if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
2936         return ParseStatus::Failure;
2937       ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
2938       if (S.isSuccess() &&
2939           !skipToken(AsmToken::RParen, "expected closing parentheses"))
2940         return ParseStatus::Failure;
2941       return S;
2942     }
2943   }
2944 
2945   const auto& Tok = getToken();
2946   const auto& NextTok = peekToken();
2947   bool IsReal = Tok.is(AsmToken::Real);
2948   SMLoc S = getLoc();
2949   bool Negate = false;
2950 
2951   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2952     lex();
2953     IsReal = true;
2954     Negate = true;
2955   }
2956 
2957   AMDGPUOperand::Modifiers Mods;
2958   Mods.Lit = HasLit;
2959 
2960   if (IsReal) {
2961     // Floating-point expressions are not supported.
2962     // Can only allow floating-point literals with an
2963     // optional sign.
2964 
2965     StringRef Num = getTokenStr();
2966     lex();
2967 
2968     APFloat RealVal(APFloat::IEEEdouble());
2969     auto roundMode = APFloat::rmNearestTiesToEven;
2970     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
2971       return ParseStatus::Failure;
2972     if (Negate)
2973       RealVal.changeSign();
2974 
2975     Operands.push_back(
2976       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2977                                AMDGPUOperand::ImmTyNone, true));
2978     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2979     Op.setModifiers(Mods);
2980 
2981     return ParseStatus::Success;
2982 
2983   } else {
2984     int64_t IntVal;
2985     const MCExpr *Expr;
2986     SMLoc S = getLoc();
2987 
2988     if (HasSP3AbsModifier) {
2989       // This is a workaround for handling expressions
2990       // as arguments of SP3 'abs' modifier, for example:
2991       //     |1.0|
2992       //     |-1|
2993       //     |1+x|
2994       // This syntax is not compatible with syntax of standard
2995       // MC expressions (due to the trailing '|').
2996       SMLoc EndLoc;
2997       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2998         return ParseStatus::Failure;
2999     } else {
3000       if (Parser.parseExpression(Expr))
3001         return ParseStatus::Failure;
3002     }
3003 
3004     if (Expr->evaluateAsAbsolute(IntVal)) {
3005       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3006       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3007       Op.setModifiers(Mods);
3008     } else {
3009       if (HasLit)
3010         return ParseStatus::NoMatch;
3011       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3012     }
3013 
3014     return ParseStatus::Success;
3015   }
3016 
3017   return ParseStatus::NoMatch;
3018 }
3019 
3020 ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3021   if (!isRegister())
3022     return ParseStatus::NoMatch;
3023 
3024   if (auto R = parseRegister()) {
3025     assert(R->isReg());
3026     Operands.push_back(std::move(R));
3027     return ParseStatus::Success;
3028   }
3029   return ParseStatus::Failure;
3030 }
3031 
3032 ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3033                                            bool HasSP3AbsMod, bool HasLit) {
3034   ParseStatus Res = parseReg(Operands);
3035   if (!Res.isNoMatch())
3036     return Res;
3037   if (isModifier())
3038     return ParseStatus::NoMatch;
3039   return parseImm(Operands, HasSP3AbsMod, HasLit);
3040 }
3041 
3042 bool
3043 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3044   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3045     const auto &str = Token.getString();
3046     return str == "abs" || str == "neg" || str == "sext";
3047   }
3048   return false;
3049 }
3050 
3051 bool
3052 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3053   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3054 }
3055 
3056 bool
3057 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3058   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3059 }
3060 
3061 bool
3062 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3063   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3064 }
3065 
3066 // Check if this is an operand modifier or an opcode modifier
3067 // which may look like an expression but it is not. We should
3068 // avoid parsing these modifiers as expressions. Currently
3069 // recognized sequences are:
3070 //   |...|
3071 //   abs(...)
3072 //   neg(...)
3073 //   sext(...)
3074 //   -reg
3075 //   -|...|
3076 //   -abs(...)
3077 //   name:...
3078 //
3079 bool
3080 AMDGPUAsmParser::isModifier() {
3081 
3082   AsmToken Tok = getToken();
3083   AsmToken NextToken[2];
3084   peekTokens(NextToken);
3085 
3086   return isOperandModifier(Tok, NextToken[0]) ||
3087          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3088          isOpcodeModifierWithVal(Tok, NextToken[0]);
3089 }
3090 
3091 // Check if the current token is an SP3 'neg' modifier.
3092 // Currently this modifier is allowed in the following context:
3093 //
3094 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3095 // 2. Before an 'abs' modifier: -abs(...)
3096 // 3. Before an SP3 'abs' modifier: -|...|
3097 //
3098 // In all other cases "-" is handled as a part
3099 // of an expression that follows the sign.
3100 //
3101 // Note: When "-" is followed by an integer literal,
3102 // this is interpreted as integer negation rather
3103 // than a floating-point NEG modifier applied to N.
3104 // Beside being contr-intuitive, such use of floating-point
3105 // NEG modifier would have resulted in different meaning
3106 // of integer literals used with VOP1/2/C and VOP3,
3107 // for example:
3108 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3109 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3110 // Negative fp literals with preceding "-" are
3111 // handled likewise for uniformity
3112 //
3113 bool
3114 AMDGPUAsmParser::parseSP3NegModifier() {
3115 
3116   AsmToken NextToken[2];
3117   peekTokens(NextToken);
3118 
3119   if (isToken(AsmToken::Minus) &&
3120       (isRegister(NextToken[0], NextToken[1]) ||
3121        NextToken[0].is(AsmToken::Pipe) ||
3122        isId(NextToken[0], "abs"))) {
3123     lex();
3124     return true;
3125   }
3126 
3127   return false;
3128 }
3129 
3130 ParseStatus
3131 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3132                                               bool AllowImm) {
3133   bool Neg, SP3Neg;
3134   bool Abs, SP3Abs;
3135   bool Lit;
3136   SMLoc Loc;
3137 
3138   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3139   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3140     return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3141 
3142   SP3Neg = parseSP3NegModifier();
3143 
3144   Loc = getLoc();
3145   Neg = trySkipId("neg");
3146   if (Neg && SP3Neg)
3147     return Error(Loc, "expected register or immediate");
3148   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3149     return ParseStatus::Failure;
3150 
3151   Abs = trySkipId("abs");
3152   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3153     return ParseStatus::Failure;
3154 
3155   Lit = trySkipId("lit");
3156   if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
3157     return ParseStatus::Failure;
3158 
3159   Loc = getLoc();
3160   SP3Abs = trySkipToken(AsmToken::Pipe);
3161   if (Abs && SP3Abs)
3162     return Error(Loc, "expected register or immediate");
3163 
3164   ParseStatus Res;
3165   if (AllowImm) {
3166     Res = parseRegOrImm(Operands, SP3Abs, Lit);
3167   } else {
3168     Res = parseReg(Operands);
3169   }
3170   if (!Res.isSuccess())
3171     return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res;
3172 
3173   if (Lit && !Operands.back()->isImm())
3174     Error(Loc, "expected immediate with lit modifier");
3175 
3176   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3177     return ParseStatus::Failure;
3178   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3179     return ParseStatus::Failure;
3180   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3181     return ParseStatus::Failure;
3182   if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3183     return ParseStatus::Failure;
3184 
3185   AMDGPUOperand::Modifiers Mods;
3186   Mods.Abs = Abs || SP3Abs;
3187   Mods.Neg = Neg || SP3Neg;
3188   Mods.Lit = Lit;
3189 
3190   if (Mods.hasFPModifiers() || Lit) {
3191     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3192     if (Op.isExpr())
3193       return Error(Op.getStartLoc(), "expected an absolute expression");
3194     Op.setModifiers(Mods);
3195   }
3196   return ParseStatus::Success;
3197 }
3198 
3199 ParseStatus
3200 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3201                                                bool AllowImm) {
3202   bool Sext = trySkipId("sext");
3203   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3204     return ParseStatus::Failure;
3205 
3206   ParseStatus Res;
3207   if (AllowImm) {
3208     Res = parseRegOrImm(Operands);
3209   } else {
3210     Res = parseReg(Operands);
3211   }
3212   if (!Res.isSuccess())
3213     return Sext ? ParseStatus::Failure : Res;
3214 
3215   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3216     return ParseStatus::Failure;
3217 
3218   AMDGPUOperand::Modifiers Mods;
3219   Mods.Sext = Sext;
3220 
3221   if (Mods.hasIntModifiers()) {
3222     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3223     if (Op.isExpr())
3224       return Error(Op.getStartLoc(), "expected an absolute expression");
3225     Op.setModifiers(Mods);
3226   }
3227 
3228   return ParseStatus::Success;
3229 }
3230 
3231 ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3232   return parseRegOrImmWithFPInputMods(Operands, false);
3233 }
3234 
3235 ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3236   return parseRegOrImmWithIntInputMods(Operands, false);
3237 }
3238 
3239 ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3240   auto Loc = getLoc();
3241   if (trySkipId("off")) {
3242     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3243                                                 AMDGPUOperand::ImmTyOff, false));
3244     return ParseStatus::Success;
3245   }
3246 
3247   if (!isRegister())
3248     return ParseStatus::NoMatch;
3249 
3250   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3251   if (Reg) {
3252     Operands.push_back(std::move(Reg));
3253     return ParseStatus::Success;
3254   }
3255 
3256   return ParseStatus::Failure;
3257 }
3258 
3259 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3260   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3261 
3262   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3263       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3264       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3265       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3266     return Match_InvalidOperand;
3267 
3268   if ((TSFlags & SIInstrFlags::VOP3) &&
3269       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3270       getForcedEncodingSize() != 64)
3271     return Match_PreferE32;
3272 
3273   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3274       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3275     // v_mac_f32/16 allow only dst_sel == DWORD;
3276     auto OpNum =
3277         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3278     const auto &Op = Inst.getOperand(OpNum);
3279     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3280       return Match_InvalidOperand;
3281     }
3282   }
3283 
3284   return Match_Success;
3285 }
3286 
3287 static ArrayRef<unsigned> getAllVariants() {
3288   static const unsigned Variants[] = {
3289     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3290     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9,
3291     AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP
3292   };
3293 
3294   return ArrayRef(Variants);
3295 }
3296 
3297 // What asm variants we should check
3298 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3299   if (isForcedDPP() && isForcedVOP3()) {
3300     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3301     return ArrayRef(Variants);
3302   }
3303   if (getForcedEncodingSize() == 32) {
3304     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3305     return ArrayRef(Variants);
3306   }
3307 
3308   if (isForcedVOP3()) {
3309     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3310     return ArrayRef(Variants);
3311   }
3312 
3313   if (isForcedSDWA()) {
3314     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3315                                         AMDGPUAsmVariants::SDWA9};
3316     return ArrayRef(Variants);
3317   }
3318 
3319   if (isForcedDPP()) {
3320     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3321     return ArrayRef(Variants);
3322   }
3323 
3324   return getAllVariants();
3325 }
3326 
3327 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3328   if (isForcedDPP() && isForcedVOP3())
3329     return "e64_dpp";
3330 
3331   if (getForcedEncodingSize() == 32)
3332     return "e32";
3333 
3334   if (isForcedVOP3())
3335     return "e64";
3336 
3337   if (isForcedSDWA())
3338     return "sdwa";
3339 
3340   if (isForcedDPP())
3341     return "dpp";
3342 
3343   return "";
3344 }
3345 
3346 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3347   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3348   for (MCPhysReg Reg : Desc.implicit_uses()) {
3349     switch (Reg) {
3350     case AMDGPU::FLAT_SCR:
3351     case AMDGPU::VCC:
3352     case AMDGPU::VCC_LO:
3353     case AMDGPU::VCC_HI:
3354     case AMDGPU::M0:
3355       return Reg;
3356     default:
3357       break;
3358     }
3359   }
3360   return AMDGPU::NoRegister;
3361 }
3362 
3363 // NB: This code is correct only when used to check constant
3364 // bus limitations because GFX7 support no f16 inline constants.
3365 // Note that there are no cases when a GFX7 opcode violates
3366 // constant bus limitations due to the use of an f16 constant.
3367 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3368                                        unsigned OpIdx) const {
3369   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3370 
3371   if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3372       AMDGPU::isKImmOperand(Desc, OpIdx)) {
3373     return false;
3374   }
3375 
3376   const MCOperand &MO = Inst.getOperand(OpIdx);
3377 
3378   int64_t Val = MO.getImm();
3379   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3380 
3381   switch (OpSize) { // expected operand size
3382   case 8:
3383     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3384   case 4:
3385     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3386   case 2: {
3387     const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3388     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3389         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3390         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3391       return AMDGPU::isInlinableIntLiteral(Val);
3392 
3393     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3394         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3395         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3396       return AMDGPU::isInlinableIntLiteralV216(Val);
3397 
3398     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3399         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3400         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3401       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3402 
3403     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3404   }
3405   default:
3406     llvm_unreachable("invalid operand size");
3407   }
3408 }
3409 
3410 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3411   if (!isGFX10Plus())
3412     return 1;
3413 
3414   switch (Opcode) {
3415   // 64-bit shift instructions can use only one scalar value input
3416   case AMDGPU::V_LSHLREV_B64_e64:
3417   case AMDGPU::V_LSHLREV_B64_gfx10:
3418   case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3419   case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3420   case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3421   case AMDGPU::V_LSHRREV_B64_e64:
3422   case AMDGPU::V_LSHRREV_B64_gfx10:
3423   case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3424   case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3425   case AMDGPU::V_ASHRREV_I64_e64:
3426   case AMDGPU::V_ASHRREV_I64_gfx10:
3427   case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3428   case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3429   case AMDGPU::V_LSHL_B64_e64:
3430   case AMDGPU::V_LSHR_B64_e64:
3431   case AMDGPU::V_ASHR_I64_e64:
3432     return 1;
3433   default:
3434     return 2;
3435   }
3436 }
3437 
3438 constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3439 using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>;
3440 
3441 // Get regular operand indices in the same order as specified
3442 // in the instruction (but append mandatory literals to the end).
3443 static OperandIndices getSrcOperandIndices(unsigned Opcode,
3444                                            bool AddMandatoryLiterals = false) {
3445 
3446   int16_t ImmIdx =
3447       AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3448 
3449   if (isVOPD(Opcode)) {
3450     int16_t ImmDeferredIdx =
3451         AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3452                              : -1;
3453 
3454     return {getNamedOperandIdx(Opcode, OpName::src0X),
3455             getNamedOperandIdx(Opcode, OpName::vsrc1X),
3456             getNamedOperandIdx(Opcode, OpName::src0Y),
3457             getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3458             ImmDeferredIdx,
3459             ImmIdx};
3460   }
3461 
3462   return {getNamedOperandIdx(Opcode, OpName::src0),
3463           getNamedOperandIdx(Opcode, OpName::src1),
3464           getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3465 }
3466 
3467 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3468   const MCOperand &MO = Inst.getOperand(OpIdx);
3469   if (MO.isImm()) {
3470     return !isInlineConstant(Inst, OpIdx);
3471   } else if (MO.isReg()) {
3472     auto Reg = MO.getReg();
3473     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3474     auto PReg = mc2PseudoReg(Reg);
3475     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3476   } else {
3477     return true;
3478   }
3479 }
3480 
3481 bool AMDGPUAsmParser::validateConstantBusLimitations(
3482     const MCInst &Inst, const OperandVector &Operands) {
3483   const unsigned Opcode = Inst.getOpcode();
3484   const MCInstrDesc &Desc = MII.get(Opcode);
3485   unsigned LastSGPR = AMDGPU::NoRegister;
3486   unsigned ConstantBusUseCount = 0;
3487   unsigned NumLiterals = 0;
3488   unsigned LiteralSize;
3489 
3490   if (!(Desc.TSFlags &
3491         (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3492          SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) &&
3493       !isVOPD(Opcode))
3494     return true;
3495 
3496   // Check special imm operands (used by madmk, etc)
3497   if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3498     ++NumLiterals;
3499     LiteralSize = 4;
3500   }
3501 
3502   SmallDenseSet<unsigned> SGPRsUsed;
3503   unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3504   if (SGPRUsed != AMDGPU::NoRegister) {
3505     SGPRsUsed.insert(SGPRUsed);
3506     ++ConstantBusUseCount;
3507   }
3508 
3509   OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3510 
3511   for (int OpIdx : OpIndices) {
3512     if (OpIdx == -1)
3513       continue;
3514 
3515     const MCOperand &MO = Inst.getOperand(OpIdx);
3516     if (usesConstantBus(Inst, OpIdx)) {
3517       if (MO.isReg()) {
3518         LastSGPR = mc2PseudoReg(MO.getReg());
3519         // Pairs of registers with a partial intersections like these
3520         //   s0, s[0:1]
3521         //   flat_scratch_lo, flat_scratch
3522         //   flat_scratch_lo, flat_scratch_hi
3523         // are theoretically valid but they are disabled anyway.
3524         // Note that this code mimics SIInstrInfo::verifyInstruction
3525         if (SGPRsUsed.insert(LastSGPR).second) {
3526           ++ConstantBusUseCount;
3527         }
3528       } else { // Expression or a literal
3529 
3530         if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3531           continue; // special operand like VINTERP attr_chan
3532 
3533         // An instruction may use only one literal.
3534         // This has been validated on the previous step.
3535         // See validateVOPLiteral.
3536         // This literal may be used as more than one operand.
3537         // If all these operands are of the same size,
3538         // this literal counts as one scalar value.
3539         // Otherwise it counts as 2 scalar values.
3540         // See "GFX10 Shader Programming", section 3.6.2.3.
3541 
3542         unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3543         if (Size < 4)
3544           Size = 4;
3545 
3546         if (NumLiterals == 0) {
3547           NumLiterals = 1;
3548           LiteralSize = Size;
3549         } else if (LiteralSize != Size) {
3550           NumLiterals = 2;
3551         }
3552       }
3553     }
3554   }
3555   ConstantBusUseCount += NumLiterals;
3556 
3557   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3558     return true;
3559 
3560   SMLoc LitLoc = getLitLoc(Operands);
3561   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3562   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3563   Error(Loc, "invalid operand (violates constant bus restrictions)");
3564   return false;
3565 }
3566 
3567 bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3568     const MCInst &Inst, const OperandVector &Operands) {
3569 
3570   const unsigned Opcode = Inst.getOpcode();
3571   if (!isVOPD(Opcode))
3572     return true;
3573 
3574   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3575 
3576   auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3577     const MCOperand &Opr = Inst.getOperand(OperandIdx);
3578     return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3579                ? Opr.getReg()
3580                : MCRegister::NoRegister;
3581   };
3582 
3583   // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3584   bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3585 
3586   const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3587   auto InvalidCompOprIdx =
3588       InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3589   if (!InvalidCompOprIdx)
3590     return true;
3591 
3592   auto CompOprIdx = *InvalidCompOprIdx;
3593   auto ParsedIdx =
3594       std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3595                InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3596   assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3597 
3598   auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3599   if (CompOprIdx == VOPD::Component::DST) {
3600     Error(Loc, "one dst register must be even and the other odd");
3601   } else {
3602     auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3603     Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3604                    " operands must use different VGPR banks");
3605   }
3606 
3607   return false;
3608 }
3609 
3610 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3611 
3612   const unsigned Opc = Inst.getOpcode();
3613   const MCInstrDesc &Desc = MII.get(Opc);
3614 
3615   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3616     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3617     assert(ClampIdx != -1);
3618     return Inst.getOperand(ClampIdx).getImm() == 0;
3619   }
3620 
3621   return true;
3622 }
3623 
3624 constexpr uint64_t MIMGFlags =
3625     SIInstrFlags::MIMG | SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE;
3626 
3627 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3628                                            const SMLoc &IDLoc) {
3629 
3630   const unsigned Opc = Inst.getOpcode();
3631   const MCInstrDesc &Desc = MII.get(Opc);
3632 
3633   if ((Desc.TSFlags & MIMGFlags) == 0)
3634     return true;
3635 
3636   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3637   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3638   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3639 
3640   assert(VDataIdx != -1);
3641 
3642   if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3643     return true;
3644 
3645   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3646   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3647   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3648   if (DMask == 0)
3649     DMask = 1;
3650 
3651   bool IsPackedD16 = false;
3652   unsigned DataSize =
3653       (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3654   if (hasPackedD16()) {
3655     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3656     IsPackedD16 = D16Idx >= 0;
3657     if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3658       DataSize = (DataSize + 1) / 2;
3659   }
3660 
3661   if ((VDataSize / 4) == DataSize + TFESize)
3662     return true;
3663 
3664   StringRef Modifiers;
3665   if (isGFX90A())
3666     Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3667   else
3668     Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3669 
3670   Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3671   return false;
3672 }
3673 
3674 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
3675                                            const SMLoc &IDLoc) {
3676   const unsigned Opc = Inst.getOpcode();
3677   const MCInstrDesc &Desc = MII.get(Opc);
3678 
3679   if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
3680     return true;
3681 
3682   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3683 
3684   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3685       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3686   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3687   int RSrcOpName = Desc.TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc
3688                                                      : AMDGPU::OpName::rsrc;
3689   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
3690   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3691   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3692 
3693   assert(VAddr0Idx != -1);
3694   assert(SrsrcIdx != -1);
3695   assert(SrsrcIdx > VAddr0Idx);
3696 
3697   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3698   if (BaseOpcode->BVH) {
3699     if (IsA16 == BaseOpcode->A16)
3700       return true;
3701     Error(IDLoc, "image address size does not match a16");
3702     return false;
3703   }
3704 
3705   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3706   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3707   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3708   unsigned ActualAddrSize =
3709       IsNSA ? SrsrcIdx - VAddr0Idx
3710             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3711 
3712   unsigned ExpectedAddrSize =
3713       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3714 
3715   if (IsNSA) {
3716     if (hasPartialNSAEncoding() &&
3717         ExpectedAddrSize >
3718             getNSAMaxSize(Desc.TSFlags & SIInstrFlags::VSAMPLE)) {
3719       int VAddrLastIdx = SrsrcIdx - 1;
3720       unsigned VAddrLastSize =
3721           AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
3722 
3723       ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3724     }
3725   } else {
3726     if (ExpectedAddrSize > 12)
3727       ExpectedAddrSize = 16;
3728 
3729     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3730     // This provides backward compatibility for assembly created
3731     // before 160b/192b/224b types were directly supported.
3732     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3733       return true;
3734   }
3735 
3736   if (ActualAddrSize == ExpectedAddrSize)
3737     return true;
3738 
3739   Error(IDLoc, "image address size does not match dim and a16");
3740   return false;
3741 }
3742 
3743 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3744 
3745   const unsigned Opc = Inst.getOpcode();
3746   const MCInstrDesc &Desc = MII.get(Opc);
3747 
3748   if ((Desc.TSFlags & MIMGFlags) == 0)
3749     return true;
3750   if (!Desc.mayLoad() || !Desc.mayStore())
3751     return true; // Not atomic
3752 
3753   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3754   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3755 
3756   // This is an incomplete check because image_atomic_cmpswap
3757   // may only use 0x3 and 0xf while other atomic operations
3758   // may use 0x1 and 0x3. However these limitations are
3759   // verified when we check that dmask matches dst size.
3760   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3761 }
3762 
3763 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3764 
3765   const unsigned Opc = Inst.getOpcode();
3766   const MCInstrDesc &Desc = MII.get(Opc);
3767 
3768   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3769     return true;
3770 
3771   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3772   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3773 
3774   // GATHER4 instructions use dmask in a different fashion compared to
3775   // other MIMG instructions. The only useful DMASK values are
3776   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3777   // (red,red,red,red) etc.) The ISA document doesn't mention
3778   // this.
3779   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3780 }
3781 
3782 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3783   const unsigned Opc = Inst.getOpcode();
3784   const MCInstrDesc &Desc = MII.get(Opc);
3785 
3786   if ((Desc.TSFlags & MIMGFlags) == 0)
3787     return true;
3788 
3789   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3790   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3791       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3792 
3793   if (!BaseOpcode->MSAA)
3794     return true;
3795 
3796   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3797   assert(DimIdx != -1);
3798 
3799   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3800   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3801 
3802   return DimInfo->MSAA;
3803 }
3804 
3805 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3806 {
3807   switch (Opcode) {
3808   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3809   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3810   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3811     return true;
3812   default:
3813     return false;
3814   }
3815 }
3816 
3817 // movrels* opcodes should only allow VGPRS as src0.
3818 // This is specified in .td description for vop1/vop3,
3819 // but sdwa is handled differently. See isSDWAOperand.
3820 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3821                                       const OperandVector &Operands) {
3822 
3823   const unsigned Opc = Inst.getOpcode();
3824   const MCInstrDesc &Desc = MII.get(Opc);
3825 
3826   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3827     return true;
3828 
3829   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3830   assert(Src0Idx != -1);
3831 
3832   SMLoc ErrLoc;
3833   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3834   if (Src0.isReg()) {
3835     auto Reg = mc2PseudoReg(Src0.getReg());
3836     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3837     if (!isSGPR(Reg, TRI))
3838       return true;
3839     ErrLoc = getRegLoc(Reg, Operands);
3840   } else {
3841     ErrLoc = getConstLoc(Operands);
3842   }
3843 
3844   Error(ErrLoc, "source operand must be a VGPR");
3845   return false;
3846 }
3847 
3848 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3849                                           const OperandVector &Operands) {
3850 
3851   const unsigned Opc = Inst.getOpcode();
3852 
3853   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3854     return true;
3855 
3856   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3857   assert(Src0Idx != -1);
3858 
3859   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3860   if (!Src0.isReg())
3861     return true;
3862 
3863   auto Reg = mc2PseudoReg(Src0.getReg());
3864   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3865   if (!isGFX90A() && isSGPR(Reg, TRI)) {
3866     Error(getRegLoc(Reg, Operands),
3867           "source operand must be either a VGPR or an inline constant");
3868     return false;
3869   }
3870 
3871   return true;
3872 }
3873 
3874 bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
3875                                       const OperandVector &Operands) {
3876   unsigned Opcode = Inst.getOpcode();
3877   const MCInstrDesc &Desc = MII.get(Opcode);
3878 
3879   if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
3880       !getFeatureBits()[FeatureMFMAInlineLiteralBug])
3881     return true;
3882 
3883   const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
3884   if (Src2Idx == -1)
3885     return true;
3886 
3887   if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
3888     Error(getConstLoc(Operands),
3889           "inline constants are not allowed for this operand");
3890     return false;
3891   }
3892 
3893   return true;
3894 }
3895 
3896 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3897                                    const OperandVector &Operands) {
3898   const unsigned Opc = Inst.getOpcode();
3899   const MCInstrDesc &Desc = MII.get(Opc);
3900 
3901   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3902     return true;
3903 
3904   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3905   if (Src2Idx == -1)
3906     return true;
3907 
3908   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3909   if (!Src2.isReg())
3910     return true;
3911 
3912   MCRegister Src2Reg = Src2.getReg();
3913   MCRegister DstReg = Inst.getOperand(0).getReg();
3914   if (Src2Reg == DstReg)
3915     return true;
3916 
3917   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3918   if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
3919     return true;
3920 
3921   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3922     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3923           "source 2 operand must not partially overlap with dst");
3924     return false;
3925   }
3926 
3927   return true;
3928 }
3929 
3930 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3931   switch (Inst.getOpcode()) {
3932   default:
3933     return true;
3934   case V_DIV_SCALE_F32_gfx6_gfx7:
3935   case V_DIV_SCALE_F32_vi:
3936   case V_DIV_SCALE_F32_gfx10:
3937   case V_DIV_SCALE_F64_gfx6_gfx7:
3938   case V_DIV_SCALE_F64_vi:
3939   case V_DIV_SCALE_F64_gfx10:
3940     break;
3941   }
3942 
3943   // TODO: Check that src0 = src1 or src2.
3944 
3945   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3946                     AMDGPU::OpName::src2_modifiers,
3947                     AMDGPU::OpName::src2_modifiers}) {
3948     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3949             .getImm() &
3950         SISrcMods::ABS) {
3951       return false;
3952     }
3953   }
3954 
3955   return true;
3956 }
3957 
3958 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3959 
3960   const unsigned Opc = Inst.getOpcode();
3961   const MCInstrDesc &Desc = MII.get(Opc);
3962 
3963   if ((Desc.TSFlags & MIMGFlags) == 0)
3964     return true;
3965 
3966   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3967   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3968     if (isCI() || isSI())
3969       return false;
3970   }
3971 
3972   return true;
3973 }
3974 
3975 static bool IsRevOpcode(const unsigned Opcode)
3976 {
3977   switch (Opcode) {
3978   case AMDGPU::V_SUBREV_F32_e32:
3979   case AMDGPU::V_SUBREV_F32_e64:
3980   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3981   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3982   case AMDGPU::V_SUBREV_F32_e32_vi:
3983   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3984   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3985   case AMDGPU::V_SUBREV_F32_e64_vi:
3986 
3987   case AMDGPU::V_SUBREV_CO_U32_e32:
3988   case AMDGPU::V_SUBREV_CO_U32_e64:
3989   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3990   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3991 
3992   case AMDGPU::V_SUBBREV_U32_e32:
3993   case AMDGPU::V_SUBBREV_U32_e64:
3994   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3995   case AMDGPU::V_SUBBREV_U32_e32_vi:
3996   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3997   case AMDGPU::V_SUBBREV_U32_e64_vi:
3998 
3999   case AMDGPU::V_SUBREV_U32_e32:
4000   case AMDGPU::V_SUBREV_U32_e64:
4001   case AMDGPU::V_SUBREV_U32_e32_gfx9:
4002   case AMDGPU::V_SUBREV_U32_e32_vi:
4003   case AMDGPU::V_SUBREV_U32_e64_gfx9:
4004   case AMDGPU::V_SUBREV_U32_e64_vi:
4005 
4006   case AMDGPU::V_SUBREV_F16_e32:
4007   case AMDGPU::V_SUBREV_F16_e64:
4008   case AMDGPU::V_SUBREV_F16_e32_gfx10:
4009   case AMDGPU::V_SUBREV_F16_e32_vi:
4010   case AMDGPU::V_SUBREV_F16_e64_gfx10:
4011   case AMDGPU::V_SUBREV_F16_e64_vi:
4012 
4013   case AMDGPU::V_SUBREV_U16_e32:
4014   case AMDGPU::V_SUBREV_U16_e64:
4015   case AMDGPU::V_SUBREV_U16_e32_vi:
4016   case AMDGPU::V_SUBREV_U16_e64_vi:
4017 
4018   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4019   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4020   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4021 
4022   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4023   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4024 
4025   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4026   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4027 
4028   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4029   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4030 
4031   case AMDGPU::V_LSHRREV_B32_e32:
4032   case AMDGPU::V_LSHRREV_B32_e64:
4033   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4034   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4035   case AMDGPU::V_LSHRREV_B32_e32_vi:
4036   case AMDGPU::V_LSHRREV_B32_e64_vi:
4037   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4038   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4039 
4040   case AMDGPU::V_ASHRREV_I32_e32:
4041   case AMDGPU::V_ASHRREV_I32_e64:
4042   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4043   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4044   case AMDGPU::V_ASHRREV_I32_e32_vi:
4045   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4046   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4047   case AMDGPU::V_ASHRREV_I32_e64_vi:
4048 
4049   case AMDGPU::V_LSHLREV_B32_e32:
4050   case AMDGPU::V_LSHLREV_B32_e64:
4051   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4052   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4053   case AMDGPU::V_LSHLREV_B32_e32_vi:
4054   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4055   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4056   case AMDGPU::V_LSHLREV_B32_e64_vi:
4057 
4058   case AMDGPU::V_LSHLREV_B16_e32:
4059   case AMDGPU::V_LSHLREV_B16_e64:
4060   case AMDGPU::V_LSHLREV_B16_e32_vi:
4061   case AMDGPU::V_LSHLREV_B16_e64_vi:
4062   case AMDGPU::V_LSHLREV_B16_gfx10:
4063 
4064   case AMDGPU::V_LSHRREV_B16_e32:
4065   case AMDGPU::V_LSHRREV_B16_e64:
4066   case AMDGPU::V_LSHRREV_B16_e32_vi:
4067   case AMDGPU::V_LSHRREV_B16_e64_vi:
4068   case AMDGPU::V_LSHRREV_B16_gfx10:
4069 
4070   case AMDGPU::V_ASHRREV_I16_e32:
4071   case AMDGPU::V_ASHRREV_I16_e64:
4072   case AMDGPU::V_ASHRREV_I16_e32_vi:
4073   case AMDGPU::V_ASHRREV_I16_e64_vi:
4074   case AMDGPU::V_ASHRREV_I16_gfx10:
4075 
4076   case AMDGPU::V_LSHLREV_B64_e64:
4077   case AMDGPU::V_LSHLREV_B64_gfx10:
4078   case AMDGPU::V_LSHLREV_B64_vi:
4079 
4080   case AMDGPU::V_LSHRREV_B64_e64:
4081   case AMDGPU::V_LSHRREV_B64_gfx10:
4082   case AMDGPU::V_LSHRREV_B64_vi:
4083 
4084   case AMDGPU::V_ASHRREV_I64_e64:
4085   case AMDGPU::V_ASHRREV_I64_gfx10:
4086   case AMDGPU::V_ASHRREV_I64_vi:
4087 
4088   case AMDGPU::V_PK_LSHLREV_B16:
4089   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4090   case AMDGPU::V_PK_LSHLREV_B16_vi:
4091 
4092   case AMDGPU::V_PK_LSHRREV_B16:
4093   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4094   case AMDGPU::V_PK_LSHRREV_B16_vi:
4095   case AMDGPU::V_PK_ASHRREV_I16:
4096   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4097   case AMDGPU::V_PK_ASHRREV_I16_vi:
4098     return true;
4099   default:
4100     return false;
4101   }
4102 }
4103 
4104 std::optional<StringRef>
4105 AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4106 
4107   using namespace SIInstrFlags;
4108   const unsigned Opcode = Inst.getOpcode();
4109   const MCInstrDesc &Desc = MII.get(Opcode);
4110 
4111   // lds_direct register is defined so that it can be used
4112   // with 9-bit operands only. Ignore encodings which do not accept these.
4113   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4114   if ((Desc.TSFlags & Enc) == 0)
4115     return std::nullopt;
4116 
4117   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4118     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4119     if (SrcIdx == -1)
4120       break;
4121     const auto &Src = Inst.getOperand(SrcIdx);
4122     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4123 
4124       if (isGFX90A() || isGFX11Plus())
4125         return StringRef("lds_direct is not supported on this GPU");
4126 
4127       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4128         return StringRef("lds_direct cannot be used with this instruction");
4129 
4130       if (SrcName != OpName::src0)
4131         return StringRef("lds_direct may be used as src0 only");
4132     }
4133   }
4134 
4135   return std::nullopt;
4136 }
4137 
4138 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4139   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4140     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4141     if (Op.isFlatOffset())
4142       return Op.getStartLoc();
4143   }
4144   return getLoc();
4145 }
4146 
4147 bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4148                                      const OperandVector &Operands) {
4149   auto Opcode = Inst.getOpcode();
4150   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4151   if (OpNum == -1)
4152     return true;
4153 
4154   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4155   if ((TSFlags & SIInstrFlags::FLAT))
4156     return validateFlatOffset(Inst, Operands);
4157 
4158   if ((TSFlags & SIInstrFlags::SMRD))
4159     return validateSMEMOffset(Inst, Operands);
4160 
4161   const auto &Op = Inst.getOperand(OpNum);
4162   if (isGFX12Plus() &&
4163       (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4164     const unsigned OffsetSize = 24;
4165     if (!isIntN(OffsetSize, Op.getImm())) {
4166       Error(getFlatOffsetLoc(Operands),
4167             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4168       return false;
4169     }
4170   } else {
4171     const unsigned OffsetSize = 16;
4172     if (!isUIntN(OffsetSize, Op.getImm())) {
4173       Error(getFlatOffsetLoc(Operands),
4174             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4175       return false;
4176     }
4177   }
4178   return true;
4179 }
4180 
4181 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4182                                          const OperandVector &Operands) {
4183   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4184   if ((TSFlags & SIInstrFlags::FLAT) == 0)
4185     return true;
4186 
4187   auto Opcode = Inst.getOpcode();
4188   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4189   assert(OpNum != -1);
4190 
4191   const auto &Op = Inst.getOperand(OpNum);
4192   if (!hasFlatOffsets() && Op.getImm() != 0) {
4193     Error(getFlatOffsetLoc(Operands),
4194           "flat offset modifier is not supported on this GPU");
4195     return false;
4196   }
4197 
4198   // For pre-GFX12 FLAT instructions the offset must be positive;
4199   // MSB is ignored and forced to zero.
4200   unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4201   bool AllowNegative =
4202       (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) ||
4203       isGFX12Plus();
4204   if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4205     Error(getFlatOffsetLoc(Operands),
4206           Twine("expected a ") +
4207               (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4208                              : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4209     return false;
4210   }
4211 
4212   return true;
4213 }
4214 
4215 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4216   // Start with second operand because SMEM Offset cannot be dst or src0.
4217   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4218     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4219     if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4220       return Op.getStartLoc();
4221   }
4222   return getLoc();
4223 }
4224 
4225 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4226                                          const OperandVector &Operands) {
4227   if (isCI() || isSI())
4228     return true;
4229 
4230   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4231   if ((TSFlags & SIInstrFlags::SMRD) == 0)
4232     return true;
4233 
4234   auto Opcode = Inst.getOpcode();
4235   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4236   if (OpNum == -1)
4237     return true;
4238 
4239   const auto &Op = Inst.getOperand(OpNum);
4240   if (!Op.isImm())
4241     return true;
4242 
4243   uint64_t Offset = Op.getImm();
4244   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4245   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4246       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4247     return true;
4248 
4249   Error(getSMEMOffsetLoc(Operands),
4250         isGFX12Plus()          ? "expected a 24-bit signed offset"
4251         : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4252                                : "expected a 21-bit signed offset");
4253 
4254   return false;
4255 }
4256 
4257 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4258   unsigned Opcode = Inst.getOpcode();
4259   const MCInstrDesc &Desc = MII.get(Opcode);
4260   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4261     return true;
4262 
4263   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4264   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4265 
4266   const int OpIndices[] = { Src0Idx, Src1Idx };
4267 
4268   unsigned NumExprs = 0;
4269   unsigned NumLiterals = 0;
4270   uint32_t LiteralValue;
4271 
4272   for (int OpIdx : OpIndices) {
4273     if (OpIdx == -1) break;
4274 
4275     const MCOperand &MO = Inst.getOperand(OpIdx);
4276     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4277     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4278       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4279         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4280         if (NumLiterals == 0 || LiteralValue != Value) {
4281           LiteralValue = Value;
4282           ++NumLiterals;
4283         }
4284       } else if (MO.isExpr()) {
4285         ++NumExprs;
4286       }
4287     }
4288   }
4289 
4290   return NumLiterals + NumExprs <= 1;
4291 }
4292 
4293 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4294   const unsigned Opc = Inst.getOpcode();
4295   if (isPermlane16(Opc)) {
4296     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4297     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4298 
4299     if (OpSel & ~3)
4300       return false;
4301   }
4302 
4303   uint64_t TSFlags = MII.get(Opc).TSFlags;
4304 
4305   if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4306     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4307     if (OpSelIdx != -1) {
4308       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4309         return false;
4310     }
4311     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4312     if (OpSelHiIdx != -1) {
4313       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4314         return false;
4315     }
4316   }
4317 
4318   // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4319   if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4320       (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4321     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4322     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4323     if (OpSel & 3)
4324       return false;
4325   }
4326 
4327   return true;
4328 }
4329 
4330 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4331                                   const OperandVector &Operands) {
4332   const unsigned Opc = Inst.getOpcode();
4333   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4334   if (DppCtrlIdx >= 0) {
4335     unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4336 
4337     if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
4338         AMDGPU::isDPALU_DPP(MII.get(Opc))) {
4339       // DP ALU DPP is supported for row_newbcast only on GFX9*
4340       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4341       Error(S, "DP ALU dpp only supports row_newbcast");
4342       return false;
4343     }
4344   }
4345 
4346   int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
4347   bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4348 
4349   if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
4350     int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4351     if (Src1Idx >= 0) {
4352       const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4353       const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4354       if (Src1.isImm() ||
4355           (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI))) {
4356         AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[Src1Idx]);
4357         Error(Op.getStartLoc(), "invalid operand for instruction");
4358         return false;
4359       }
4360     }
4361   }
4362 
4363   return true;
4364 }
4365 
4366 // Check if VCC register matches wavefront size
4367 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4368   auto FB = getFeatureBits();
4369   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4370     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4371 }
4372 
4373 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4374 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4375                                          const OperandVector &Operands) {
4376   unsigned Opcode = Inst.getOpcode();
4377   const MCInstrDesc &Desc = MII.get(Opcode);
4378   bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4379   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4380       !HasMandatoryLiteral && !isVOPD(Opcode))
4381     return true;
4382 
4383   OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4384 
4385   unsigned NumExprs = 0;
4386   unsigned NumLiterals = 0;
4387   uint32_t LiteralValue;
4388 
4389   for (int OpIdx : OpIndices) {
4390     if (OpIdx == -1)
4391       continue;
4392 
4393     const MCOperand &MO = Inst.getOperand(OpIdx);
4394     if (!MO.isImm() && !MO.isExpr())
4395       continue;
4396     if (!isSISrcOperand(Desc, OpIdx))
4397       continue;
4398 
4399     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4400       uint64_t Value = static_cast<uint64_t>(MO.getImm());
4401       bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
4402                     AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
4403       bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
4404 
4405       if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) {
4406         Error(getLitLoc(Operands), "invalid operand for instruction");
4407         return false;
4408       }
4409 
4410       if (IsFP64 && IsValid32Op)
4411         Value = Hi_32(Value);
4412 
4413       if (NumLiterals == 0 || LiteralValue != Value) {
4414         LiteralValue = Value;
4415         ++NumLiterals;
4416       }
4417     } else if (MO.isExpr()) {
4418       ++NumExprs;
4419     }
4420   }
4421   NumLiterals += NumExprs;
4422 
4423   if (!NumLiterals)
4424     return true;
4425 
4426   if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4427     Error(getLitLoc(Operands), "literal operands are not supported");
4428     return false;
4429   }
4430 
4431   if (NumLiterals > 1) {
4432     Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4433     return false;
4434   }
4435 
4436   return true;
4437 }
4438 
4439 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4440 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4441                          const MCRegisterInfo *MRI) {
4442   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4443   if (OpIdx < 0)
4444     return -1;
4445 
4446   const MCOperand &Op = Inst.getOperand(OpIdx);
4447   if (!Op.isReg())
4448     return -1;
4449 
4450   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4451   auto Reg = Sub ? Sub : Op.getReg();
4452   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4453   return AGPR32.contains(Reg) ? 1 : 0;
4454 }
4455 
4456 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4457   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4458   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4459                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4460                   SIInstrFlags::DS)) == 0)
4461     return true;
4462 
4463   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4464                                                       : AMDGPU::OpName::vdata;
4465 
4466   const MCRegisterInfo *MRI = getMRI();
4467   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4468   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4469 
4470   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4471     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4472     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4473       return false;
4474   }
4475 
4476   auto FB = getFeatureBits();
4477   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4478     if (DataAreg < 0 || DstAreg < 0)
4479       return true;
4480     return DstAreg == DataAreg;
4481   }
4482 
4483   return DstAreg < 1 && DataAreg < 1;
4484 }
4485 
4486 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4487   auto FB = getFeatureBits();
4488   if (!FB[AMDGPU::FeatureGFX90AInsts])
4489     return true;
4490 
4491   const MCRegisterInfo *MRI = getMRI();
4492   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4493   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4494   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4495     const MCOperand &Op = Inst.getOperand(I);
4496     if (!Op.isReg())
4497       continue;
4498 
4499     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4500     if (!Sub)
4501       continue;
4502 
4503     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4504       return false;
4505     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4506       return false;
4507   }
4508 
4509   return true;
4510 }
4511 
4512 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4513   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4514     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4515     if (Op.isBLGP())
4516       return Op.getStartLoc();
4517   }
4518   return SMLoc();
4519 }
4520 
4521 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4522                                    const OperandVector &Operands) {
4523   unsigned Opc = Inst.getOpcode();
4524   int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4525   if (BlgpIdx == -1)
4526     return true;
4527   SMLoc BLGPLoc = getBLGPLoc(Operands);
4528   if (!BLGPLoc.isValid())
4529     return true;
4530   bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
4531   auto FB = getFeatureBits();
4532   bool UsesNeg = false;
4533   if (FB[AMDGPU::FeatureGFX940Insts]) {
4534     switch (Opc) {
4535     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4536     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4537     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4538     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4539       UsesNeg = true;
4540     }
4541   }
4542 
4543   if (IsNeg == UsesNeg)
4544     return true;
4545 
4546   Error(BLGPLoc,
4547         UsesNeg ? "invalid modifier: blgp is not supported"
4548                 : "invalid modifier: neg is not supported");
4549 
4550   return false;
4551 }
4552 
4553 bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4554                                       const OperandVector &Operands) {
4555   if (!isGFX11Plus())
4556     return true;
4557 
4558   unsigned Opc = Inst.getOpcode();
4559   if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4560       Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4561       Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4562       Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4563     return true;
4564 
4565   int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4566   assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4567   auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4568   if (Reg == AMDGPU::SGPR_NULL)
4569     return true;
4570 
4571   SMLoc RegLoc = getRegLoc(Reg, Operands);
4572   Error(RegLoc, "src0 must be null");
4573   return false;
4574 }
4575 
4576 bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
4577                                  const OperandVector &Operands) {
4578   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4579   if ((TSFlags & SIInstrFlags::DS) == 0)
4580     return true;
4581   if (TSFlags & SIInstrFlags::GWS)
4582     return validateGWS(Inst, Operands);
4583   // Only validate GDS for non-GWS instructions.
4584   if (hasGDS())
4585     return true;
4586   int GDSIdx =
4587       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
4588   if (GDSIdx < 0)
4589     return true;
4590   unsigned GDS = Inst.getOperand(GDSIdx).getImm();
4591   if (GDS) {
4592     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
4593     Error(S, "gds modifier is not supported on this GPU");
4594     return false;
4595   }
4596   return true;
4597 }
4598 
4599 // gfx90a has an undocumented limitation:
4600 // DS_GWS opcodes must use even aligned registers.
4601 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4602                                   const OperandVector &Operands) {
4603   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4604     return true;
4605 
4606   int Opc = Inst.getOpcode();
4607   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4608       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4609     return true;
4610 
4611   const MCRegisterInfo *MRI = getMRI();
4612   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4613   int Data0Pos =
4614       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4615   assert(Data0Pos != -1);
4616   auto Reg = Inst.getOperand(Data0Pos).getReg();
4617   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4618   if (RegIdx & 1) {
4619     SMLoc RegLoc = getRegLoc(Reg, Operands);
4620     Error(RegLoc, "vgpr must be even aligned");
4621     return false;
4622   }
4623 
4624   return true;
4625 }
4626 
4627 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4628                                             const OperandVector &Operands,
4629                                             const SMLoc &IDLoc) {
4630   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4631                                            AMDGPU::OpName::cpol);
4632   if (CPolPos == -1)
4633     return true;
4634 
4635   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4636 
4637   if (isGFX12Plus())
4638     return validateTHAndScopeBits(Inst, Operands, CPol);
4639 
4640   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4641   if (TSFlags & SIInstrFlags::SMRD) {
4642     if (CPol && (isSI() || isCI())) {
4643       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4644       Error(S, "cache policy is not supported for SMRD instructions");
4645       return false;
4646     }
4647     if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4648       Error(IDLoc, "invalid cache policy for SMEM instruction");
4649       return false;
4650     }
4651   }
4652 
4653   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4654     const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
4655                                       SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4656                                       SIInstrFlags::FLAT;
4657     if (!(TSFlags & AllowSCCModifier)) {
4658       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4659       StringRef CStr(S.getPointer());
4660       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4661       Error(S,
4662             "scc modifier is not supported for this instruction on this GPU");
4663       return false;
4664     }
4665   }
4666 
4667   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4668     return true;
4669 
4670   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4671     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4672       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4673                               : "instruction must use glc");
4674       return false;
4675     }
4676   } else {
4677     if (CPol & CPol::GLC) {
4678       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4679       StringRef CStr(S.getPointer());
4680       S = SMLoc::getFromPointer(
4681           &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4682       Error(S, isGFX940() ? "instruction must not use sc0"
4683                           : "instruction must not use glc");
4684       return false;
4685     }
4686   }
4687 
4688   return true;
4689 }
4690 
4691 bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
4692                                              const OperandVector &Operands,
4693                                              const unsigned CPol) {
4694   const unsigned TH = CPol & AMDGPU::CPol::TH;
4695   const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
4696 
4697   const unsigned Opcode = Inst.getOpcode();
4698   const MCInstrDesc &TID = MII.get(Opcode);
4699 
4700   auto PrintError = [&](StringRef Msg) {
4701     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4702     Error(S, Msg);
4703     return false;
4704   };
4705 
4706   if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
4707       (TID.TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF)) &&
4708       (!(TH & AMDGPU::CPol::TH_ATOMIC_RETURN)))
4709     return PrintError("instruction must use th:TH_ATOMIC_RETURN");
4710 
4711   if (TH == 0)
4712     return true;
4713 
4714   if ((TID.TSFlags & SIInstrFlags::SMRD) &&
4715       ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
4716        (TH == AMDGPU::CPol::TH_NT_HT)))
4717     return PrintError("invalid th value for SMEM instruction");
4718 
4719   if (TH == AMDGPU::CPol::TH_BYPASS) {
4720     if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
4721          CPol & AMDGPU::CPol::TH_REAL_BYPASS) ||
4722         (Scope == AMDGPU::CPol::SCOPE_SYS &&
4723          !(CPol & AMDGPU::CPol::TH_REAL_BYPASS)))
4724       return PrintError("scope and th combination is not valid");
4725   }
4726 
4727   bool IsStore = TID.mayStore();
4728   bool IsAtomic =
4729       TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet);
4730 
4731   if (IsAtomic) {
4732     if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
4733       return PrintError("invalid th value for atomic instructions");
4734   } else if (IsStore) {
4735     if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
4736       return PrintError("invalid th value for store instructions");
4737   } else {
4738     if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
4739       return PrintError("invalid th value for load instructions");
4740   }
4741 
4742   return true;
4743 }
4744 
4745 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
4746   if (!isGFX11Plus())
4747     return true;
4748   for (auto &Operand : Operands) {
4749     if (!Operand->isReg())
4750       continue;
4751     unsigned Reg = Operand->getReg();
4752     if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
4753       Error(getRegLoc(Reg, Operands),
4754             "execz and vccz are not supported on this GPU");
4755       return false;
4756     }
4757   }
4758   return true;
4759 }
4760 
4761 bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
4762                                   const OperandVector &Operands) {
4763   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4764   if (Desc.mayStore() &&
4765       (Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4766     SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
4767     if (Loc != getInstLoc(Operands)) {
4768       Error(Loc, "TFE modifier has no meaning for store instructions");
4769       return false;
4770     }
4771   }
4772 
4773   return true;
4774 }
4775 
4776 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4777                                           const SMLoc &IDLoc,
4778                                           const OperandVector &Operands) {
4779   if (auto ErrMsg = validateLdsDirect(Inst)) {
4780     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4781     return false;
4782   }
4783   if (!validateSOPLiteral(Inst)) {
4784     Error(getLitLoc(Operands),
4785       "only one unique literal operand is allowed");
4786     return false;
4787   }
4788   if (!validateVOPLiteral(Inst, Operands)) {
4789     return false;
4790   }
4791   if (!validateConstantBusLimitations(Inst, Operands)) {
4792     return false;
4793   }
4794   if (!validateVOPDRegBankConstraints(Inst, Operands)) {
4795     return false;
4796   }
4797   if (!validateIntClampSupported(Inst)) {
4798     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4799       "integer clamping is not supported on this GPU");
4800     return false;
4801   }
4802   if (!validateOpSel(Inst)) {
4803     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4804       "invalid op_sel operand");
4805     return false;
4806   }
4807   if (!validateDPP(Inst, Operands)) {
4808     return false;
4809   }
4810   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4811   if (!validateMIMGD16(Inst)) {
4812     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4813       "d16 modifier is not supported on this GPU");
4814     return false;
4815   }
4816   if (!validateMIMGMSAA(Inst)) {
4817     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4818           "invalid dim; must be MSAA type");
4819     return false;
4820   }
4821   if (!validateMIMGDataSize(Inst, IDLoc)) {
4822     return false;
4823   }
4824   if (!validateMIMGAddrSize(Inst, IDLoc))
4825     return false;
4826   if (!validateMIMGAtomicDMask(Inst)) {
4827     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4828       "invalid atomic image dmask");
4829     return false;
4830   }
4831   if (!validateMIMGGatherDMask(Inst)) {
4832     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4833       "invalid image_gather dmask: only one bit must be set");
4834     return false;
4835   }
4836   if (!validateMovrels(Inst, Operands)) {
4837     return false;
4838   }
4839   if (!validateOffset(Inst, Operands)) {
4840     return false;
4841   }
4842   if (!validateMAIAccWrite(Inst, Operands)) {
4843     return false;
4844   }
4845   if (!validateMAISrc2(Inst, Operands)) {
4846     return false;
4847   }
4848   if (!validateMFMA(Inst, Operands)) {
4849     return false;
4850   }
4851   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4852     return false;
4853   }
4854 
4855   if (!validateAGPRLdSt(Inst)) {
4856     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4857     ? "invalid register class: data and dst should be all VGPR or AGPR"
4858     : "invalid register class: agpr loads and stores not supported on this GPU"
4859     );
4860     return false;
4861   }
4862   if (!validateVGPRAlign(Inst)) {
4863     Error(IDLoc,
4864       "invalid register class: vgpr tuples must be 64 bit aligned");
4865     return false;
4866   }
4867   if (!validateDS(Inst, Operands)) {
4868     return false;
4869   }
4870 
4871   if (!validateBLGP(Inst, Operands)) {
4872     return false;
4873   }
4874 
4875   if (!validateDivScale(Inst)) {
4876     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4877     return false;
4878   }
4879   if (!validateWaitCnt(Inst, Operands)) {
4880     return false;
4881   }
4882   if (!validateExeczVcczOperands(Operands)) {
4883     return false;
4884   }
4885   if (!validateTFE(Inst, Operands)) {
4886     return false;
4887   }
4888 
4889   return true;
4890 }
4891 
4892 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4893                                             const FeatureBitset &FBS,
4894                                             unsigned VariantID = 0);
4895 
4896 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4897                                 const FeatureBitset &AvailableFeatures,
4898                                 unsigned VariantID);
4899 
4900 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4901                                        const FeatureBitset &FBS) {
4902   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4903 }
4904 
4905 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4906                                        const FeatureBitset &FBS,
4907                                        ArrayRef<unsigned> Variants) {
4908   for (auto Variant : Variants) {
4909     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4910       return true;
4911   }
4912 
4913   return false;
4914 }
4915 
4916 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4917                                                   const SMLoc &IDLoc) {
4918   FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
4919 
4920   // Check if requested instruction variant is supported.
4921   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4922     return false;
4923 
4924   // This instruction is not supported.
4925   // Clear any other pending errors because they are no longer relevant.
4926   getParser().clearPendingErrors();
4927 
4928   // Requested instruction variant is not supported.
4929   // Check if any other variants are supported.
4930   StringRef VariantName = getMatchedVariantName();
4931   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4932     return Error(IDLoc,
4933                  Twine(VariantName,
4934                        " variant of this instruction is not supported"));
4935   }
4936 
4937   // Check if this instruction may be used with a different wavesize.
4938   if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
4939       !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
4940 
4941     FeatureBitset FeaturesWS32 = getFeatureBits();
4942     FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
4943         .flip(AMDGPU::FeatureWavefrontSize32);
4944     FeatureBitset AvailableFeaturesWS32 =
4945         ComputeAvailableFeatures(FeaturesWS32);
4946 
4947     if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
4948       return Error(IDLoc, "instruction requires wavesize=32");
4949   }
4950 
4951   // Finally check if this instruction is supported on any other GPU.
4952   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4953     return Error(IDLoc, "instruction not supported on this GPU");
4954   }
4955 
4956   // Instruction not supported on any GPU. Probably a typo.
4957   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4958   return Error(IDLoc, "invalid instruction" + Suggestion);
4959 }
4960 
4961 static bool isInvalidVOPDY(const OperandVector &Operands,
4962                            uint64_t InvalidOprIdx) {
4963   assert(InvalidOprIdx < Operands.size());
4964   const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
4965   if (Op.isToken() && InvalidOprIdx > 1) {
4966     const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
4967     return PrevOp.isToken() && PrevOp.getToken() == "::";
4968   }
4969   return false;
4970 }
4971 
4972 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4973                                               OperandVector &Operands,
4974                                               MCStreamer &Out,
4975                                               uint64_t &ErrorInfo,
4976                                               bool MatchingInlineAsm) {
4977   MCInst Inst;
4978   unsigned Result = Match_Success;
4979   for (auto Variant : getMatchedVariants()) {
4980     uint64_t EI;
4981     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4982                                   Variant);
4983     // We order match statuses from least to most specific. We use most specific
4984     // status as resulting
4985     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4986     if ((R == Match_Success) ||
4987         (R == Match_PreferE32) ||
4988         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4989         (R == Match_InvalidOperand && Result != Match_MissingFeature
4990                                    && Result != Match_PreferE32) ||
4991         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4992                                    && Result != Match_MissingFeature
4993                                    && Result != Match_PreferE32)) {
4994       Result = R;
4995       ErrorInfo = EI;
4996     }
4997     if (R == Match_Success)
4998       break;
4999   }
5000 
5001   if (Result == Match_Success) {
5002     if (!validateInstruction(Inst, IDLoc, Operands)) {
5003       return true;
5004     }
5005     Inst.setLoc(IDLoc);
5006     Out.emitInstruction(Inst, getSTI());
5007     return false;
5008   }
5009 
5010   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5011   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5012     return true;
5013   }
5014 
5015   switch (Result) {
5016   default: break;
5017   case Match_MissingFeature:
5018     // It has been verified that the specified instruction
5019     // mnemonic is valid. A match was found but it requires
5020     // features which are not supported on this GPU.
5021     return Error(IDLoc, "operands are not valid for this GPU or mode");
5022 
5023   case Match_InvalidOperand: {
5024     SMLoc ErrorLoc = IDLoc;
5025     if (ErrorInfo != ~0ULL) {
5026       if (ErrorInfo >= Operands.size()) {
5027         return Error(IDLoc, "too few operands for instruction");
5028       }
5029       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5030       if (ErrorLoc == SMLoc())
5031         ErrorLoc = IDLoc;
5032 
5033       if (isInvalidVOPDY(Operands, ErrorInfo))
5034         return Error(ErrorLoc, "invalid VOPDY instruction");
5035     }
5036     return Error(ErrorLoc, "invalid operand for instruction");
5037   }
5038 
5039   case Match_PreferE32:
5040     return Error(IDLoc, "internal error: instruction without _e64 suffix "
5041                         "should be encoded as e32");
5042   case Match_MnemonicFail:
5043     llvm_unreachable("Invalid instructions should have been handled already");
5044   }
5045   llvm_unreachable("Implement any new match types added!");
5046 }
5047 
5048 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5049   int64_t Tmp = -1;
5050   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5051     return true;
5052   }
5053   if (getParser().parseAbsoluteExpression(Tmp)) {
5054     return true;
5055   }
5056   Ret = static_cast<uint32_t>(Tmp);
5057   return false;
5058 }
5059 
5060 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
5061                                                uint32_t &Minor) {
5062   if (ParseAsAbsoluteExpression(Major))
5063     return TokError("invalid major version");
5064 
5065   if (!trySkipToken(AsmToken::Comma))
5066     return TokError("minor version number required, comma expected");
5067 
5068   if (ParseAsAbsoluteExpression(Minor))
5069     return TokError("invalid minor version");
5070 
5071   return false;
5072 }
5073 
5074 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5075   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5076     return TokError("directive only supported for amdgcn architecture");
5077 
5078   std::string TargetIDDirective;
5079   SMLoc TargetStart = getTok().getLoc();
5080   if (getParser().parseEscapedString(TargetIDDirective))
5081     return true;
5082 
5083   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5084   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5085     return getParser().Error(TargetRange.Start,
5086         (Twine(".amdgcn_target directive's target id ") +
5087          Twine(TargetIDDirective) +
5088          Twine(" does not match the specified target id ") +
5089          Twine(getTargetStreamer().getTargetID()->toString())).str());
5090 
5091   return false;
5092 }
5093 
5094 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5095   return Error(Range.Start, "value out of range", Range);
5096 }
5097 
5098 bool AMDGPUAsmParser::calculateGPRBlocks(
5099     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
5100     bool XNACKUsed, std::optional<bool> EnableWavefrontSize32,
5101     unsigned NextFreeVGPR, SMRange VGPRRange, unsigned NextFreeSGPR,
5102     SMRange SGPRRange, unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
5103   // TODO(scott.linder): These calculations are duplicated from
5104   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5105   IsaVersion Version = getIsaVersion(getSTI().getCPU());
5106 
5107   unsigned NumVGPRs = NextFreeVGPR;
5108   unsigned NumSGPRs = NextFreeSGPR;
5109 
5110   if (Version.Major >= 10)
5111     NumSGPRs = 0;
5112   else {
5113     unsigned MaxAddressableNumSGPRs =
5114         IsaInfo::getAddressableNumSGPRs(&getSTI());
5115 
5116     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
5117         NumSGPRs > MaxAddressableNumSGPRs)
5118       return OutOfRangeError(SGPRRange);
5119 
5120     NumSGPRs +=
5121         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
5122 
5123     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5124         NumSGPRs > MaxAddressableNumSGPRs)
5125       return OutOfRangeError(SGPRRange);
5126 
5127     if (Features.test(FeatureSGPRInitBug))
5128       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
5129   }
5130 
5131   VGPRBlocks =
5132       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
5133   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
5134 
5135   return false;
5136 }
5137 
5138 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5139   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5140     return TokError("directive only supported for amdgcn architecture");
5141 
5142   if (!isHsaAbi(getSTI()))
5143     return TokError("directive only supported for amdhsa OS");
5144 
5145   StringRef KernelName;
5146   if (getParser().parseIdentifier(KernelName))
5147     return true;
5148 
5149   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
5150 
5151   StringSet<> Seen;
5152 
5153   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5154 
5155   SMRange VGPRRange;
5156   uint64_t NextFreeVGPR = 0;
5157   uint64_t AccumOffset = 0;
5158   uint64_t SharedVGPRCount = 0;
5159   uint64_t PreloadLength = 0;
5160   uint64_t PreloadOffset = 0;
5161   SMRange SGPRRange;
5162   uint64_t NextFreeSGPR = 0;
5163 
5164   // Count the number of user SGPRs implied from the enabled feature bits.
5165   unsigned ImpliedUserSGPRCount = 0;
5166 
5167   // Track if the asm explicitly contains the directive for the user SGPR
5168   // count.
5169   std::optional<unsigned> ExplicitUserSGPRCount;
5170   bool ReserveVCC = true;
5171   bool ReserveFlatScr = true;
5172   std::optional<bool> EnableWavefrontSize32;
5173 
5174   while (true) {
5175     while (trySkipToken(AsmToken::EndOfStatement));
5176 
5177     StringRef ID;
5178     SMRange IDRange = getTok().getLocRange();
5179     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5180       return true;
5181 
5182     if (ID == ".end_amdhsa_kernel")
5183       break;
5184 
5185     if (!Seen.insert(ID).second)
5186       return TokError(".amdhsa_ directives cannot be repeated");
5187 
5188     SMLoc ValStart = getLoc();
5189     int64_t IVal;
5190     if (getParser().parseAbsoluteExpression(IVal))
5191       return true;
5192     SMLoc ValEnd = getLoc();
5193     SMRange ValRange = SMRange(ValStart, ValEnd);
5194 
5195     if (IVal < 0)
5196       return OutOfRangeError(ValRange);
5197 
5198     uint64_t Val = IVal;
5199 
5200 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
5201   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
5202     return OutOfRangeError(RANGE);                                             \
5203   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
5204 
5205     if (ID == ".amdhsa_group_segment_fixed_size") {
5206       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
5207         return OutOfRangeError(ValRange);
5208       KD.group_segment_fixed_size = Val;
5209     } else if (ID == ".amdhsa_private_segment_fixed_size") {
5210       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
5211         return OutOfRangeError(ValRange);
5212       KD.private_segment_fixed_size = Val;
5213     } else if (ID == ".amdhsa_kernarg_size") {
5214       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
5215         return OutOfRangeError(ValRange);
5216       KD.kernarg_size = Val;
5217     } else if (ID == ".amdhsa_user_sgpr_count") {
5218       ExplicitUserSGPRCount = Val;
5219     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5220       if (hasArchitectedFlatScratch())
5221         return Error(IDRange.Start,
5222                      "directive is not supported with architected flat scratch",
5223                      IDRange);
5224       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5225                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5226                        Val, ValRange);
5227       if (Val)
5228         ImpliedUserSGPRCount += 4;
5229     } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
5230       if (!hasKernargPreload())
5231         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5232 
5233       if (Val > getMaxNumUserSGPRs())
5234         return OutOfRangeError(ValRange);
5235       PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, Val,
5236                        ValRange);
5237       if (Val) {
5238         ImpliedUserSGPRCount += Val;
5239         PreloadLength = Val;
5240       }
5241     } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
5242       if (!hasKernargPreload())
5243         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5244 
5245       if (Val >= 1024)
5246         return OutOfRangeError(ValRange);
5247       PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, Val,
5248                        ValRange);
5249       if (Val)
5250         PreloadOffset = Val;
5251     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5252       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5253                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
5254                        ValRange);
5255       if (Val)
5256         ImpliedUserSGPRCount += 2;
5257     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5258       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5259                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
5260                        ValRange);
5261       if (Val)
5262         ImpliedUserSGPRCount += 2;
5263     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5264       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5265                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5266                        Val, ValRange);
5267       if (Val)
5268         ImpliedUserSGPRCount += 2;
5269     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5270       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5271                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
5272                        ValRange);
5273       if (Val)
5274         ImpliedUserSGPRCount += 2;
5275     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5276       if (hasArchitectedFlatScratch())
5277         return Error(IDRange.Start,
5278                      "directive is not supported with architected flat scratch",
5279                      IDRange);
5280       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5281                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
5282                        ValRange);
5283       if (Val)
5284         ImpliedUserSGPRCount += 2;
5285     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5286       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5287                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5288                        Val, ValRange);
5289       if (Val)
5290         ImpliedUserSGPRCount += 1;
5291     } else if (ID == ".amdhsa_wavefront_size32") {
5292       if (IVersion.Major < 10)
5293         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5294       EnableWavefrontSize32 = Val;
5295       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5296                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
5297                        Val, ValRange);
5298     } else if (ID == ".amdhsa_uses_dynamic_stack") {
5299       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5300                        KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
5301     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5302       if (hasArchitectedFlatScratch())
5303         return Error(IDRange.Start,
5304                      "directive is not supported with architected flat scratch",
5305                      IDRange);
5306       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5307                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5308     } else if (ID == ".amdhsa_enable_private_segment") {
5309       if (!hasArchitectedFlatScratch())
5310         return Error(
5311             IDRange.Start,
5312             "directive is not supported without architected flat scratch",
5313             IDRange);
5314       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5315                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5316     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5317       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5318                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
5319                        ValRange);
5320     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5321       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5322                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
5323                        ValRange);
5324     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5325       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5326                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
5327                        ValRange);
5328     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5329       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5330                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
5331                        ValRange);
5332     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5333       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5334                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
5335                        ValRange);
5336     } else if (ID == ".amdhsa_next_free_vgpr") {
5337       VGPRRange = ValRange;
5338       NextFreeVGPR = Val;
5339     } else if (ID == ".amdhsa_next_free_sgpr") {
5340       SGPRRange = ValRange;
5341       NextFreeSGPR = Val;
5342     } else if (ID == ".amdhsa_accum_offset") {
5343       if (!isGFX90A())
5344         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5345       AccumOffset = Val;
5346     } else if (ID == ".amdhsa_reserve_vcc") {
5347       if (!isUInt<1>(Val))
5348         return OutOfRangeError(ValRange);
5349       ReserveVCC = Val;
5350     } else if (ID == ".amdhsa_reserve_flat_scratch") {
5351       if (IVersion.Major < 7)
5352         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5353       if (hasArchitectedFlatScratch())
5354         return Error(IDRange.Start,
5355                      "directive is not supported with architected flat scratch",
5356                      IDRange);
5357       if (!isUInt<1>(Val))
5358         return OutOfRangeError(ValRange);
5359       ReserveFlatScr = Val;
5360     } else if (ID == ".amdhsa_reserve_xnack_mask") {
5361       if (IVersion.Major < 8)
5362         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5363       if (!isUInt<1>(Val))
5364         return OutOfRangeError(ValRange);
5365       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5366         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5367                                  IDRange);
5368     } else if (ID == ".amdhsa_float_round_mode_32") {
5369       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5370                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5371     } else if (ID == ".amdhsa_float_round_mode_16_64") {
5372       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5373                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5374     } else if (ID == ".amdhsa_float_denorm_mode_32") {
5375       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5376                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5377     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5378       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5379                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5380                        ValRange);
5381     } else if (ID == ".amdhsa_dx10_clamp") {
5382       if (IVersion.Major >= 12)
5383         return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5384       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5385                        COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Val,
5386                        ValRange);
5387     } else if (ID == ".amdhsa_ieee_mode") {
5388       if (IVersion.Major >= 12)
5389         return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5390       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5391                        COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Val,
5392                        ValRange);
5393     } else if (ID == ".amdhsa_fp16_overflow") {
5394       if (IVersion.Major < 9)
5395         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5396       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, Val,
5397                        ValRange);
5398     } else if (ID == ".amdhsa_tg_split") {
5399       if (!isGFX90A())
5400         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5401       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5402                        ValRange);
5403     } else if (ID == ".amdhsa_workgroup_processor_mode") {
5404       if (IVersion.Major < 10)
5405         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5406       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Val,
5407                        ValRange);
5408     } else if (ID == ".amdhsa_memory_ordered") {
5409       if (IVersion.Major < 10)
5410         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5411       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Val,
5412                        ValRange);
5413     } else if (ID == ".amdhsa_forward_progress") {
5414       if (IVersion.Major < 10)
5415         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5416       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, Val,
5417                        ValRange);
5418     } else if (ID == ".amdhsa_shared_vgpr_count") {
5419       if (IVersion.Major < 10)
5420         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5421       SharedVGPRCount = Val;
5422       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5423                        COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val,
5424                        ValRange);
5425     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5426       PARSE_BITS_ENTRY(
5427           KD.compute_pgm_rsrc2,
5428           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5429           ValRange);
5430     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5431       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5432                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5433                        Val, ValRange);
5434     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5435       PARSE_BITS_ENTRY(
5436           KD.compute_pgm_rsrc2,
5437           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5438           ValRange);
5439     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5440       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5441                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5442                        Val, ValRange);
5443     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5444       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5445                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5446                        Val, ValRange);
5447     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5448       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5449                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5450                        Val, ValRange);
5451     } else if (ID == ".amdhsa_exception_int_div_zero") {
5452       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5453                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5454                        Val, ValRange);
5455     } else if (ID == ".amdhsa_round_robin_scheduling") {
5456       if (IVersion.Major < 12)
5457         return Error(IDRange.Start, "directive requires gfx12+", IDRange);
5458       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5459                        COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, Val,
5460                        ValRange);
5461     } else {
5462       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5463     }
5464 
5465 #undef PARSE_BITS_ENTRY
5466   }
5467 
5468   if (!Seen.contains(".amdhsa_next_free_vgpr"))
5469     return TokError(".amdhsa_next_free_vgpr directive is required");
5470 
5471   if (!Seen.contains(".amdhsa_next_free_sgpr"))
5472     return TokError(".amdhsa_next_free_sgpr directive is required");
5473 
5474   unsigned VGPRBlocks;
5475   unsigned SGPRBlocks;
5476   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5477                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5478                          EnableWavefrontSize32, NextFreeVGPR,
5479                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5480                          SGPRBlocks))
5481     return true;
5482 
5483   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5484           VGPRBlocks))
5485     return OutOfRangeError(VGPRRange);
5486   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5487                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5488 
5489   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5490           SGPRBlocks))
5491     return OutOfRangeError(SGPRRange);
5492   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5493                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5494                   SGPRBlocks);
5495 
5496   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5497     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5498                     "enabled user SGPRs");
5499 
5500   unsigned UserSGPRCount =
5501       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5502 
5503   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5504     return TokError("too many user SGPRs enabled");
5505   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5506                   UserSGPRCount);
5507 
5508   if (PreloadLength && KD.kernarg_size &&
5509       (PreloadLength * 4 + PreloadOffset * 4 > KD.kernarg_size))
5510     return TokError("Kernarg preload length + offset is larger than the "
5511                     "kernarg segment size");
5512 
5513   if (isGFX90A()) {
5514     if (!Seen.contains(".amdhsa_accum_offset"))
5515       return TokError(".amdhsa_accum_offset directive is required");
5516     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5517       return TokError("accum_offset should be in range [4..256] in "
5518                       "increments of 4");
5519     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5520       return TokError("accum_offset exceeds total VGPR allocation");
5521     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5522                     (AccumOffset / 4 - 1));
5523   }
5524 
5525   if (IVersion.Major >= 10) {
5526     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5527     if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5528       return TokError("shared_vgpr_count directive not valid on "
5529                       "wavefront size 32");
5530     }
5531     if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5532       return TokError("shared_vgpr_count*2 + "
5533                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5534                       "exceed 63\n");
5535     }
5536   }
5537 
5538   getTargetStreamer().EmitAmdhsaKernelDescriptor(
5539       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5540       ReserveFlatScr, AMDGPU::getAmdhsaCodeObjectVersion());
5541   return false;
5542 }
5543 
5544 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5545   uint32_t Major;
5546   uint32_t Minor;
5547 
5548   if (ParseDirectiveMajorMinor(Major, Minor))
5549     return true;
5550 
5551   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5552   return false;
5553 }
5554 
5555 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5556   uint32_t Major;
5557   uint32_t Minor;
5558   uint32_t Stepping;
5559   StringRef VendorName;
5560   StringRef ArchName;
5561 
5562   // If this directive has no arguments, then use the ISA version for the
5563   // targeted GPU.
5564   if (isToken(AsmToken::EndOfStatement)) {
5565     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5566     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5567                                                         ISA.Stepping,
5568                                                         "AMD", "AMDGPU");
5569     return false;
5570   }
5571 
5572   if (ParseDirectiveMajorMinor(Major, Minor))
5573     return true;
5574 
5575   if (!trySkipToken(AsmToken::Comma))
5576     return TokError("stepping version number required, comma expected");
5577 
5578   if (ParseAsAbsoluteExpression(Stepping))
5579     return TokError("invalid stepping version");
5580 
5581   if (!trySkipToken(AsmToken::Comma))
5582     return TokError("vendor name required, comma expected");
5583 
5584   if (!parseString(VendorName, "invalid vendor name"))
5585     return true;
5586 
5587   if (!trySkipToken(AsmToken::Comma))
5588     return TokError("arch name required, comma expected");
5589 
5590   if (!parseString(ArchName, "invalid arch name"))
5591     return true;
5592 
5593   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5594                                                       VendorName, ArchName);
5595   return false;
5596 }
5597 
5598 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5599                                                amd_kernel_code_t &Header) {
5600   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5601   // assembly for backwards compatibility.
5602   if (ID == "max_scratch_backing_memory_byte_size") {
5603     Parser.eatToEndOfStatement();
5604     return false;
5605   }
5606 
5607   SmallString<40> ErrStr;
5608   raw_svector_ostream Err(ErrStr);
5609   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5610     return TokError(Err.str());
5611   }
5612   Lex();
5613 
5614   if (ID == "enable_dx10_clamp") {
5615     if (G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) &&
5616         isGFX12Plus())
5617       return TokError("enable_dx10_clamp=1 is not allowed on GFX12+");
5618   }
5619 
5620   if (ID == "enable_ieee_mode") {
5621     if (G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) &&
5622         isGFX12Plus())
5623       return TokError("enable_ieee_mode=1 is not allowed on GFX12+");
5624   }
5625 
5626   if (ID == "enable_wavefront_size32") {
5627     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5628       if (!isGFX10Plus())
5629         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5630       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5631         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5632     } else {
5633       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5634         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5635     }
5636   }
5637 
5638   if (ID == "wavefront_size") {
5639     if (Header.wavefront_size == 5) {
5640       if (!isGFX10Plus())
5641         return TokError("wavefront_size=5 is only allowed on GFX10+");
5642       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5643         return TokError("wavefront_size=5 requires +WavefrontSize32");
5644     } else if (Header.wavefront_size == 6) {
5645       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5646         return TokError("wavefront_size=6 requires +WavefrontSize64");
5647     }
5648   }
5649 
5650   if (ID == "enable_wgp_mode") {
5651     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5652         !isGFX10Plus())
5653       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5654   }
5655 
5656   if (ID == "enable_mem_ordered") {
5657     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5658         !isGFX10Plus())
5659       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5660   }
5661 
5662   if (ID == "enable_fwd_progress") {
5663     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5664         !isGFX10Plus())
5665       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5666   }
5667 
5668   return false;
5669 }
5670 
5671 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5672   amd_kernel_code_t Header;
5673   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5674 
5675   while (true) {
5676     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5677     // will set the current token to EndOfStatement.
5678     while(trySkipToken(AsmToken::EndOfStatement));
5679 
5680     StringRef ID;
5681     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5682       return true;
5683 
5684     if (ID == ".end_amd_kernel_code_t")
5685       break;
5686 
5687     if (ParseAMDKernelCodeTValue(ID, Header))
5688       return true;
5689   }
5690 
5691   getTargetStreamer().EmitAMDKernelCodeT(Header);
5692 
5693   return false;
5694 }
5695 
5696 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5697   StringRef KernelName;
5698   if (!parseId(KernelName, "expected symbol name"))
5699     return true;
5700 
5701   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5702                                            ELF::STT_AMDGPU_HSA_KERNEL);
5703 
5704   KernelScope.initialize(getContext());
5705   return false;
5706 }
5707 
5708 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5709   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5710     return Error(getLoc(),
5711                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5712                  "architectures");
5713   }
5714 
5715   auto TargetIDDirective = getLexer().getTok().getStringContents();
5716   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5717     return Error(getParser().getTok().getLoc(), "target id must match options");
5718 
5719   getTargetStreamer().EmitISAVersion();
5720   Lex();
5721 
5722   return false;
5723 }
5724 
5725 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5726   assert(isHsaAbi(getSTI()));
5727 
5728   std::string HSAMetadataString;
5729   if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
5730                           HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
5731     return true;
5732 
5733   if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5734     return Error(getLoc(), "invalid HSA metadata");
5735 
5736   return false;
5737 }
5738 
5739 /// Common code to parse out a block of text (typically YAML) between start and
5740 /// end directives.
5741 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5742                                           const char *AssemblerDirectiveEnd,
5743                                           std::string &CollectString) {
5744 
5745   raw_string_ostream CollectStream(CollectString);
5746 
5747   getLexer().setSkipSpace(false);
5748 
5749   bool FoundEnd = false;
5750   while (!isToken(AsmToken::Eof)) {
5751     while (isToken(AsmToken::Space)) {
5752       CollectStream << getTokenStr();
5753       Lex();
5754     }
5755 
5756     if (trySkipId(AssemblerDirectiveEnd)) {
5757       FoundEnd = true;
5758       break;
5759     }
5760 
5761     CollectStream << Parser.parseStringToEndOfStatement()
5762                   << getContext().getAsmInfo()->getSeparatorString();
5763 
5764     Parser.eatToEndOfStatement();
5765   }
5766 
5767   getLexer().setSkipSpace(true);
5768 
5769   if (isToken(AsmToken::Eof) && !FoundEnd) {
5770     return TokError(Twine("expected directive ") +
5771                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5772   }
5773 
5774   CollectStream.flush();
5775   return false;
5776 }
5777 
5778 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5779 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5780   std::string String;
5781   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5782                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5783     return true;
5784 
5785   auto PALMetadata = getTargetStreamer().getPALMetadata();
5786   if (!PALMetadata->setFromString(String))
5787     return Error(getLoc(), "invalid PAL metadata");
5788   return false;
5789 }
5790 
5791 /// Parse the assembler directive for old linear-format PAL metadata.
5792 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5793   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5794     return Error(getLoc(),
5795                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5796                  "not available on non-amdpal OSes")).str());
5797   }
5798 
5799   auto PALMetadata = getTargetStreamer().getPALMetadata();
5800   PALMetadata->setLegacy();
5801   for (;;) {
5802     uint32_t Key, Value;
5803     if (ParseAsAbsoluteExpression(Key)) {
5804       return TokError(Twine("invalid value in ") +
5805                       Twine(PALMD::AssemblerDirective));
5806     }
5807     if (!trySkipToken(AsmToken::Comma)) {
5808       return TokError(Twine("expected an even number of values in ") +
5809                       Twine(PALMD::AssemblerDirective));
5810     }
5811     if (ParseAsAbsoluteExpression(Value)) {
5812       return TokError(Twine("invalid value in ") +
5813                       Twine(PALMD::AssemblerDirective));
5814     }
5815     PALMetadata->setRegister(Key, Value);
5816     if (!trySkipToken(AsmToken::Comma))
5817       break;
5818   }
5819   return false;
5820 }
5821 
5822 /// ParseDirectiveAMDGPULDS
5823 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5824 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5825   if (getParser().checkForValidSection())
5826     return true;
5827 
5828   StringRef Name;
5829   SMLoc NameLoc = getLoc();
5830   if (getParser().parseIdentifier(Name))
5831     return TokError("expected identifier in directive");
5832 
5833   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5834   if (getParser().parseComma())
5835     return true;
5836 
5837   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5838 
5839   int64_t Size;
5840   SMLoc SizeLoc = getLoc();
5841   if (getParser().parseAbsoluteExpression(Size))
5842     return true;
5843   if (Size < 0)
5844     return Error(SizeLoc, "size must be non-negative");
5845   if (Size > LocalMemorySize)
5846     return Error(SizeLoc, "size is too large");
5847 
5848   int64_t Alignment = 4;
5849   if (trySkipToken(AsmToken::Comma)) {
5850     SMLoc AlignLoc = getLoc();
5851     if (getParser().parseAbsoluteExpression(Alignment))
5852       return true;
5853     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5854       return Error(AlignLoc, "alignment must be a power of two");
5855 
5856     // Alignment larger than the size of LDS is possible in theory, as long
5857     // as the linker manages to place to symbol at address 0, but we do want
5858     // to make sure the alignment fits nicely into a 32-bit integer.
5859     if (Alignment >= 1u << 31)
5860       return Error(AlignLoc, "alignment is too large");
5861   }
5862 
5863   if (parseEOL())
5864     return true;
5865 
5866   Symbol->redefineIfPossible();
5867   if (!Symbol->isUndefined())
5868     return Error(NameLoc, "invalid symbol redefinition");
5869 
5870   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5871   return false;
5872 }
5873 
5874 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5875   StringRef IDVal = DirectiveID.getString();
5876 
5877   if (isHsaAbi(getSTI())) {
5878     if (IDVal == ".amdhsa_kernel")
5879      return ParseDirectiveAMDHSAKernel();
5880 
5881     // TODO: Restructure/combine with PAL metadata directive.
5882     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5883       return ParseDirectiveHSAMetadata();
5884   } else {
5885     if (IDVal == ".hsa_code_object_version")
5886       return ParseDirectiveHSACodeObjectVersion();
5887 
5888     if (IDVal == ".hsa_code_object_isa")
5889       return ParseDirectiveHSACodeObjectISA();
5890 
5891     if (IDVal == ".amd_kernel_code_t")
5892       return ParseDirectiveAMDKernelCodeT();
5893 
5894     if (IDVal == ".amdgpu_hsa_kernel")
5895       return ParseDirectiveAMDGPUHsaKernel();
5896 
5897     if (IDVal == ".amd_amdgpu_isa")
5898       return ParseDirectiveISAVersion();
5899 
5900     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) {
5901       return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
5902                               Twine(" directive is "
5903                                     "not available on non-amdhsa OSes"))
5904                                  .str());
5905     }
5906   }
5907 
5908   if (IDVal == ".amdgcn_target")
5909     return ParseDirectiveAMDGCNTarget();
5910 
5911   if (IDVal == ".amdgpu_lds")
5912     return ParseDirectiveAMDGPULDS();
5913 
5914   if (IDVal == PALMD::AssemblerDirectiveBegin)
5915     return ParseDirectivePALMetadataBegin();
5916 
5917   if (IDVal == PALMD::AssemblerDirective)
5918     return ParseDirectivePALMetadata();
5919 
5920   return true;
5921 }
5922 
5923 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5924                                            unsigned RegNo) {
5925 
5926   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5927     return isGFX9Plus();
5928 
5929   // GFX10+ has 2 more SGPRs 104 and 105.
5930   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5931     return hasSGPR104_SGPR105();
5932 
5933   switch (RegNo) {
5934   case AMDGPU::SRC_SHARED_BASE_LO:
5935   case AMDGPU::SRC_SHARED_BASE:
5936   case AMDGPU::SRC_SHARED_LIMIT_LO:
5937   case AMDGPU::SRC_SHARED_LIMIT:
5938   case AMDGPU::SRC_PRIVATE_BASE_LO:
5939   case AMDGPU::SRC_PRIVATE_BASE:
5940   case AMDGPU::SRC_PRIVATE_LIMIT_LO:
5941   case AMDGPU::SRC_PRIVATE_LIMIT:
5942     return isGFX9Plus();
5943   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5944     return isGFX9Plus() && !isGFX11Plus();
5945   case AMDGPU::TBA:
5946   case AMDGPU::TBA_LO:
5947   case AMDGPU::TBA_HI:
5948   case AMDGPU::TMA:
5949   case AMDGPU::TMA_LO:
5950   case AMDGPU::TMA_HI:
5951     return !isGFX9Plus();
5952   case AMDGPU::XNACK_MASK:
5953   case AMDGPU::XNACK_MASK_LO:
5954   case AMDGPU::XNACK_MASK_HI:
5955     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5956   case AMDGPU::SGPR_NULL:
5957     return isGFX10Plus();
5958   default:
5959     break;
5960   }
5961 
5962   if (isCI())
5963     return true;
5964 
5965   if (isSI() || isGFX10Plus()) {
5966     // No flat_scr on SI.
5967     // On GFX10Plus flat scratch is not a valid register operand and can only be
5968     // accessed with s_setreg/s_getreg.
5969     switch (RegNo) {
5970     case AMDGPU::FLAT_SCR:
5971     case AMDGPU::FLAT_SCR_LO:
5972     case AMDGPU::FLAT_SCR_HI:
5973       return false;
5974     default:
5975       return true;
5976     }
5977   }
5978 
5979   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5980   // SI/CI have.
5981   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5982     return hasSGPR102_SGPR103();
5983 
5984   return true;
5985 }
5986 
5987 ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
5988                                           StringRef Mnemonic,
5989                                           OperandMode Mode) {
5990   ParseStatus Res = parseVOPD(Operands);
5991   if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
5992     return Res;
5993 
5994   // Try to parse with a custom parser
5995   Res = MatchOperandParserImpl(Operands, Mnemonic);
5996 
5997   // If we successfully parsed the operand or if there as an error parsing,
5998   // we are done.
5999   //
6000   // If we are parsing after we reach EndOfStatement then this means we
6001   // are appending default values to the Operands list.  This is only done
6002   // by custom parser, so we shouldn't continue on to the generic parsing.
6003   if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6004     return Res;
6005 
6006   SMLoc RBraceLoc;
6007   SMLoc LBraceLoc = getLoc();
6008   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6009     unsigned Prefix = Operands.size();
6010 
6011     for (;;) {
6012       auto Loc = getLoc();
6013       Res = parseReg(Operands);
6014       if (Res.isNoMatch())
6015         Error(Loc, "expected a register");
6016       if (!Res.isSuccess())
6017         return ParseStatus::Failure;
6018 
6019       RBraceLoc = getLoc();
6020       if (trySkipToken(AsmToken::RBrac))
6021         break;
6022 
6023       if (!skipToken(AsmToken::Comma,
6024                      "expected a comma or a closing square bracket"))
6025         return ParseStatus::Failure;
6026     }
6027 
6028     if (Operands.size() - Prefix > 1) {
6029       Operands.insert(Operands.begin() + Prefix,
6030                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6031       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6032     }
6033 
6034     return ParseStatus::Success;
6035   }
6036 
6037   return parseRegOrImm(Operands);
6038 }
6039 
6040 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6041   // Clear any forced encodings from the previous instruction.
6042   setForcedEncodingSize(0);
6043   setForcedDPP(false);
6044   setForcedSDWA(false);
6045 
6046   if (Name.ends_with("_e64_dpp")) {
6047     setForcedDPP(true);
6048     setForcedEncodingSize(64);
6049     return Name.substr(0, Name.size() - 8);
6050   } else if (Name.ends_with("_e64")) {
6051     setForcedEncodingSize(64);
6052     return Name.substr(0, Name.size() - 4);
6053   } else if (Name.ends_with("_e32")) {
6054     setForcedEncodingSize(32);
6055     return Name.substr(0, Name.size() - 4);
6056   } else if (Name.ends_with("_dpp")) {
6057     setForcedDPP(true);
6058     return Name.substr(0, Name.size() - 4);
6059   } else if (Name.ends_with("_sdwa")) {
6060     setForcedSDWA(true);
6061     return Name.substr(0, Name.size() - 5);
6062   }
6063   return Name;
6064 }
6065 
6066 static void applyMnemonicAliases(StringRef &Mnemonic,
6067                                  const FeatureBitset &Features,
6068                                  unsigned VariantID);
6069 
6070 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
6071                                        StringRef Name,
6072                                        SMLoc NameLoc, OperandVector &Operands) {
6073   // Add the instruction mnemonic
6074   Name = parseMnemonicSuffix(Name);
6075 
6076   // If the target architecture uses MnemonicAlias, call it here to parse
6077   // operands correctly.
6078   applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6079 
6080   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6081 
6082   bool IsMIMG = Name.starts_with("image_");
6083 
6084   while (!trySkipToken(AsmToken::EndOfStatement)) {
6085     OperandMode Mode = OperandMode_Default;
6086     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6087       Mode = OperandMode_NSA;
6088     ParseStatus Res = parseOperand(Operands, Name, Mode);
6089 
6090     if (!Res.isSuccess()) {
6091       checkUnsupportedInstruction(Name, NameLoc);
6092       if (!Parser.hasPendingError()) {
6093         // FIXME: use real operand location rather than the current location.
6094         StringRef Msg = Res.isFailure() ? "failed parsing operand."
6095                                         : "not a valid operand.";
6096         Error(getLoc(), Msg);
6097       }
6098       while (!trySkipToken(AsmToken::EndOfStatement)) {
6099         lex();
6100       }
6101       return true;
6102     }
6103 
6104     // Eat the comma or space if there is one.
6105     trySkipToken(AsmToken::Comma);
6106   }
6107 
6108   return false;
6109 }
6110 
6111 //===----------------------------------------------------------------------===//
6112 // Utility functions
6113 //===----------------------------------------------------------------------===//
6114 
6115 ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6116                                           OperandVector &Operands) {
6117   SMLoc S = getLoc();
6118   if (!trySkipId(Name))
6119     return ParseStatus::NoMatch;
6120 
6121   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6122   return ParseStatus::Success;
6123 }
6124 
6125 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6126                                                 int64_t &IntVal) {
6127 
6128   if (!trySkipId(Prefix, AsmToken::Colon))
6129     return ParseStatus::NoMatch;
6130 
6131   return parseExpr(IntVal) ? ParseStatus::Success : ParseStatus::Failure;
6132 }
6133 
6134 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6135     const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6136     std::function<bool(int64_t &)> ConvertResult) {
6137   SMLoc S = getLoc();
6138   int64_t Value = 0;
6139 
6140   ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6141   if (!Res.isSuccess())
6142     return Res;
6143 
6144   if (ConvertResult && !ConvertResult(Value)) {
6145     Error(S, "invalid " + StringRef(Prefix) + " value.");
6146   }
6147 
6148   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
6149   return ParseStatus::Success;
6150 }
6151 
6152 ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6153     const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6154     bool (*ConvertResult)(int64_t &)) {
6155   SMLoc S = getLoc();
6156   if (!trySkipId(Prefix, AsmToken::Colon))
6157     return ParseStatus::NoMatch;
6158 
6159   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
6160     return ParseStatus::Failure;
6161 
6162   unsigned Val = 0;
6163   const unsigned MaxSize = 4;
6164 
6165   // FIXME: How to verify the number of elements matches the number of src
6166   // operands?
6167   for (int I = 0; ; ++I) {
6168     int64_t Op;
6169     SMLoc Loc = getLoc();
6170     if (!parseExpr(Op))
6171       return ParseStatus::Failure;
6172 
6173     if (Op != 0 && Op != 1)
6174       return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
6175 
6176     Val |= (Op << I);
6177 
6178     if (trySkipToken(AsmToken::RBrac))
6179       break;
6180 
6181     if (I + 1 == MaxSize)
6182       return Error(getLoc(), "expected a closing square bracket");
6183 
6184     if (!skipToken(AsmToken::Comma, "expected a comma"))
6185       return ParseStatus::Failure;
6186   }
6187 
6188   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
6189   return ParseStatus::Success;
6190 }
6191 
6192 ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
6193                                            OperandVector &Operands,
6194                                            AMDGPUOperand::ImmTy ImmTy) {
6195   int64_t Bit;
6196   SMLoc S = getLoc();
6197 
6198   if (trySkipId(Name)) {
6199     Bit = 1;
6200   } else if (trySkipId("no", Name)) {
6201     Bit = 0;
6202   } else {
6203     return ParseStatus::NoMatch;
6204   }
6205 
6206   if (Name == "r128" && !hasMIMG_R128())
6207     return Error(S, "r128 modifier is not supported on this GPU");
6208   if (Name == "a16" && !hasA16())
6209     return Error(S, "a16 modifier is not supported on this GPU");
6210 
6211   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6212     ImmTy = AMDGPUOperand::ImmTyR128A16;
6213 
6214   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
6215   return ParseStatus::Success;
6216 }
6217 
6218 unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
6219                                       bool &Disabling) const {
6220   Disabling = Id.consume_front("no");
6221 
6222   if (isGFX940() && !Mnemo.starts_with("s_")) {
6223     return StringSwitch<unsigned>(Id)
6224         .Case("nt", AMDGPU::CPol::NT)
6225         .Case("sc0", AMDGPU::CPol::SC0)
6226         .Case("sc1", AMDGPU::CPol::SC1)
6227         .Default(0);
6228   }
6229 
6230   return StringSwitch<unsigned>(Id)
6231       .Case("dlc", AMDGPU::CPol::DLC)
6232       .Case("glc", AMDGPU::CPol::GLC)
6233       .Case("scc", AMDGPU::CPol::SCC)
6234       .Case("slc", AMDGPU::CPol::SLC)
6235       .Default(0);
6236 }
6237 
6238 ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
6239   if (isGFX12Plus()) {
6240     SMLoc StringLoc = getLoc();
6241 
6242     int64_t CPolVal = 0;
6243     ParseStatus ResTH = ParseStatus::NoMatch;
6244     ParseStatus ResScope = ParseStatus::NoMatch;
6245 
6246     for (;;) {
6247       if (ResTH.isNoMatch()) {
6248         int64_t TH;
6249         ResTH = parseTH(Operands, TH);
6250         if (ResTH.isFailure())
6251           return ResTH;
6252         if (ResTH.isSuccess()) {
6253           CPolVal |= TH;
6254           continue;
6255         }
6256       }
6257 
6258       if (ResScope.isNoMatch()) {
6259         int64_t Scope;
6260         ResScope = parseScope(Operands, Scope);
6261         if (ResScope.isFailure())
6262           return ResScope;
6263         if (ResScope.isSuccess()) {
6264           CPolVal |= Scope;
6265           continue;
6266         }
6267       }
6268 
6269       break;
6270     }
6271 
6272     if (ResTH.isNoMatch() && ResScope.isNoMatch())
6273       return ParseStatus::NoMatch;
6274 
6275     Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
6276                                                 AMDGPUOperand::ImmTyCPol));
6277     return ParseStatus::Success;
6278   }
6279 
6280   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
6281   SMLoc OpLoc = getLoc();
6282   unsigned Enabled = 0, Seen = 0;
6283   for (;;) {
6284     SMLoc S = getLoc();
6285     bool Disabling;
6286     unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
6287     if (!CPol)
6288       break;
6289 
6290     lex();
6291 
6292     if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
6293       return Error(S, "dlc modifier is not supported on this GPU");
6294 
6295     if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
6296       return Error(S, "scc modifier is not supported on this GPU");
6297 
6298     if (Seen & CPol)
6299       return Error(S, "duplicate cache policy modifier");
6300 
6301     if (!Disabling)
6302       Enabled |= CPol;
6303 
6304     Seen |= CPol;
6305   }
6306 
6307   if (!Seen)
6308     return ParseStatus::NoMatch;
6309 
6310   Operands.push_back(
6311       AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
6312   return ParseStatus::Success;
6313 }
6314 
6315 ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
6316                                         int64_t &Scope) {
6317   Scope = AMDGPU::CPol::SCOPE_CU; // default;
6318 
6319   StringRef Value;
6320   SMLoc StringLoc;
6321   ParseStatus Res;
6322 
6323   Res = parseStringWithPrefix("scope", Value, StringLoc);
6324   if (!Res.isSuccess())
6325     return Res;
6326 
6327   Scope = StringSwitch<int64_t>(Value)
6328               .Case("SCOPE_CU", AMDGPU::CPol::SCOPE_CU)
6329               .Case("SCOPE_SE", AMDGPU::CPol::SCOPE_SE)
6330               .Case("SCOPE_DEV", AMDGPU::CPol::SCOPE_DEV)
6331               .Case("SCOPE_SYS", AMDGPU::CPol::SCOPE_SYS)
6332               .Default(0xffffffff);
6333 
6334   if (Scope == 0xffffffff)
6335     return Error(StringLoc, "invalid scope value");
6336 
6337   return ParseStatus::Success;
6338 }
6339 
6340 ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
6341   TH = AMDGPU::CPol::TH_RT; // default
6342 
6343   StringRef Value;
6344   SMLoc StringLoc;
6345   ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
6346   if (!Res.isSuccess())
6347     return Res;
6348 
6349   if (Value == "TH_DEFAULT")
6350     TH = AMDGPU::CPol::TH_RT;
6351   else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" ||
6352            Value == "TH_LOAD_NT_WB") {
6353     return Error(StringLoc, "invalid th value");
6354   } else if (Value.starts_with("TH_ATOMIC_")) {
6355     Value = Value.drop_front(10);
6356     TH = AMDGPU::CPol::TH_TYPE_ATOMIC;
6357   } else if (Value.starts_with("TH_LOAD_")) {
6358     Value = Value.drop_front(8);
6359     TH = AMDGPU::CPol::TH_TYPE_LOAD;
6360   } else if (Value.starts_with("TH_STORE_")) {
6361     Value = Value.drop_front(9);
6362     TH = AMDGPU::CPol::TH_TYPE_STORE;
6363   } else {
6364     return Error(StringLoc, "invalid th value");
6365   }
6366 
6367   if (Value == "BYPASS")
6368     TH |= AMDGPU::CPol::TH_REAL_BYPASS;
6369 
6370   if (TH != 0) {
6371     if (TH & AMDGPU::CPol::TH_TYPE_ATOMIC)
6372       TH |= StringSwitch<int64_t>(Value)
6373                 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
6374                 .Case("RT", AMDGPU::CPol::TH_RT)
6375                 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
6376                 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
6377                 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
6378                                        AMDGPU::CPol::TH_ATOMIC_RETURN)
6379                 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
6380                 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
6381                                         AMDGPU::CPol::TH_ATOMIC_NT)
6382                 .Default(0xffffffff);
6383     else
6384       TH |= StringSwitch<int64_t>(Value)
6385                 .Case("RT", AMDGPU::CPol::TH_RT)
6386                 .Case("NT", AMDGPU::CPol::TH_NT)
6387                 .Case("HT", AMDGPU::CPol::TH_HT)
6388                 .Case("LU", AMDGPU::CPol::TH_LU)
6389                 .Case("RT_WB", AMDGPU::CPol::TH_RT_WB)
6390                 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
6391                 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
6392                 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
6393                 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
6394                 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
6395                 .Default(0xffffffff);
6396   }
6397 
6398   if (TH == 0xffffffff)
6399     return Error(StringLoc, "invalid th value");
6400 
6401   return ParseStatus::Success;
6402 }
6403 
6404 static void addOptionalImmOperand(
6405   MCInst& Inst, const OperandVector& Operands,
6406   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6407   AMDGPUOperand::ImmTy ImmT,
6408   int64_t Default = 0) {
6409   auto i = OptionalIdx.find(ImmT);
6410   if (i != OptionalIdx.end()) {
6411     unsigned Idx = i->second;
6412     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
6413   } else {
6414     Inst.addOperand(MCOperand::createImm(Default));
6415   }
6416 }
6417 
6418 ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
6419                                                    StringRef &Value,
6420                                                    SMLoc &StringLoc) {
6421   if (!trySkipId(Prefix, AsmToken::Colon))
6422     return ParseStatus::NoMatch;
6423 
6424   StringLoc = getLoc();
6425   return parseId(Value, "expected an identifier") ? ParseStatus::Success
6426                                                   : ParseStatus::Failure;
6427 }
6428 
6429 //===----------------------------------------------------------------------===//
6430 // MTBUF format
6431 //===----------------------------------------------------------------------===//
6432 
6433 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6434                                   int64_t MaxVal,
6435                                   int64_t &Fmt) {
6436   int64_t Val;
6437   SMLoc Loc = getLoc();
6438 
6439   auto Res = parseIntWithPrefix(Pref, Val);
6440   if (Res.isFailure())
6441     return false;
6442   if (Res.isNoMatch())
6443     return true;
6444 
6445   if (Val < 0 || Val > MaxVal) {
6446     Error(Loc, Twine("out of range ", StringRef(Pref)));
6447     return false;
6448   }
6449 
6450   Fmt = Val;
6451   return true;
6452 }
6453 
6454 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6455 // values to live in a joint format operand in the MCInst encoding.
6456 ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6457   using namespace llvm::AMDGPU::MTBUFFormat;
6458 
6459   int64_t Dfmt = DFMT_UNDEF;
6460   int64_t Nfmt = NFMT_UNDEF;
6461 
6462   // dfmt and nfmt can appear in either order, and each is optional.
6463   for (int I = 0; I < 2; ++I) {
6464     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6465       return ParseStatus::Failure;
6466 
6467     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
6468       return ParseStatus::Failure;
6469 
6470     // Skip optional comma between dfmt/nfmt
6471     // but guard against 2 commas following each other.
6472     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6473         !peekToken().is(AsmToken::Comma)) {
6474       trySkipToken(AsmToken::Comma);
6475     }
6476   }
6477 
6478   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6479     return ParseStatus::NoMatch;
6480 
6481   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6482   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6483 
6484   Format = encodeDfmtNfmt(Dfmt, Nfmt);
6485   return ParseStatus::Success;
6486 }
6487 
6488 ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6489   using namespace llvm::AMDGPU::MTBUFFormat;
6490 
6491   int64_t Fmt = UFMT_UNDEF;
6492 
6493   if (!tryParseFmt("format", UFMT_MAX, Fmt))
6494     return ParseStatus::Failure;
6495 
6496   if (Fmt == UFMT_UNDEF)
6497     return ParseStatus::NoMatch;
6498 
6499   Format = Fmt;
6500   return ParseStatus::Success;
6501 }
6502 
6503 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6504                                     int64_t &Nfmt,
6505                                     StringRef FormatStr,
6506                                     SMLoc Loc) {
6507   using namespace llvm::AMDGPU::MTBUFFormat;
6508   int64_t Format;
6509 
6510   Format = getDfmt(FormatStr);
6511   if (Format != DFMT_UNDEF) {
6512     Dfmt = Format;
6513     return true;
6514   }
6515 
6516   Format = getNfmt(FormatStr, getSTI());
6517   if (Format != NFMT_UNDEF) {
6518     Nfmt = Format;
6519     return true;
6520   }
6521 
6522   Error(Loc, "unsupported format");
6523   return false;
6524 }
6525 
6526 ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6527                                                       SMLoc FormatLoc,
6528                                                       int64_t &Format) {
6529   using namespace llvm::AMDGPU::MTBUFFormat;
6530 
6531   int64_t Dfmt = DFMT_UNDEF;
6532   int64_t Nfmt = NFMT_UNDEF;
6533   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6534     return ParseStatus::Failure;
6535 
6536   if (trySkipToken(AsmToken::Comma)) {
6537     StringRef Str;
6538     SMLoc Loc = getLoc();
6539     if (!parseId(Str, "expected a format string") ||
6540         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
6541       return ParseStatus::Failure;
6542     if (Dfmt == DFMT_UNDEF)
6543       return Error(Loc, "duplicate numeric format");
6544     if (Nfmt == NFMT_UNDEF)
6545       return Error(Loc, "duplicate data format");
6546   }
6547 
6548   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6549   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6550 
6551   if (isGFX10Plus()) {
6552     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6553     if (Ufmt == UFMT_UNDEF)
6554       return Error(FormatLoc, "unsupported format");
6555     Format = Ufmt;
6556   } else {
6557     Format = encodeDfmtNfmt(Dfmt, Nfmt);
6558   }
6559 
6560   return ParseStatus::Success;
6561 }
6562 
6563 ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6564                                                         SMLoc Loc,
6565                                                         int64_t &Format) {
6566   using namespace llvm::AMDGPU::MTBUFFormat;
6567 
6568   auto Id = getUnifiedFormat(FormatStr, getSTI());
6569   if (Id == UFMT_UNDEF)
6570     return ParseStatus::NoMatch;
6571 
6572   if (!isGFX10Plus())
6573     return Error(Loc, "unified format is not supported on this GPU");
6574 
6575   Format = Id;
6576   return ParseStatus::Success;
6577 }
6578 
6579 ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6580   using namespace llvm::AMDGPU::MTBUFFormat;
6581   SMLoc Loc = getLoc();
6582 
6583   if (!parseExpr(Format))
6584     return ParseStatus::Failure;
6585   if (!isValidFormatEncoding(Format, getSTI()))
6586     return Error(Loc, "out of range format");
6587 
6588   return ParseStatus::Success;
6589 }
6590 
6591 ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6592   using namespace llvm::AMDGPU::MTBUFFormat;
6593 
6594   if (!trySkipId("format", AsmToken::Colon))
6595     return ParseStatus::NoMatch;
6596 
6597   if (trySkipToken(AsmToken::LBrac)) {
6598     StringRef FormatStr;
6599     SMLoc Loc = getLoc();
6600     if (!parseId(FormatStr, "expected a format string"))
6601       return ParseStatus::Failure;
6602 
6603     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6604     if (Res.isNoMatch())
6605       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6606     if (!Res.isSuccess())
6607       return Res;
6608 
6609     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6610       return ParseStatus::Failure;
6611 
6612     return ParseStatus::Success;
6613   }
6614 
6615   return parseNumericFormat(Format);
6616 }
6617 
6618 ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6619   using namespace llvm::AMDGPU::MTBUFFormat;
6620 
6621   int64_t Format = getDefaultFormatEncoding(getSTI());
6622   ParseStatus Res;
6623   SMLoc Loc = getLoc();
6624 
6625   // Parse legacy format syntax.
6626   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6627   if (Res.isFailure())
6628     return Res;
6629 
6630   bool FormatFound = Res.isSuccess();
6631 
6632   Operands.push_back(
6633     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6634 
6635   if (FormatFound)
6636     trySkipToken(AsmToken::Comma);
6637 
6638   if (isToken(AsmToken::EndOfStatement)) {
6639     // We are expecting an soffset operand,
6640     // but let matcher handle the error.
6641     return ParseStatus::Success;
6642   }
6643 
6644   // Parse soffset.
6645   Res = parseRegOrImm(Operands);
6646   if (!Res.isSuccess())
6647     return Res;
6648 
6649   trySkipToken(AsmToken::Comma);
6650 
6651   if (!FormatFound) {
6652     Res = parseSymbolicOrNumericFormat(Format);
6653     if (Res.isFailure())
6654       return Res;
6655     if (Res.isSuccess()) {
6656       auto Size = Operands.size();
6657       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6658       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6659       Op.setImm(Format);
6660     }
6661     return ParseStatus::Success;
6662   }
6663 
6664   if (isId("format") && peekToken().is(AsmToken::Colon))
6665     return Error(getLoc(), "duplicate format");
6666   return ParseStatus::Success;
6667 }
6668 
6669 ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
6670   ParseStatus Res =
6671       parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
6672   if (Res.isNoMatch()) {
6673     Res = parseIntWithPrefix("inst_offset", Operands,
6674                              AMDGPUOperand::ImmTyInstOffset);
6675   }
6676   return Res;
6677 }
6678 
6679 ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
6680   ParseStatus Res =
6681       parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
6682   if (Res.isNoMatch())
6683     Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
6684   return Res;
6685 }
6686 
6687 ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
6688   ParseStatus Res =
6689       parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
6690   if (Res.isNoMatch()) {
6691     Res =
6692         parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
6693   }
6694   return Res;
6695 }
6696 
6697 //===----------------------------------------------------------------------===//
6698 // Exp
6699 //===----------------------------------------------------------------------===//
6700 
6701 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6702   OptionalImmIndexMap OptionalIdx;
6703 
6704   unsigned OperandIdx[4];
6705   unsigned EnMask = 0;
6706   int SrcIdx = 0;
6707 
6708   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6709     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6710 
6711     // Add the register arguments
6712     if (Op.isReg()) {
6713       assert(SrcIdx < 4);
6714       OperandIdx[SrcIdx] = Inst.size();
6715       Op.addRegOperands(Inst, 1);
6716       ++SrcIdx;
6717       continue;
6718     }
6719 
6720     if (Op.isOff()) {
6721       assert(SrcIdx < 4);
6722       OperandIdx[SrcIdx] = Inst.size();
6723       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6724       ++SrcIdx;
6725       continue;
6726     }
6727 
6728     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6729       Op.addImmOperands(Inst, 1);
6730       continue;
6731     }
6732 
6733     if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
6734       continue;
6735 
6736     // Handle optional arguments
6737     OptionalIdx[Op.getImmTy()] = i;
6738   }
6739 
6740   assert(SrcIdx == 4);
6741 
6742   bool Compr = false;
6743   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6744     Compr = true;
6745     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6746     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6747     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6748   }
6749 
6750   for (auto i = 0; i < SrcIdx; ++i) {
6751     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6752       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6753     }
6754   }
6755 
6756   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6757   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6758 
6759   Inst.addOperand(MCOperand::createImm(EnMask));
6760 }
6761 
6762 //===----------------------------------------------------------------------===//
6763 // s_waitcnt
6764 //===----------------------------------------------------------------------===//
6765 
6766 static bool
6767 encodeCnt(
6768   const AMDGPU::IsaVersion ISA,
6769   int64_t &IntVal,
6770   int64_t CntVal,
6771   bool Saturate,
6772   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6773   unsigned (*decode)(const IsaVersion &Version, unsigned))
6774 {
6775   bool Failed = false;
6776 
6777   IntVal = encode(ISA, IntVal, CntVal);
6778   if (CntVal != decode(ISA, IntVal)) {
6779     if (Saturate) {
6780       IntVal = encode(ISA, IntVal, -1);
6781     } else {
6782       Failed = true;
6783     }
6784   }
6785   return Failed;
6786 }
6787 
6788 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6789 
6790   SMLoc CntLoc = getLoc();
6791   StringRef CntName = getTokenStr();
6792 
6793   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6794       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6795     return false;
6796 
6797   int64_t CntVal;
6798   SMLoc ValLoc = getLoc();
6799   if (!parseExpr(CntVal))
6800     return false;
6801 
6802   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6803 
6804   bool Failed = true;
6805   bool Sat = CntName.ends_with("_sat");
6806 
6807   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6808     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6809   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6810     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6811   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6812     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6813   } else {
6814     Error(CntLoc, "invalid counter name " + CntName);
6815     return false;
6816   }
6817 
6818   if (Failed) {
6819     Error(ValLoc, "too large value for " + CntName);
6820     return false;
6821   }
6822 
6823   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6824     return false;
6825 
6826   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6827     if (isToken(AsmToken::EndOfStatement)) {
6828       Error(getLoc(), "expected a counter name");
6829       return false;
6830     }
6831   }
6832 
6833   return true;
6834 }
6835 
6836 ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
6837   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6838   int64_t Waitcnt = getWaitcntBitMask(ISA);
6839   SMLoc S = getLoc();
6840 
6841   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6842     while (!isToken(AsmToken::EndOfStatement)) {
6843       if (!parseCnt(Waitcnt))
6844         return ParseStatus::Failure;
6845     }
6846   } else {
6847     if (!parseExpr(Waitcnt))
6848       return ParseStatus::Failure;
6849   }
6850 
6851   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6852   return ParseStatus::Success;
6853 }
6854 
6855 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
6856   SMLoc FieldLoc = getLoc();
6857   StringRef FieldName = getTokenStr();
6858   if (!skipToken(AsmToken::Identifier, "expected a field name") ||
6859       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6860     return false;
6861 
6862   SMLoc ValueLoc = getLoc();
6863   StringRef ValueName = getTokenStr();
6864   if (!skipToken(AsmToken::Identifier, "expected a value name") ||
6865       !skipToken(AsmToken::RParen, "expected a right parenthesis"))
6866     return false;
6867 
6868   unsigned Shift;
6869   if (FieldName == "instid0") {
6870     Shift = 0;
6871   } else if (FieldName == "instskip") {
6872     Shift = 4;
6873   } else if (FieldName == "instid1") {
6874     Shift = 7;
6875   } else {
6876     Error(FieldLoc, "invalid field name " + FieldName);
6877     return false;
6878   }
6879 
6880   int Value;
6881   if (Shift == 4) {
6882     // Parse values for instskip.
6883     Value = StringSwitch<int>(ValueName)
6884                 .Case("SAME", 0)
6885                 .Case("NEXT", 1)
6886                 .Case("SKIP_1", 2)
6887                 .Case("SKIP_2", 3)
6888                 .Case("SKIP_3", 4)
6889                 .Case("SKIP_4", 5)
6890                 .Default(-1);
6891   } else {
6892     // Parse values for instid0 and instid1.
6893     Value = StringSwitch<int>(ValueName)
6894                 .Case("NO_DEP", 0)
6895                 .Case("VALU_DEP_1", 1)
6896                 .Case("VALU_DEP_2", 2)
6897                 .Case("VALU_DEP_3", 3)
6898                 .Case("VALU_DEP_4", 4)
6899                 .Case("TRANS32_DEP_1", 5)
6900                 .Case("TRANS32_DEP_2", 6)
6901                 .Case("TRANS32_DEP_3", 7)
6902                 .Case("FMA_ACCUM_CYCLE_1", 8)
6903                 .Case("SALU_CYCLE_1", 9)
6904                 .Case("SALU_CYCLE_2", 10)
6905                 .Case("SALU_CYCLE_3", 11)
6906                 .Default(-1);
6907   }
6908   if (Value < 0) {
6909     Error(ValueLoc, "invalid value name " + ValueName);
6910     return false;
6911   }
6912 
6913   Delay |= Value << Shift;
6914   return true;
6915 }
6916 
6917 ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
6918   int64_t Delay = 0;
6919   SMLoc S = getLoc();
6920 
6921   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6922     do {
6923       if (!parseDelay(Delay))
6924         return ParseStatus::Failure;
6925     } while (trySkipToken(AsmToken::Pipe));
6926   } else {
6927     if (!parseExpr(Delay))
6928       return ParseStatus::Failure;
6929   }
6930 
6931   Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
6932   return ParseStatus::Success;
6933 }
6934 
6935 bool
6936 AMDGPUOperand::isSWaitCnt() const {
6937   return isImm();
6938 }
6939 
6940 bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
6941 
6942 //===----------------------------------------------------------------------===//
6943 // DepCtr
6944 //===----------------------------------------------------------------------===//
6945 
6946 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
6947                                   StringRef DepCtrName) {
6948   switch (ErrorId) {
6949   case OPR_ID_UNKNOWN:
6950     Error(Loc, Twine("invalid counter name ", DepCtrName));
6951     return;
6952   case OPR_ID_UNSUPPORTED:
6953     Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
6954     return;
6955   case OPR_ID_DUPLICATE:
6956     Error(Loc, Twine("duplicate counter name ", DepCtrName));
6957     return;
6958   case OPR_VAL_INVALID:
6959     Error(Loc, Twine("invalid value for ", DepCtrName));
6960     return;
6961   default:
6962     assert(false);
6963   }
6964 }
6965 
6966 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
6967 
6968   using namespace llvm::AMDGPU::DepCtr;
6969 
6970   SMLoc DepCtrLoc = getLoc();
6971   StringRef DepCtrName = getTokenStr();
6972 
6973   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6974       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6975     return false;
6976 
6977   int64_t ExprVal;
6978   if (!parseExpr(ExprVal))
6979     return false;
6980 
6981   unsigned PrevOprMask = UsedOprMask;
6982   int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
6983 
6984   if (CntVal < 0) {
6985     depCtrError(DepCtrLoc, CntVal, DepCtrName);
6986     return false;
6987   }
6988 
6989   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6990     return false;
6991 
6992   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6993     if (isToken(AsmToken::EndOfStatement)) {
6994       Error(getLoc(), "expected a counter name");
6995       return false;
6996     }
6997   }
6998 
6999   unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7000   DepCtr = (DepCtr & ~CntValMask) | CntVal;
7001   return true;
7002 }
7003 
7004 ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
7005   using namespace llvm::AMDGPU::DepCtr;
7006 
7007   int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
7008   SMLoc Loc = getLoc();
7009 
7010   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7011     unsigned UsedOprMask = 0;
7012     while (!isToken(AsmToken::EndOfStatement)) {
7013       if (!parseDepCtr(DepCtr, UsedOprMask))
7014         return ParseStatus::Failure;
7015     }
7016   } else {
7017     if (!parseExpr(DepCtr))
7018       return ParseStatus::Failure;
7019   }
7020 
7021   Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
7022   return ParseStatus::Success;
7023 }
7024 
7025 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
7026 
7027 //===----------------------------------------------------------------------===//
7028 // hwreg
7029 //===----------------------------------------------------------------------===//
7030 
7031 bool
7032 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
7033                                 OperandInfoTy &Offset,
7034                                 OperandInfoTy &Width) {
7035   using namespace llvm::AMDGPU::Hwreg;
7036 
7037   // The register may be specified by name or using a numeric code
7038   HwReg.Loc = getLoc();
7039   if (isToken(AsmToken::Identifier) &&
7040       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7041     HwReg.IsSymbolic = true;
7042     lex(); // skip register name
7043   } else if (!parseExpr(HwReg.Id, "a register name")) {
7044     return false;
7045   }
7046 
7047   if (trySkipToken(AsmToken::RParen))
7048     return true;
7049 
7050   // parse optional params
7051   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
7052     return false;
7053 
7054   Offset.Loc = getLoc();
7055   if (!parseExpr(Offset.Id))
7056     return false;
7057 
7058   if (!skipToken(AsmToken::Comma, "expected a comma"))
7059     return false;
7060 
7061   Width.Loc = getLoc();
7062   return parseExpr(Width.Id) &&
7063          skipToken(AsmToken::RParen, "expected a closing parenthesis");
7064 }
7065 
7066 bool
7067 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
7068                                const OperandInfoTy &Offset,
7069                                const OperandInfoTy &Width) {
7070 
7071   using namespace llvm::AMDGPU::Hwreg;
7072 
7073   if (HwReg.IsSymbolic) {
7074     if (HwReg.Id == OPR_ID_UNSUPPORTED) {
7075       Error(HwReg.Loc,
7076             "specified hardware register is not supported on this GPU");
7077       return false;
7078     }
7079   } else {
7080     if (!isValidHwreg(HwReg.Id)) {
7081       Error(HwReg.Loc,
7082             "invalid code of hardware register: only 6-bit values are legal");
7083       return false;
7084     }
7085   }
7086   if (!isValidHwregOffset(Offset.Id)) {
7087     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
7088     return false;
7089   }
7090   if (!isValidHwregWidth(Width.Id)) {
7091     Error(Width.Loc,
7092           "invalid bitfield width: only values from 1 to 32 are legal");
7093     return false;
7094   }
7095   return true;
7096 }
7097 
7098 ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
7099   using namespace llvm::AMDGPU::Hwreg;
7100 
7101   int64_t ImmVal = 0;
7102   SMLoc Loc = getLoc();
7103 
7104   if (trySkipId("hwreg", AsmToken::LParen)) {
7105     OperandInfoTy HwReg(OPR_ID_UNKNOWN);
7106     OperandInfoTy Offset(OFFSET_DEFAULT_);
7107     OperandInfoTy Width(WIDTH_DEFAULT_);
7108     if (parseHwregBody(HwReg, Offset, Width) &&
7109         validateHwreg(HwReg, Offset, Width)) {
7110       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
7111     } else {
7112       return ParseStatus::Failure;
7113     }
7114   } else if (parseExpr(ImmVal, "a hwreg macro")) {
7115     if (ImmVal < 0 || !isUInt<16>(ImmVal))
7116       return Error(Loc, "invalid immediate: only 16-bit values are legal");
7117   } else {
7118     return ParseStatus::Failure;
7119   }
7120 
7121   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
7122   return ParseStatus::Success;
7123 }
7124 
7125 bool AMDGPUOperand::isHwreg() const {
7126   return isImmTy(ImmTyHwreg);
7127 }
7128 
7129 //===----------------------------------------------------------------------===//
7130 // sendmsg
7131 //===----------------------------------------------------------------------===//
7132 
7133 bool
7134 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7135                                   OperandInfoTy &Op,
7136                                   OperandInfoTy &Stream) {
7137   using namespace llvm::AMDGPU::SendMsg;
7138 
7139   Msg.Loc = getLoc();
7140   if (isToken(AsmToken::Identifier) &&
7141       (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7142     Msg.IsSymbolic = true;
7143     lex(); // skip message name
7144   } else if (!parseExpr(Msg.Id, "a message name")) {
7145     return false;
7146   }
7147 
7148   if (trySkipToken(AsmToken::Comma)) {
7149     Op.IsDefined = true;
7150     Op.Loc = getLoc();
7151     if (isToken(AsmToken::Identifier) &&
7152         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
7153       lex(); // skip operation name
7154     } else if (!parseExpr(Op.Id, "an operation name")) {
7155       return false;
7156     }
7157 
7158     if (trySkipToken(AsmToken::Comma)) {
7159       Stream.IsDefined = true;
7160       Stream.Loc = getLoc();
7161       if (!parseExpr(Stream.Id))
7162         return false;
7163     }
7164   }
7165 
7166   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
7167 }
7168 
7169 bool
7170 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
7171                                  const OperandInfoTy &Op,
7172                                  const OperandInfoTy &Stream) {
7173   using namespace llvm::AMDGPU::SendMsg;
7174 
7175   // Validation strictness depends on whether message is specified
7176   // in a symbolic or in a numeric form. In the latter case
7177   // only encoding possibility is checked.
7178   bool Strict = Msg.IsSymbolic;
7179 
7180   if (Strict) {
7181     if (Msg.Id == OPR_ID_UNSUPPORTED) {
7182       Error(Msg.Loc, "specified message id is not supported on this GPU");
7183       return false;
7184     }
7185   } else {
7186     if (!isValidMsgId(Msg.Id, getSTI())) {
7187       Error(Msg.Loc, "invalid message id");
7188       return false;
7189     }
7190   }
7191   if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) {
7192     if (Op.IsDefined) {
7193       Error(Op.Loc, "message does not support operations");
7194     } else {
7195       Error(Msg.Loc, "missing message operation");
7196     }
7197     return false;
7198   }
7199   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
7200     Error(Op.Loc, "invalid operation id");
7201     return false;
7202   }
7203   if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) &&
7204       Stream.IsDefined) {
7205     Error(Stream.Loc, "message operation does not support streams");
7206     return false;
7207   }
7208   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
7209     Error(Stream.Loc, "invalid message stream id");
7210     return false;
7211   }
7212   return true;
7213 }
7214 
7215 ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
7216   using namespace llvm::AMDGPU::SendMsg;
7217 
7218   int64_t ImmVal = 0;
7219   SMLoc Loc = getLoc();
7220 
7221   if (trySkipId("sendmsg", AsmToken::LParen)) {
7222     OperandInfoTy Msg(OPR_ID_UNKNOWN);
7223     OperandInfoTy Op(OP_NONE_);
7224     OperandInfoTy Stream(STREAM_ID_NONE_);
7225     if (parseSendMsgBody(Msg, Op, Stream) &&
7226         validateSendMsg(Msg, Op, Stream)) {
7227       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
7228     } else {
7229       return ParseStatus::Failure;
7230     }
7231   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
7232     if (ImmVal < 0 || !isUInt<16>(ImmVal))
7233       return Error(Loc, "invalid immediate: only 16-bit values are legal");
7234   } else {
7235     return ParseStatus::Failure;
7236   }
7237 
7238   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
7239   return ParseStatus::Success;
7240 }
7241 
7242 bool AMDGPUOperand::isSendMsg() const {
7243   return isImmTy(ImmTySendMsg);
7244 }
7245 
7246 //===----------------------------------------------------------------------===//
7247 // v_interp
7248 //===----------------------------------------------------------------------===//
7249 
7250 ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
7251   StringRef Str;
7252   SMLoc S = getLoc();
7253 
7254   if (!parseId(Str))
7255     return ParseStatus::NoMatch;
7256 
7257   int Slot = StringSwitch<int>(Str)
7258     .Case("p10", 0)
7259     .Case("p20", 1)
7260     .Case("p0", 2)
7261     .Default(-1);
7262 
7263   if (Slot == -1)
7264     return Error(S, "invalid interpolation slot");
7265 
7266   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
7267                                               AMDGPUOperand::ImmTyInterpSlot));
7268   return ParseStatus::Success;
7269 }
7270 
7271 ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
7272   StringRef Str;
7273   SMLoc S = getLoc();
7274 
7275   if (!parseId(Str))
7276     return ParseStatus::NoMatch;
7277 
7278   if (!Str.starts_with("attr"))
7279     return Error(S, "invalid interpolation attribute");
7280 
7281   StringRef Chan = Str.take_back(2);
7282   int AttrChan = StringSwitch<int>(Chan)
7283     .Case(".x", 0)
7284     .Case(".y", 1)
7285     .Case(".z", 2)
7286     .Case(".w", 3)
7287     .Default(-1);
7288   if (AttrChan == -1)
7289     return Error(S, "invalid or missing interpolation attribute channel");
7290 
7291   Str = Str.drop_back(2).drop_front(4);
7292 
7293   uint8_t Attr;
7294   if (Str.getAsInteger(10, Attr))
7295     return Error(S, "invalid or missing interpolation attribute number");
7296 
7297   if (Attr > 32)
7298     return Error(S, "out of bounds interpolation attribute number");
7299 
7300   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
7301 
7302   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
7303                                               AMDGPUOperand::ImmTyInterpAttr));
7304   Operands.push_back(AMDGPUOperand::CreateImm(
7305       this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
7306   return ParseStatus::Success;
7307 }
7308 
7309 //===----------------------------------------------------------------------===//
7310 // exp
7311 //===----------------------------------------------------------------------===//
7312 
7313 ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
7314   using namespace llvm::AMDGPU::Exp;
7315 
7316   StringRef Str;
7317   SMLoc S = getLoc();
7318 
7319   if (!parseId(Str))
7320     return ParseStatus::NoMatch;
7321 
7322   unsigned Id = getTgtId(Str);
7323   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
7324     return Error(S, (Id == ET_INVALID)
7325                         ? "invalid exp target"
7326                         : "exp target is not supported on this GPU");
7327 
7328   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
7329                                               AMDGPUOperand::ImmTyExpTgt));
7330   return ParseStatus::Success;
7331 }
7332 
7333 //===----------------------------------------------------------------------===//
7334 // parser helpers
7335 //===----------------------------------------------------------------------===//
7336 
7337 bool
7338 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
7339   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
7340 }
7341 
7342 bool
7343 AMDGPUAsmParser::isId(const StringRef Id) const {
7344   return isId(getToken(), Id);
7345 }
7346 
7347 bool
7348 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
7349   return getTokenKind() == Kind;
7350 }
7351 
7352 StringRef AMDGPUAsmParser::getId() const {
7353   return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
7354 }
7355 
7356 bool
7357 AMDGPUAsmParser::trySkipId(const StringRef Id) {
7358   if (isId(Id)) {
7359     lex();
7360     return true;
7361   }
7362   return false;
7363 }
7364 
7365 bool
7366 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7367   if (isToken(AsmToken::Identifier)) {
7368     StringRef Tok = getTokenStr();
7369     if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
7370       lex();
7371       return true;
7372     }
7373   }
7374   return false;
7375 }
7376 
7377 bool
7378 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7379   if (isId(Id) && peekToken().is(Kind)) {
7380     lex();
7381     lex();
7382     return true;
7383   }
7384   return false;
7385 }
7386 
7387 bool
7388 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7389   if (isToken(Kind)) {
7390     lex();
7391     return true;
7392   }
7393   return false;
7394 }
7395 
7396 bool
7397 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7398                            const StringRef ErrMsg) {
7399   if (!trySkipToken(Kind)) {
7400     Error(getLoc(), ErrMsg);
7401     return false;
7402   }
7403   return true;
7404 }
7405 
7406 bool
7407 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7408   SMLoc S = getLoc();
7409 
7410   const MCExpr *Expr;
7411   if (Parser.parseExpression(Expr))
7412     return false;
7413 
7414   if (Expr->evaluateAsAbsolute(Imm))
7415     return true;
7416 
7417   if (Expected.empty()) {
7418     Error(S, "expected absolute expression");
7419   } else {
7420     Error(S, Twine("expected ", Expected) +
7421              Twine(" or an absolute expression"));
7422   }
7423   return false;
7424 }
7425 
7426 bool
7427 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7428   SMLoc S = getLoc();
7429 
7430   const MCExpr *Expr;
7431   if (Parser.parseExpression(Expr))
7432     return false;
7433 
7434   int64_t IntVal;
7435   if (Expr->evaluateAsAbsolute(IntVal)) {
7436     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7437   } else {
7438     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7439   }
7440   return true;
7441 }
7442 
7443 bool
7444 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7445   if (isToken(AsmToken::String)) {
7446     Val = getToken().getStringContents();
7447     lex();
7448     return true;
7449   } else {
7450     Error(getLoc(), ErrMsg);
7451     return false;
7452   }
7453 }
7454 
7455 bool
7456 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7457   if (isToken(AsmToken::Identifier)) {
7458     Val = getTokenStr();
7459     lex();
7460     return true;
7461   } else {
7462     if (!ErrMsg.empty())
7463       Error(getLoc(), ErrMsg);
7464     return false;
7465   }
7466 }
7467 
7468 AsmToken
7469 AMDGPUAsmParser::getToken() const {
7470   return Parser.getTok();
7471 }
7472 
7473 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7474   return isToken(AsmToken::EndOfStatement)
7475              ? getToken()
7476              : getLexer().peekTok(ShouldSkipSpace);
7477 }
7478 
7479 void
7480 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7481   auto TokCount = getLexer().peekTokens(Tokens);
7482 
7483   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7484     Tokens[Idx] = AsmToken(AsmToken::Error, "");
7485 }
7486 
7487 AsmToken::TokenKind
7488 AMDGPUAsmParser::getTokenKind() const {
7489   return getLexer().getKind();
7490 }
7491 
7492 SMLoc
7493 AMDGPUAsmParser::getLoc() const {
7494   return getToken().getLoc();
7495 }
7496 
7497 StringRef
7498 AMDGPUAsmParser::getTokenStr() const {
7499   return getToken().getString();
7500 }
7501 
7502 void
7503 AMDGPUAsmParser::lex() {
7504   Parser.Lex();
7505 }
7506 
7507 SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
7508   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7509 }
7510 
7511 SMLoc
7512 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7513                                const OperandVector &Operands) const {
7514   for (unsigned i = Operands.size() - 1; i > 0; --i) {
7515     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7516     if (Test(Op))
7517       return Op.getStartLoc();
7518   }
7519   return getInstLoc(Operands);
7520 }
7521 
7522 SMLoc
7523 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7524                            const OperandVector &Operands) const {
7525   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7526   return getOperandLoc(Test, Operands);
7527 }
7528 
7529 SMLoc
7530 AMDGPUAsmParser::getRegLoc(unsigned Reg,
7531                            const OperandVector &Operands) const {
7532   auto Test = [=](const AMDGPUOperand& Op) {
7533     return Op.isRegKind() && Op.getReg() == Reg;
7534   };
7535   return getOperandLoc(Test, Operands);
7536 }
7537 
7538 SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,
7539                                  bool SearchMandatoryLiterals) const {
7540   auto Test = [](const AMDGPUOperand& Op) {
7541     return Op.IsImmKindLiteral() || Op.isExpr();
7542   };
7543   SMLoc Loc = getOperandLoc(Test, Operands);
7544   if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))
7545     Loc = getMandatoryLitLoc(Operands);
7546   return Loc;
7547 }
7548 
7549 SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {
7550   auto Test = [](const AMDGPUOperand &Op) {
7551     return Op.IsImmKindMandatoryLiteral();
7552   };
7553   return getOperandLoc(Test, Operands);
7554 }
7555 
7556 SMLoc
7557 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7558   auto Test = [](const AMDGPUOperand& Op) {
7559     return Op.isImmKindConst();
7560   };
7561   return getOperandLoc(Test, Operands);
7562 }
7563 
7564 //===----------------------------------------------------------------------===//
7565 // swizzle
7566 //===----------------------------------------------------------------------===//
7567 
7568 LLVM_READNONE
7569 static unsigned
7570 encodeBitmaskPerm(const unsigned AndMask,
7571                   const unsigned OrMask,
7572                   const unsigned XorMask) {
7573   using namespace llvm::AMDGPU::Swizzle;
7574 
7575   return BITMASK_PERM_ENC |
7576          (AndMask << BITMASK_AND_SHIFT) |
7577          (OrMask  << BITMASK_OR_SHIFT)  |
7578          (XorMask << BITMASK_XOR_SHIFT);
7579 }
7580 
7581 bool
7582 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7583                                      const unsigned MinVal,
7584                                      const unsigned MaxVal,
7585                                      const StringRef ErrMsg,
7586                                      SMLoc &Loc) {
7587   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7588     return false;
7589   }
7590   Loc = getLoc();
7591   if (!parseExpr(Op)) {
7592     return false;
7593   }
7594   if (Op < MinVal || Op > MaxVal) {
7595     Error(Loc, ErrMsg);
7596     return false;
7597   }
7598 
7599   return true;
7600 }
7601 
7602 bool
7603 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7604                                       const unsigned MinVal,
7605                                       const unsigned MaxVal,
7606                                       const StringRef ErrMsg) {
7607   SMLoc Loc;
7608   for (unsigned i = 0; i < OpNum; ++i) {
7609     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7610       return false;
7611   }
7612 
7613   return true;
7614 }
7615 
7616 bool
7617 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7618   using namespace llvm::AMDGPU::Swizzle;
7619 
7620   int64_t Lane[LANE_NUM];
7621   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7622                            "expected a 2-bit lane id")) {
7623     Imm = QUAD_PERM_ENC;
7624     for (unsigned I = 0; I < LANE_NUM; ++I) {
7625       Imm |= Lane[I] << (LANE_SHIFT * I);
7626     }
7627     return true;
7628   }
7629   return false;
7630 }
7631 
7632 bool
7633 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7634   using namespace llvm::AMDGPU::Swizzle;
7635 
7636   SMLoc Loc;
7637   int64_t GroupSize;
7638   int64_t LaneIdx;
7639 
7640   if (!parseSwizzleOperand(GroupSize,
7641                            2, 32,
7642                            "group size must be in the interval [2,32]",
7643                            Loc)) {
7644     return false;
7645   }
7646   if (!isPowerOf2_64(GroupSize)) {
7647     Error(Loc, "group size must be a power of two");
7648     return false;
7649   }
7650   if (parseSwizzleOperand(LaneIdx,
7651                           0, GroupSize - 1,
7652                           "lane id must be in the interval [0,group size - 1]",
7653                           Loc)) {
7654     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7655     return true;
7656   }
7657   return false;
7658 }
7659 
7660 bool
7661 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7662   using namespace llvm::AMDGPU::Swizzle;
7663 
7664   SMLoc Loc;
7665   int64_t GroupSize;
7666 
7667   if (!parseSwizzleOperand(GroupSize,
7668                            2, 32,
7669                            "group size must be in the interval [2,32]",
7670                            Loc)) {
7671     return false;
7672   }
7673   if (!isPowerOf2_64(GroupSize)) {
7674     Error(Loc, "group size must be a power of two");
7675     return false;
7676   }
7677 
7678   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
7679   return true;
7680 }
7681 
7682 bool
7683 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
7684   using namespace llvm::AMDGPU::Swizzle;
7685 
7686   SMLoc Loc;
7687   int64_t GroupSize;
7688 
7689   if (!parseSwizzleOperand(GroupSize,
7690                            1, 16,
7691                            "group size must be in the interval [1,16]",
7692                            Loc)) {
7693     return false;
7694   }
7695   if (!isPowerOf2_64(GroupSize)) {
7696     Error(Loc, "group size must be a power of two");
7697     return false;
7698   }
7699 
7700   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
7701   return true;
7702 }
7703 
7704 bool
7705 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
7706   using namespace llvm::AMDGPU::Swizzle;
7707 
7708   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7709     return false;
7710   }
7711 
7712   StringRef Ctl;
7713   SMLoc StrLoc = getLoc();
7714   if (!parseString(Ctl)) {
7715     return false;
7716   }
7717   if (Ctl.size() != BITMASK_WIDTH) {
7718     Error(StrLoc, "expected a 5-character mask");
7719     return false;
7720   }
7721 
7722   unsigned AndMask = 0;
7723   unsigned OrMask = 0;
7724   unsigned XorMask = 0;
7725 
7726   for (size_t i = 0; i < Ctl.size(); ++i) {
7727     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
7728     switch(Ctl[i]) {
7729     default:
7730       Error(StrLoc, "invalid mask");
7731       return false;
7732     case '0':
7733       break;
7734     case '1':
7735       OrMask |= Mask;
7736       break;
7737     case 'p':
7738       AndMask |= Mask;
7739       break;
7740     case 'i':
7741       AndMask |= Mask;
7742       XorMask |= Mask;
7743       break;
7744     }
7745   }
7746 
7747   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
7748   return true;
7749 }
7750 
7751 bool
7752 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
7753 
7754   SMLoc OffsetLoc = getLoc();
7755 
7756   if (!parseExpr(Imm, "a swizzle macro")) {
7757     return false;
7758   }
7759   if (!isUInt<16>(Imm)) {
7760     Error(OffsetLoc, "expected a 16-bit offset");
7761     return false;
7762   }
7763   return true;
7764 }
7765 
7766 bool
7767 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
7768   using namespace llvm::AMDGPU::Swizzle;
7769 
7770   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
7771 
7772     SMLoc ModeLoc = getLoc();
7773     bool Ok = false;
7774 
7775     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
7776       Ok = parseSwizzleQuadPerm(Imm);
7777     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
7778       Ok = parseSwizzleBitmaskPerm(Imm);
7779     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
7780       Ok = parseSwizzleBroadcast(Imm);
7781     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
7782       Ok = parseSwizzleSwap(Imm);
7783     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
7784       Ok = parseSwizzleReverse(Imm);
7785     } else {
7786       Error(ModeLoc, "expected a swizzle mode");
7787     }
7788 
7789     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7790   }
7791 
7792   return false;
7793 }
7794 
7795 ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
7796   SMLoc S = getLoc();
7797   int64_t Imm = 0;
7798 
7799   if (trySkipId("offset")) {
7800 
7801     bool Ok = false;
7802     if (skipToken(AsmToken::Colon, "expected a colon")) {
7803       if (trySkipId("swizzle")) {
7804         Ok = parseSwizzleMacro(Imm);
7805       } else {
7806         Ok = parseSwizzleOffset(Imm);
7807       }
7808     }
7809 
7810     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7811 
7812     return Ok ? ParseStatus::Success : ParseStatus::Failure;
7813   }
7814   return ParseStatus::NoMatch;
7815 }
7816 
7817 bool
7818 AMDGPUOperand::isSwizzle() const {
7819   return isImmTy(ImmTySwizzle);
7820 }
7821 
7822 //===----------------------------------------------------------------------===//
7823 // VGPR Index Mode
7824 //===----------------------------------------------------------------------===//
7825 
7826 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7827 
7828   using namespace llvm::AMDGPU::VGPRIndexMode;
7829 
7830   if (trySkipToken(AsmToken::RParen)) {
7831     return OFF;
7832   }
7833 
7834   int64_t Imm = 0;
7835 
7836   while (true) {
7837     unsigned Mode = 0;
7838     SMLoc S = getLoc();
7839 
7840     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7841       if (trySkipId(IdSymbolic[ModeId])) {
7842         Mode = 1 << ModeId;
7843         break;
7844       }
7845     }
7846 
7847     if (Mode == 0) {
7848       Error(S, (Imm == 0)?
7849                "expected a VGPR index mode or a closing parenthesis" :
7850                "expected a VGPR index mode");
7851       return UNDEF;
7852     }
7853 
7854     if (Imm & Mode) {
7855       Error(S, "duplicate VGPR index mode");
7856       return UNDEF;
7857     }
7858     Imm |= Mode;
7859 
7860     if (trySkipToken(AsmToken::RParen))
7861       break;
7862     if (!skipToken(AsmToken::Comma,
7863                    "expected a comma or a closing parenthesis"))
7864       return UNDEF;
7865   }
7866 
7867   return Imm;
7868 }
7869 
7870 ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7871 
7872   using namespace llvm::AMDGPU::VGPRIndexMode;
7873 
7874   int64_t Imm = 0;
7875   SMLoc S = getLoc();
7876 
7877   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7878     Imm = parseGPRIdxMacro();
7879     if (Imm == UNDEF)
7880       return ParseStatus::Failure;
7881   } else {
7882     if (getParser().parseAbsoluteExpression(Imm))
7883       return ParseStatus::Failure;
7884     if (Imm < 0 || !isUInt<4>(Imm))
7885       return Error(S, "invalid immediate: only 4-bit values are legal");
7886   }
7887 
7888   Operands.push_back(
7889       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7890   return ParseStatus::Success;
7891 }
7892 
7893 bool AMDGPUOperand::isGPRIdxMode() const {
7894   return isImmTy(ImmTyGprIdxMode);
7895 }
7896 
7897 //===----------------------------------------------------------------------===//
7898 // sopp branch targets
7899 //===----------------------------------------------------------------------===//
7900 
7901 ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
7902 
7903   // Make sure we are not parsing something
7904   // that looks like a label or an expression but is not.
7905   // This will improve error messages.
7906   if (isRegister() || isModifier())
7907     return ParseStatus::NoMatch;
7908 
7909   if (!parseExpr(Operands))
7910     return ParseStatus::Failure;
7911 
7912   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7913   assert(Opr.isImm() || Opr.isExpr());
7914   SMLoc Loc = Opr.getStartLoc();
7915 
7916   // Currently we do not support arbitrary expressions as branch targets.
7917   // Only labels and absolute expressions are accepted.
7918   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7919     Error(Loc, "expected an absolute expression or a label");
7920   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7921     Error(Loc, "expected a 16-bit signed jump offset");
7922   }
7923 
7924   return ParseStatus::Success;
7925 }
7926 
7927 //===----------------------------------------------------------------------===//
7928 // Boolean holding registers
7929 //===----------------------------------------------------------------------===//
7930 
7931 ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7932   return parseReg(Operands);
7933 }
7934 
7935 //===----------------------------------------------------------------------===//
7936 // mubuf
7937 //===----------------------------------------------------------------------===//
7938 
7939 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7940                                    const OperandVector &Operands,
7941                                    bool IsAtomic) {
7942   OptionalImmIndexMap OptionalIdx;
7943   unsigned FirstOperandIdx = 1;
7944   bool IsAtomicReturn = false;
7945 
7946   if (IsAtomic) {
7947     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7948       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7949       if (!Op.isCPol())
7950         continue;
7951       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7952       break;
7953     }
7954 
7955     if (!IsAtomicReturn) {
7956       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7957       if (NewOpc != -1)
7958         Inst.setOpcode(NewOpc);
7959     }
7960 
7961     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7962                       SIInstrFlags::IsAtomicRet;
7963   }
7964 
7965   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7966     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7967 
7968     // Add the register arguments
7969     if (Op.isReg()) {
7970       Op.addRegOperands(Inst, 1);
7971       // Insert a tied src for atomic return dst.
7972       // This cannot be postponed as subsequent calls to
7973       // addImmOperands rely on correct number of MC operands.
7974       if (IsAtomicReturn && i == FirstOperandIdx)
7975         Op.addRegOperands(Inst, 1);
7976       continue;
7977     }
7978 
7979     // Handle the case where soffset is an immediate
7980     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7981       Op.addImmOperands(Inst, 1);
7982       continue;
7983     }
7984 
7985     // Handle tokens like 'offen' which are sometimes hard-coded into the
7986     // asm string.  There are no MCInst operands for these.
7987     if (Op.isToken()) {
7988       continue;
7989     }
7990     assert(Op.isImm());
7991 
7992     // Handle optional arguments
7993     OptionalIdx[Op.getImmTy()] = i;
7994   }
7995 
7996   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7997   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7998 }
7999 
8000 //===----------------------------------------------------------------------===//
8001 // smrd
8002 //===----------------------------------------------------------------------===//
8003 
8004 bool AMDGPUOperand::isSMRDOffset8() const {
8005   return isImmLiteral() && isUInt<8>(getImm());
8006 }
8007 
8008 bool AMDGPUOperand::isSMEMOffset() const {
8009   // Offset range is checked later by validator.
8010   return isImmLiteral();
8011 }
8012 
8013 bool AMDGPUOperand::isSMRDLiteralOffset() const {
8014   // 32-bit literals are only supported on CI and we only want to use them
8015   // when the offset is > 8-bits.
8016   return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8017 }
8018 
8019 //===----------------------------------------------------------------------===//
8020 // vop3
8021 //===----------------------------------------------------------------------===//
8022 
8023 static bool ConvertOmodMul(int64_t &Mul) {
8024   if (Mul != 1 && Mul != 2 && Mul != 4)
8025     return false;
8026 
8027   Mul >>= 1;
8028   return true;
8029 }
8030 
8031 static bool ConvertOmodDiv(int64_t &Div) {
8032   if (Div == 1) {
8033     Div = 0;
8034     return true;
8035   }
8036 
8037   if (Div == 2) {
8038     Div = 3;
8039     return true;
8040   }
8041 
8042   return false;
8043 }
8044 
8045 // For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
8046 // This is intentional and ensures compatibility with sp3.
8047 // See bug 35397 for details.
8048 bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8049   if (BoundCtrl == 0 || BoundCtrl == 1) {
8050     if (!isGFX11Plus())
8051       BoundCtrl = 1;
8052     return true;
8053   }
8054   return false;
8055 }
8056 
8057 void AMDGPUAsmParser::onBeginOfFile() {
8058   if (!getParser().getStreamer().getTargetStreamer() ||
8059       getSTI().getTargetTriple().getArch() == Triple::r600)
8060     return;
8061 
8062   if (!getTargetStreamer().getTargetID())
8063     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString(),
8064         // TODO: Should try to check code object version from directive???
8065         AMDGPU::getAmdhsaCodeObjectVersion());
8066 
8067   if (isHsaAbi(getSTI()))
8068     getTargetStreamer().EmitDirectiveAMDGCNTarget();
8069 }
8070 
8071 ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
8072   StringRef Name = getTokenStr();
8073   if (Name == "mul") {
8074     return parseIntWithPrefix("mul", Operands,
8075                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
8076   }
8077 
8078   if (Name == "div") {
8079     return parseIntWithPrefix("div", Operands,
8080                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
8081   }
8082 
8083   return ParseStatus::NoMatch;
8084 }
8085 
8086 // Determines which bit DST_OP_SEL occupies in the op_sel operand according to
8087 // the number of src operands present, then copies that bit into src0_modifiers.
8088 void cvtVOP3DstOpSelOnly(MCInst &Inst) {
8089   int Opc = Inst.getOpcode();
8090   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8091   if (OpSelIdx == -1)
8092     return;
8093 
8094   int SrcNum;
8095   const int Ops[] = { AMDGPU::OpName::src0,
8096                       AMDGPU::OpName::src1,
8097                       AMDGPU::OpName::src2 };
8098   for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
8099        ++SrcNum)
8100     ;
8101   assert(SrcNum > 0);
8102 
8103   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8104 
8105   if ((OpSel & (1 << SrcNum)) != 0) {
8106     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8107     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8108     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
8109   }
8110 }
8111 
8112 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
8113                                    const OperandVector &Operands) {
8114   cvtVOP3P(Inst, Operands);
8115   cvtVOP3DstOpSelOnly(Inst);
8116 }
8117 
8118 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
8119                                    OptionalImmIndexMap &OptionalIdx) {
8120   cvtVOP3P(Inst, Operands, OptionalIdx);
8121   cvtVOP3DstOpSelOnly(Inst);
8122 }
8123 
8124 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8125   return
8126       // 1. This operand is input modifiers
8127       Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8128       // 2. This is not last operand
8129       && Desc.NumOperands > (OpNum + 1)
8130       // 3. Next operand is register class
8131       && Desc.operands()[OpNum + 1].RegClass != -1
8132       // 4. Next register is not tied to any other operand
8133       && Desc.getOperandConstraint(OpNum + 1,
8134                                    MCOI::OperandConstraint::TIED_TO) == -1;
8135 }
8136 
8137 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8138 {
8139   OptionalImmIndexMap OptionalIdx;
8140   unsigned Opc = Inst.getOpcode();
8141 
8142   unsigned I = 1;
8143   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8144   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8145     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8146   }
8147 
8148   for (unsigned E = Operands.size(); I != E; ++I) {
8149     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8150     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8151       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8152     } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
8153                Op.isInterpAttrChan()) {
8154       Inst.addOperand(MCOperand::createImm(Op.getImm()));
8155     } else if (Op.isImmModifier()) {
8156       OptionalIdx[Op.getImmTy()] = I;
8157     } else {
8158       llvm_unreachable("unhandled operand type");
8159     }
8160   }
8161 
8162   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
8163     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8164                           AMDGPUOperand::ImmTyHigh);
8165 
8166   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8167     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8168                           AMDGPUOperand::ImmTyClampSI);
8169 
8170   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8171     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8172                           AMDGPUOperand::ImmTyOModSI);
8173 }
8174 
8175 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8176 {
8177   OptionalImmIndexMap OptionalIdx;
8178   unsigned Opc = Inst.getOpcode();
8179 
8180   unsigned I = 1;
8181   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8182   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8183     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8184   }
8185 
8186   for (unsigned E = Operands.size(); I != E; ++I) {
8187     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8188     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8189       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8190     } else if (Op.isImmModifier()) {
8191       OptionalIdx[Op.getImmTy()] = I;
8192     } else {
8193       llvm_unreachable("unhandled operand type");
8194     }
8195   }
8196 
8197   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8198 
8199   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8200   if (OpSelIdx != -1)
8201     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8202 
8203   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8204 
8205   if (OpSelIdx == -1)
8206     return;
8207 
8208   const int Ops[] = { AMDGPU::OpName::src0,
8209                       AMDGPU::OpName::src1,
8210                       AMDGPU::OpName::src2 };
8211   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8212                          AMDGPU::OpName::src1_modifiers,
8213                          AMDGPU::OpName::src2_modifiers };
8214 
8215   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8216 
8217   for (int J = 0; J < 3; ++J) {
8218     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8219     if (OpIdx == -1)
8220       break;
8221 
8222     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8223     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8224 
8225     if ((OpSel & (1 << J)) != 0)
8226       ModVal |= SISrcMods::OP_SEL_0;
8227     if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8228         (OpSel & (1 << 3)) != 0)
8229       ModVal |= SISrcMods::DST_OP_SEL;
8230 
8231     Inst.getOperand(ModIdx).setImm(ModVal);
8232   }
8233 }
8234 
8235 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8236                               OptionalImmIndexMap &OptionalIdx) {
8237   unsigned Opc = Inst.getOpcode();
8238 
8239   unsigned I = 1;
8240   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8241   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8242     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8243   }
8244 
8245   for (unsigned E = Operands.size(); I != E; ++I) {
8246     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8247     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8248       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8249     } else if (Op.isImmModifier()) {
8250       OptionalIdx[Op.getImmTy()] = I;
8251     } else if (Op.isRegOrImm()) {
8252       Op.addRegOrImmOperands(Inst, 1);
8253     } else {
8254       llvm_unreachable("unhandled operand type");
8255     }
8256   }
8257 
8258   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8259     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8260                           AMDGPUOperand::ImmTyClampSI);
8261 
8262   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8263     addOptionalImmOperand(Inst, Operands, OptionalIdx,
8264                           AMDGPUOperand::ImmTyOModSI);
8265 
8266   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8267   // it has src2 register operand that is tied to dst operand
8268   // we don't allow modifiers for this operand in assembler so src2_modifiers
8269   // should be 0.
8270   if (isMAC(Opc)) {
8271     auto it = Inst.begin();
8272     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8273     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8274     ++it;
8275     // Copy the operand to ensure it's not invalidated when Inst grows.
8276     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8277   }
8278 }
8279 
8280 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8281   OptionalImmIndexMap OptionalIdx;
8282   cvtVOP3(Inst, Operands, OptionalIdx);
8283 }
8284 
8285 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8286                                OptionalImmIndexMap &OptIdx) {
8287   const int Opc = Inst.getOpcode();
8288   const MCInstrDesc &Desc = MII.get(Opc);
8289 
8290   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8291 
8292   if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8293       Opc == AMDGPU::V_CVT_SR_FP8_F32_vi) {
8294     Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
8295     Inst.addOperand(Inst.getOperand(0));
8296   }
8297 
8298   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in)) {
8299     assert(!IsPacked);
8300     Inst.addOperand(Inst.getOperand(0));
8301   }
8302 
8303   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8304   // instruction, and then figure out where to actually put the modifiers
8305 
8306   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8307   if (OpSelIdx != -1) {
8308     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8309   }
8310 
8311   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8312   if (OpSelHiIdx != -1) {
8313     int DefaultVal = IsPacked ? -1 : 0;
8314     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8315                           DefaultVal);
8316   }
8317 
8318   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8319   if (NegLoIdx != -1) {
8320     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8321     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8322   }
8323 
8324   const int Ops[] = { AMDGPU::OpName::src0,
8325                       AMDGPU::OpName::src1,
8326                       AMDGPU::OpName::src2 };
8327   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8328                          AMDGPU::OpName::src1_modifiers,
8329                          AMDGPU::OpName::src2_modifiers };
8330 
8331   unsigned OpSel = 0;
8332   unsigned OpSelHi = 0;
8333   unsigned NegLo = 0;
8334   unsigned NegHi = 0;
8335 
8336   if (OpSelIdx != -1)
8337     OpSel = Inst.getOperand(OpSelIdx).getImm();
8338 
8339   if (OpSelHiIdx != -1)
8340     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8341 
8342   if (NegLoIdx != -1) {
8343     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8344     NegLo = Inst.getOperand(NegLoIdx).getImm();
8345     NegHi = Inst.getOperand(NegHiIdx).getImm();
8346   }
8347 
8348   for (int J = 0; J < 3; ++J) {
8349     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8350     if (OpIdx == -1)
8351       break;
8352 
8353     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8354 
8355     if (ModIdx == -1)
8356       continue;
8357 
8358     uint32_t ModVal = 0;
8359 
8360     if ((OpSel & (1 << J)) != 0)
8361       ModVal |= SISrcMods::OP_SEL_0;
8362 
8363     if ((OpSelHi & (1 << J)) != 0)
8364       ModVal |= SISrcMods::OP_SEL_1;
8365 
8366     if ((NegLo & (1 << J)) != 0)
8367       ModVal |= SISrcMods::NEG;
8368 
8369     if ((NegHi & (1 << J)) != 0)
8370       ModVal |= SISrcMods::NEG_HI;
8371 
8372     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8373   }
8374 }
8375 
8376 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8377   OptionalImmIndexMap OptIdx;
8378   cvtVOP3(Inst, Operands, OptIdx);
8379   cvtVOP3P(Inst, Operands, OptIdx);
8380 }
8381 
8382 //===----------------------------------------------------------------------===//
8383 // VOPD
8384 //===----------------------------------------------------------------------===//
8385 
8386 ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
8387   if (!hasVOPD(getSTI()))
8388     return ParseStatus::NoMatch;
8389 
8390   if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
8391     SMLoc S = getLoc();
8392     lex();
8393     lex();
8394     Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
8395     SMLoc OpYLoc = getLoc();
8396     StringRef OpYName;
8397     if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
8398       Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
8399       return ParseStatus::Success;
8400     }
8401     return Error(OpYLoc, "expected a VOPDY instruction after ::");
8402   }
8403   return ParseStatus::NoMatch;
8404 }
8405 
8406 // Create VOPD MCInst operands using parsed assembler operands.
8407 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
8408   auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
8409     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
8410     if (Op.isReg()) {
8411       Op.addRegOperands(Inst, 1);
8412       return;
8413     }
8414     if (Op.isImm()) {
8415       Op.addImmOperands(Inst, 1);
8416       return;
8417     }
8418     llvm_unreachable("Unhandled operand type in cvtVOPD");
8419   };
8420 
8421   const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
8422 
8423   // MCInst operands are ordered as follows:
8424   //   dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
8425 
8426   for (auto CompIdx : VOPD::COMPONENTS) {
8427     addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
8428   }
8429 
8430   for (auto CompIdx : VOPD::COMPONENTS) {
8431     const auto &CInfo = InstInfo[CompIdx];
8432     auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
8433     for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
8434       addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
8435     if (CInfo.hasSrc2Acc())
8436       addOp(CInfo.getIndexOfDstInParsedOperands());
8437   }
8438 }
8439 
8440 //===----------------------------------------------------------------------===//
8441 // dpp
8442 //===----------------------------------------------------------------------===//
8443 
8444 bool AMDGPUOperand::isDPP8() const {
8445   return isImmTy(ImmTyDPP8);
8446 }
8447 
8448 bool AMDGPUOperand::isDPPCtrl() const {
8449   using namespace AMDGPU::DPP;
8450 
8451   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8452   if (result) {
8453     int64_t Imm = getImm();
8454     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8455            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8456            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8457            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8458            (Imm == DppCtrl::WAVE_SHL1) ||
8459            (Imm == DppCtrl::WAVE_ROL1) ||
8460            (Imm == DppCtrl::WAVE_SHR1) ||
8461            (Imm == DppCtrl::WAVE_ROR1) ||
8462            (Imm == DppCtrl::ROW_MIRROR) ||
8463            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8464            (Imm == DppCtrl::BCAST15) ||
8465            (Imm == DppCtrl::BCAST31) ||
8466            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8467            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8468   }
8469   return false;
8470 }
8471 
8472 //===----------------------------------------------------------------------===//
8473 // mAI
8474 //===----------------------------------------------------------------------===//
8475 
8476 bool AMDGPUOperand::isBLGP() const {
8477   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8478 }
8479 
8480 bool AMDGPUOperand::isCBSZ() const {
8481   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8482 }
8483 
8484 bool AMDGPUOperand::isABID() const {
8485   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8486 }
8487 
8488 bool AMDGPUOperand::isS16Imm() const {
8489   return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8490 }
8491 
8492 bool AMDGPUOperand::isU16Imm() const {
8493   return isImmLiteral() && isUInt<16>(getImm());
8494 }
8495 
8496 //===----------------------------------------------------------------------===//
8497 // dim
8498 //===----------------------------------------------------------------------===//
8499 
8500 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8501   // We want to allow "dim:1D" etc.,
8502   // but the initial 1 is tokenized as an integer.
8503   std::string Token;
8504   if (isToken(AsmToken::Integer)) {
8505     SMLoc Loc = getToken().getEndLoc();
8506     Token = std::string(getTokenStr());
8507     lex();
8508     if (getLoc() != Loc)
8509       return false;
8510   }
8511 
8512   StringRef Suffix;
8513   if (!parseId(Suffix))
8514     return false;
8515   Token += Suffix;
8516 
8517   StringRef DimId = Token;
8518   if (DimId.starts_with("SQ_RSRC_IMG_"))
8519     DimId = DimId.drop_front(12);
8520 
8521   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8522   if (!DimInfo)
8523     return false;
8524 
8525   Encoding = DimInfo->Encoding;
8526   return true;
8527 }
8528 
8529 ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8530   if (!isGFX10Plus())
8531     return ParseStatus::NoMatch;
8532 
8533   SMLoc S = getLoc();
8534 
8535   if (!trySkipId("dim", AsmToken::Colon))
8536     return ParseStatus::NoMatch;
8537 
8538   unsigned Encoding;
8539   SMLoc Loc = getLoc();
8540   if (!parseDimId(Encoding))
8541     return Error(Loc, "invalid dim value");
8542 
8543   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8544                                               AMDGPUOperand::ImmTyDim));
8545   return ParseStatus::Success;
8546 }
8547 
8548 //===----------------------------------------------------------------------===//
8549 // dpp
8550 //===----------------------------------------------------------------------===//
8551 
8552 ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8553   SMLoc S = getLoc();
8554 
8555   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8556     return ParseStatus::NoMatch;
8557 
8558   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8559 
8560   int64_t Sels[8];
8561 
8562   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8563     return ParseStatus::Failure;
8564 
8565   for (size_t i = 0; i < 8; ++i) {
8566     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8567       return ParseStatus::Failure;
8568 
8569     SMLoc Loc = getLoc();
8570     if (getParser().parseAbsoluteExpression(Sels[i]))
8571       return ParseStatus::Failure;
8572     if (0 > Sels[i] || 7 < Sels[i])
8573       return Error(Loc, "expected a 3-bit value");
8574   }
8575 
8576   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8577     return ParseStatus::Failure;
8578 
8579   unsigned DPP8 = 0;
8580   for (size_t i = 0; i < 8; ++i)
8581     DPP8 |= (Sels[i] << (i * 3));
8582 
8583   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8584   return ParseStatus::Success;
8585 }
8586 
8587 bool
8588 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8589                                     const OperandVector &Operands) {
8590   if (Ctrl == "row_newbcast")
8591     return isGFX90A();
8592 
8593   if (Ctrl == "row_share" ||
8594       Ctrl == "row_xmask")
8595     return isGFX10Plus();
8596 
8597   if (Ctrl == "wave_shl" ||
8598       Ctrl == "wave_shr" ||
8599       Ctrl == "wave_rol" ||
8600       Ctrl == "wave_ror" ||
8601       Ctrl == "row_bcast")
8602     return isVI() || isGFX9();
8603 
8604   return Ctrl == "row_mirror" ||
8605          Ctrl == "row_half_mirror" ||
8606          Ctrl == "quad_perm" ||
8607          Ctrl == "row_shl" ||
8608          Ctrl == "row_shr" ||
8609          Ctrl == "row_ror";
8610 }
8611 
8612 int64_t
8613 AMDGPUAsmParser::parseDPPCtrlPerm() {
8614   // quad_perm:[%d,%d,%d,%d]
8615 
8616   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8617     return -1;
8618 
8619   int64_t Val = 0;
8620   for (int i = 0; i < 4; ++i) {
8621     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8622       return -1;
8623 
8624     int64_t Temp;
8625     SMLoc Loc = getLoc();
8626     if (getParser().parseAbsoluteExpression(Temp))
8627       return -1;
8628     if (Temp < 0 || Temp > 3) {
8629       Error(Loc, "expected a 2-bit value");
8630       return -1;
8631     }
8632 
8633     Val += (Temp << i * 2);
8634   }
8635 
8636   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8637     return -1;
8638 
8639   return Val;
8640 }
8641 
8642 int64_t
8643 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8644   using namespace AMDGPU::DPP;
8645 
8646   // sel:%d
8647 
8648   int64_t Val;
8649   SMLoc Loc = getLoc();
8650 
8651   if (getParser().parseAbsoluteExpression(Val))
8652     return -1;
8653 
8654   struct DppCtrlCheck {
8655     int64_t Ctrl;
8656     int Lo;
8657     int Hi;
8658   };
8659 
8660   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8661     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8662     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8663     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8664     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8665     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8666     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8667     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8668     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8669     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8670     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8671     .Default({-1, 0, 0});
8672 
8673   bool Valid;
8674   if (Check.Ctrl == -1) {
8675     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8676     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8677   } else {
8678     Valid = Check.Lo <= Val && Val <= Check.Hi;
8679     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8680   }
8681 
8682   if (!Valid) {
8683     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8684     return -1;
8685   }
8686 
8687   return Val;
8688 }
8689 
8690 ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8691   using namespace AMDGPU::DPP;
8692 
8693   if (!isToken(AsmToken::Identifier) ||
8694       !isSupportedDPPCtrl(getTokenStr(), Operands))
8695     return ParseStatus::NoMatch;
8696 
8697   SMLoc S = getLoc();
8698   int64_t Val = -1;
8699   StringRef Ctrl;
8700 
8701   parseId(Ctrl);
8702 
8703   if (Ctrl == "row_mirror") {
8704     Val = DppCtrl::ROW_MIRROR;
8705   } else if (Ctrl == "row_half_mirror") {
8706     Val = DppCtrl::ROW_HALF_MIRROR;
8707   } else {
8708     if (skipToken(AsmToken::Colon, "expected a colon")) {
8709       if (Ctrl == "quad_perm") {
8710         Val = parseDPPCtrlPerm();
8711       } else {
8712         Val = parseDPPCtrlSel(Ctrl);
8713       }
8714     }
8715   }
8716 
8717   if (Val == -1)
8718     return ParseStatus::Failure;
8719 
8720   Operands.push_back(
8721     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8722   return ParseStatus::Success;
8723 }
8724 
8725 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
8726                                  bool IsDPP8) {
8727   OptionalImmIndexMap OptionalIdx;
8728   unsigned Opc = Inst.getOpcode();
8729   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8730 
8731   // MAC instructions are special because they have 'old'
8732   // operand which is not tied to dst (but assumed to be).
8733   // They also have dummy unused src2_modifiers.
8734   int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
8735   int Src2ModIdx =
8736       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
8737   bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
8738                Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
8739 
8740   unsigned I = 1;
8741   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8742     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8743   }
8744 
8745   int Fi = 0;
8746   for (unsigned E = Operands.size(); I != E; ++I) {
8747 
8748     if (IsMAC) {
8749       int NumOperands = Inst.getNumOperands();
8750       if (OldIdx == NumOperands) {
8751         // Handle old operand
8752         constexpr int DST_IDX = 0;
8753         Inst.addOperand(Inst.getOperand(DST_IDX));
8754       } else if (Src2ModIdx == NumOperands) {
8755         // Add unused dummy src2_modifiers
8756         Inst.addOperand(MCOperand::createImm(0));
8757       }
8758     }
8759 
8760     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8761                                             MCOI::TIED_TO);
8762     if (TiedTo != -1) {
8763       assert((unsigned)TiedTo < Inst.getNumOperands());
8764       // handle tied old or src2 for MAC instructions
8765       Inst.addOperand(Inst.getOperand(TiedTo));
8766     }
8767     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8768     // Add the register arguments
8769     if (IsDPP8 && Op.isDppFI()) {
8770       Fi = Op.getImm();
8771     } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8772       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8773     } else if (Op.isReg()) {
8774       Op.addRegOperands(Inst, 1);
8775     } else if (Op.isImm() &&
8776                Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
8777       assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
8778       Op.addImmOperands(Inst, 1);
8779     } else if (Op.isImm()) {
8780       OptionalIdx[Op.getImmTy()] = I;
8781     } else {
8782       llvm_unreachable("unhandled operand type");
8783     }
8784   }
8785   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8786     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8787 
8788   if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8789     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8790 
8791   if (Desc.TSFlags & SIInstrFlags::VOP3P)
8792     cvtVOP3P(Inst, Operands, OptionalIdx);
8793   else if (Desc.TSFlags & SIInstrFlags::VOP3)
8794     cvtVOP3OpSel(Inst, Operands, OptionalIdx);
8795   else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
8796     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8797   }
8798 
8799   if (IsDPP8) {
8800     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
8801     using namespace llvm::AMDGPU::DPP;
8802     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8803   } else {
8804     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
8805     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8806     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8807     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8808 
8809     if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
8810       addOptionalImmOperand(Inst, Operands, OptionalIdx,
8811                             AMDGPUOperand::ImmTyDppFI);
8812   }
8813 }
8814 
8815 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8816   OptionalImmIndexMap OptionalIdx;
8817 
8818   unsigned I = 1;
8819   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8820   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8821     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8822   }
8823 
8824   int Fi = 0;
8825   for (unsigned E = Operands.size(); I != E; ++I) {
8826     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8827                                             MCOI::TIED_TO);
8828     if (TiedTo != -1) {
8829       assert((unsigned)TiedTo < Inst.getNumOperands());
8830       // handle tied old or src2 for MAC instructions
8831       Inst.addOperand(Inst.getOperand(TiedTo));
8832     }
8833     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8834     // Add the register arguments
8835     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8836       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8837       // Skip it.
8838       continue;
8839     }
8840 
8841     if (IsDPP8) {
8842       if (Op.isDPP8()) {
8843         Op.addImmOperands(Inst, 1);
8844       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8845         Op.addRegWithFPInputModsOperands(Inst, 2);
8846       } else if (Op.isDppFI()) {
8847         Fi = Op.getImm();
8848       } else if (Op.isReg()) {
8849         Op.addRegOperands(Inst, 1);
8850       } else {
8851         llvm_unreachable("Invalid operand type");
8852       }
8853     } else {
8854       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8855         Op.addRegWithFPInputModsOperands(Inst, 2);
8856       } else if (Op.isReg()) {
8857         Op.addRegOperands(Inst, 1);
8858       } else if (Op.isDPPCtrl()) {
8859         Op.addImmOperands(Inst, 1);
8860       } else if (Op.isImm()) {
8861         // Handle optional arguments
8862         OptionalIdx[Op.getImmTy()] = I;
8863       } else {
8864         llvm_unreachable("Invalid operand type");
8865       }
8866     }
8867   }
8868 
8869   if (IsDPP8) {
8870     using namespace llvm::AMDGPU::DPP;
8871     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8872   } else {
8873     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8874     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8875     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8876     if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
8877       addOptionalImmOperand(Inst, Operands, OptionalIdx,
8878                             AMDGPUOperand::ImmTyDppFI);
8879     }
8880   }
8881 }
8882 
8883 //===----------------------------------------------------------------------===//
8884 // sdwa
8885 //===----------------------------------------------------------------------===//
8886 
8887 ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
8888                                           StringRef Prefix,
8889                                           AMDGPUOperand::ImmTy Type) {
8890   using namespace llvm::AMDGPU::SDWA;
8891 
8892   SMLoc S = getLoc();
8893   StringRef Value;
8894 
8895   SMLoc StringLoc;
8896   ParseStatus Res = parseStringWithPrefix(Prefix, Value, StringLoc);
8897   if (!Res.isSuccess())
8898     return Res;
8899 
8900   int64_t Int;
8901   Int = StringSwitch<int64_t>(Value)
8902         .Case("BYTE_0", SdwaSel::BYTE_0)
8903         .Case("BYTE_1", SdwaSel::BYTE_1)
8904         .Case("BYTE_2", SdwaSel::BYTE_2)
8905         .Case("BYTE_3", SdwaSel::BYTE_3)
8906         .Case("WORD_0", SdwaSel::WORD_0)
8907         .Case("WORD_1", SdwaSel::WORD_1)
8908         .Case("DWORD", SdwaSel::DWORD)
8909         .Default(0xffffffff);
8910 
8911   if (Int == 0xffffffff)
8912     return Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8913 
8914   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8915   return ParseStatus::Success;
8916 }
8917 
8918 ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8919   using namespace llvm::AMDGPU::SDWA;
8920 
8921   SMLoc S = getLoc();
8922   StringRef Value;
8923 
8924   SMLoc StringLoc;
8925   ParseStatus Res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8926   if (!Res.isSuccess())
8927     return Res;
8928 
8929   int64_t Int;
8930   Int = StringSwitch<int64_t>(Value)
8931         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8932         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8933         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8934         .Default(0xffffffff);
8935 
8936   if (Int == 0xffffffff)
8937     return Error(StringLoc, "invalid dst_unused value");
8938 
8939   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySDWADstUnused));
8940   return ParseStatus::Success;
8941 }
8942 
8943 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8944   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8945 }
8946 
8947 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8948   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8949 }
8950 
8951 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8952   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8953 }
8954 
8955 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8956   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8957 }
8958 
8959 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8960   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8961 }
8962 
8963 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8964                               uint64_t BasicInstType,
8965                               bool SkipDstVcc,
8966                               bool SkipSrcVcc) {
8967   using namespace llvm::AMDGPU::SDWA;
8968 
8969   OptionalImmIndexMap OptionalIdx;
8970   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8971   bool SkippedVcc = false;
8972 
8973   unsigned I = 1;
8974   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8975   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8976     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8977   }
8978 
8979   for (unsigned E = Operands.size(); I != E; ++I) {
8980     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8981     if (SkipVcc && !SkippedVcc && Op.isReg() &&
8982         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8983       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8984       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8985       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8986       // Skip VCC only if we didn't skip it on previous iteration.
8987       // Note that src0 and src1 occupy 2 slots each because of modifiers.
8988       if (BasicInstType == SIInstrFlags::VOP2 &&
8989           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8990            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8991         SkippedVcc = true;
8992         continue;
8993       } else if (BasicInstType == SIInstrFlags::VOPC &&
8994                  Inst.getNumOperands() == 0) {
8995         SkippedVcc = true;
8996         continue;
8997       }
8998     }
8999     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9000       Op.addRegOrImmWithInputModsOperands(Inst, 2);
9001     } else if (Op.isImm()) {
9002       // Handle optional arguments
9003       OptionalIdx[Op.getImmTy()] = I;
9004     } else {
9005       llvm_unreachable("Invalid operand type");
9006     }
9007     SkippedVcc = false;
9008   }
9009 
9010   const unsigned Opc = Inst.getOpcode();
9011   if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
9012       Opc != AMDGPU::V_NOP_sdwa_vi) {
9013     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
9014     switch (BasicInstType) {
9015     case SIInstrFlags::VOP1:
9016       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9017         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9018                               AMDGPUOperand::ImmTyClampSI, 0);
9019 
9020       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9021         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9022                               AMDGPUOperand::ImmTyOModSI, 0);
9023 
9024       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
9025         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9026                               AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9027 
9028       if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
9029         addOptionalImmOperand(Inst, Operands, OptionalIdx,
9030                               AMDGPUOperand::ImmTySDWADstUnused,
9031                               DstUnused::UNUSED_PRESERVE);
9032 
9033       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9034       break;
9035 
9036     case SIInstrFlags::VOP2:
9037       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9038 
9039       if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
9040         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
9041 
9042       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9043       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
9044       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9045       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9046       break;
9047 
9048     case SIInstrFlags::VOPC:
9049       if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
9050         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9051       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9052       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9053       break;
9054 
9055     default:
9056       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9057     }
9058   }
9059 
9060   // special case v_mac_{f16, f32}:
9061   // it has src2 register operand that is tied to dst operand
9062   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9063       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
9064     auto it = Inst.begin();
9065     std::advance(
9066       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
9067     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
9068   }
9069 }
9070 
9071 /// Force static initialization.
9072 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
9073   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheR600Target());
9074   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
9075 }
9076 
9077 #define GET_REGISTER_MATCHER
9078 #define GET_MATCHER_IMPLEMENTATION
9079 #define GET_MNEMONIC_SPELL_CHECKER
9080 #define GET_MNEMONIC_CHECKER
9081 #include "AMDGPUGenAsmMatcher.inc"
9082 
9083 ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
9084                                                 unsigned MCK) {
9085   switch (MCK) {
9086   case MCK_addr64:
9087     return parseTokenOp("addr64", Operands);
9088   case MCK_done:
9089     return parseTokenOp("done", Operands);
9090   case MCK_idxen:
9091     return parseTokenOp("idxen", Operands);
9092   case MCK_lds:
9093     return parseTokenOp("lds", Operands);
9094   case MCK_offen:
9095     return parseTokenOp("offen", Operands);
9096   case MCK_off:
9097     return parseTokenOp("off", Operands);
9098   case MCK_row_95_en:
9099     return parseTokenOp("row_en", Operands);
9100   case MCK_gds:
9101     return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
9102   case MCK_tfe:
9103     return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
9104   }
9105   return tryCustomParseOperand(Operands, MCK);
9106 }
9107 
9108 // This function should be defined after auto-generated include so that we have
9109 // MatchClassKind enum defined
9110 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
9111                                                      unsigned Kind) {
9112   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
9113   // But MatchInstructionImpl() expects to meet token and fails to validate
9114   // operand. This method checks if we are given immediate operand but expect to
9115   // get corresponding token.
9116   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
9117   switch (Kind) {
9118   case MCK_addr64:
9119     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9120   case MCK_gds:
9121     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9122   case MCK_lds:
9123     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9124   case MCK_idxen:
9125     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9126   case MCK_offen:
9127     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9128   case MCK_tfe:
9129     return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9130   case MCK_SSrcB32:
9131     // When operands have expression values, they will return true for isToken,
9132     // because it is not possible to distinguish between a token and an
9133     // expression at parse time. MatchInstructionImpl() will always try to
9134     // match an operand as a token, when isToken returns true, and when the
9135     // name of the expression is not a valid token, the match will fail,
9136     // so we need to handle it here.
9137     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
9138   case MCK_SSrcF32:
9139     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
9140   case MCK_SOPPBrTarget:
9141     return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
9142   case MCK_VReg32OrOff:
9143     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9144   case MCK_InterpSlot:
9145     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9146   case MCK_InterpAttr:
9147     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9148   case MCK_InterpAttrChan:
9149     return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
9150   case MCK_SReg_64:
9151   case MCK_SReg_64_XEXEC:
9152     // Null is defined as a 32-bit register but
9153     // it should also be enabled with 64-bit operands.
9154     // The following code enables it for SReg_64 operands
9155     // used as source and destination. Remaining source
9156     // operands are handled in isInlinableImm.
9157     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9158   default:
9159     return Match_InvalidOperand;
9160   }
9161 }
9162 
9163 //===----------------------------------------------------------------------===//
9164 // endpgm
9165 //===----------------------------------------------------------------------===//
9166 
9167 ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
9168   SMLoc S = getLoc();
9169   int64_t Imm = 0;
9170 
9171   if (!parseExpr(Imm)) {
9172     // The operand is optional, if not present default to 0
9173     Imm = 0;
9174   }
9175 
9176   if (!isUInt<16>(Imm))
9177     return Error(S, "expected a 16-bit value");
9178 
9179   Operands.push_back(
9180       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9181   return ParseStatus::Success;
9182 }
9183 
9184 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
9185 
9186 //===----------------------------------------------------------------------===//
9187 // LDSDIR
9188 //===----------------------------------------------------------------------===//
9189 
9190 bool AMDGPUOperand::isWaitVDST() const {
9191   return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
9192 }
9193 
9194 //===----------------------------------------------------------------------===//
9195 // VINTERP
9196 //===----------------------------------------------------------------------===//
9197 
9198 bool AMDGPUOperand::isWaitEXP() const {
9199   return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());
9200 }
9201 
9202 //===----------------------------------------------------------------------===//
9203 // Split Barrier
9204 //===----------------------------------------------------------------------===//
9205 
9206 bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
9207