1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCExpr.h"
27 #include "llvm/MC/MCInst.h"
28 #include "llvm/MC/MCInstrDesc.h"
29 #include "llvm/MC/MCParser/MCAsmLexer.h"
30 #include "llvm/MC/MCParser/MCAsmParser.h"
31 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
32 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
33 #include "llvm/MC/MCSymbol.h"
34 #include "llvm/MC/TargetRegistry.h"
35 #include "llvm/Support/AMDGPUMetadata.h"
36 #include "llvm/Support/AMDHSAKernelDescriptor.h"
37 #include "llvm/Support/Casting.h"
38 #include "llvm/Support/MachineValueType.h"
39 #include "llvm/Support/MathExtras.h"
40 #include "llvm/Support/TargetParser.h"
41 
42 using namespace llvm;
43 using namespace llvm::AMDGPU;
44 using namespace llvm::amdhsa;
45 
46 namespace {
47 
48 class AMDGPUAsmParser;
49 
50 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
51 
52 //===----------------------------------------------------------------------===//
53 // Operand
54 //===----------------------------------------------------------------------===//
55 
56 class AMDGPUOperand : public MCParsedAsmOperand {
57   enum KindTy {
58     Token,
59     Immediate,
60     Register,
61     Expression
62   } Kind;
63 
64   SMLoc StartLoc, EndLoc;
65   const AMDGPUAsmParser *AsmParser;
66 
67 public:
68   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
69       : Kind(Kind_), AsmParser(AsmParser_) {}
70 
71   using Ptr = std::unique_ptr<AMDGPUOperand>;
72 
73   struct Modifiers {
74     bool Abs = false;
75     bool Neg = false;
76     bool Sext = false;
77 
78     bool hasFPModifiers() const { return Abs || Neg; }
79     bool hasIntModifiers() const { return Sext; }
80     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
81 
82     int64_t getFPModifiersOperand() const {
83       int64_t Operand = 0;
84       Operand |= Abs ? SISrcMods::ABS : 0u;
85       Operand |= Neg ? SISrcMods::NEG : 0u;
86       return Operand;
87     }
88 
89     int64_t getIntModifiersOperand() const {
90       int64_t Operand = 0;
91       Operand |= Sext ? SISrcMods::SEXT : 0u;
92       return Operand;
93     }
94 
95     int64_t getModifiersOperand() const {
96       assert(!(hasFPModifiers() && hasIntModifiers())
97            && "fp and int modifiers should not be used simultaneously");
98       if (hasFPModifiers()) {
99         return getFPModifiersOperand();
100       } else if (hasIntModifiers()) {
101         return getIntModifiersOperand();
102       } else {
103         return 0;
104       }
105     }
106 
107     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
108   };
109 
110   enum ImmTy {
111     ImmTyNone,
112     ImmTyGDS,
113     ImmTyLDS,
114     ImmTyOffen,
115     ImmTyIdxen,
116     ImmTyAddr64,
117     ImmTyOffset,
118     ImmTyInstOffset,
119     ImmTyOffset0,
120     ImmTyOffset1,
121     ImmTyCPol,
122     ImmTySWZ,
123     ImmTyTFE,
124     ImmTyD16,
125     ImmTyClampSI,
126     ImmTyOModSI,
127     ImmTySdwaDstSel,
128     ImmTySdwaSrc0Sel,
129     ImmTySdwaSrc1Sel,
130     ImmTySdwaDstUnused,
131     ImmTyDMask,
132     ImmTyDim,
133     ImmTyUNorm,
134     ImmTyDA,
135     ImmTyR128A16,
136     ImmTyA16,
137     ImmTyLWE,
138     ImmTyExpTgt,
139     ImmTyExpCompr,
140     ImmTyExpVM,
141     ImmTyFORMAT,
142     ImmTyHwreg,
143     ImmTyOff,
144     ImmTySendMsg,
145     ImmTyInterpSlot,
146     ImmTyInterpAttr,
147     ImmTyAttrChan,
148     ImmTyOpSel,
149     ImmTyOpSelHi,
150     ImmTyNegLo,
151     ImmTyNegHi,
152     ImmTyDPP8,
153     ImmTyDppCtrl,
154     ImmTyDppRowMask,
155     ImmTyDppBankMask,
156     ImmTyDppBoundCtrl,
157     ImmTyDppFi,
158     ImmTySwizzle,
159     ImmTyGprIdxMode,
160     ImmTyHigh,
161     ImmTyBLGP,
162     ImmTyCBSZ,
163     ImmTyABID,
164     ImmTyEndpgm,
165     ImmTyWaitVDST,
166     ImmTyWaitEXP,
167   };
168 
169   enum ImmKindTy {
170     ImmKindTyNone,
171     ImmKindTyLiteral,
172     ImmKindTyConst,
173   };
174 
175 private:
176   struct TokOp {
177     const char *Data;
178     unsigned Length;
179   };
180 
181   struct ImmOp {
182     int64_t Val;
183     ImmTy Type;
184     bool IsFPImm;
185     mutable ImmKindTy Kind;
186     Modifiers Mods;
187   };
188 
189   struct RegOp {
190     unsigned RegNo;
191     Modifiers Mods;
192   };
193 
194   union {
195     TokOp Tok;
196     ImmOp Imm;
197     RegOp Reg;
198     const MCExpr *Expr;
199   };
200 
201 public:
202   bool isToken() const override {
203     if (Kind == Token)
204       return true;
205 
206     // When parsing operands, we can't always tell if something was meant to be
207     // a token, like 'gds', or an expression that references a global variable.
208     // In this case, we assume the string is an expression, and if we need to
209     // interpret is a token, then we treat the symbol name as the token.
210     return isSymbolRefExpr();
211   }
212 
213   bool isSymbolRefExpr() const {
214     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
215   }
216 
217   bool isImm() const override {
218     return Kind == Immediate;
219   }
220 
221   void setImmKindNone() const {
222     assert(isImm());
223     Imm.Kind = ImmKindTyNone;
224   }
225 
226   void setImmKindLiteral() const {
227     assert(isImm());
228     Imm.Kind = ImmKindTyLiteral;
229   }
230 
231   void setImmKindConst() const {
232     assert(isImm());
233     Imm.Kind = ImmKindTyConst;
234   }
235 
236   bool IsImmKindLiteral() const {
237     return isImm() && Imm.Kind == ImmKindTyLiteral;
238   }
239 
240   bool isImmKindConst() const {
241     return isImm() && Imm.Kind == ImmKindTyConst;
242   }
243 
244   bool isInlinableImm(MVT type) const;
245   bool isLiteralImm(MVT type) const;
246 
247   bool isRegKind() const {
248     return Kind == Register;
249   }
250 
251   bool isReg() const override {
252     return isRegKind() && !hasModifiers();
253   }
254 
255   bool isRegOrInline(unsigned RCID, MVT type) const {
256     return isRegClass(RCID) || isInlinableImm(type);
257   }
258 
259   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
260     return isRegOrInline(RCID, type) || isLiteralImm(type);
261   }
262 
263   bool isRegOrImmWithInt16InputMods() const {
264     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
265   }
266 
267   bool isRegOrImmWithInt32InputMods() const {
268     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
269   }
270 
271   bool isRegOrInlineImmWithInt16InputMods() const {
272     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
273   }
274 
275   bool isRegOrInlineImmWithInt32InputMods() const {
276     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
277   }
278 
279   bool isRegOrImmWithInt64InputMods() const {
280     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
281   }
282 
283   bool isRegOrImmWithFP16InputMods() const {
284     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
285   }
286 
287   bool isRegOrImmWithFP32InputMods() const {
288     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
289   }
290 
291   bool isRegOrImmWithFP64InputMods() const {
292     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
293   }
294 
295   bool isRegOrInlineImmWithFP16InputMods() const {
296     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
297   }
298 
299   bool isRegOrInlineImmWithFP32InputMods() const {
300     return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
301   }
302 
303 
304   bool isVReg() const {
305     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
306            isRegClass(AMDGPU::VReg_64RegClassID) ||
307            isRegClass(AMDGPU::VReg_96RegClassID) ||
308            isRegClass(AMDGPU::VReg_128RegClassID) ||
309            isRegClass(AMDGPU::VReg_160RegClassID) ||
310            isRegClass(AMDGPU::VReg_192RegClassID) ||
311            isRegClass(AMDGPU::VReg_256RegClassID) ||
312            isRegClass(AMDGPU::VReg_512RegClassID) ||
313            isRegClass(AMDGPU::VReg_1024RegClassID);
314   }
315 
316   bool isVReg32() const {
317     return isRegClass(AMDGPU::VGPR_32RegClassID);
318   }
319 
320   bool isVReg32OrOff() const {
321     return isOff() || isVReg32();
322   }
323 
324   bool isNull() const {
325     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
326   }
327 
328   bool isVRegWithInputMods() const;
329 
330   bool isSDWAOperand(MVT type) const;
331   bool isSDWAFP16Operand() const;
332   bool isSDWAFP32Operand() const;
333   bool isSDWAInt16Operand() const;
334   bool isSDWAInt32Operand() const;
335 
336   bool isImmTy(ImmTy ImmT) const {
337     return isImm() && Imm.Type == ImmT;
338   }
339 
340   bool isImmModifier() const {
341     return isImm() && Imm.Type != ImmTyNone;
342   }
343 
344   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
345   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
346   bool isDMask() const { return isImmTy(ImmTyDMask); }
347   bool isDim() const { return isImmTy(ImmTyDim); }
348   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
349   bool isDA() const { return isImmTy(ImmTyDA); }
350   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
351   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
352   bool isLWE() const { return isImmTy(ImmTyLWE); }
353   bool isOff() const { return isImmTy(ImmTyOff); }
354   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
355   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
356   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
357   bool isOffen() const { return isImmTy(ImmTyOffen); }
358   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
359   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
360   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
361   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
362   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
363 
364   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
365   bool isGDS() const { return isImmTy(ImmTyGDS); }
366   bool isLDS() const { return isImmTy(ImmTyLDS); }
367   bool isCPol() const { return isImmTy(ImmTyCPol); }
368   bool isSWZ() const { return isImmTy(ImmTySWZ); }
369   bool isTFE() const { return isImmTy(ImmTyTFE); }
370   bool isD16() const { return isImmTy(ImmTyD16); }
371   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
372   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
373   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
374   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
375   bool isFI() const { return isImmTy(ImmTyDppFi); }
376   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
377   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
378   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
379   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
380   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
381   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
382   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
383   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
384   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
385   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
386   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
387   bool isHigh() const { return isImmTy(ImmTyHigh); }
388 
389   bool isMod() const {
390     return isClampSI() || isOModSI();
391   }
392 
393   bool isRegOrImm() const {
394     return isReg() || isImm();
395   }
396 
397   bool isRegClass(unsigned RCID) const;
398 
399   bool isInlineValue() const;
400 
401   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
402     return isRegOrInline(RCID, type) && !hasModifiers();
403   }
404 
405   bool isSCSrcB16() const {
406     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
407   }
408 
409   bool isSCSrcV2B16() const {
410     return isSCSrcB16();
411   }
412 
413   bool isSCSrcB32() const {
414     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
415   }
416 
417   bool isSCSrcB64() const {
418     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
419   }
420 
421   bool isBoolReg() const;
422 
423   bool isSCSrcF16() const {
424     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
425   }
426 
427   bool isSCSrcV2F16() const {
428     return isSCSrcF16();
429   }
430 
431   bool isSCSrcF32() const {
432     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
433   }
434 
435   bool isSCSrcF64() const {
436     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
437   }
438 
439   bool isSSrcB32() const {
440     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
441   }
442 
443   bool isSSrcB16() const {
444     return isSCSrcB16() || isLiteralImm(MVT::i16);
445   }
446 
447   bool isSSrcV2B16() const {
448     llvm_unreachable("cannot happen");
449     return isSSrcB16();
450   }
451 
452   bool isSSrcB64() const {
453     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
454     // See isVSrc64().
455     return isSCSrcB64() || isLiteralImm(MVT::i64);
456   }
457 
458   bool isSSrcF32() const {
459     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
460   }
461 
462   bool isSSrcF64() const {
463     return isSCSrcB64() || isLiteralImm(MVT::f64);
464   }
465 
466   bool isSSrcF16() const {
467     return isSCSrcB16() || isLiteralImm(MVT::f16);
468   }
469 
470   bool isSSrcV2F16() const {
471     llvm_unreachable("cannot happen");
472     return isSSrcF16();
473   }
474 
475   bool isSSrcV2FP32() const {
476     llvm_unreachable("cannot happen");
477     return isSSrcF32();
478   }
479 
480   bool isSCSrcV2FP32() const {
481     llvm_unreachable("cannot happen");
482     return isSCSrcF32();
483   }
484 
485   bool isSSrcV2INT32() const {
486     llvm_unreachable("cannot happen");
487     return isSSrcB32();
488   }
489 
490   bool isSCSrcV2INT32() const {
491     llvm_unreachable("cannot happen");
492     return isSCSrcB32();
493   }
494 
495   bool isSSrcOrLdsB32() const {
496     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
497            isLiteralImm(MVT::i32) || isExpr();
498   }
499 
500   bool isVCSrcB32() const {
501     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
502   }
503 
504   bool isVCSrcB64() const {
505     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
506   }
507 
508   bool isVCSrcB16() const {
509     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
510   }
511 
512   bool isVCSrcV2B16() const {
513     return isVCSrcB16();
514   }
515 
516   bool isVCSrcF32() const {
517     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
518   }
519 
520   bool isVCSrcF64() const {
521     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
522   }
523 
524   bool isVCSrcF16() const {
525     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
526   }
527 
528   bool isVCSrcV2F16() const {
529     return isVCSrcF16();
530   }
531 
532   bool isVSrcB32() const {
533     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
534   }
535 
536   bool isVSrcB64() const {
537     return isVCSrcF64() || isLiteralImm(MVT::i64);
538   }
539 
540   bool isVSrcB16() const {
541     return isVCSrcB16() || isLiteralImm(MVT::i16);
542   }
543 
544   bool isVSrcV2B16() const {
545     return isVSrcB16() || isLiteralImm(MVT::v2i16);
546   }
547 
548   bool isVCSrcV2FP32() const {
549     return isVCSrcF64();
550   }
551 
552   bool isVSrcV2FP32() const {
553     return isVSrcF64() || isLiteralImm(MVT::v2f32);
554   }
555 
556   bool isVCSrcV2INT32() const {
557     return isVCSrcB64();
558   }
559 
560   bool isVSrcV2INT32() const {
561     return isVSrcB64() || isLiteralImm(MVT::v2i32);
562   }
563 
564   bool isVSrcF32() const {
565     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
566   }
567 
568   bool isVSrcF64() const {
569     return isVCSrcF64() || isLiteralImm(MVT::f64);
570   }
571 
572   bool isVSrcF16() const {
573     return isVCSrcF16() || isLiteralImm(MVT::f16);
574   }
575 
576   bool isVSrcV2F16() const {
577     return isVSrcF16() || isLiteralImm(MVT::v2f16);
578   }
579 
580   bool isVISrcB32() const {
581     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
582   }
583 
584   bool isVISrcB16() const {
585     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
586   }
587 
588   bool isVISrcV2B16() const {
589     return isVISrcB16();
590   }
591 
592   bool isVISrcF32() const {
593     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
594   }
595 
596   bool isVISrcF16() const {
597     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
598   }
599 
600   bool isVISrcV2F16() const {
601     return isVISrcF16() || isVISrcB32();
602   }
603 
604   bool isVISrc_64B64() const {
605     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
606   }
607 
608   bool isVISrc_64F64() const {
609     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
610   }
611 
612   bool isVISrc_64V2FP32() const {
613     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
614   }
615 
616   bool isVISrc_64V2INT32() const {
617     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
618   }
619 
620   bool isVISrc_256B64() const {
621     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
622   }
623 
624   bool isVISrc_256F64() const {
625     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
626   }
627 
628   bool isVISrc_128B16() const {
629     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
630   }
631 
632   bool isVISrc_128V2B16() const {
633     return isVISrc_128B16();
634   }
635 
636   bool isVISrc_128B32() const {
637     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
638   }
639 
640   bool isVISrc_128F32() const {
641     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
642   }
643 
644   bool isVISrc_256V2FP32() const {
645     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
646   }
647 
648   bool isVISrc_256V2INT32() const {
649     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
650   }
651 
652   bool isVISrc_512B32() const {
653     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
654   }
655 
656   bool isVISrc_512B16() const {
657     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
658   }
659 
660   bool isVISrc_512V2B16() const {
661     return isVISrc_512B16();
662   }
663 
664   bool isVISrc_512F32() const {
665     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
666   }
667 
668   bool isVISrc_512F16() const {
669     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
670   }
671 
672   bool isVISrc_512V2F16() const {
673     return isVISrc_512F16() || isVISrc_512B32();
674   }
675 
676   bool isVISrc_1024B32() const {
677     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
678   }
679 
680   bool isVISrc_1024B16() const {
681     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
682   }
683 
684   bool isVISrc_1024V2B16() const {
685     return isVISrc_1024B16();
686   }
687 
688   bool isVISrc_1024F32() const {
689     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
690   }
691 
692   bool isVISrc_1024F16() const {
693     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
694   }
695 
696   bool isVISrc_1024V2F16() const {
697     return isVISrc_1024F16() || isVISrc_1024B32();
698   }
699 
700   bool isAISrcB32() const {
701     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
702   }
703 
704   bool isAISrcB16() const {
705     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
706   }
707 
708   bool isAISrcV2B16() const {
709     return isAISrcB16();
710   }
711 
712   bool isAISrcF32() const {
713     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
714   }
715 
716   bool isAISrcF16() const {
717     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
718   }
719 
720   bool isAISrcV2F16() const {
721     return isAISrcF16() || isAISrcB32();
722   }
723 
724   bool isAISrc_64B64() const {
725     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
726   }
727 
728   bool isAISrc_64F64() const {
729     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
730   }
731 
732   bool isAISrc_128B32() const {
733     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
734   }
735 
736   bool isAISrc_128B16() const {
737     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
738   }
739 
740   bool isAISrc_128V2B16() const {
741     return isAISrc_128B16();
742   }
743 
744   bool isAISrc_128F32() const {
745     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
746   }
747 
748   bool isAISrc_128F16() const {
749     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
750   }
751 
752   bool isAISrc_128V2F16() const {
753     return isAISrc_128F16() || isAISrc_128B32();
754   }
755 
756   bool isVISrc_128F16() const {
757     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
758   }
759 
760   bool isVISrc_128V2F16() const {
761     return isVISrc_128F16() || isVISrc_128B32();
762   }
763 
764   bool isAISrc_256B64() const {
765     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
766   }
767 
768   bool isAISrc_256F64() const {
769     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
770   }
771 
772   bool isAISrc_512B32() const {
773     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
774   }
775 
776   bool isAISrc_512B16() const {
777     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
778   }
779 
780   bool isAISrc_512V2B16() const {
781     return isAISrc_512B16();
782   }
783 
784   bool isAISrc_512F32() const {
785     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
786   }
787 
788   bool isAISrc_512F16() const {
789     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
790   }
791 
792   bool isAISrc_512V2F16() const {
793     return isAISrc_512F16() || isAISrc_512B32();
794   }
795 
796   bool isAISrc_1024B32() const {
797     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
798   }
799 
800   bool isAISrc_1024B16() const {
801     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
802   }
803 
804   bool isAISrc_1024V2B16() const {
805     return isAISrc_1024B16();
806   }
807 
808   bool isAISrc_1024F32() const {
809     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
810   }
811 
812   bool isAISrc_1024F16() const {
813     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
814   }
815 
816   bool isAISrc_1024V2F16() const {
817     return isAISrc_1024F16() || isAISrc_1024B32();
818   }
819 
820   bool isKImmFP32() const {
821     return isLiteralImm(MVT::f32);
822   }
823 
824   bool isKImmFP16() const {
825     return isLiteralImm(MVT::f16);
826   }
827 
828   bool isMem() const override {
829     return false;
830   }
831 
832   bool isExpr() const {
833     return Kind == Expression;
834   }
835 
836   bool isSoppBrTarget() const {
837     return isExpr() || isImm();
838   }
839 
840   bool isSWaitCnt() const;
841   bool isDepCtr() const;
842   bool isSDelayAlu() const;
843   bool isHwreg() const;
844   bool isSendMsg() const;
845   bool isSwizzle() const;
846   bool isSMRDOffset8() const;
847   bool isSMEMOffset() const;
848   bool isSMRDLiteralOffset() const;
849   bool isDPP8() const;
850   bool isDPPCtrl() const;
851   bool isBLGP() const;
852   bool isCBSZ() const;
853   bool isABID() const;
854   bool isGPRIdxMode() const;
855   bool isS16Imm() const;
856   bool isU16Imm() const;
857   bool isEndpgm() const;
858   bool isWaitVDST() const;
859   bool isWaitEXP() const;
860 
861   StringRef getExpressionAsToken() const {
862     assert(isExpr());
863     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
864     return S->getSymbol().getName();
865   }
866 
867   StringRef getToken() const {
868     assert(isToken());
869 
870     if (Kind == Expression)
871       return getExpressionAsToken();
872 
873     return StringRef(Tok.Data, Tok.Length);
874   }
875 
876   int64_t getImm() const {
877     assert(isImm());
878     return Imm.Val;
879   }
880 
881   void setImm(int64_t Val) {
882     assert(isImm());
883     Imm.Val = Val;
884   }
885 
886   ImmTy getImmTy() const {
887     assert(isImm());
888     return Imm.Type;
889   }
890 
891   unsigned getReg() const override {
892     assert(isRegKind());
893     return Reg.RegNo;
894   }
895 
896   SMLoc getStartLoc() const override {
897     return StartLoc;
898   }
899 
900   SMLoc getEndLoc() const override {
901     return EndLoc;
902   }
903 
904   SMRange getLocRange() const {
905     return SMRange(StartLoc, EndLoc);
906   }
907 
908   Modifiers getModifiers() const {
909     assert(isRegKind() || isImmTy(ImmTyNone));
910     return isRegKind() ? Reg.Mods : Imm.Mods;
911   }
912 
913   void setModifiers(Modifiers Mods) {
914     assert(isRegKind() || isImmTy(ImmTyNone));
915     if (isRegKind())
916       Reg.Mods = Mods;
917     else
918       Imm.Mods = Mods;
919   }
920 
921   bool hasModifiers() const {
922     return getModifiers().hasModifiers();
923   }
924 
925   bool hasFPModifiers() const {
926     return getModifiers().hasFPModifiers();
927   }
928 
929   bool hasIntModifiers() const {
930     return getModifiers().hasIntModifiers();
931   }
932 
933   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
934 
935   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
936 
937   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
938 
939   template <unsigned Bitwidth>
940   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
941 
942   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
943     addKImmFPOperands<16>(Inst, N);
944   }
945 
946   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
947     addKImmFPOperands<32>(Inst, N);
948   }
949 
950   void addRegOperands(MCInst &Inst, unsigned N) const;
951 
952   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
953     addRegOperands(Inst, N);
954   }
955 
956   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
957     if (isRegKind())
958       addRegOperands(Inst, N);
959     else if (isExpr())
960       Inst.addOperand(MCOperand::createExpr(Expr));
961     else
962       addImmOperands(Inst, N);
963   }
964 
965   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
966     Modifiers Mods = getModifiers();
967     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
968     if (isRegKind()) {
969       addRegOperands(Inst, N);
970     } else {
971       addImmOperands(Inst, N, false);
972     }
973   }
974 
975   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
976     assert(!hasIntModifiers());
977     addRegOrImmWithInputModsOperands(Inst, N);
978   }
979 
980   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
981     assert(!hasFPModifiers());
982     addRegOrImmWithInputModsOperands(Inst, N);
983   }
984 
985   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
986     Modifiers Mods = getModifiers();
987     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
988     assert(isRegKind());
989     addRegOperands(Inst, N);
990   }
991 
992   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
993     assert(!hasIntModifiers());
994     addRegWithInputModsOperands(Inst, N);
995   }
996 
997   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
998     assert(!hasFPModifiers());
999     addRegWithInputModsOperands(Inst, N);
1000   }
1001 
1002   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
1003     if (isImm())
1004       addImmOperands(Inst, N);
1005     else {
1006       assert(isExpr());
1007       Inst.addOperand(MCOperand::createExpr(Expr));
1008     }
1009   }
1010 
1011   static void printImmTy(raw_ostream& OS, ImmTy Type) {
1012     switch (Type) {
1013     case ImmTyNone: OS << "None"; break;
1014     case ImmTyGDS: OS << "GDS"; break;
1015     case ImmTyLDS: OS << "LDS"; break;
1016     case ImmTyOffen: OS << "Offen"; break;
1017     case ImmTyIdxen: OS << "Idxen"; break;
1018     case ImmTyAddr64: OS << "Addr64"; break;
1019     case ImmTyOffset: OS << "Offset"; break;
1020     case ImmTyInstOffset: OS << "InstOffset"; break;
1021     case ImmTyOffset0: OS << "Offset0"; break;
1022     case ImmTyOffset1: OS << "Offset1"; break;
1023     case ImmTyCPol: OS << "CPol"; break;
1024     case ImmTySWZ: OS << "SWZ"; break;
1025     case ImmTyTFE: OS << "TFE"; break;
1026     case ImmTyD16: OS << "D16"; break;
1027     case ImmTyFORMAT: OS << "FORMAT"; break;
1028     case ImmTyClampSI: OS << "ClampSI"; break;
1029     case ImmTyOModSI: OS << "OModSI"; break;
1030     case ImmTyDPP8: OS << "DPP8"; break;
1031     case ImmTyDppCtrl: OS << "DppCtrl"; break;
1032     case ImmTyDppRowMask: OS << "DppRowMask"; break;
1033     case ImmTyDppBankMask: OS << "DppBankMask"; break;
1034     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1035     case ImmTyDppFi: OS << "FI"; break;
1036     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1037     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1038     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1039     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1040     case ImmTyDMask: OS << "DMask"; break;
1041     case ImmTyDim: OS << "Dim"; break;
1042     case ImmTyUNorm: OS << "UNorm"; break;
1043     case ImmTyDA: OS << "DA"; break;
1044     case ImmTyR128A16: OS << "R128A16"; break;
1045     case ImmTyA16: OS << "A16"; break;
1046     case ImmTyLWE: OS << "LWE"; break;
1047     case ImmTyOff: OS << "Off"; break;
1048     case ImmTyExpTgt: OS << "ExpTgt"; break;
1049     case ImmTyExpCompr: OS << "ExpCompr"; break;
1050     case ImmTyExpVM: OS << "ExpVM"; break;
1051     case ImmTyHwreg: OS << "Hwreg"; break;
1052     case ImmTySendMsg: OS << "SendMsg"; break;
1053     case ImmTyInterpSlot: OS << "InterpSlot"; break;
1054     case ImmTyInterpAttr: OS << "InterpAttr"; break;
1055     case ImmTyAttrChan: OS << "AttrChan"; break;
1056     case ImmTyOpSel: OS << "OpSel"; break;
1057     case ImmTyOpSelHi: OS << "OpSelHi"; break;
1058     case ImmTyNegLo: OS << "NegLo"; break;
1059     case ImmTyNegHi: OS << "NegHi"; break;
1060     case ImmTySwizzle: OS << "Swizzle"; break;
1061     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1062     case ImmTyHigh: OS << "High"; break;
1063     case ImmTyBLGP: OS << "BLGP"; break;
1064     case ImmTyCBSZ: OS << "CBSZ"; break;
1065     case ImmTyABID: OS << "ABID"; break;
1066     case ImmTyEndpgm: OS << "Endpgm"; break;
1067     case ImmTyWaitVDST: OS << "WaitVDST"; break;
1068     case ImmTyWaitEXP: OS << "WaitEXP"; break;
1069     }
1070   }
1071 
1072   void print(raw_ostream &OS) const override {
1073     switch (Kind) {
1074     case Register:
1075       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1076       break;
1077     case Immediate:
1078       OS << '<' << getImm();
1079       if (getImmTy() != ImmTyNone) {
1080         OS << " type: "; printImmTy(OS, getImmTy());
1081       }
1082       OS << " mods: " << Imm.Mods << '>';
1083       break;
1084     case Token:
1085       OS << '\'' << getToken() << '\'';
1086       break;
1087     case Expression:
1088       OS << "<expr " << *Expr << '>';
1089       break;
1090     }
1091   }
1092 
1093   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1094                                       int64_t Val, SMLoc Loc,
1095                                       ImmTy Type = ImmTyNone,
1096                                       bool IsFPImm = false) {
1097     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1098     Op->Imm.Val = Val;
1099     Op->Imm.IsFPImm = IsFPImm;
1100     Op->Imm.Kind = ImmKindTyNone;
1101     Op->Imm.Type = Type;
1102     Op->Imm.Mods = Modifiers();
1103     Op->StartLoc = Loc;
1104     Op->EndLoc = Loc;
1105     return Op;
1106   }
1107 
1108   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1109                                         StringRef Str, SMLoc Loc,
1110                                         bool HasExplicitEncodingSize = true) {
1111     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1112     Res->Tok.Data = Str.data();
1113     Res->Tok.Length = Str.size();
1114     Res->StartLoc = Loc;
1115     Res->EndLoc = Loc;
1116     return Res;
1117   }
1118 
1119   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1120                                       unsigned RegNo, SMLoc S,
1121                                       SMLoc E) {
1122     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1123     Op->Reg.RegNo = RegNo;
1124     Op->Reg.Mods = Modifiers();
1125     Op->StartLoc = S;
1126     Op->EndLoc = E;
1127     return Op;
1128   }
1129 
1130   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1131                                        const class MCExpr *Expr, SMLoc S) {
1132     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1133     Op->Expr = Expr;
1134     Op->StartLoc = S;
1135     Op->EndLoc = S;
1136     return Op;
1137   }
1138 };
1139 
1140 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1141   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1142   return OS;
1143 }
1144 
1145 //===----------------------------------------------------------------------===//
1146 // AsmParser
1147 //===----------------------------------------------------------------------===//
1148 
1149 // Holds info related to the current kernel, e.g. count of SGPRs used.
1150 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1151 // .amdgpu_hsa_kernel or at EOF.
1152 class KernelScopeInfo {
1153   int SgprIndexUnusedMin = -1;
1154   int VgprIndexUnusedMin = -1;
1155   int AgprIndexUnusedMin = -1;
1156   MCContext *Ctx = nullptr;
1157   MCSubtargetInfo const *MSTI = nullptr;
1158 
1159   void usesSgprAt(int i) {
1160     if (i >= SgprIndexUnusedMin) {
1161       SgprIndexUnusedMin = ++i;
1162       if (Ctx) {
1163         MCSymbol* const Sym =
1164           Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1165         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1166       }
1167     }
1168   }
1169 
1170   void usesVgprAt(int i) {
1171     if (i >= VgprIndexUnusedMin) {
1172       VgprIndexUnusedMin = ++i;
1173       if (Ctx) {
1174         MCSymbol* const Sym =
1175           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1176         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1177                                          VgprIndexUnusedMin);
1178         Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1179       }
1180     }
1181   }
1182 
1183   void usesAgprAt(int i) {
1184     // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1185     if (!hasMAIInsts(*MSTI))
1186       return;
1187 
1188     if (i >= AgprIndexUnusedMin) {
1189       AgprIndexUnusedMin = ++i;
1190       if (Ctx) {
1191         MCSymbol* const Sym =
1192           Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1193         Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1194 
1195         // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1196         MCSymbol* const vSym =
1197           Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1198         int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1199                                          VgprIndexUnusedMin);
1200         vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1201       }
1202     }
1203   }
1204 
1205 public:
1206   KernelScopeInfo() = default;
1207 
1208   void initialize(MCContext &Context) {
1209     Ctx = &Context;
1210     MSTI = Ctx->getSubtargetInfo();
1211 
1212     usesSgprAt(SgprIndexUnusedMin = -1);
1213     usesVgprAt(VgprIndexUnusedMin = -1);
1214     if (hasMAIInsts(*MSTI)) {
1215       usesAgprAt(AgprIndexUnusedMin = -1);
1216     }
1217   }
1218 
1219   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1220                     unsigned RegWidth) {
1221     switch (RegKind) {
1222     case IS_SGPR:
1223       usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1224       break;
1225     case IS_AGPR:
1226       usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1227       break;
1228     case IS_VGPR:
1229       usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1230       break;
1231     default:
1232       break;
1233     }
1234   }
1235 };
1236 
1237 class AMDGPUAsmParser : public MCTargetAsmParser {
1238   MCAsmParser &Parser;
1239 
1240   // Number of extra operands parsed after the first optional operand.
1241   // This may be necessary to skip hardcoded mandatory operands.
1242   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1243 
1244   unsigned ForcedEncodingSize = 0;
1245   bool ForcedDPP = false;
1246   bool ForcedSDWA = false;
1247   KernelScopeInfo KernelScope;
1248   unsigned CPolSeen;
1249 
1250   /// @name Auto-generated Match Functions
1251   /// {
1252 
1253 #define GET_ASSEMBLER_HEADER
1254 #include "AMDGPUGenAsmMatcher.inc"
1255 
1256   /// }
1257 
1258 private:
1259   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1260   bool OutOfRangeError(SMRange Range);
1261   /// Calculate VGPR/SGPR blocks required for given target, reserved
1262   /// registers, and user-specified NextFreeXGPR values.
1263   ///
1264   /// \param Features [in] Target features, used for bug corrections.
1265   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1266   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1267   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1268   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1269   /// descriptor field, if valid.
1270   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1271   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1272   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1273   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1274   /// \param VGPRBlocks [out] Result VGPR block count.
1275   /// \param SGPRBlocks [out] Result SGPR block count.
1276   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1277                           bool FlatScrUsed, bool XNACKUsed,
1278                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1279                           SMRange VGPRRange, unsigned NextFreeSGPR,
1280                           SMRange SGPRRange, unsigned &VGPRBlocks,
1281                           unsigned &SGPRBlocks);
1282   bool ParseDirectiveAMDGCNTarget();
1283   bool ParseDirectiveAMDHSAKernel();
1284   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1285   bool ParseDirectiveHSACodeObjectVersion();
1286   bool ParseDirectiveHSACodeObjectISA();
1287   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1288   bool ParseDirectiveAMDKernelCodeT();
1289   // TODO: Possibly make subtargetHasRegister const.
1290   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1291   bool ParseDirectiveAMDGPUHsaKernel();
1292 
1293   bool ParseDirectiveISAVersion();
1294   bool ParseDirectiveHSAMetadata();
1295   bool ParseDirectivePALMetadataBegin();
1296   bool ParseDirectivePALMetadata();
1297   bool ParseDirectiveAMDGPULDS();
1298 
1299   /// Common code to parse out a block of text (typically YAML) between start and
1300   /// end directives.
1301   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1302                            const char *AssemblerDirectiveEnd,
1303                            std::string &CollectString);
1304 
1305   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1306                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1307   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1308                            unsigned &RegNum, unsigned &RegWidth,
1309                            bool RestoreOnFailure = false);
1310   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1311                            unsigned &RegNum, unsigned &RegWidth,
1312                            SmallVectorImpl<AsmToken> &Tokens);
1313   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1314                            unsigned &RegWidth,
1315                            SmallVectorImpl<AsmToken> &Tokens);
1316   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1317                            unsigned &RegWidth,
1318                            SmallVectorImpl<AsmToken> &Tokens);
1319   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1320                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1321   bool ParseRegRange(unsigned& Num, unsigned& Width);
1322   unsigned getRegularReg(RegisterKind RegKind,
1323                          unsigned RegNum,
1324                          unsigned RegWidth,
1325                          SMLoc Loc);
1326 
1327   bool isRegister();
1328   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1329   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1330   void initializeGprCountSymbol(RegisterKind RegKind);
1331   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1332                              unsigned RegWidth);
1333   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1334                     bool IsAtomic, bool IsLds = false);
1335   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1336                  bool IsGdsHardcoded);
1337 
1338 public:
1339   enum AMDGPUMatchResultTy {
1340     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1341   };
1342   enum OperandMode {
1343     OperandMode_Default,
1344     OperandMode_NSA,
1345   };
1346 
1347   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1348 
1349   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1350                const MCInstrInfo &MII,
1351                const MCTargetOptions &Options)
1352       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1353     MCAsmParserExtension::Initialize(Parser);
1354 
1355     if (getFeatureBits().none()) {
1356       // Set default features.
1357       copySTI().ToggleFeature("southern-islands");
1358     }
1359 
1360     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1361 
1362     {
1363       // TODO: make those pre-defined variables read-only.
1364       // Currently there is none suitable machinery in the core llvm-mc for this.
1365       // MCSymbol::isRedefinable is intended for another purpose, and
1366       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1367       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1368       MCContext &Ctx = getContext();
1369       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1370         MCSymbol *Sym =
1371             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1372         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1373         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1374         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1375         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1376         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1377       } else {
1378         MCSymbol *Sym =
1379             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1380         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1381         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1382         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1383         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1384         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1385       }
1386       if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) {
1387         initializeGprCountSymbol(IS_VGPR);
1388         initializeGprCountSymbol(IS_SGPR);
1389       } else
1390         KernelScope.initialize(getContext());
1391     }
1392   }
1393 
1394   bool hasMIMG_R128() const {
1395     return AMDGPU::hasMIMG_R128(getSTI());
1396   }
1397 
1398   bool hasPackedD16() const {
1399     return AMDGPU::hasPackedD16(getSTI());
1400   }
1401 
1402   bool hasGFX10A16() const {
1403     return AMDGPU::hasGFX10A16(getSTI());
1404   }
1405 
1406   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1407 
1408   bool isSI() const {
1409     return AMDGPU::isSI(getSTI());
1410   }
1411 
1412   bool isCI() const {
1413     return AMDGPU::isCI(getSTI());
1414   }
1415 
1416   bool isVI() const {
1417     return AMDGPU::isVI(getSTI());
1418   }
1419 
1420   bool isGFX9() const {
1421     return AMDGPU::isGFX9(getSTI());
1422   }
1423 
1424   // TODO: isGFX90A is also true for GFX940. We need to clean it.
1425   bool isGFX90A() const {
1426     return AMDGPU::isGFX90A(getSTI());
1427   }
1428 
1429   bool isGFX940() const {
1430     return AMDGPU::isGFX940(getSTI());
1431   }
1432 
1433   bool isGFX9Plus() const {
1434     return AMDGPU::isGFX9Plus(getSTI());
1435   }
1436 
1437   bool isGFX10() const {
1438     return AMDGPU::isGFX10(getSTI());
1439   }
1440 
1441   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1442 
1443   bool isGFX11() const {
1444     return AMDGPU::isGFX11(getSTI());
1445   }
1446 
1447   bool isGFX11Plus() const {
1448     return AMDGPU::isGFX11Plus(getSTI());
1449   }
1450 
1451   bool isGFX10_BEncoding() const {
1452     return AMDGPU::isGFX10_BEncoding(getSTI());
1453   }
1454 
1455   bool hasInv2PiInlineImm() const {
1456     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1457   }
1458 
1459   bool hasFlatOffsets() const {
1460     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1461   }
1462 
1463   bool hasArchitectedFlatScratch() const {
1464     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1465   }
1466 
1467   bool hasSGPR102_SGPR103() const {
1468     return !isVI() && !isGFX9();
1469   }
1470 
1471   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1472 
1473   bool hasIntClamp() const {
1474     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1475   }
1476 
1477   AMDGPUTargetStreamer &getTargetStreamer() {
1478     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1479     return static_cast<AMDGPUTargetStreamer &>(TS);
1480   }
1481 
1482   const MCRegisterInfo *getMRI() const {
1483     // We need this const_cast because for some reason getContext() is not const
1484     // in MCAsmParser.
1485     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1486   }
1487 
1488   const MCInstrInfo *getMII() const {
1489     return &MII;
1490   }
1491 
1492   const FeatureBitset &getFeatureBits() const {
1493     return getSTI().getFeatureBits();
1494   }
1495 
1496   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1497   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1498   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1499 
1500   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1501   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1502   bool isForcedDPP() const { return ForcedDPP; }
1503   bool isForcedSDWA() const { return ForcedSDWA; }
1504   ArrayRef<unsigned> getMatchedVariants() const;
1505   StringRef getMatchedVariantName() const;
1506 
1507   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1508   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1509                      bool RestoreOnFailure);
1510   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1511   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1512                                         SMLoc &EndLoc) override;
1513   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1514   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1515                                       unsigned Kind) override;
1516   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1517                                OperandVector &Operands, MCStreamer &Out,
1518                                uint64_t &ErrorInfo,
1519                                bool MatchingInlineAsm) override;
1520   bool ParseDirective(AsmToken DirectiveID) override;
1521   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1522                                     OperandMode Mode = OperandMode_Default);
1523   StringRef parseMnemonicSuffix(StringRef Name);
1524   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1525                         SMLoc NameLoc, OperandVector &Operands) override;
1526   //bool ProcessInstruction(MCInst &Inst);
1527 
1528   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1529 
1530   OperandMatchResultTy
1531   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1532                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1533                      bool (*ConvertResult)(int64_t &) = nullptr);
1534 
1535   OperandMatchResultTy
1536   parseOperandArrayWithPrefix(const char *Prefix,
1537                               OperandVector &Operands,
1538                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1539                               bool (*ConvertResult)(int64_t&) = nullptr);
1540 
1541   OperandMatchResultTy
1542   parseNamedBit(StringRef Name, OperandVector &Operands,
1543                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1544   OperandMatchResultTy parseCPol(OperandVector &Operands);
1545   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1546                                              StringRef &Value,
1547                                              SMLoc &StringLoc);
1548 
1549   bool isModifier();
1550   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1551   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1552   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1553   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1554   bool parseSP3NegModifier();
1555   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1556   OperandMatchResultTy parseReg(OperandVector &Operands);
1557   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1558   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1559   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1560   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1561   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1562   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1563   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1564   OperandMatchResultTy parseUfmt(int64_t &Format);
1565   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1566   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1567   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1568   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1569   OperandMatchResultTy parseNumericFormat(int64_t &Format);
1570   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1571   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1572 
1573   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1574   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1575   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1576   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1577 
1578   bool parseCnt(int64_t &IntVal);
1579   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1580 
1581   bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1582   void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1583   OperandMatchResultTy parseDepCtrOps(OperandVector &Operands);
1584 
1585   bool parseDelay(int64_t &Delay);
1586   OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands);
1587 
1588   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1589 
1590 private:
1591   struct OperandInfoTy {
1592     SMLoc Loc;
1593     int64_t Id;
1594     bool IsSymbolic = false;
1595     bool IsDefined = false;
1596 
1597     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1598   };
1599 
1600   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1601   bool validateSendMsg(const OperandInfoTy &Msg,
1602                        const OperandInfoTy &Op,
1603                        const OperandInfoTy &Stream);
1604 
1605   bool parseHwregBody(OperandInfoTy &HwReg,
1606                       OperandInfoTy &Offset,
1607                       OperandInfoTy &Width);
1608   bool validateHwreg(const OperandInfoTy &HwReg,
1609                      const OperandInfoTy &Offset,
1610                      const OperandInfoTy &Width);
1611 
1612   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1613   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1614   SMLoc getBLGPLoc(const OperandVector &Operands) const;
1615 
1616   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1617                       const OperandVector &Operands) const;
1618   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1619   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1620   SMLoc getLitLoc(const OperandVector &Operands) const;
1621   SMLoc getConstLoc(const OperandVector &Operands) const;
1622 
1623   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1624   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1625   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1626   bool validateSOPLiteral(const MCInst &Inst) const;
1627   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1628   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1629   bool validateIntClampSupported(const MCInst &Inst);
1630   bool validateMIMGAtomicDMask(const MCInst &Inst);
1631   bool validateMIMGGatherDMask(const MCInst &Inst);
1632   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1633   Optional<StringRef> validateMIMGDataSize(const MCInst &Inst);
1634   bool validateMIMGAddrSize(const MCInst &Inst);
1635   bool validateMIMGD16(const MCInst &Inst);
1636   bool validateMIMGDim(const MCInst &Inst);
1637   bool validateMIMGMSAA(const MCInst &Inst);
1638   bool validateOpSel(const MCInst &Inst);
1639   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1640   bool validateVccOperand(unsigned Reg) const;
1641   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1642   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1643   bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1644   bool validateAGPRLdSt(const MCInst &Inst) const;
1645   bool validateVGPRAlign(const MCInst &Inst) const;
1646   bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1647   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1648   bool validateDivScale(const MCInst &Inst);
1649   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1650                              const SMLoc &IDLoc);
1651   bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands,
1652                           const SMLoc &IDLoc);
1653   bool validateExeczVcczOperands(const OperandVector &Operands);
1654   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1655   unsigned getConstantBusLimit(unsigned Opcode) const;
1656   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1657   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1658   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1659 
1660   bool isSupportedMnemo(StringRef Mnemo,
1661                         const FeatureBitset &FBS);
1662   bool isSupportedMnemo(StringRef Mnemo,
1663                         const FeatureBitset &FBS,
1664                         ArrayRef<unsigned> Variants);
1665   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1666 
1667   bool isId(const StringRef Id) const;
1668   bool isId(const AsmToken &Token, const StringRef Id) const;
1669   bool isToken(const AsmToken::TokenKind Kind) const;
1670   bool trySkipId(const StringRef Id);
1671   bool trySkipId(const StringRef Pref, const StringRef Id);
1672   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1673   bool trySkipToken(const AsmToken::TokenKind Kind);
1674   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1675   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1676   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1677 
1678   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1679   AsmToken::TokenKind getTokenKind() const;
1680   bool parseExpr(int64_t &Imm, StringRef Expected = "");
1681   bool parseExpr(OperandVector &Operands);
1682   StringRef getTokenStr() const;
1683   AsmToken peekToken(bool ShouldSkipSpace = true);
1684   AsmToken getToken() const;
1685   SMLoc getLoc() const;
1686   void lex();
1687 
1688 public:
1689   void onBeginOfFile() override;
1690 
1691   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1692   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1693 
1694   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1695   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1696   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1697   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1698   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1699   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1700 
1701   bool parseSwizzleOperand(int64_t &Op,
1702                            const unsigned MinVal,
1703                            const unsigned MaxVal,
1704                            const StringRef ErrMsg,
1705                            SMLoc &Loc);
1706   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1707                             const unsigned MinVal,
1708                             const unsigned MaxVal,
1709                             const StringRef ErrMsg);
1710   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1711   bool parseSwizzleOffset(int64_t &Imm);
1712   bool parseSwizzleMacro(int64_t &Imm);
1713   bool parseSwizzleQuadPerm(int64_t &Imm);
1714   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1715   bool parseSwizzleBroadcast(int64_t &Imm);
1716   bool parseSwizzleSwap(int64_t &Imm);
1717   bool parseSwizzleReverse(int64_t &Imm);
1718 
1719   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1720   int64_t parseGPRIdxMacro();
1721 
1722   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1723   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1724   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1725   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1726 
1727   AMDGPUOperand::Ptr defaultCPol() const;
1728 
1729   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1730   AMDGPUOperand::Ptr defaultSMEMOffset() const;
1731   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1732   AMDGPUOperand::Ptr defaultFlatOffset() const;
1733 
1734   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1735 
1736   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1737                OptionalImmIndexMap &OptionalIdx);
1738   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1739   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1740   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1741   void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1742   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1743                 OptionalImmIndexMap &OptionalIdx);
1744 
1745   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1746   void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1747 
1748   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1749                bool IsAtomic = false);
1750   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1751   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1752 
1753   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1754 
1755   bool parseDimId(unsigned &Encoding);
1756   OperandMatchResultTy parseDim(OperandVector &Operands);
1757   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1758   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1759   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1760   int64_t parseDPPCtrlSel(StringRef Ctrl);
1761   int64_t parseDPPCtrlPerm();
1762   AMDGPUOperand::Ptr defaultRowMask() const;
1763   AMDGPUOperand::Ptr defaultBankMask() const;
1764   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1765   AMDGPUOperand::Ptr defaultFI() const;
1766   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1767   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1768     cvtDPP(Inst, Operands, true);
1769   }
1770   void cvtVOPCNoDstDPP(MCInst &Inst, const OperandVector &Operands,
1771                        bool IsDPP8 = false);
1772   void cvtVOPCNoDstDPP8(MCInst &Inst, const OperandVector &Operands) {
1773     cvtVOPCNoDstDPP(Inst, Operands, true);
1774   }
1775   void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1776                   bool IsDPP8 = false);
1777   void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1778     cvtVOP3DPP(Inst, Operands, true);
1779   }
1780   void cvtVOPC64NoDstDPP(MCInst &Inst, const OperandVector &Operands,
1781                          bool IsDPP8 = false);
1782   void cvtVOPC64NoDstDPP8(MCInst &Inst, const OperandVector &Operands) {
1783     cvtVOPC64NoDstDPP(Inst, Operands, true);
1784   }
1785 
1786   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1787                                     AMDGPUOperand::ImmTy Type);
1788   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1789   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1790   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1791   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1792   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1793   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1794   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1795                uint64_t BasicInstType,
1796                bool SkipDstVcc = false,
1797                bool SkipSrcVcc = false);
1798 
1799   AMDGPUOperand::Ptr defaultBLGP() const;
1800   AMDGPUOperand::Ptr defaultCBSZ() const;
1801   AMDGPUOperand::Ptr defaultABID() const;
1802 
1803   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1804   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1805 
1806   AMDGPUOperand::Ptr defaultWaitVDST() const;
1807   AMDGPUOperand::Ptr defaultWaitEXP() const;
1808   OperandMatchResultTy parseVOPD(OperandVector &Operands);
1809 };
1810 
1811 struct OptionalOperand {
1812   const char *Name;
1813   AMDGPUOperand::ImmTy Type;
1814   bool IsBit;
1815   bool (*ConvertResult)(int64_t&);
1816 };
1817 
1818 } // end anonymous namespace
1819 
1820 // May be called with integer type with equivalent bitwidth.
1821 static const fltSemantics *getFltSemantics(unsigned Size) {
1822   switch (Size) {
1823   case 4:
1824     return &APFloat::IEEEsingle();
1825   case 8:
1826     return &APFloat::IEEEdouble();
1827   case 2:
1828     return &APFloat::IEEEhalf();
1829   default:
1830     llvm_unreachable("unsupported fp type");
1831   }
1832 }
1833 
1834 static const fltSemantics *getFltSemantics(MVT VT) {
1835   return getFltSemantics(VT.getSizeInBits() / 8);
1836 }
1837 
1838 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1839   switch (OperandType) {
1840   case AMDGPU::OPERAND_REG_IMM_INT32:
1841   case AMDGPU::OPERAND_REG_IMM_FP32:
1842   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1843   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1844   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1845   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1846   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1847   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1848   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1849   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1850   case AMDGPU::OPERAND_REG_IMM_V2INT32:
1851   case AMDGPU::OPERAND_KIMM32:
1852     return &APFloat::IEEEsingle();
1853   case AMDGPU::OPERAND_REG_IMM_INT64:
1854   case AMDGPU::OPERAND_REG_IMM_FP64:
1855   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1856   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1857   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1858     return &APFloat::IEEEdouble();
1859   case AMDGPU::OPERAND_REG_IMM_INT16:
1860   case AMDGPU::OPERAND_REG_IMM_FP16:
1861   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1862   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1863   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1864   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1865   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1866   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1867   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1868   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1869   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1870   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1871   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1872   case AMDGPU::OPERAND_KIMM16:
1873     return &APFloat::IEEEhalf();
1874   default:
1875     llvm_unreachable("unsupported fp type");
1876   }
1877 }
1878 
1879 //===----------------------------------------------------------------------===//
1880 // Operand
1881 //===----------------------------------------------------------------------===//
1882 
1883 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1884   bool Lost;
1885 
1886   // Convert literal to single precision
1887   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1888                                                APFloat::rmNearestTiesToEven,
1889                                                &Lost);
1890   // We allow precision lost but not overflow or underflow
1891   if (Status != APFloat::opOK &&
1892       Lost &&
1893       ((Status & APFloat::opOverflow)  != 0 ||
1894        (Status & APFloat::opUnderflow) != 0)) {
1895     return false;
1896   }
1897 
1898   return true;
1899 }
1900 
1901 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1902   return isUIntN(Size, Val) || isIntN(Size, Val);
1903 }
1904 
1905 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1906   if (VT.getScalarType() == MVT::i16) {
1907     // FP immediate values are broken.
1908     return isInlinableIntLiteral(Val);
1909   }
1910 
1911   // f16/v2f16 operands work correctly for all values.
1912   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1913 }
1914 
1915 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1916 
1917   // This is a hack to enable named inline values like
1918   // shared_base with both 32-bit and 64-bit operands.
1919   // Note that these values are defined as
1920   // 32-bit operands only.
1921   if (isInlineValue()) {
1922     return true;
1923   }
1924 
1925   if (!isImmTy(ImmTyNone)) {
1926     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1927     return false;
1928   }
1929   // TODO: We should avoid using host float here. It would be better to
1930   // check the float bit values which is what a few other places do.
1931   // We've had bot failures before due to weird NaN support on mips hosts.
1932 
1933   APInt Literal(64, Imm.Val);
1934 
1935   if (Imm.IsFPImm) { // We got fp literal token
1936     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1937       return AMDGPU::isInlinableLiteral64(Imm.Val,
1938                                           AsmParser->hasInv2PiInlineImm());
1939     }
1940 
1941     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1942     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1943       return false;
1944 
1945     if (type.getScalarSizeInBits() == 16) {
1946       return isInlineableLiteralOp16(
1947         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1948         type, AsmParser->hasInv2PiInlineImm());
1949     }
1950 
1951     // Check if single precision literal is inlinable
1952     return AMDGPU::isInlinableLiteral32(
1953       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1954       AsmParser->hasInv2PiInlineImm());
1955   }
1956 
1957   // We got int literal token.
1958   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1959     return AMDGPU::isInlinableLiteral64(Imm.Val,
1960                                         AsmParser->hasInv2PiInlineImm());
1961   }
1962 
1963   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1964     return false;
1965   }
1966 
1967   if (type.getScalarSizeInBits() == 16) {
1968     return isInlineableLiteralOp16(
1969       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1970       type, AsmParser->hasInv2PiInlineImm());
1971   }
1972 
1973   return AMDGPU::isInlinableLiteral32(
1974     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1975     AsmParser->hasInv2PiInlineImm());
1976 }
1977 
1978 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1979   // Check that this immediate can be added as literal
1980   if (!isImmTy(ImmTyNone)) {
1981     return false;
1982   }
1983 
1984   if (!Imm.IsFPImm) {
1985     // We got int literal token.
1986 
1987     if (type == MVT::f64 && hasFPModifiers()) {
1988       // Cannot apply fp modifiers to int literals preserving the same semantics
1989       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1990       // disable these cases.
1991       return false;
1992     }
1993 
1994     unsigned Size = type.getSizeInBits();
1995     if (Size == 64)
1996       Size = 32;
1997 
1998     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1999     // types.
2000     return isSafeTruncation(Imm.Val, Size);
2001   }
2002 
2003   // We got fp literal token
2004   if (type == MVT::f64) { // Expected 64-bit fp operand
2005     // We would set low 64-bits of literal to zeroes but we accept this literals
2006     return true;
2007   }
2008 
2009   if (type == MVT::i64) { // Expected 64-bit int operand
2010     // We don't allow fp literals in 64-bit integer instructions. It is
2011     // unclear how we should encode them.
2012     return false;
2013   }
2014 
2015   // We allow fp literals with f16x2 operands assuming that the specified
2016   // literal goes into the lower half and the upper half is zero. We also
2017   // require that the literal may be losslessly converted to f16.
2018   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
2019                      (type == MVT::v2i16)? MVT::i16 :
2020                      (type == MVT::v2f32)? MVT::f32 : type;
2021 
2022   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2023   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2024 }
2025 
2026 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2027   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2028 }
2029 
2030 bool AMDGPUOperand::isVRegWithInputMods() const {
2031   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2032          // GFX90A allows DPP on 64-bit operands.
2033          (isRegClass(AMDGPU::VReg_64RegClassID) &&
2034           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
2035 }
2036 
2037 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2038   if (AsmParser->isVI())
2039     return isVReg32();
2040   else if (AsmParser->isGFX9Plus())
2041     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2042   else
2043     return false;
2044 }
2045 
2046 bool AMDGPUOperand::isSDWAFP16Operand() const {
2047   return isSDWAOperand(MVT::f16);
2048 }
2049 
2050 bool AMDGPUOperand::isSDWAFP32Operand() const {
2051   return isSDWAOperand(MVT::f32);
2052 }
2053 
2054 bool AMDGPUOperand::isSDWAInt16Operand() const {
2055   return isSDWAOperand(MVT::i16);
2056 }
2057 
2058 bool AMDGPUOperand::isSDWAInt32Operand() const {
2059   return isSDWAOperand(MVT::i32);
2060 }
2061 
2062 bool AMDGPUOperand::isBoolReg() const {
2063   auto FB = AsmParser->getFeatureBits();
2064   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2065                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2066 }
2067 
2068 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2069 {
2070   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2071   assert(Size == 2 || Size == 4 || Size == 8);
2072 
2073   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2074 
2075   if (Imm.Mods.Abs) {
2076     Val &= ~FpSignMask;
2077   }
2078   if (Imm.Mods.Neg) {
2079     Val ^= FpSignMask;
2080   }
2081 
2082   return Val;
2083 }
2084 
2085 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2086   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2087                              Inst.getNumOperands())) {
2088     addLiteralImmOperand(Inst, Imm.Val,
2089                          ApplyModifiers &
2090                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2091   } else {
2092     assert(!isImmTy(ImmTyNone) || !hasModifiers());
2093     Inst.addOperand(MCOperand::createImm(Imm.Val));
2094     setImmKindNone();
2095   }
2096 }
2097 
2098 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2099   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2100   auto OpNum = Inst.getNumOperands();
2101   // Check that this operand accepts literals
2102   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2103 
2104   if (ApplyModifiers) {
2105     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2106     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2107     Val = applyInputFPModifiers(Val, Size);
2108   }
2109 
2110   APInt Literal(64, Val);
2111   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
2112 
2113   if (Imm.IsFPImm) { // We got fp literal token
2114     switch (OpTy) {
2115     case AMDGPU::OPERAND_REG_IMM_INT64:
2116     case AMDGPU::OPERAND_REG_IMM_FP64:
2117     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2118     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2119     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2120       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2121                                        AsmParser->hasInv2PiInlineImm())) {
2122         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2123         setImmKindConst();
2124         return;
2125       }
2126 
2127       // Non-inlineable
2128       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2129         // For fp operands we check if low 32 bits are zeros
2130         if (Literal.getLoBits(32) != 0) {
2131           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2132           "Can't encode literal as exact 64-bit floating-point operand. "
2133           "Low 32-bits will be set to zero");
2134         }
2135 
2136         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2137         setImmKindLiteral();
2138         return;
2139       }
2140 
2141       // We don't allow fp literals in 64-bit integer instructions. It is
2142       // unclear how we should encode them. This case should be checked earlier
2143       // in predicate methods (isLiteralImm())
2144       llvm_unreachable("fp literal in 64-bit integer instruction.");
2145 
2146     case AMDGPU::OPERAND_REG_IMM_INT32:
2147     case AMDGPU::OPERAND_REG_IMM_FP32:
2148     case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2149     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2150     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2151     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2152     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2153     case AMDGPU::OPERAND_REG_IMM_INT16:
2154     case AMDGPU::OPERAND_REG_IMM_FP16:
2155     case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2156     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2157     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2158     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2159     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2160     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2161     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2162     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2163     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2164     case AMDGPU::OPERAND_REG_IMM_V2INT16:
2165     case AMDGPU::OPERAND_REG_IMM_V2FP16:
2166     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2167     case AMDGPU::OPERAND_REG_IMM_V2FP32:
2168     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2169     case AMDGPU::OPERAND_REG_IMM_V2INT32:
2170     case AMDGPU::OPERAND_KIMM32:
2171     case AMDGPU::OPERAND_KIMM16: {
2172       bool lost;
2173       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2174       // Convert literal to single precision
2175       FPLiteral.convert(*getOpFltSemantics(OpTy),
2176                         APFloat::rmNearestTiesToEven, &lost);
2177       // We allow precision lost but not overflow or underflow. This should be
2178       // checked earlier in isLiteralImm()
2179 
2180       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2181       Inst.addOperand(MCOperand::createImm(ImmVal));
2182       setImmKindLiteral();
2183       return;
2184     }
2185     default:
2186       llvm_unreachable("invalid operand size");
2187     }
2188 
2189     return;
2190   }
2191 
2192   // We got int literal token.
2193   // Only sign extend inline immediates.
2194   switch (OpTy) {
2195   case AMDGPU::OPERAND_REG_IMM_INT32:
2196   case AMDGPU::OPERAND_REG_IMM_FP32:
2197   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2198   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2199   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2200   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2201   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2202   case AMDGPU::OPERAND_REG_IMM_V2INT16:
2203   case AMDGPU::OPERAND_REG_IMM_V2FP16:
2204   case AMDGPU::OPERAND_REG_IMM_V2FP32:
2205   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2206   case AMDGPU::OPERAND_REG_IMM_V2INT32:
2207   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2208     if (isSafeTruncation(Val, 32) &&
2209         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2210                                      AsmParser->hasInv2PiInlineImm())) {
2211       Inst.addOperand(MCOperand::createImm(Val));
2212       setImmKindConst();
2213       return;
2214     }
2215 
2216     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2217     setImmKindLiteral();
2218     return;
2219 
2220   case AMDGPU::OPERAND_REG_IMM_INT64:
2221   case AMDGPU::OPERAND_REG_IMM_FP64:
2222   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2223   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2224   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2225     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2226       Inst.addOperand(MCOperand::createImm(Val));
2227       setImmKindConst();
2228       return;
2229     }
2230 
2231     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2232     setImmKindLiteral();
2233     return;
2234 
2235   case AMDGPU::OPERAND_REG_IMM_INT16:
2236   case AMDGPU::OPERAND_REG_IMM_FP16:
2237   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2238   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2239   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2240   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2241   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2242     if (isSafeTruncation(Val, 16) &&
2243         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2244                                      AsmParser->hasInv2PiInlineImm())) {
2245       Inst.addOperand(MCOperand::createImm(Val));
2246       setImmKindConst();
2247       return;
2248     }
2249 
2250     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2251     setImmKindLiteral();
2252     return;
2253 
2254   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2255   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2256   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2257   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2258     assert(isSafeTruncation(Val, 16));
2259     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2260                                         AsmParser->hasInv2PiInlineImm()));
2261 
2262     Inst.addOperand(MCOperand::createImm(Val));
2263     return;
2264   }
2265   case AMDGPU::OPERAND_KIMM32:
2266     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2267     setImmKindNone();
2268     return;
2269   case AMDGPU::OPERAND_KIMM16:
2270     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2271     setImmKindNone();
2272     return;
2273   default:
2274     llvm_unreachable("invalid operand size");
2275   }
2276 }
2277 
2278 template <unsigned Bitwidth>
2279 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2280   APInt Literal(64, Imm.Val);
2281   setImmKindNone();
2282 
2283   if (!Imm.IsFPImm) {
2284     // We got int literal token.
2285     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2286     return;
2287   }
2288 
2289   bool Lost;
2290   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2291   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2292                     APFloat::rmNearestTiesToEven, &Lost);
2293   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2294 }
2295 
2296 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2297   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2298 }
2299 
2300 static bool isInlineValue(unsigned Reg) {
2301   switch (Reg) {
2302   case AMDGPU::SRC_SHARED_BASE:
2303   case AMDGPU::SRC_SHARED_LIMIT:
2304   case AMDGPU::SRC_PRIVATE_BASE:
2305   case AMDGPU::SRC_PRIVATE_LIMIT:
2306   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2307     return true;
2308   case AMDGPU::SRC_VCCZ:
2309   case AMDGPU::SRC_EXECZ:
2310   case AMDGPU::SRC_SCC:
2311     return true;
2312   case AMDGPU::SGPR_NULL:
2313     return true;
2314   default:
2315     return false;
2316   }
2317 }
2318 
2319 bool AMDGPUOperand::isInlineValue() const {
2320   return isRegKind() && ::isInlineValue(getReg());
2321 }
2322 
2323 //===----------------------------------------------------------------------===//
2324 // AsmParser
2325 //===----------------------------------------------------------------------===//
2326 
2327 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2328   if (Is == IS_VGPR) {
2329     switch (RegWidth) {
2330       default: return -1;
2331       case 32:
2332         return AMDGPU::VGPR_32RegClassID;
2333       case 64:
2334         return AMDGPU::VReg_64RegClassID;
2335       case 96:
2336         return AMDGPU::VReg_96RegClassID;
2337       case 128:
2338         return AMDGPU::VReg_128RegClassID;
2339       case 160:
2340         return AMDGPU::VReg_160RegClassID;
2341       case 192:
2342         return AMDGPU::VReg_192RegClassID;
2343       case 224:
2344         return AMDGPU::VReg_224RegClassID;
2345       case 256:
2346         return AMDGPU::VReg_256RegClassID;
2347       case 512:
2348         return AMDGPU::VReg_512RegClassID;
2349       case 1024:
2350         return AMDGPU::VReg_1024RegClassID;
2351     }
2352   } else if (Is == IS_TTMP) {
2353     switch (RegWidth) {
2354       default: return -1;
2355       case 32:
2356         return AMDGPU::TTMP_32RegClassID;
2357       case 64:
2358         return AMDGPU::TTMP_64RegClassID;
2359       case 128:
2360         return AMDGPU::TTMP_128RegClassID;
2361       case 256:
2362         return AMDGPU::TTMP_256RegClassID;
2363       case 512:
2364         return AMDGPU::TTMP_512RegClassID;
2365     }
2366   } else if (Is == IS_SGPR) {
2367     switch (RegWidth) {
2368       default: return -1;
2369       case 32:
2370         return AMDGPU::SGPR_32RegClassID;
2371       case 64:
2372         return AMDGPU::SGPR_64RegClassID;
2373       case 96:
2374         return AMDGPU::SGPR_96RegClassID;
2375       case 128:
2376         return AMDGPU::SGPR_128RegClassID;
2377       case 160:
2378         return AMDGPU::SGPR_160RegClassID;
2379       case 192:
2380         return AMDGPU::SGPR_192RegClassID;
2381       case 224:
2382         return AMDGPU::SGPR_224RegClassID;
2383       case 256:
2384         return AMDGPU::SGPR_256RegClassID;
2385       case 512:
2386         return AMDGPU::SGPR_512RegClassID;
2387     }
2388   } else if (Is == IS_AGPR) {
2389     switch (RegWidth) {
2390       default: return -1;
2391       case 32:
2392         return AMDGPU::AGPR_32RegClassID;
2393       case 64:
2394         return AMDGPU::AReg_64RegClassID;
2395       case 96:
2396         return AMDGPU::AReg_96RegClassID;
2397       case 128:
2398         return AMDGPU::AReg_128RegClassID;
2399       case 160:
2400         return AMDGPU::AReg_160RegClassID;
2401       case 192:
2402         return AMDGPU::AReg_192RegClassID;
2403       case 224:
2404         return AMDGPU::AReg_224RegClassID;
2405       case 256:
2406         return AMDGPU::AReg_256RegClassID;
2407       case 512:
2408         return AMDGPU::AReg_512RegClassID;
2409       case 1024:
2410         return AMDGPU::AReg_1024RegClassID;
2411     }
2412   }
2413   return -1;
2414 }
2415 
2416 static unsigned getSpecialRegForName(StringRef RegName) {
2417   return StringSwitch<unsigned>(RegName)
2418     .Case("exec", AMDGPU::EXEC)
2419     .Case("vcc", AMDGPU::VCC)
2420     .Case("flat_scratch", AMDGPU::FLAT_SCR)
2421     .Case("xnack_mask", AMDGPU::XNACK_MASK)
2422     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2423     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2424     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2425     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2426     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2427     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2428     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2429     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2430     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2431     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2432     .Case("lds_direct", AMDGPU::LDS_DIRECT)
2433     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2434     .Case("m0", AMDGPU::M0)
2435     .Case("vccz", AMDGPU::SRC_VCCZ)
2436     .Case("src_vccz", AMDGPU::SRC_VCCZ)
2437     .Case("execz", AMDGPU::SRC_EXECZ)
2438     .Case("src_execz", AMDGPU::SRC_EXECZ)
2439     .Case("scc", AMDGPU::SRC_SCC)
2440     .Case("src_scc", AMDGPU::SRC_SCC)
2441     .Case("tba", AMDGPU::TBA)
2442     .Case("tma", AMDGPU::TMA)
2443     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2444     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2445     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2446     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2447     .Case("vcc_lo", AMDGPU::VCC_LO)
2448     .Case("vcc_hi", AMDGPU::VCC_HI)
2449     .Case("exec_lo", AMDGPU::EXEC_LO)
2450     .Case("exec_hi", AMDGPU::EXEC_HI)
2451     .Case("tma_lo", AMDGPU::TMA_LO)
2452     .Case("tma_hi", AMDGPU::TMA_HI)
2453     .Case("tba_lo", AMDGPU::TBA_LO)
2454     .Case("tba_hi", AMDGPU::TBA_HI)
2455     .Case("pc", AMDGPU::PC_REG)
2456     .Case("null", AMDGPU::SGPR_NULL)
2457     .Default(AMDGPU::NoRegister);
2458 }
2459 
2460 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2461                                     SMLoc &EndLoc, bool RestoreOnFailure) {
2462   auto R = parseRegister();
2463   if (!R) return true;
2464   assert(R->isReg());
2465   RegNo = R->getReg();
2466   StartLoc = R->getStartLoc();
2467   EndLoc = R->getEndLoc();
2468   return false;
2469 }
2470 
2471 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2472                                     SMLoc &EndLoc) {
2473   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2474 }
2475 
2476 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2477                                                        SMLoc &StartLoc,
2478                                                        SMLoc &EndLoc) {
2479   bool Result =
2480       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2481   bool PendingErrors = getParser().hasPendingError();
2482   getParser().clearPendingErrors();
2483   if (PendingErrors)
2484     return MatchOperand_ParseFail;
2485   if (Result)
2486     return MatchOperand_NoMatch;
2487   return MatchOperand_Success;
2488 }
2489 
2490 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2491                                             RegisterKind RegKind, unsigned Reg1,
2492                                             SMLoc Loc) {
2493   switch (RegKind) {
2494   case IS_SPECIAL:
2495     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2496       Reg = AMDGPU::EXEC;
2497       RegWidth = 64;
2498       return true;
2499     }
2500     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2501       Reg = AMDGPU::FLAT_SCR;
2502       RegWidth = 64;
2503       return true;
2504     }
2505     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2506       Reg = AMDGPU::XNACK_MASK;
2507       RegWidth = 64;
2508       return true;
2509     }
2510     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2511       Reg = AMDGPU::VCC;
2512       RegWidth = 64;
2513       return true;
2514     }
2515     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2516       Reg = AMDGPU::TBA;
2517       RegWidth = 64;
2518       return true;
2519     }
2520     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2521       Reg = AMDGPU::TMA;
2522       RegWidth = 64;
2523       return true;
2524     }
2525     Error(Loc, "register does not fit in the list");
2526     return false;
2527   case IS_VGPR:
2528   case IS_SGPR:
2529   case IS_AGPR:
2530   case IS_TTMP:
2531     if (Reg1 != Reg + RegWidth / 32) {
2532       Error(Loc, "registers in a list must have consecutive indices");
2533       return false;
2534     }
2535     RegWidth += 32;
2536     return true;
2537   default:
2538     llvm_unreachable("unexpected register kind");
2539   }
2540 }
2541 
2542 struct RegInfo {
2543   StringLiteral Name;
2544   RegisterKind Kind;
2545 };
2546 
2547 static constexpr RegInfo RegularRegisters[] = {
2548   {{"v"},    IS_VGPR},
2549   {{"s"},    IS_SGPR},
2550   {{"ttmp"}, IS_TTMP},
2551   {{"acc"},  IS_AGPR},
2552   {{"a"},    IS_AGPR},
2553 };
2554 
2555 static bool isRegularReg(RegisterKind Kind) {
2556   return Kind == IS_VGPR ||
2557          Kind == IS_SGPR ||
2558          Kind == IS_TTMP ||
2559          Kind == IS_AGPR;
2560 }
2561 
2562 static const RegInfo* getRegularRegInfo(StringRef Str) {
2563   for (const RegInfo &Reg : RegularRegisters)
2564     if (Str.startswith(Reg.Name))
2565       return &Reg;
2566   return nullptr;
2567 }
2568 
2569 static bool getRegNum(StringRef Str, unsigned& Num) {
2570   return !Str.getAsInteger(10, Num);
2571 }
2572 
2573 bool
2574 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2575                             const AsmToken &NextToken) const {
2576 
2577   // A list of consecutive registers: [s0,s1,s2,s3]
2578   if (Token.is(AsmToken::LBrac))
2579     return true;
2580 
2581   if (!Token.is(AsmToken::Identifier))
2582     return false;
2583 
2584   // A single register like s0 or a range of registers like s[0:1]
2585 
2586   StringRef Str = Token.getString();
2587   const RegInfo *Reg = getRegularRegInfo(Str);
2588   if (Reg) {
2589     StringRef RegName = Reg->Name;
2590     StringRef RegSuffix = Str.substr(RegName.size());
2591     if (!RegSuffix.empty()) {
2592       unsigned Num;
2593       // A single register with an index: rXX
2594       if (getRegNum(RegSuffix, Num))
2595         return true;
2596     } else {
2597       // A range of registers: r[XX:YY].
2598       if (NextToken.is(AsmToken::LBrac))
2599         return true;
2600     }
2601   }
2602 
2603   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2604 }
2605 
2606 bool
2607 AMDGPUAsmParser::isRegister()
2608 {
2609   return isRegister(getToken(), peekToken());
2610 }
2611 
2612 unsigned
2613 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2614                                unsigned RegNum,
2615                                unsigned RegWidth,
2616                                SMLoc Loc) {
2617 
2618   assert(isRegularReg(RegKind));
2619 
2620   unsigned AlignSize = 1;
2621   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2622     // SGPR and TTMP registers must be aligned.
2623     // Max required alignment is 4 dwords.
2624     AlignSize = std::min(RegWidth / 32, 4u);
2625   }
2626 
2627   if (RegNum % AlignSize != 0) {
2628     Error(Loc, "invalid register alignment");
2629     return AMDGPU::NoRegister;
2630   }
2631 
2632   unsigned RegIdx = RegNum / AlignSize;
2633   int RCID = getRegClass(RegKind, RegWidth);
2634   if (RCID == -1) {
2635     Error(Loc, "invalid or unsupported register size");
2636     return AMDGPU::NoRegister;
2637   }
2638 
2639   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2640   const MCRegisterClass RC = TRI->getRegClass(RCID);
2641   if (RegIdx >= RC.getNumRegs()) {
2642     Error(Loc, "register index is out of range");
2643     return AMDGPU::NoRegister;
2644   }
2645 
2646   return RC.getRegister(RegIdx);
2647 }
2648 
2649 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2650   int64_t RegLo, RegHi;
2651   if (!skipToken(AsmToken::LBrac, "missing register index"))
2652     return false;
2653 
2654   SMLoc FirstIdxLoc = getLoc();
2655   SMLoc SecondIdxLoc;
2656 
2657   if (!parseExpr(RegLo))
2658     return false;
2659 
2660   if (trySkipToken(AsmToken::Colon)) {
2661     SecondIdxLoc = getLoc();
2662     if (!parseExpr(RegHi))
2663       return false;
2664   } else {
2665     RegHi = RegLo;
2666   }
2667 
2668   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2669     return false;
2670 
2671   if (!isUInt<32>(RegLo)) {
2672     Error(FirstIdxLoc, "invalid register index");
2673     return false;
2674   }
2675 
2676   if (!isUInt<32>(RegHi)) {
2677     Error(SecondIdxLoc, "invalid register index");
2678     return false;
2679   }
2680 
2681   if (RegLo > RegHi) {
2682     Error(FirstIdxLoc, "first register index should not exceed second index");
2683     return false;
2684   }
2685 
2686   Num = static_cast<unsigned>(RegLo);
2687   RegWidth = 32 * ((RegHi - RegLo) + 1);
2688   return true;
2689 }
2690 
2691 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2692                                           unsigned &RegNum, unsigned &RegWidth,
2693                                           SmallVectorImpl<AsmToken> &Tokens) {
2694   assert(isToken(AsmToken::Identifier));
2695   unsigned Reg = getSpecialRegForName(getTokenStr());
2696   if (Reg) {
2697     RegNum = 0;
2698     RegWidth = 32;
2699     RegKind = IS_SPECIAL;
2700     Tokens.push_back(getToken());
2701     lex(); // skip register name
2702   }
2703   return Reg;
2704 }
2705 
2706 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2707                                           unsigned &RegNum, unsigned &RegWidth,
2708                                           SmallVectorImpl<AsmToken> &Tokens) {
2709   assert(isToken(AsmToken::Identifier));
2710   StringRef RegName = getTokenStr();
2711   auto Loc = getLoc();
2712 
2713   const RegInfo *RI = getRegularRegInfo(RegName);
2714   if (!RI) {
2715     Error(Loc, "invalid register name");
2716     return AMDGPU::NoRegister;
2717   }
2718 
2719   Tokens.push_back(getToken());
2720   lex(); // skip register name
2721 
2722   RegKind = RI->Kind;
2723   StringRef RegSuffix = RegName.substr(RI->Name.size());
2724   if (!RegSuffix.empty()) {
2725     // Single 32-bit register: vXX.
2726     if (!getRegNum(RegSuffix, RegNum)) {
2727       Error(Loc, "invalid register index");
2728       return AMDGPU::NoRegister;
2729     }
2730     RegWidth = 32;
2731   } else {
2732     // Range of registers: v[XX:YY]. ":YY" is optional.
2733     if (!ParseRegRange(RegNum, RegWidth))
2734       return AMDGPU::NoRegister;
2735   }
2736 
2737   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2738 }
2739 
2740 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2741                                        unsigned &RegWidth,
2742                                        SmallVectorImpl<AsmToken> &Tokens) {
2743   unsigned Reg = AMDGPU::NoRegister;
2744   auto ListLoc = getLoc();
2745 
2746   if (!skipToken(AsmToken::LBrac,
2747                  "expected a register or a list of registers")) {
2748     return AMDGPU::NoRegister;
2749   }
2750 
2751   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2752 
2753   auto Loc = getLoc();
2754   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2755     return AMDGPU::NoRegister;
2756   if (RegWidth != 32) {
2757     Error(Loc, "expected a single 32-bit register");
2758     return AMDGPU::NoRegister;
2759   }
2760 
2761   for (; trySkipToken(AsmToken::Comma); ) {
2762     RegisterKind NextRegKind;
2763     unsigned NextReg, NextRegNum, NextRegWidth;
2764     Loc = getLoc();
2765 
2766     if (!ParseAMDGPURegister(NextRegKind, NextReg,
2767                              NextRegNum, NextRegWidth,
2768                              Tokens)) {
2769       return AMDGPU::NoRegister;
2770     }
2771     if (NextRegWidth != 32) {
2772       Error(Loc, "expected a single 32-bit register");
2773       return AMDGPU::NoRegister;
2774     }
2775     if (NextRegKind != RegKind) {
2776       Error(Loc, "registers in a list must be of the same kind");
2777       return AMDGPU::NoRegister;
2778     }
2779     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2780       return AMDGPU::NoRegister;
2781   }
2782 
2783   if (!skipToken(AsmToken::RBrac,
2784                  "expected a comma or a closing square bracket")) {
2785     return AMDGPU::NoRegister;
2786   }
2787 
2788   if (isRegularReg(RegKind))
2789     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2790 
2791   return Reg;
2792 }
2793 
2794 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2795                                           unsigned &RegNum, unsigned &RegWidth,
2796                                           SmallVectorImpl<AsmToken> &Tokens) {
2797   auto Loc = getLoc();
2798   Reg = AMDGPU::NoRegister;
2799 
2800   if (isToken(AsmToken::Identifier)) {
2801     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2802     if (Reg == AMDGPU::NoRegister)
2803       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2804   } else {
2805     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2806   }
2807 
2808   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2809   if (Reg == AMDGPU::NoRegister) {
2810     assert(Parser.hasPendingError());
2811     return false;
2812   }
2813 
2814   if (!subtargetHasRegister(*TRI, Reg)) {
2815     if (Reg == AMDGPU::SGPR_NULL) {
2816       Error(Loc, "'null' operand is not supported on this GPU");
2817     } else {
2818       Error(Loc, "register not available on this GPU");
2819     }
2820     return false;
2821   }
2822 
2823   return true;
2824 }
2825 
2826 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2827                                           unsigned &RegNum, unsigned &RegWidth,
2828                                           bool RestoreOnFailure /*=false*/) {
2829   Reg = AMDGPU::NoRegister;
2830 
2831   SmallVector<AsmToken, 1> Tokens;
2832   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2833     if (RestoreOnFailure) {
2834       while (!Tokens.empty()) {
2835         getLexer().UnLex(Tokens.pop_back_val());
2836       }
2837     }
2838     return true;
2839   }
2840   return false;
2841 }
2842 
2843 Optional<StringRef>
2844 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2845   switch (RegKind) {
2846   case IS_VGPR:
2847     return StringRef(".amdgcn.next_free_vgpr");
2848   case IS_SGPR:
2849     return StringRef(".amdgcn.next_free_sgpr");
2850   default:
2851     return None;
2852   }
2853 }
2854 
2855 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2856   auto SymbolName = getGprCountSymbolName(RegKind);
2857   assert(SymbolName && "initializing invalid register kind");
2858   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2859   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2860 }
2861 
2862 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2863                                             unsigned DwordRegIndex,
2864                                             unsigned RegWidth) {
2865   // Symbols are only defined for GCN targets
2866   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2867     return true;
2868 
2869   auto SymbolName = getGprCountSymbolName(RegKind);
2870   if (!SymbolName)
2871     return true;
2872   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2873 
2874   int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2875   int64_t OldCount;
2876 
2877   if (!Sym->isVariable())
2878     return !Error(getLoc(),
2879                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2880   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2881     return !Error(
2882         getLoc(),
2883         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2884 
2885   if (OldCount <= NewMax)
2886     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2887 
2888   return true;
2889 }
2890 
2891 std::unique_ptr<AMDGPUOperand>
2892 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2893   const auto &Tok = getToken();
2894   SMLoc StartLoc = Tok.getLoc();
2895   SMLoc EndLoc = Tok.getEndLoc();
2896   RegisterKind RegKind;
2897   unsigned Reg, RegNum, RegWidth;
2898 
2899   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2900     return nullptr;
2901   }
2902   if (isHsaAbiVersion3AndAbove(&getSTI())) {
2903     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2904       return nullptr;
2905   } else
2906     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2907   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2908 }
2909 
2910 OperandMatchResultTy
2911 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2912   // TODO: add syntactic sugar for 1/(2*PI)
2913 
2914   if (isRegister())
2915     return MatchOperand_NoMatch;
2916   assert(!isModifier());
2917 
2918   const auto& Tok = getToken();
2919   const auto& NextTok = peekToken();
2920   bool IsReal = Tok.is(AsmToken::Real);
2921   SMLoc S = getLoc();
2922   bool Negate = false;
2923 
2924   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2925     lex();
2926     IsReal = true;
2927     Negate = true;
2928   }
2929 
2930   if (IsReal) {
2931     // Floating-point expressions are not supported.
2932     // Can only allow floating-point literals with an
2933     // optional sign.
2934 
2935     StringRef Num = getTokenStr();
2936     lex();
2937 
2938     APFloat RealVal(APFloat::IEEEdouble());
2939     auto roundMode = APFloat::rmNearestTiesToEven;
2940     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2941       return MatchOperand_ParseFail;
2942     }
2943     if (Negate)
2944       RealVal.changeSign();
2945 
2946     Operands.push_back(
2947       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2948                                AMDGPUOperand::ImmTyNone, true));
2949 
2950     return MatchOperand_Success;
2951 
2952   } else {
2953     int64_t IntVal;
2954     const MCExpr *Expr;
2955     SMLoc S = getLoc();
2956 
2957     if (HasSP3AbsModifier) {
2958       // This is a workaround for handling expressions
2959       // as arguments of SP3 'abs' modifier, for example:
2960       //     |1.0|
2961       //     |-1|
2962       //     |1+x|
2963       // This syntax is not compatible with syntax of standard
2964       // MC expressions (due to the trailing '|').
2965       SMLoc EndLoc;
2966       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2967         return MatchOperand_ParseFail;
2968     } else {
2969       if (Parser.parseExpression(Expr))
2970         return MatchOperand_ParseFail;
2971     }
2972 
2973     if (Expr->evaluateAsAbsolute(IntVal)) {
2974       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2975     } else {
2976       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2977     }
2978 
2979     return MatchOperand_Success;
2980   }
2981 
2982   return MatchOperand_NoMatch;
2983 }
2984 
2985 OperandMatchResultTy
2986 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2987   if (!isRegister())
2988     return MatchOperand_NoMatch;
2989 
2990   if (auto R = parseRegister()) {
2991     assert(R->isReg());
2992     Operands.push_back(std::move(R));
2993     return MatchOperand_Success;
2994   }
2995   return MatchOperand_ParseFail;
2996 }
2997 
2998 OperandMatchResultTy
2999 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
3000   auto res = parseReg(Operands);
3001   if (res != MatchOperand_NoMatch) {
3002     return res;
3003   } else if (isModifier()) {
3004     return MatchOperand_NoMatch;
3005   } else {
3006     return parseImm(Operands, HasSP3AbsMod);
3007   }
3008 }
3009 
3010 bool
3011 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3012   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3013     const auto &str = Token.getString();
3014     return str == "abs" || str == "neg" || str == "sext";
3015   }
3016   return false;
3017 }
3018 
3019 bool
3020 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3021   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3022 }
3023 
3024 bool
3025 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3026   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3027 }
3028 
3029 bool
3030 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3031   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3032 }
3033 
3034 // Check if this is an operand modifier or an opcode modifier
3035 // which may look like an expression but it is not. We should
3036 // avoid parsing these modifiers as expressions. Currently
3037 // recognized sequences are:
3038 //   |...|
3039 //   abs(...)
3040 //   neg(...)
3041 //   sext(...)
3042 //   -reg
3043 //   -|...|
3044 //   -abs(...)
3045 //   name:...
3046 // Note that simple opcode modifiers like 'gds' may be parsed as
3047 // expressions; this is a special case. See getExpressionAsToken.
3048 //
3049 bool
3050 AMDGPUAsmParser::isModifier() {
3051 
3052   AsmToken Tok = getToken();
3053   AsmToken NextToken[2];
3054   peekTokens(NextToken);
3055 
3056   return isOperandModifier(Tok, NextToken[0]) ||
3057          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3058          isOpcodeModifierWithVal(Tok, NextToken[0]);
3059 }
3060 
3061 // Check if the current token is an SP3 'neg' modifier.
3062 // Currently this modifier is allowed in the following context:
3063 //
3064 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3065 // 2. Before an 'abs' modifier: -abs(...)
3066 // 3. Before an SP3 'abs' modifier: -|...|
3067 //
3068 // In all other cases "-" is handled as a part
3069 // of an expression that follows the sign.
3070 //
3071 // Note: When "-" is followed by an integer literal,
3072 // this is interpreted as integer negation rather
3073 // than a floating-point NEG modifier applied to N.
3074 // Beside being contr-intuitive, such use of floating-point
3075 // NEG modifier would have resulted in different meaning
3076 // of integer literals used with VOP1/2/C and VOP3,
3077 // for example:
3078 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3079 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3080 // Negative fp literals with preceding "-" are
3081 // handled likewise for uniformity
3082 //
3083 bool
3084 AMDGPUAsmParser::parseSP3NegModifier() {
3085 
3086   AsmToken NextToken[2];
3087   peekTokens(NextToken);
3088 
3089   if (isToken(AsmToken::Minus) &&
3090       (isRegister(NextToken[0], NextToken[1]) ||
3091        NextToken[0].is(AsmToken::Pipe) ||
3092        isId(NextToken[0], "abs"))) {
3093     lex();
3094     return true;
3095   }
3096 
3097   return false;
3098 }
3099 
3100 OperandMatchResultTy
3101 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3102                                               bool AllowImm) {
3103   bool Neg, SP3Neg;
3104   bool Abs, SP3Abs;
3105   SMLoc Loc;
3106 
3107   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3108   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
3109     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3110     return MatchOperand_ParseFail;
3111   }
3112 
3113   SP3Neg = parseSP3NegModifier();
3114 
3115   Loc = getLoc();
3116   Neg = trySkipId("neg");
3117   if (Neg && SP3Neg) {
3118     Error(Loc, "expected register or immediate");
3119     return MatchOperand_ParseFail;
3120   }
3121   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3122     return MatchOperand_ParseFail;
3123 
3124   Abs = trySkipId("abs");
3125   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3126     return MatchOperand_ParseFail;
3127 
3128   Loc = getLoc();
3129   SP3Abs = trySkipToken(AsmToken::Pipe);
3130   if (Abs && SP3Abs) {
3131     Error(Loc, "expected register or immediate");
3132     return MatchOperand_ParseFail;
3133   }
3134 
3135   OperandMatchResultTy Res;
3136   if (AllowImm) {
3137     Res = parseRegOrImm(Operands, SP3Abs);
3138   } else {
3139     Res = parseReg(Operands);
3140   }
3141   if (Res != MatchOperand_Success) {
3142     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
3143   }
3144 
3145   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3146     return MatchOperand_ParseFail;
3147   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3148     return MatchOperand_ParseFail;
3149   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3150     return MatchOperand_ParseFail;
3151 
3152   AMDGPUOperand::Modifiers Mods;
3153   Mods.Abs = Abs || SP3Abs;
3154   Mods.Neg = Neg || SP3Neg;
3155 
3156   if (Mods.hasFPModifiers()) {
3157     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3158     if (Op.isExpr()) {
3159       Error(Op.getStartLoc(), "expected an absolute expression");
3160       return MatchOperand_ParseFail;
3161     }
3162     Op.setModifiers(Mods);
3163   }
3164   return MatchOperand_Success;
3165 }
3166 
3167 OperandMatchResultTy
3168 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3169                                                bool AllowImm) {
3170   bool Sext = trySkipId("sext");
3171   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3172     return MatchOperand_ParseFail;
3173 
3174   OperandMatchResultTy Res;
3175   if (AllowImm) {
3176     Res = parseRegOrImm(Operands);
3177   } else {
3178     Res = parseReg(Operands);
3179   }
3180   if (Res != MatchOperand_Success) {
3181     return Sext? MatchOperand_ParseFail : Res;
3182   }
3183 
3184   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3185     return MatchOperand_ParseFail;
3186 
3187   AMDGPUOperand::Modifiers Mods;
3188   Mods.Sext = Sext;
3189 
3190   if (Mods.hasIntModifiers()) {
3191     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3192     if (Op.isExpr()) {
3193       Error(Op.getStartLoc(), "expected an absolute expression");
3194       return MatchOperand_ParseFail;
3195     }
3196     Op.setModifiers(Mods);
3197   }
3198 
3199   return MatchOperand_Success;
3200 }
3201 
3202 OperandMatchResultTy
3203 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3204   return parseRegOrImmWithFPInputMods(Operands, false);
3205 }
3206 
3207 OperandMatchResultTy
3208 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3209   return parseRegOrImmWithIntInputMods(Operands, false);
3210 }
3211 
3212 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3213   auto Loc = getLoc();
3214   if (trySkipId("off")) {
3215     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3216                                                 AMDGPUOperand::ImmTyOff, false));
3217     return MatchOperand_Success;
3218   }
3219 
3220   if (!isRegister())
3221     return MatchOperand_NoMatch;
3222 
3223   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3224   if (Reg) {
3225     Operands.push_back(std::move(Reg));
3226     return MatchOperand_Success;
3227   }
3228 
3229   return MatchOperand_ParseFail;
3230 
3231 }
3232 
3233 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3234   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3235 
3236   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3237       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3238       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3239       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3240     return Match_InvalidOperand;
3241 
3242   if ((TSFlags & SIInstrFlags::VOP3) &&
3243       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3244       getForcedEncodingSize() != 64)
3245     return Match_PreferE32;
3246 
3247   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3248       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3249     // v_mac_f32/16 allow only dst_sel == DWORD;
3250     auto OpNum =
3251         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3252     const auto &Op = Inst.getOperand(OpNum);
3253     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3254       return Match_InvalidOperand;
3255     }
3256   }
3257 
3258   return Match_Success;
3259 }
3260 
3261 static ArrayRef<unsigned> getAllVariants() {
3262   static const unsigned Variants[] = {
3263     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3264     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9,
3265     AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP
3266   };
3267 
3268   return makeArrayRef(Variants);
3269 }
3270 
3271 // What asm variants we should check
3272 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3273   if (isForcedDPP() && isForcedVOP3()) {
3274     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3275     return makeArrayRef(Variants);
3276   }
3277   if (getForcedEncodingSize() == 32) {
3278     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3279     return makeArrayRef(Variants);
3280   }
3281 
3282   if (isForcedVOP3()) {
3283     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3284     return makeArrayRef(Variants);
3285   }
3286 
3287   if (isForcedSDWA()) {
3288     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3289                                         AMDGPUAsmVariants::SDWA9};
3290     return makeArrayRef(Variants);
3291   }
3292 
3293   if (isForcedDPP()) {
3294     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3295     return makeArrayRef(Variants);
3296   }
3297 
3298   return getAllVariants();
3299 }
3300 
3301 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3302   if (isForcedDPP() && isForcedVOP3())
3303     return "e64_dpp";
3304 
3305   if (getForcedEncodingSize() == 32)
3306     return "e32";
3307 
3308   if (isForcedVOP3())
3309     return "e64";
3310 
3311   if (isForcedSDWA())
3312     return "sdwa";
3313 
3314   if (isForcedDPP())
3315     return "dpp";
3316 
3317   return "";
3318 }
3319 
3320 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3321   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3322   const unsigned Num = Desc.getNumImplicitUses();
3323   for (unsigned i = 0; i < Num; ++i) {
3324     unsigned Reg = Desc.ImplicitUses[i];
3325     switch (Reg) {
3326     case AMDGPU::FLAT_SCR:
3327     case AMDGPU::VCC:
3328     case AMDGPU::VCC_LO:
3329     case AMDGPU::VCC_HI:
3330     case AMDGPU::M0:
3331       return Reg;
3332     default:
3333       break;
3334     }
3335   }
3336   return AMDGPU::NoRegister;
3337 }
3338 
3339 // NB: This code is correct only when used to check constant
3340 // bus limitations because GFX7 support no f16 inline constants.
3341 // Note that there are no cases when a GFX7 opcode violates
3342 // constant bus limitations due to the use of an f16 constant.
3343 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3344                                        unsigned OpIdx) const {
3345   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3346 
3347   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3348     return false;
3349   }
3350 
3351   const MCOperand &MO = Inst.getOperand(OpIdx);
3352 
3353   int64_t Val = MO.getImm();
3354   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3355 
3356   switch (OpSize) { // expected operand size
3357   case 8:
3358     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3359   case 4:
3360     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3361   case 2: {
3362     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3363     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3364         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3365         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3366       return AMDGPU::isInlinableIntLiteral(Val);
3367 
3368     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3369         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3370         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3371       return AMDGPU::isInlinableIntLiteralV216(Val);
3372 
3373     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3374         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3375         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3376       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3377 
3378     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3379   }
3380   default:
3381     llvm_unreachable("invalid operand size");
3382   }
3383 }
3384 
3385 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3386   if (!isGFX10Plus())
3387     return 1;
3388 
3389   switch (Opcode) {
3390   // 64-bit shift instructions can use only one scalar value input
3391   case AMDGPU::V_LSHLREV_B64_e64:
3392   case AMDGPU::V_LSHLREV_B64_gfx10:
3393   case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3394   case AMDGPU::V_LSHRREV_B64_e64:
3395   case AMDGPU::V_LSHRREV_B64_gfx10:
3396   case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3397   case AMDGPU::V_ASHRREV_I64_e64:
3398   case AMDGPU::V_ASHRREV_I64_gfx10:
3399   case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3400   case AMDGPU::V_LSHL_B64_e64:
3401   case AMDGPU::V_LSHR_B64_e64:
3402   case AMDGPU::V_ASHR_I64_e64:
3403     return 1;
3404   default:
3405     return 2;
3406   }
3407 }
3408 
3409 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3410   const MCOperand &MO = Inst.getOperand(OpIdx);
3411   if (MO.isImm()) {
3412     return !isInlineConstant(Inst, OpIdx);
3413   } else if (MO.isReg()) {
3414     auto Reg = MO.getReg();
3415     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3416     auto PReg = mc2PseudoReg(Reg);
3417     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3418   } else {
3419     return true;
3420   }
3421 }
3422 
3423 bool
3424 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3425                                                 const OperandVector &Operands) {
3426   const unsigned Opcode = Inst.getOpcode();
3427   const MCInstrDesc &Desc = MII.get(Opcode);
3428   unsigned LastSGPR = AMDGPU::NoRegister;
3429   unsigned ConstantBusUseCount = 0;
3430   unsigned NumLiterals = 0;
3431   unsigned LiteralSize;
3432 
3433   if (Desc.TSFlags &
3434       (SIInstrFlags::VOPC |
3435        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3436        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3437        SIInstrFlags::SDWA)) {
3438     // Check special imm operands (used by madmk, etc)
3439     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3440       ++NumLiterals;
3441       LiteralSize = 4;
3442     }
3443 
3444     SmallDenseSet<unsigned> SGPRsUsed;
3445     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3446     if (SGPRUsed != AMDGPU::NoRegister) {
3447       SGPRsUsed.insert(SGPRUsed);
3448       ++ConstantBusUseCount;
3449     }
3450 
3451     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3452     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3453     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3454 
3455     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3456 
3457     for (int OpIdx : OpIndices) {
3458       if (OpIdx == -1) break;
3459 
3460       const MCOperand &MO = Inst.getOperand(OpIdx);
3461       if (usesConstantBus(Inst, OpIdx)) {
3462         if (MO.isReg()) {
3463           LastSGPR = mc2PseudoReg(MO.getReg());
3464           // Pairs of registers with a partial intersections like these
3465           //   s0, s[0:1]
3466           //   flat_scratch_lo, flat_scratch
3467           //   flat_scratch_lo, flat_scratch_hi
3468           // are theoretically valid but they are disabled anyway.
3469           // Note that this code mimics SIInstrInfo::verifyInstruction
3470           if (SGPRsUsed.insert(LastSGPR).second) {
3471             ++ConstantBusUseCount;
3472           }
3473         } else { // Expression or a literal
3474 
3475           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3476             continue; // special operand like VINTERP attr_chan
3477 
3478           // An instruction may use only one literal.
3479           // This has been validated on the previous step.
3480           // See validateVOPLiteral.
3481           // This literal may be used as more than one operand.
3482           // If all these operands are of the same size,
3483           // this literal counts as one scalar value.
3484           // Otherwise it counts as 2 scalar values.
3485           // See "GFX10 Shader Programming", section 3.6.2.3.
3486 
3487           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3488           if (Size < 4) Size = 4;
3489 
3490           if (NumLiterals == 0) {
3491             NumLiterals = 1;
3492             LiteralSize = Size;
3493           } else if (LiteralSize != Size) {
3494             NumLiterals = 2;
3495           }
3496         }
3497       }
3498     }
3499   }
3500   ConstantBusUseCount += NumLiterals;
3501 
3502   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3503     return true;
3504 
3505   SMLoc LitLoc = getLitLoc(Operands);
3506   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3507   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3508   Error(Loc, "invalid operand (violates constant bus restrictions)");
3509   return false;
3510 }
3511 
3512 bool
3513 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3514                                                  const OperandVector &Operands) {
3515   const unsigned Opcode = Inst.getOpcode();
3516   const MCInstrDesc &Desc = MII.get(Opcode);
3517 
3518   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3519   if (DstIdx == -1 ||
3520       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3521     return true;
3522   }
3523 
3524   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3525 
3526   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3527   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3528   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3529 
3530   assert(DstIdx != -1);
3531   const MCOperand &Dst = Inst.getOperand(DstIdx);
3532   assert(Dst.isReg());
3533 
3534   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3535 
3536   for (int SrcIdx : SrcIndices) {
3537     if (SrcIdx == -1) break;
3538     const MCOperand &Src = Inst.getOperand(SrcIdx);
3539     if (Src.isReg()) {
3540       if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) {
3541         const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3542         Error(getRegLoc(SrcReg, Operands),
3543           "destination must be different than all sources");
3544         return false;
3545       }
3546     }
3547   }
3548 
3549   return true;
3550 }
3551 
3552 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3553 
3554   const unsigned Opc = Inst.getOpcode();
3555   const MCInstrDesc &Desc = MII.get(Opc);
3556 
3557   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3558     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3559     assert(ClampIdx != -1);
3560     return Inst.getOperand(ClampIdx).getImm() == 0;
3561   }
3562 
3563   return true;
3564 }
3565 
3566 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3567 
3568   const unsigned Opc = Inst.getOpcode();
3569   const MCInstrDesc &Desc = MII.get(Opc);
3570 
3571   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3572     return None;
3573 
3574   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3575   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3576   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3577 
3578   assert(VDataIdx != -1);
3579 
3580   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3581     return None;
3582 
3583   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3584   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3585   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3586   if (DMask == 0)
3587     DMask = 1;
3588 
3589   bool isPackedD16 = false;
3590   unsigned DataSize =
3591     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3592   if (hasPackedD16()) {
3593     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3594     isPackedD16 = D16Idx >= 0;
3595     if (isPackedD16 && Inst.getOperand(D16Idx).getImm())
3596       DataSize = (DataSize + 1) / 2;
3597   }
3598 
3599   if ((VDataSize / 4) == DataSize + TFESize)
3600     return None;
3601 
3602   return StringRef(isPackedD16
3603                        ? "image data size does not match dmask, d16 and tfe"
3604                        : "image data size does not match dmask and tfe");
3605 }
3606 
3607 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3608   const unsigned Opc = Inst.getOpcode();
3609   const MCInstrDesc &Desc = MII.get(Opc);
3610 
3611   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3612     return true;
3613 
3614   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3615 
3616   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3617       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3618   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3619   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3620   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3621   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3622 
3623   assert(VAddr0Idx != -1);
3624   assert(SrsrcIdx != -1);
3625   assert(SrsrcIdx > VAddr0Idx);
3626 
3627   if (DimIdx == -1)
3628     return true; // intersect_ray
3629 
3630   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3631   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3632   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3633   unsigned ActualAddrSize =
3634       IsNSA ? SrsrcIdx - VAddr0Idx
3635             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3636   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3637 
3638   unsigned ExpectedAddrSize =
3639       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3640 
3641   if (!IsNSA) {
3642     if (ExpectedAddrSize > 8)
3643       ExpectedAddrSize = 16;
3644 
3645     // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3646     // This provides backward compatibility for assembly created
3647     // before 160b/192b/224b types were directly supported.
3648     if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3649       return true;
3650   }
3651 
3652   return ActualAddrSize == ExpectedAddrSize;
3653 }
3654 
3655 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3656 
3657   const unsigned Opc = Inst.getOpcode();
3658   const MCInstrDesc &Desc = MII.get(Opc);
3659 
3660   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3661     return true;
3662   if (!Desc.mayLoad() || !Desc.mayStore())
3663     return true; // Not atomic
3664 
3665   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3666   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3667 
3668   // This is an incomplete check because image_atomic_cmpswap
3669   // may only use 0x3 and 0xf while other atomic operations
3670   // may use 0x1 and 0x3. However these limitations are
3671   // verified when we check that dmask matches dst size.
3672   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3673 }
3674 
3675 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3676 
3677   const unsigned Opc = Inst.getOpcode();
3678   const MCInstrDesc &Desc = MII.get(Opc);
3679 
3680   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3681     return true;
3682 
3683   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3684   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3685 
3686   // GATHER4 instructions use dmask in a different fashion compared to
3687   // other MIMG instructions. The only useful DMASK values are
3688   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3689   // (red,red,red,red) etc.) The ISA document doesn't mention
3690   // this.
3691   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3692 }
3693 
3694 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3695   const unsigned Opc = Inst.getOpcode();
3696   const MCInstrDesc &Desc = MII.get(Opc);
3697 
3698   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3699     return true;
3700 
3701   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3702   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3703       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3704 
3705   if (!BaseOpcode->MSAA)
3706     return true;
3707 
3708   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3709   assert(DimIdx != -1);
3710 
3711   unsigned Dim = Inst.getOperand(DimIdx).getImm();
3712   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3713 
3714   return DimInfo->MSAA;
3715 }
3716 
3717 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3718 {
3719   switch (Opcode) {
3720   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3721   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3722   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3723     return true;
3724   default:
3725     return false;
3726   }
3727 }
3728 
3729 // movrels* opcodes should only allow VGPRS as src0.
3730 // This is specified in .td description for vop1/vop3,
3731 // but sdwa is handled differently. See isSDWAOperand.
3732 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3733                                       const OperandVector &Operands) {
3734 
3735   const unsigned Opc = Inst.getOpcode();
3736   const MCInstrDesc &Desc = MII.get(Opc);
3737 
3738   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3739     return true;
3740 
3741   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3742   assert(Src0Idx != -1);
3743 
3744   SMLoc ErrLoc;
3745   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3746   if (Src0.isReg()) {
3747     auto Reg = mc2PseudoReg(Src0.getReg());
3748     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3749     if (!isSGPR(Reg, TRI))
3750       return true;
3751     ErrLoc = getRegLoc(Reg, Operands);
3752   } else {
3753     ErrLoc = getConstLoc(Operands);
3754   }
3755 
3756   Error(ErrLoc, "source operand must be a VGPR");
3757   return false;
3758 }
3759 
3760 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3761                                           const OperandVector &Operands) {
3762 
3763   const unsigned Opc = Inst.getOpcode();
3764 
3765   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3766     return true;
3767 
3768   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3769   assert(Src0Idx != -1);
3770 
3771   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3772   if (!Src0.isReg())
3773     return true;
3774 
3775   auto Reg = mc2PseudoReg(Src0.getReg());
3776   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3777   if (!isGFX90A() && isSGPR(Reg, TRI)) {
3778     Error(getRegLoc(Reg, Operands),
3779           "source operand must be either a VGPR or an inline constant");
3780     return false;
3781   }
3782 
3783   return true;
3784 }
3785 
3786 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3787                                    const OperandVector &Operands) {
3788   const unsigned Opc = Inst.getOpcode();
3789   const MCInstrDesc &Desc = MII.get(Opc);
3790 
3791   if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3792     return true;
3793 
3794   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3795   if (Src2Idx == -1)
3796     return true;
3797 
3798   const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3799   if (!Src2.isReg())
3800     return true;
3801 
3802   MCRegister Src2Reg = Src2.getReg();
3803   MCRegister DstReg = Inst.getOperand(0).getReg();
3804   if (Src2Reg == DstReg)
3805     return true;
3806 
3807   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3808   if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
3809     return true;
3810 
3811   if (TRI->regsOverlap(Src2Reg, DstReg)) {
3812     Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3813           "source 2 operand must not partially overlap with dst");
3814     return false;
3815   }
3816 
3817   return true;
3818 }
3819 
3820 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3821   switch (Inst.getOpcode()) {
3822   default:
3823     return true;
3824   case V_DIV_SCALE_F32_gfx6_gfx7:
3825   case V_DIV_SCALE_F32_vi:
3826   case V_DIV_SCALE_F32_gfx10:
3827   case V_DIV_SCALE_F64_gfx6_gfx7:
3828   case V_DIV_SCALE_F64_vi:
3829   case V_DIV_SCALE_F64_gfx10:
3830     break;
3831   }
3832 
3833   // TODO: Check that src0 = src1 or src2.
3834 
3835   for (auto Name : {AMDGPU::OpName::src0_modifiers,
3836                     AMDGPU::OpName::src2_modifiers,
3837                     AMDGPU::OpName::src2_modifiers}) {
3838     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3839             .getImm() &
3840         SISrcMods::ABS) {
3841       return false;
3842     }
3843   }
3844 
3845   return true;
3846 }
3847 
3848 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3849 
3850   const unsigned Opc = Inst.getOpcode();
3851   const MCInstrDesc &Desc = MII.get(Opc);
3852 
3853   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3854     return true;
3855 
3856   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3857   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3858     if (isCI() || isSI())
3859       return false;
3860   }
3861 
3862   return true;
3863 }
3864 
3865 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3866   const unsigned Opc = Inst.getOpcode();
3867   const MCInstrDesc &Desc = MII.get(Opc);
3868 
3869   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3870     return true;
3871 
3872   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3873   if (DimIdx < 0)
3874     return true;
3875 
3876   long Imm = Inst.getOperand(DimIdx).getImm();
3877   if (Imm < 0 || Imm >= 8)
3878     return false;
3879 
3880   return true;
3881 }
3882 
3883 static bool IsRevOpcode(const unsigned Opcode)
3884 {
3885   switch (Opcode) {
3886   case AMDGPU::V_SUBREV_F32_e32:
3887   case AMDGPU::V_SUBREV_F32_e64:
3888   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3889   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3890   case AMDGPU::V_SUBREV_F32_e32_vi:
3891   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3892   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3893   case AMDGPU::V_SUBREV_F32_e64_vi:
3894 
3895   case AMDGPU::V_SUBREV_CO_U32_e32:
3896   case AMDGPU::V_SUBREV_CO_U32_e64:
3897   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3898   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3899 
3900   case AMDGPU::V_SUBBREV_U32_e32:
3901   case AMDGPU::V_SUBBREV_U32_e64:
3902   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3903   case AMDGPU::V_SUBBREV_U32_e32_vi:
3904   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3905   case AMDGPU::V_SUBBREV_U32_e64_vi:
3906 
3907   case AMDGPU::V_SUBREV_U32_e32:
3908   case AMDGPU::V_SUBREV_U32_e64:
3909   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3910   case AMDGPU::V_SUBREV_U32_e32_vi:
3911   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3912   case AMDGPU::V_SUBREV_U32_e64_vi:
3913 
3914   case AMDGPU::V_SUBREV_F16_e32:
3915   case AMDGPU::V_SUBREV_F16_e64:
3916   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3917   case AMDGPU::V_SUBREV_F16_e32_vi:
3918   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3919   case AMDGPU::V_SUBREV_F16_e64_vi:
3920 
3921   case AMDGPU::V_SUBREV_U16_e32:
3922   case AMDGPU::V_SUBREV_U16_e64:
3923   case AMDGPU::V_SUBREV_U16_e32_vi:
3924   case AMDGPU::V_SUBREV_U16_e64_vi:
3925 
3926   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3927   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3928   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3929 
3930   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3931   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3932 
3933   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3934   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3935 
3936   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3937   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3938 
3939   case AMDGPU::V_LSHRREV_B32_e32:
3940   case AMDGPU::V_LSHRREV_B32_e64:
3941   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3942   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3943   case AMDGPU::V_LSHRREV_B32_e32_vi:
3944   case AMDGPU::V_LSHRREV_B32_e64_vi:
3945   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3946   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3947 
3948   case AMDGPU::V_ASHRREV_I32_e32:
3949   case AMDGPU::V_ASHRREV_I32_e64:
3950   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3951   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3952   case AMDGPU::V_ASHRREV_I32_e32_vi:
3953   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3954   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3955   case AMDGPU::V_ASHRREV_I32_e64_vi:
3956 
3957   case AMDGPU::V_LSHLREV_B32_e32:
3958   case AMDGPU::V_LSHLREV_B32_e64:
3959   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3960   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3961   case AMDGPU::V_LSHLREV_B32_e32_vi:
3962   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3963   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3964   case AMDGPU::V_LSHLREV_B32_e64_vi:
3965 
3966   case AMDGPU::V_LSHLREV_B16_e32:
3967   case AMDGPU::V_LSHLREV_B16_e64:
3968   case AMDGPU::V_LSHLREV_B16_e32_vi:
3969   case AMDGPU::V_LSHLREV_B16_e64_vi:
3970   case AMDGPU::V_LSHLREV_B16_gfx10:
3971 
3972   case AMDGPU::V_LSHRREV_B16_e32:
3973   case AMDGPU::V_LSHRREV_B16_e64:
3974   case AMDGPU::V_LSHRREV_B16_e32_vi:
3975   case AMDGPU::V_LSHRREV_B16_e64_vi:
3976   case AMDGPU::V_LSHRREV_B16_gfx10:
3977 
3978   case AMDGPU::V_ASHRREV_I16_e32:
3979   case AMDGPU::V_ASHRREV_I16_e64:
3980   case AMDGPU::V_ASHRREV_I16_e32_vi:
3981   case AMDGPU::V_ASHRREV_I16_e64_vi:
3982   case AMDGPU::V_ASHRREV_I16_gfx10:
3983 
3984   case AMDGPU::V_LSHLREV_B64_e64:
3985   case AMDGPU::V_LSHLREV_B64_gfx10:
3986   case AMDGPU::V_LSHLREV_B64_vi:
3987 
3988   case AMDGPU::V_LSHRREV_B64_e64:
3989   case AMDGPU::V_LSHRREV_B64_gfx10:
3990   case AMDGPU::V_LSHRREV_B64_vi:
3991 
3992   case AMDGPU::V_ASHRREV_I64_e64:
3993   case AMDGPU::V_ASHRREV_I64_gfx10:
3994   case AMDGPU::V_ASHRREV_I64_vi:
3995 
3996   case AMDGPU::V_PK_LSHLREV_B16:
3997   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3998   case AMDGPU::V_PK_LSHLREV_B16_vi:
3999 
4000   case AMDGPU::V_PK_LSHRREV_B16:
4001   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4002   case AMDGPU::V_PK_LSHRREV_B16_vi:
4003   case AMDGPU::V_PK_ASHRREV_I16:
4004   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4005   case AMDGPU::V_PK_ASHRREV_I16_vi:
4006     return true;
4007   default:
4008     return false;
4009   }
4010 }
4011 
4012 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4013 
4014   using namespace SIInstrFlags;
4015   const unsigned Opcode = Inst.getOpcode();
4016   const MCInstrDesc &Desc = MII.get(Opcode);
4017 
4018   // lds_direct register is defined so that it can be used
4019   // with 9-bit operands only. Ignore encodings which do not accept these.
4020   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4021   if ((Desc.TSFlags & Enc) == 0)
4022     return None;
4023 
4024   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4025     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4026     if (SrcIdx == -1)
4027       break;
4028     const auto &Src = Inst.getOperand(SrcIdx);
4029     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4030 
4031       if (isGFX90A() || isGFX11Plus())
4032         return StringRef("lds_direct is not supported on this GPU");
4033 
4034       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4035         return StringRef("lds_direct cannot be used with this instruction");
4036 
4037       if (SrcName != OpName::src0)
4038         return StringRef("lds_direct may be used as src0 only");
4039     }
4040   }
4041 
4042   return None;
4043 }
4044 
4045 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4046   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4047     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4048     if (Op.isFlatOffset())
4049       return Op.getStartLoc();
4050   }
4051   return getLoc();
4052 }
4053 
4054 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4055                                          const OperandVector &Operands) {
4056   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4057   if ((TSFlags & SIInstrFlags::FLAT) == 0)
4058     return true;
4059 
4060   auto Opcode = Inst.getOpcode();
4061   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4062   assert(OpNum != -1);
4063 
4064   const auto &Op = Inst.getOperand(OpNum);
4065   if (!hasFlatOffsets() && Op.getImm() != 0) {
4066     Error(getFlatOffsetLoc(Operands),
4067           "flat offset modifier is not supported on this GPU");
4068     return false;
4069   }
4070 
4071   // For FLAT segment the offset must be positive;
4072   // MSB is ignored and forced to zero.
4073   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
4074     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
4075     if (!isIntN(OffsetSize, Op.getImm())) {
4076       Error(getFlatOffsetLoc(Operands),
4077             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4078       return false;
4079     }
4080   } else {
4081     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
4082     if (!isUIntN(OffsetSize, Op.getImm())) {
4083       Error(getFlatOffsetLoc(Operands),
4084             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4085       return false;
4086     }
4087   }
4088 
4089   return true;
4090 }
4091 
4092 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4093   // Start with second operand because SMEM Offset cannot be dst or src0.
4094   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4095     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4096     if (Op.isSMEMOffset())
4097       return Op.getStartLoc();
4098   }
4099   return getLoc();
4100 }
4101 
4102 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4103                                          const OperandVector &Operands) {
4104   if (isCI() || isSI())
4105     return true;
4106 
4107   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4108   if ((TSFlags & SIInstrFlags::SMRD) == 0)
4109     return true;
4110 
4111   auto Opcode = Inst.getOpcode();
4112   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4113   if (OpNum == -1)
4114     return true;
4115 
4116   const auto &Op = Inst.getOperand(OpNum);
4117   if (!Op.isImm())
4118     return true;
4119 
4120   uint64_t Offset = Op.getImm();
4121   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4122   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4123       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4124     return true;
4125 
4126   Error(getSMEMOffsetLoc(Operands),
4127         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
4128                                "expected a 21-bit signed offset");
4129 
4130   return false;
4131 }
4132 
4133 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4134   unsigned Opcode = Inst.getOpcode();
4135   const MCInstrDesc &Desc = MII.get(Opcode);
4136   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4137     return true;
4138 
4139   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4140   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4141 
4142   const int OpIndices[] = { Src0Idx, Src1Idx };
4143 
4144   unsigned NumExprs = 0;
4145   unsigned NumLiterals = 0;
4146   uint32_t LiteralValue;
4147 
4148   for (int OpIdx : OpIndices) {
4149     if (OpIdx == -1) break;
4150 
4151     const MCOperand &MO = Inst.getOperand(OpIdx);
4152     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4153     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4154       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4155         uint32_t Value = static_cast<uint32_t>(MO.getImm());
4156         if (NumLiterals == 0 || LiteralValue != Value) {
4157           LiteralValue = Value;
4158           ++NumLiterals;
4159         }
4160       } else if (MO.isExpr()) {
4161         ++NumExprs;
4162       }
4163     }
4164   }
4165 
4166   return NumLiterals + NumExprs <= 1;
4167 }
4168 
4169 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4170   const unsigned Opc = Inst.getOpcode();
4171   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
4172       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
4173     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4174     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4175 
4176     if (OpSel & ~3)
4177       return false;
4178   }
4179 
4180   if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) {
4181     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4182     if (OpSelIdx != -1) {
4183       if (Inst.getOperand(OpSelIdx).getImm() != 0)
4184         return false;
4185     }
4186     int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4187     if (OpSelHiIdx != -1) {
4188       if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4189         return false;
4190     }
4191   }
4192 
4193   return true;
4194 }
4195 
4196 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4197                                   const OperandVector &Operands) {
4198   const unsigned Opc = Inst.getOpcode();
4199   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4200   if (DppCtrlIdx < 0)
4201     return true;
4202   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4203 
4204   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
4205     // DPP64 is supported for row_newbcast only.
4206     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4207     if (Src0Idx >= 0 &&
4208         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
4209       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4210       Error(S, "64 bit dpp only supports row_newbcast");
4211       return false;
4212     }
4213   }
4214 
4215   return true;
4216 }
4217 
4218 // Check if VCC register matches wavefront size
4219 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4220   auto FB = getFeatureBits();
4221   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4222     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4223 }
4224 
4225 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
4226 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4227                                          const OperandVector &Operands) {
4228   unsigned Opcode = Inst.getOpcode();
4229   const MCInstrDesc &Desc = MII.get(Opcode);
4230   const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4231   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4232       ImmIdx == -1)
4233     return true;
4234 
4235   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4236   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4237   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4238 
4239   const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4240 
4241   unsigned NumExprs = 0;
4242   unsigned NumLiterals = 0;
4243   uint32_t LiteralValue;
4244 
4245   for (int OpIdx : OpIndices) {
4246     if (OpIdx == -1)
4247       continue;
4248 
4249     const MCOperand &MO = Inst.getOperand(OpIdx);
4250     if (!MO.isImm() && !MO.isExpr())
4251       continue;
4252     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4253       continue;
4254 
4255     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4256         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4257       Error(getConstLoc(Operands),
4258             "inline constants are not allowed for this operand");
4259       return false;
4260     }
4261 
4262     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4263       uint32_t Value = static_cast<uint32_t>(MO.getImm());
4264       if (NumLiterals == 0 || LiteralValue != Value) {
4265         LiteralValue = Value;
4266         ++NumLiterals;
4267       }
4268     } else if (MO.isExpr()) {
4269       ++NumExprs;
4270     }
4271   }
4272   NumLiterals += NumExprs;
4273 
4274   if (!NumLiterals)
4275     return true;
4276 
4277   if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4278     Error(getLitLoc(Operands), "literal operands are not supported");
4279     return false;
4280   }
4281 
4282   if (NumLiterals > 1) {
4283     Error(getLitLoc(Operands), "only one literal operand is allowed");
4284     return false;
4285   }
4286 
4287   return true;
4288 }
4289 
4290 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
4291 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4292                          const MCRegisterInfo *MRI) {
4293   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4294   if (OpIdx < 0)
4295     return -1;
4296 
4297   const MCOperand &Op = Inst.getOperand(OpIdx);
4298   if (!Op.isReg())
4299     return -1;
4300 
4301   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4302   auto Reg = Sub ? Sub : Op.getReg();
4303   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4304   return AGPR32.contains(Reg) ? 1 : 0;
4305 }
4306 
4307 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4308   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4309   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4310                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4311                   SIInstrFlags::DS)) == 0)
4312     return true;
4313 
4314   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4315                                                       : AMDGPU::OpName::vdata;
4316 
4317   const MCRegisterInfo *MRI = getMRI();
4318   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4319   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4320 
4321   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4322     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4323     if (Data2Areg >= 0 && Data2Areg != DataAreg)
4324       return false;
4325   }
4326 
4327   auto FB = getFeatureBits();
4328   if (FB[AMDGPU::FeatureGFX90AInsts]) {
4329     if (DataAreg < 0 || DstAreg < 0)
4330       return true;
4331     return DstAreg == DataAreg;
4332   }
4333 
4334   return DstAreg < 1 && DataAreg < 1;
4335 }
4336 
4337 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4338   auto FB = getFeatureBits();
4339   if (!FB[AMDGPU::FeatureGFX90AInsts])
4340     return true;
4341 
4342   const MCRegisterInfo *MRI = getMRI();
4343   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4344   const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4345   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4346     const MCOperand &Op = Inst.getOperand(I);
4347     if (!Op.isReg())
4348       continue;
4349 
4350     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4351     if (!Sub)
4352       continue;
4353 
4354     if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4355       return false;
4356     if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4357       return false;
4358   }
4359 
4360   return true;
4361 }
4362 
4363 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4364   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4365     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4366     if (Op.isBLGP())
4367       return Op.getStartLoc();
4368   }
4369   return SMLoc();
4370 }
4371 
4372 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4373                                    const OperandVector &Operands) {
4374   unsigned Opc = Inst.getOpcode();
4375   int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4376   if (BlgpIdx == -1)
4377     return true;
4378   SMLoc BLGPLoc = getBLGPLoc(Operands);
4379   if (!BLGPLoc.isValid())
4380     return true;
4381   bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:");
4382   auto FB = getFeatureBits();
4383   bool UsesNeg = false;
4384   if (FB[AMDGPU::FeatureGFX940Insts]) {
4385     switch (Opc) {
4386     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4387     case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4388     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4389     case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4390       UsesNeg = true;
4391     }
4392   }
4393 
4394   if (IsNeg == UsesNeg)
4395     return true;
4396 
4397   Error(BLGPLoc,
4398         UsesNeg ? "invalid modifier: blgp is not supported"
4399                 : "invalid modifier: neg is not supported");
4400 
4401   return false;
4402 }
4403 
4404 // gfx90a has an undocumented limitation:
4405 // DS_GWS opcodes must use even aligned registers.
4406 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4407                                   const OperandVector &Operands) {
4408   if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4409     return true;
4410 
4411   int Opc = Inst.getOpcode();
4412   if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4413       Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4414     return true;
4415 
4416   const MCRegisterInfo *MRI = getMRI();
4417   const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4418   int Data0Pos =
4419       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4420   assert(Data0Pos != -1);
4421   auto Reg = Inst.getOperand(Data0Pos).getReg();
4422   auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4423   if (RegIdx & 1) {
4424     SMLoc RegLoc = getRegLoc(Reg, Operands);
4425     Error(RegLoc, "vgpr must be even aligned");
4426     return false;
4427   }
4428 
4429   return true;
4430 }
4431 
4432 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4433                                             const OperandVector &Operands,
4434                                             const SMLoc &IDLoc) {
4435   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4436                                            AMDGPU::OpName::cpol);
4437   if (CPolPos == -1)
4438     return true;
4439 
4440   unsigned CPol = Inst.getOperand(CPolPos).getImm();
4441 
4442   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4443   if (TSFlags & SIInstrFlags::SMRD) {
4444     if (CPol && (isSI() || isCI())) {
4445       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4446       Error(S, "cache policy is not supported for SMRD instructions");
4447       return false;
4448     }
4449     if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4450       Error(IDLoc, "invalid cache policy for SMEM instruction");
4451       return false;
4452     }
4453   }
4454 
4455   if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4456     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4457     StringRef CStr(S.getPointer());
4458     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4459     Error(S, "scc is not supported on this GPU");
4460     return false;
4461   }
4462 
4463   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4464     return true;
4465 
4466   if (TSFlags & SIInstrFlags::IsAtomicRet) {
4467     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4468       Error(IDLoc, isGFX940() ? "instruction must use sc0"
4469                               : "instruction must use glc");
4470       return false;
4471     }
4472   } else {
4473     if (CPol & CPol::GLC) {
4474       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4475       StringRef CStr(S.getPointer());
4476       S = SMLoc::getFromPointer(
4477           &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4478       Error(S, isGFX940() ? "instruction must not use sc0"
4479                           : "instruction must not use glc");
4480       return false;
4481     }
4482   }
4483 
4484   return true;
4485 }
4486 
4487 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst,
4488                                          const OperandVector &Operands,
4489                                          const SMLoc &IDLoc) {
4490   if (isGFX940())
4491     return true;
4492 
4493   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4494   if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) !=
4495       (SIInstrFlags::VALU | SIInstrFlags::FLAT))
4496     return true;
4497   // This is FLAT LDS DMA.
4498 
4499   SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands);
4500   StringRef CStr(S.getPointer());
4501   if (!CStr.startswith("lds")) {
4502     // This is incorrectly selected LDS DMA version of a FLAT load opcode.
4503     // And LDS version should have 'lds' modifier, but it follows optional
4504     // operands so its absense is ignored by the matcher.
4505     Error(IDLoc, "invalid operands for instruction");
4506     return false;
4507   }
4508 
4509   return true;
4510 }
4511 
4512 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
4513   if (!isGFX11Plus())
4514     return true;
4515   for (auto &Operand : Operands) {
4516     if (!Operand->isReg())
4517       continue;
4518     unsigned Reg = Operand->getReg();
4519     if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
4520       Error(getRegLoc(Reg, Operands),
4521             "execz and vccz are not supported on this GPU");
4522       return false;
4523     }
4524   }
4525   return true;
4526 }
4527 
4528 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4529                                           const SMLoc &IDLoc,
4530                                           const OperandVector &Operands) {
4531   if (auto ErrMsg = validateLdsDirect(Inst)) {
4532     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4533     return false;
4534   }
4535   if (!validateSOPLiteral(Inst)) {
4536     Error(getLitLoc(Operands),
4537       "only one literal operand is allowed");
4538     return false;
4539   }
4540   if (!validateVOPLiteral(Inst, Operands)) {
4541     return false;
4542   }
4543   if (!validateConstantBusLimitations(Inst, Operands)) {
4544     return false;
4545   }
4546   if (!validateEarlyClobberLimitations(Inst, Operands)) {
4547     return false;
4548   }
4549   if (!validateIntClampSupported(Inst)) {
4550     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4551       "integer clamping is not supported on this GPU");
4552     return false;
4553   }
4554   if (!validateOpSel(Inst)) {
4555     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4556       "invalid op_sel operand");
4557     return false;
4558   }
4559   if (!validateDPP(Inst, Operands)) {
4560     return false;
4561   }
4562   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4563   if (!validateMIMGD16(Inst)) {
4564     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4565       "d16 modifier is not supported on this GPU");
4566     return false;
4567   }
4568   if (!validateMIMGDim(Inst)) {
4569     Error(IDLoc, "dim modifier is required on this GPU");
4570     return false;
4571   }
4572   if (!validateMIMGMSAA(Inst)) {
4573     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4574           "invalid dim; must be MSAA type");
4575     return false;
4576   }
4577   if (auto ErrMsg = validateMIMGDataSize(Inst)) {
4578     Error(IDLoc, *ErrMsg);
4579     return false;
4580   }
4581   if (!validateMIMGAddrSize(Inst)) {
4582     Error(IDLoc,
4583       "image address size does not match dim and a16");
4584     return false;
4585   }
4586   if (!validateMIMGAtomicDMask(Inst)) {
4587     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4588       "invalid atomic image dmask");
4589     return false;
4590   }
4591   if (!validateMIMGGatherDMask(Inst)) {
4592     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4593       "invalid image_gather dmask: only one bit must be set");
4594     return false;
4595   }
4596   if (!validateMovrels(Inst, Operands)) {
4597     return false;
4598   }
4599   if (!validateFlatOffset(Inst, Operands)) {
4600     return false;
4601   }
4602   if (!validateSMEMOffset(Inst, Operands)) {
4603     return false;
4604   }
4605   if (!validateMAIAccWrite(Inst, Operands)) {
4606     return false;
4607   }
4608   if (!validateMFMA(Inst, Operands)) {
4609     return false;
4610   }
4611   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4612     return false;
4613   }
4614 
4615   if (!validateAGPRLdSt(Inst)) {
4616     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4617     ? "invalid register class: data and dst should be all VGPR or AGPR"
4618     : "invalid register class: agpr loads and stores not supported on this GPU"
4619     );
4620     return false;
4621   }
4622   if (!validateVGPRAlign(Inst)) {
4623     Error(IDLoc,
4624       "invalid register class: vgpr tuples must be 64 bit aligned");
4625     return false;
4626   }
4627   if (!validateGWS(Inst, Operands)) {
4628     return false;
4629   }
4630 
4631   if (!validateBLGP(Inst, Operands)) {
4632     return false;
4633   }
4634 
4635   if (!validateDivScale(Inst)) {
4636     Error(IDLoc, "ABS not allowed in VOP3B instructions");
4637     return false;
4638   }
4639   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4640     return false;
4641   }
4642   if (!validateExeczVcczOperands(Operands)) {
4643     return false;
4644   }
4645 
4646   if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) {
4647     return false;
4648   }
4649 
4650   return true;
4651 }
4652 
4653 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4654                                             const FeatureBitset &FBS,
4655                                             unsigned VariantID = 0);
4656 
4657 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4658                                 const FeatureBitset &AvailableFeatures,
4659                                 unsigned VariantID);
4660 
4661 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4662                                        const FeatureBitset &FBS) {
4663   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4664 }
4665 
4666 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4667                                        const FeatureBitset &FBS,
4668                                        ArrayRef<unsigned> Variants) {
4669   for (auto Variant : Variants) {
4670     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4671       return true;
4672   }
4673 
4674   return false;
4675 }
4676 
4677 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4678                                                   const SMLoc &IDLoc) {
4679   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4680 
4681   // Check if requested instruction variant is supported.
4682   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4683     return false;
4684 
4685   // This instruction is not supported.
4686   // Clear any other pending errors because they are no longer relevant.
4687   getParser().clearPendingErrors();
4688 
4689   // Requested instruction variant is not supported.
4690   // Check if any other variants are supported.
4691   StringRef VariantName = getMatchedVariantName();
4692   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4693     return Error(IDLoc,
4694                  Twine(VariantName,
4695                        " variant of this instruction is not supported"));
4696   }
4697 
4698   // Finally check if this instruction is supported on any other GPU.
4699   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4700     return Error(IDLoc, "instruction not supported on this GPU");
4701   }
4702 
4703   // Instruction not supported on any GPU. Probably a typo.
4704   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4705   return Error(IDLoc, "invalid instruction" + Suggestion);
4706 }
4707 
4708 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4709                                               OperandVector &Operands,
4710                                               MCStreamer &Out,
4711                                               uint64_t &ErrorInfo,
4712                                               bool MatchingInlineAsm) {
4713   MCInst Inst;
4714   unsigned Result = Match_Success;
4715   for (auto Variant : getMatchedVariants()) {
4716     uint64_t EI;
4717     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4718                                   Variant);
4719     // We order match statuses from least to most specific. We use most specific
4720     // status as resulting
4721     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4722     if ((R == Match_Success) ||
4723         (R == Match_PreferE32) ||
4724         (R == Match_MissingFeature && Result != Match_PreferE32) ||
4725         (R == Match_InvalidOperand && Result != Match_MissingFeature
4726                                    && Result != Match_PreferE32) ||
4727         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
4728                                    && Result != Match_MissingFeature
4729                                    && Result != Match_PreferE32)) {
4730       Result = R;
4731       ErrorInfo = EI;
4732     }
4733     if (R == Match_Success)
4734       break;
4735   }
4736 
4737   if (Result == Match_Success) {
4738     if (!validateInstruction(Inst, IDLoc, Operands)) {
4739       return true;
4740     }
4741     Inst.setLoc(IDLoc);
4742     Out.emitInstruction(Inst, getSTI());
4743     return false;
4744   }
4745 
4746   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4747   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4748     return true;
4749   }
4750 
4751   switch (Result) {
4752   default: break;
4753   case Match_MissingFeature:
4754     // It has been verified that the specified instruction
4755     // mnemonic is valid. A match was found but it requires
4756     // features which are not supported on this GPU.
4757     return Error(IDLoc, "operands are not valid for this GPU or mode");
4758 
4759   case Match_InvalidOperand: {
4760     SMLoc ErrorLoc = IDLoc;
4761     if (ErrorInfo != ~0ULL) {
4762       if (ErrorInfo >= Operands.size()) {
4763         return Error(IDLoc, "too few operands for instruction");
4764       }
4765       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4766       if (ErrorLoc == SMLoc())
4767         ErrorLoc = IDLoc;
4768     }
4769     return Error(ErrorLoc, "invalid operand for instruction");
4770   }
4771 
4772   case Match_PreferE32:
4773     return Error(IDLoc, "internal error: instruction without _e64 suffix "
4774                         "should be encoded as e32");
4775   case Match_MnemonicFail:
4776     llvm_unreachable("Invalid instructions should have been handled already");
4777   }
4778   llvm_unreachable("Implement any new match types added!");
4779 }
4780 
4781 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4782   int64_t Tmp = -1;
4783   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4784     return true;
4785   }
4786   if (getParser().parseAbsoluteExpression(Tmp)) {
4787     return true;
4788   }
4789   Ret = static_cast<uint32_t>(Tmp);
4790   return false;
4791 }
4792 
4793 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4794                                                uint32_t &Minor) {
4795   if (ParseAsAbsoluteExpression(Major))
4796     return TokError("invalid major version");
4797 
4798   if (!trySkipToken(AsmToken::Comma))
4799     return TokError("minor version number required, comma expected");
4800 
4801   if (ParseAsAbsoluteExpression(Minor))
4802     return TokError("invalid minor version");
4803 
4804   return false;
4805 }
4806 
4807 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4808   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4809     return TokError("directive only supported for amdgcn architecture");
4810 
4811   std::string TargetIDDirective;
4812   SMLoc TargetStart = getTok().getLoc();
4813   if (getParser().parseEscapedString(TargetIDDirective))
4814     return true;
4815 
4816   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4817   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4818     return getParser().Error(TargetRange.Start,
4819         (Twine(".amdgcn_target directive's target id ") +
4820          Twine(TargetIDDirective) +
4821          Twine(" does not match the specified target id ") +
4822          Twine(getTargetStreamer().getTargetID()->toString())).str());
4823 
4824   return false;
4825 }
4826 
4827 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4828   return Error(Range.Start, "value out of range", Range);
4829 }
4830 
4831 bool AMDGPUAsmParser::calculateGPRBlocks(
4832     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4833     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4834     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4835     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4836   // TODO(scott.linder): These calculations are duplicated from
4837   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4838   IsaVersion Version = getIsaVersion(getSTI().getCPU());
4839 
4840   unsigned NumVGPRs = NextFreeVGPR;
4841   unsigned NumSGPRs = NextFreeSGPR;
4842 
4843   if (Version.Major >= 10)
4844     NumSGPRs = 0;
4845   else {
4846     unsigned MaxAddressableNumSGPRs =
4847         IsaInfo::getAddressableNumSGPRs(&getSTI());
4848 
4849     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4850         NumSGPRs > MaxAddressableNumSGPRs)
4851       return OutOfRangeError(SGPRRange);
4852 
4853     NumSGPRs +=
4854         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4855 
4856     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4857         NumSGPRs > MaxAddressableNumSGPRs)
4858       return OutOfRangeError(SGPRRange);
4859 
4860     if (Features.test(FeatureSGPRInitBug))
4861       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4862   }
4863 
4864   VGPRBlocks =
4865       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4866   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4867 
4868   return false;
4869 }
4870 
4871 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4872   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4873     return TokError("directive only supported for amdgcn architecture");
4874 
4875   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4876     return TokError("directive only supported for amdhsa OS");
4877 
4878   StringRef KernelName;
4879   if (getParser().parseIdentifier(KernelName))
4880     return true;
4881 
4882   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4883 
4884   StringSet<> Seen;
4885 
4886   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4887 
4888   SMRange VGPRRange;
4889   uint64_t NextFreeVGPR = 0;
4890   uint64_t AccumOffset = 0;
4891   uint64_t SharedVGPRCount = 0;
4892   SMRange SGPRRange;
4893   uint64_t NextFreeSGPR = 0;
4894 
4895   // Count the number of user SGPRs implied from the enabled feature bits.
4896   unsigned ImpliedUserSGPRCount = 0;
4897 
4898   // Track if the asm explicitly contains the directive for the user SGPR
4899   // count.
4900   Optional<unsigned> ExplicitUserSGPRCount;
4901   bool ReserveVCC = true;
4902   bool ReserveFlatScr = true;
4903   Optional<bool> EnableWavefrontSize32;
4904 
4905   while (true) {
4906     while (trySkipToken(AsmToken::EndOfStatement));
4907 
4908     StringRef ID;
4909     SMRange IDRange = getTok().getLocRange();
4910     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4911       return true;
4912 
4913     if (ID == ".end_amdhsa_kernel")
4914       break;
4915 
4916     if (!Seen.insert(ID).second)
4917       return TokError(".amdhsa_ directives cannot be repeated");
4918 
4919     SMLoc ValStart = getLoc();
4920     int64_t IVal;
4921     if (getParser().parseAbsoluteExpression(IVal))
4922       return true;
4923     SMLoc ValEnd = getLoc();
4924     SMRange ValRange = SMRange(ValStart, ValEnd);
4925 
4926     if (IVal < 0)
4927       return OutOfRangeError(ValRange);
4928 
4929     uint64_t Val = IVal;
4930 
4931 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
4932   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
4933     return OutOfRangeError(RANGE);                                             \
4934   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4935 
4936     if (ID == ".amdhsa_group_segment_fixed_size") {
4937       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4938         return OutOfRangeError(ValRange);
4939       KD.group_segment_fixed_size = Val;
4940     } else if (ID == ".amdhsa_private_segment_fixed_size") {
4941       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4942         return OutOfRangeError(ValRange);
4943       KD.private_segment_fixed_size = Val;
4944     } else if (ID == ".amdhsa_kernarg_size") {
4945       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4946         return OutOfRangeError(ValRange);
4947       KD.kernarg_size = Val;
4948     } else if (ID == ".amdhsa_user_sgpr_count") {
4949       ExplicitUserSGPRCount = Val;
4950     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4951       if (hasArchitectedFlatScratch())
4952         return Error(IDRange.Start,
4953                      "directive is not supported with architected flat scratch",
4954                      IDRange);
4955       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4956                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4957                        Val, ValRange);
4958       if (Val)
4959         ImpliedUserSGPRCount += 4;
4960     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4961       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4962                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4963                        ValRange);
4964       if (Val)
4965         ImpliedUserSGPRCount += 2;
4966     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4967       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4968                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4969                        ValRange);
4970       if (Val)
4971         ImpliedUserSGPRCount += 2;
4972     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4973       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4974                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4975                        Val, ValRange);
4976       if (Val)
4977         ImpliedUserSGPRCount += 2;
4978     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4979       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4980                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4981                        ValRange);
4982       if (Val)
4983         ImpliedUserSGPRCount += 2;
4984     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4985       if (hasArchitectedFlatScratch())
4986         return Error(IDRange.Start,
4987                      "directive is not supported with architected flat scratch",
4988                      IDRange);
4989       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4990                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4991                        ValRange);
4992       if (Val)
4993         ImpliedUserSGPRCount += 2;
4994     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4995       PARSE_BITS_ENTRY(KD.kernel_code_properties,
4996                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4997                        Val, ValRange);
4998       if (Val)
4999         ImpliedUserSGPRCount += 1;
5000     } else if (ID == ".amdhsa_wavefront_size32") {
5001       if (IVersion.Major < 10)
5002         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5003       EnableWavefrontSize32 = Val;
5004       PARSE_BITS_ENTRY(KD.kernel_code_properties,
5005                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
5006                        Val, ValRange);
5007     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5008       if (hasArchitectedFlatScratch())
5009         return Error(IDRange.Start,
5010                      "directive is not supported with architected flat scratch",
5011                      IDRange);
5012       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5013                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5014     } else if (ID == ".amdhsa_enable_private_segment") {
5015       if (!hasArchitectedFlatScratch())
5016         return Error(
5017             IDRange.Start,
5018             "directive is not supported without architected flat scratch",
5019             IDRange);
5020       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5021                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5022     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5023       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5024                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
5025                        ValRange);
5026     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5027       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5028                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
5029                        ValRange);
5030     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5031       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5032                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
5033                        ValRange);
5034     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5035       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5036                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
5037                        ValRange);
5038     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5039       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5040                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
5041                        ValRange);
5042     } else if (ID == ".amdhsa_next_free_vgpr") {
5043       VGPRRange = ValRange;
5044       NextFreeVGPR = Val;
5045     } else if (ID == ".amdhsa_next_free_sgpr") {
5046       SGPRRange = ValRange;
5047       NextFreeSGPR = Val;
5048     } else if (ID == ".amdhsa_accum_offset") {
5049       if (!isGFX90A())
5050         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5051       AccumOffset = Val;
5052     } else if (ID == ".amdhsa_reserve_vcc") {
5053       if (!isUInt<1>(Val))
5054         return OutOfRangeError(ValRange);
5055       ReserveVCC = Val;
5056     } else if (ID == ".amdhsa_reserve_flat_scratch") {
5057       if (IVersion.Major < 7)
5058         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5059       if (hasArchitectedFlatScratch())
5060         return Error(IDRange.Start,
5061                      "directive is not supported with architected flat scratch",
5062                      IDRange);
5063       if (!isUInt<1>(Val))
5064         return OutOfRangeError(ValRange);
5065       ReserveFlatScr = Val;
5066     } else if (ID == ".amdhsa_reserve_xnack_mask") {
5067       if (IVersion.Major < 8)
5068         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5069       if (!isUInt<1>(Val))
5070         return OutOfRangeError(ValRange);
5071       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5072         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5073                                  IDRange);
5074     } else if (ID == ".amdhsa_float_round_mode_32") {
5075       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5076                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5077     } else if (ID == ".amdhsa_float_round_mode_16_64") {
5078       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5079                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5080     } else if (ID == ".amdhsa_float_denorm_mode_32") {
5081       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5082                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5083     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5084       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5085                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5086                        ValRange);
5087     } else if (ID == ".amdhsa_dx10_clamp") {
5088       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5089                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
5090     } else if (ID == ".amdhsa_ieee_mode") {
5091       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
5092                        Val, ValRange);
5093     } else if (ID == ".amdhsa_fp16_overflow") {
5094       if (IVersion.Major < 9)
5095         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5096       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
5097                        ValRange);
5098     } else if (ID == ".amdhsa_tg_split") {
5099       if (!isGFX90A())
5100         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5101       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5102                        ValRange);
5103     } else if (ID == ".amdhsa_workgroup_processor_mode") {
5104       if (IVersion.Major < 10)
5105         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5106       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
5107                        ValRange);
5108     } else if (ID == ".amdhsa_memory_ordered") {
5109       if (IVersion.Major < 10)
5110         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5111       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
5112                        ValRange);
5113     } else if (ID == ".amdhsa_forward_progress") {
5114       if (IVersion.Major < 10)
5115         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5116       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
5117                        ValRange);
5118     } else if (ID == ".amdhsa_shared_vgpr_count") {
5119       if (IVersion.Major < 10)
5120         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5121       SharedVGPRCount = Val;
5122       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5123                        COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val,
5124                        ValRange);
5125     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5126       PARSE_BITS_ENTRY(
5127           KD.compute_pgm_rsrc2,
5128           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5129           ValRange);
5130     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5131       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5132                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5133                        Val, ValRange);
5134     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5135       PARSE_BITS_ENTRY(
5136           KD.compute_pgm_rsrc2,
5137           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5138           ValRange);
5139     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5140       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5141                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5142                        Val, ValRange);
5143     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5144       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5145                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5146                        Val, ValRange);
5147     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5148       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5149                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5150                        Val, ValRange);
5151     } else if (ID == ".amdhsa_exception_int_div_zero") {
5152       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5153                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5154                        Val, ValRange);
5155     } else {
5156       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5157     }
5158 
5159 #undef PARSE_BITS_ENTRY
5160   }
5161 
5162   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
5163     return TokError(".amdhsa_next_free_vgpr directive is required");
5164 
5165   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
5166     return TokError(".amdhsa_next_free_sgpr directive is required");
5167 
5168   unsigned VGPRBlocks;
5169   unsigned SGPRBlocks;
5170   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5171                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5172                          EnableWavefrontSize32, NextFreeVGPR,
5173                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5174                          SGPRBlocks))
5175     return true;
5176 
5177   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5178           VGPRBlocks))
5179     return OutOfRangeError(VGPRRange);
5180   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5181                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5182 
5183   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5184           SGPRBlocks))
5185     return OutOfRangeError(SGPRRange);
5186   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5187                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5188                   SGPRBlocks);
5189 
5190   if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5191     return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5192                     "enabled user SGPRs");
5193 
5194   unsigned UserSGPRCount =
5195       ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5196 
5197   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5198     return TokError("too many user SGPRs enabled");
5199   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5200                   UserSGPRCount);
5201 
5202   if (isGFX90A()) {
5203     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
5204       return TokError(".amdhsa_accum_offset directive is required");
5205     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5206       return TokError("accum_offset should be in range [4..256] in "
5207                       "increments of 4");
5208     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5209       return TokError("accum_offset exceeds total VGPR allocation");
5210     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5211                     (AccumOffset / 4 - 1));
5212   }
5213 
5214   if (IVersion.Major == 10) {
5215     // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5216     if (SharedVGPRCount && EnableWavefrontSize32) {
5217       return TokError("shared_vgpr_count directive not valid on "
5218                       "wavefront size 32");
5219     }
5220     if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5221       return TokError("shared_vgpr_count*2 + "
5222                       "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5223                       "exceed 63\n");
5224     }
5225   }
5226 
5227   getTargetStreamer().EmitAmdhsaKernelDescriptor(
5228       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
5229       ReserveFlatScr);
5230   return false;
5231 }
5232 
5233 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
5234   uint32_t Major;
5235   uint32_t Minor;
5236 
5237   if (ParseDirectiveMajorMinor(Major, Minor))
5238     return true;
5239 
5240   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
5241   return false;
5242 }
5243 
5244 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
5245   uint32_t Major;
5246   uint32_t Minor;
5247   uint32_t Stepping;
5248   StringRef VendorName;
5249   StringRef ArchName;
5250 
5251   // If this directive has no arguments, then use the ISA version for the
5252   // targeted GPU.
5253   if (isToken(AsmToken::EndOfStatement)) {
5254     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5255     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
5256                                                         ISA.Stepping,
5257                                                         "AMD", "AMDGPU");
5258     return false;
5259   }
5260 
5261   if (ParseDirectiveMajorMinor(Major, Minor))
5262     return true;
5263 
5264   if (!trySkipToken(AsmToken::Comma))
5265     return TokError("stepping version number required, comma expected");
5266 
5267   if (ParseAsAbsoluteExpression(Stepping))
5268     return TokError("invalid stepping version");
5269 
5270   if (!trySkipToken(AsmToken::Comma))
5271     return TokError("vendor name required, comma expected");
5272 
5273   if (!parseString(VendorName, "invalid vendor name"))
5274     return true;
5275 
5276   if (!trySkipToken(AsmToken::Comma))
5277     return TokError("arch name required, comma expected");
5278 
5279   if (!parseString(ArchName, "invalid arch name"))
5280     return true;
5281 
5282   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
5283                                                       VendorName, ArchName);
5284   return false;
5285 }
5286 
5287 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5288                                                amd_kernel_code_t &Header) {
5289   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5290   // assembly for backwards compatibility.
5291   if (ID == "max_scratch_backing_memory_byte_size") {
5292     Parser.eatToEndOfStatement();
5293     return false;
5294   }
5295 
5296   SmallString<40> ErrStr;
5297   raw_svector_ostream Err(ErrStr);
5298   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5299     return TokError(Err.str());
5300   }
5301   Lex();
5302 
5303   if (ID == "enable_wavefront_size32") {
5304     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5305       if (!isGFX10Plus())
5306         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5307       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5308         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5309     } else {
5310       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5311         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5312     }
5313   }
5314 
5315   if (ID == "wavefront_size") {
5316     if (Header.wavefront_size == 5) {
5317       if (!isGFX10Plus())
5318         return TokError("wavefront_size=5 is only allowed on GFX10+");
5319       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5320         return TokError("wavefront_size=5 requires +WavefrontSize32");
5321     } else if (Header.wavefront_size == 6) {
5322       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5323         return TokError("wavefront_size=6 requires +WavefrontSize64");
5324     }
5325   }
5326 
5327   if (ID == "enable_wgp_mode") {
5328     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5329         !isGFX10Plus())
5330       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5331   }
5332 
5333   if (ID == "enable_mem_ordered") {
5334     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5335         !isGFX10Plus())
5336       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5337   }
5338 
5339   if (ID == "enable_fwd_progress") {
5340     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5341         !isGFX10Plus())
5342       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5343   }
5344 
5345   return false;
5346 }
5347 
5348 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5349   amd_kernel_code_t Header;
5350   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5351 
5352   while (true) {
5353     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
5354     // will set the current token to EndOfStatement.
5355     while(trySkipToken(AsmToken::EndOfStatement));
5356 
5357     StringRef ID;
5358     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5359       return true;
5360 
5361     if (ID == ".end_amd_kernel_code_t")
5362       break;
5363 
5364     if (ParseAMDKernelCodeTValue(ID, Header))
5365       return true;
5366   }
5367 
5368   getTargetStreamer().EmitAMDKernelCodeT(Header);
5369 
5370   return false;
5371 }
5372 
5373 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5374   StringRef KernelName;
5375   if (!parseId(KernelName, "expected symbol name"))
5376     return true;
5377 
5378   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5379                                            ELF::STT_AMDGPU_HSA_KERNEL);
5380 
5381   KernelScope.initialize(getContext());
5382   return false;
5383 }
5384 
5385 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5386   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5387     return Error(getLoc(),
5388                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
5389                  "architectures");
5390   }
5391 
5392   auto TargetIDDirective = getLexer().getTok().getStringContents();
5393   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5394     return Error(getParser().getTok().getLoc(), "target id must match options");
5395 
5396   getTargetStreamer().EmitISAVersion();
5397   Lex();
5398 
5399   return false;
5400 }
5401 
5402 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5403   const char *AssemblerDirectiveBegin;
5404   const char *AssemblerDirectiveEnd;
5405   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5406       isHsaAbiVersion3AndAbove(&getSTI())
5407           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5408                             HSAMD::V3::AssemblerDirectiveEnd)
5409           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5410                             HSAMD::AssemblerDirectiveEnd);
5411 
5412   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5413     return Error(getLoc(),
5414                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5415                  "not available on non-amdhsa OSes")).str());
5416   }
5417 
5418   std::string HSAMetadataString;
5419   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5420                           HSAMetadataString))
5421     return true;
5422 
5423   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5424     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5425       return Error(getLoc(), "invalid HSA metadata");
5426   } else {
5427     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5428       return Error(getLoc(), "invalid HSA metadata");
5429   }
5430 
5431   return false;
5432 }
5433 
5434 /// Common code to parse out a block of text (typically YAML) between start and
5435 /// end directives.
5436 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5437                                           const char *AssemblerDirectiveEnd,
5438                                           std::string &CollectString) {
5439 
5440   raw_string_ostream CollectStream(CollectString);
5441 
5442   getLexer().setSkipSpace(false);
5443 
5444   bool FoundEnd = false;
5445   while (!isToken(AsmToken::Eof)) {
5446     while (isToken(AsmToken::Space)) {
5447       CollectStream << getTokenStr();
5448       Lex();
5449     }
5450 
5451     if (trySkipId(AssemblerDirectiveEnd)) {
5452       FoundEnd = true;
5453       break;
5454     }
5455 
5456     CollectStream << Parser.parseStringToEndOfStatement()
5457                   << getContext().getAsmInfo()->getSeparatorString();
5458 
5459     Parser.eatToEndOfStatement();
5460   }
5461 
5462   getLexer().setSkipSpace(true);
5463 
5464   if (isToken(AsmToken::Eof) && !FoundEnd) {
5465     return TokError(Twine("expected directive ") +
5466                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5467   }
5468 
5469   CollectStream.flush();
5470   return false;
5471 }
5472 
5473 /// Parse the assembler directive for new MsgPack-format PAL metadata.
5474 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5475   std::string String;
5476   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5477                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5478     return true;
5479 
5480   auto PALMetadata = getTargetStreamer().getPALMetadata();
5481   if (!PALMetadata->setFromString(String))
5482     return Error(getLoc(), "invalid PAL metadata");
5483   return false;
5484 }
5485 
5486 /// Parse the assembler directive for old linear-format PAL metadata.
5487 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5488   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5489     return Error(getLoc(),
5490                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5491                  "not available on non-amdpal OSes")).str());
5492   }
5493 
5494   auto PALMetadata = getTargetStreamer().getPALMetadata();
5495   PALMetadata->setLegacy();
5496   for (;;) {
5497     uint32_t Key, Value;
5498     if (ParseAsAbsoluteExpression(Key)) {
5499       return TokError(Twine("invalid value in ") +
5500                       Twine(PALMD::AssemblerDirective));
5501     }
5502     if (!trySkipToken(AsmToken::Comma)) {
5503       return TokError(Twine("expected an even number of values in ") +
5504                       Twine(PALMD::AssemblerDirective));
5505     }
5506     if (ParseAsAbsoluteExpression(Value)) {
5507       return TokError(Twine("invalid value in ") +
5508                       Twine(PALMD::AssemblerDirective));
5509     }
5510     PALMetadata->setRegister(Key, Value);
5511     if (!trySkipToken(AsmToken::Comma))
5512       break;
5513   }
5514   return false;
5515 }
5516 
5517 /// ParseDirectiveAMDGPULDS
5518 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
5519 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5520   if (getParser().checkForValidSection())
5521     return true;
5522 
5523   StringRef Name;
5524   SMLoc NameLoc = getLoc();
5525   if (getParser().parseIdentifier(Name))
5526     return TokError("expected identifier in directive");
5527 
5528   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5529   if (parseToken(AsmToken::Comma, "expected ','"))
5530     return true;
5531 
5532   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5533 
5534   int64_t Size;
5535   SMLoc SizeLoc = getLoc();
5536   if (getParser().parseAbsoluteExpression(Size))
5537     return true;
5538   if (Size < 0)
5539     return Error(SizeLoc, "size must be non-negative");
5540   if (Size > LocalMemorySize)
5541     return Error(SizeLoc, "size is too large");
5542 
5543   int64_t Alignment = 4;
5544   if (trySkipToken(AsmToken::Comma)) {
5545     SMLoc AlignLoc = getLoc();
5546     if (getParser().parseAbsoluteExpression(Alignment))
5547       return true;
5548     if (Alignment < 0 || !isPowerOf2_64(Alignment))
5549       return Error(AlignLoc, "alignment must be a power of two");
5550 
5551     // Alignment larger than the size of LDS is possible in theory, as long
5552     // as the linker manages to place to symbol at address 0, but we do want
5553     // to make sure the alignment fits nicely into a 32-bit integer.
5554     if (Alignment >= 1u << 31)
5555       return Error(AlignLoc, "alignment is too large");
5556   }
5557 
5558   if (parseEOL())
5559     return true;
5560 
5561   Symbol->redefineIfPossible();
5562   if (!Symbol->isUndefined())
5563     return Error(NameLoc, "invalid symbol redefinition");
5564 
5565   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5566   return false;
5567 }
5568 
5569 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5570   StringRef IDVal = DirectiveID.getString();
5571 
5572   if (isHsaAbiVersion3AndAbove(&getSTI())) {
5573     if (IDVal == ".amdhsa_kernel")
5574      return ParseDirectiveAMDHSAKernel();
5575 
5576     // TODO: Restructure/combine with PAL metadata directive.
5577     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5578       return ParseDirectiveHSAMetadata();
5579   } else {
5580     if (IDVal == ".hsa_code_object_version")
5581       return ParseDirectiveHSACodeObjectVersion();
5582 
5583     if (IDVal == ".hsa_code_object_isa")
5584       return ParseDirectiveHSACodeObjectISA();
5585 
5586     if (IDVal == ".amd_kernel_code_t")
5587       return ParseDirectiveAMDKernelCodeT();
5588 
5589     if (IDVal == ".amdgpu_hsa_kernel")
5590       return ParseDirectiveAMDGPUHsaKernel();
5591 
5592     if (IDVal == ".amd_amdgpu_isa")
5593       return ParseDirectiveISAVersion();
5594 
5595     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5596       return ParseDirectiveHSAMetadata();
5597   }
5598 
5599   if (IDVal == ".amdgcn_target")
5600     return ParseDirectiveAMDGCNTarget();
5601 
5602   if (IDVal == ".amdgpu_lds")
5603     return ParseDirectiveAMDGPULDS();
5604 
5605   if (IDVal == PALMD::AssemblerDirectiveBegin)
5606     return ParseDirectivePALMetadataBegin();
5607 
5608   if (IDVal == PALMD::AssemblerDirective)
5609     return ParseDirectivePALMetadata();
5610 
5611   return true;
5612 }
5613 
5614 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5615                                            unsigned RegNo) {
5616 
5617   if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5618     return isGFX9Plus();
5619 
5620   // GFX10+ has 2 more SGPRs 104 and 105.
5621   if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5622     return hasSGPR104_SGPR105();
5623 
5624   switch (RegNo) {
5625   case AMDGPU::SRC_SHARED_BASE:
5626   case AMDGPU::SRC_SHARED_LIMIT:
5627   case AMDGPU::SRC_PRIVATE_BASE:
5628   case AMDGPU::SRC_PRIVATE_LIMIT:
5629     return isGFX9Plus();
5630   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5631     return isGFX9Plus() && !isGFX11Plus();
5632   case AMDGPU::TBA:
5633   case AMDGPU::TBA_LO:
5634   case AMDGPU::TBA_HI:
5635   case AMDGPU::TMA:
5636   case AMDGPU::TMA_LO:
5637   case AMDGPU::TMA_HI:
5638     return !isGFX9Plus();
5639   case AMDGPU::XNACK_MASK:
5640   case AMDGPU::XNACK_MASK_LO:
5641   case AMDGPU::XNACK_MASK_HI:
5642     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5643   case AMDGPU::SGPR_NULL:
5644     return isGFX10Plus();
5645   default:
5646     break;
5647   }
5648 
5649   if (isCI())
5650     return true;
5651 
5652   if (isSI() || isGFX10Plus()) {
5653     // No flat_scr on SI.
5654     // On GFX10Plus flat scratch is not a valid register operand and can only be
5655     // accessed with s_setreg/s_getreg.
5656     switch (RegNo) {
5657     case AMDGPU::FLAT_SCR:
5658     case AMDGPU::FLAT_SCR_LO:
5659     case AMDGPU::FLAT_SCR_HI:
5660       return false;
5661     default:
5662       return true;
5663     }
5664   }
5665 
5666   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5667   // SI/CI have.
5668   if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
5669     return hasSGPR102_SGPR103();
5670 
5671   return true;
5672 }
5673 
5674 OperandMatchResultTy
5675 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5676                               OperandMode Mode) {
5677   OperandMatchResultTy ResTy = parseVOPD(Operands);
5678   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5679       isToken(AsmToken::EndOfStatement))
5680     return ResTy;
5681 
5682   // Try to parse with a custom parser
5683   ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5684 
5685   // If we successfully parsed the operand or if there as an error parsing,
5686   // we are done.
5687   //
5688   // If we are parsing after we reach EndOfStatement then this means we
5689   // are appending default values to the Operands list.  This is only done
5690   // by custom parser, so we shouldn't continue on to the generic parsing.
5691   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5692       isToken(AsmToken::EndOfStatement))
5693     return ResTy;
5694 
5695   SMLoc RBraceLoc;
5696   SMLoc LBraceLoc = getLoc();
5697   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5698     unsigned Prefix = Operands.size();
5699 
5700     for (;;) {
5701       auto Loc = getLoc();
5702       ResTy = parseReg(Operands);
5703       if (ResTy == MatchOperand_NoMatch)
5704         Error(Loc, "expected a register");
5705       if (ResTy != MatchOperand_Success)
5706         return MatchOperand_ParseFail;
5707 
5708       RBraceLoc = getLoc();
5709       if (trySkipToken(AsmToken::RBrac))
5710         break;
5711 
5712       if (!skipToken(AsmToken::Comma,
5713                      "expected a comma or a closing square bracket")) {
5714         return MatchOperand_ParseFail;
5715       }
5716     }
5717 
5718     if (Operands.size() - Prefix > 1) {
5719       Operands.insert(Operands.begin() + Prefix,
5720                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5721       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5722     }
5723 
5724     return MatchOperand_Success;
5725   }
5726 
5727   return parseRegOrImm(Operands);
5728 }
5729 
5730 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5731   // Clear any forced encodings from the previous instruction.
5732   setForcedEncodingSize(0);
5733   setForcedDPP(false);
5734   setForcedSDWA(false);
5735 
5736   if (Name.endswith("_e64_dpp")) {
5737     setForcedDPP(true);
5738     setForcedEncodingSize(64);
5739     return Name.substr(0, Name.size() - 8);
5740   } else if (Name.endswith("_e64")) {
5741     setForcedEncodingSize(64);
5742     return Name.substr(0, Name.size() - 4);
5743   } else if (Name.endswith("_e32")) {
5744     setForcedEncodingSize(32);
5745     return Name.substr(0, Name.size() - 4);
5746   } else if (Name.endswith("_dpp")) {
5747     setForcedDPP(true);
5748     return Name.substr(0, Name.size() - 4);
5749   } else if (Name.endswith("_sdwa")) {
5750     setForcedSDWA(true);
5751     return Name.substr(0, Name.size() - 5);
5752   }
5753   return Name;
5754 }
5755 
5756 static void applyMnemonicAliases(StringRef &Mnemonic,
5757                                  const FeatureBitset &Features,
5758                                  unsigned VariantID);
5759 
5760 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5761                                        StringRef Name,
5762                                        SMLoc NameLoc, OperandVector &Operands) {
5763   // Add the instruction mnemonic
5764   Name = parseMnemonicSuffix(Name);
5765 
5766   // If the target architecture uses MnemonicAlias, call it here to parse
5767   // operands correctly.
5768   applyMnemonicAliases(Name, getAvailableFeatures(), 0);
5769 
5770   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5771 
5772   bool IsMIMG = Name.startswith("image_");
5773 
5774   while (!trySkipToken(AsmToken::EndOfStatement)) {
5775     OperandMode Mode = OperandMode_Default;
5776     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5777       Mode = OperandMode_NSA;
5778     CPolSeen = 0;
5779     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5780 
5781     if (Res != MatchOperand_Success) {
5782       checkUnsupportedInstruction(Name, NameLoc);
5783       if (!Parser.hasPendingError()) {
5784         // FIXME: use real operand location rather than the current location.
5785         StringRef Msg =
5786           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5787                                             "not a valid operand.";
5788         Error(getLoc(), Msg);
5789       }
5790       while (!trySkipToken(AsmToken::EndOfStatement)) {
5791         lex();
5792       }
5793       return true;
5794     }
5795 
5796     // Eat the comma or space if there is one.
5797     trySkipToken(AsmToken::Comma);
5798   }
5799 
5800   return false;
5801 }
5802 
5803 //===----------------------------------------------------------------------===//
5804 // Utility functions
5805 //===----------------------------------------------------------------------===//
5806 
5807 OperandMatchResultTy
5808 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5809 
5810   if (!trySkipId(Prefix, AsmToken::Colon))
5811     return MatchOperand_NoMatch;
5812 
5813   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5814 }
5815 
5816 OperandMatchResultTy
5817 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5818                                     AMDGPUOperand::ImmTy ImmTy,
5819                                     bool (*ConvertResult)(int64_t&)) {
5820   SMLoc S = getLoc();
5821   int64_t Value = 0;
5822 
5823   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5824   if (Res != MatchOperand_Success)
5825     return Res;
5826 
5827   if (ConvertResult && !ConvertResult(Value)) {
5828     Error(S, "invalid " + StringRef(Prefix) + " value.");
5829   }
5830 
5831   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5832   return MatchOperand_Success;
5833 }
5834 
5835 OperandMatchResultTy
5836 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5837                                              OperandVector &Operands,
5838                                              AMDGPUOperand::ImmTy ImmTy,
5839                                              bool (*ConvertResult)(int64_t&)) {
5840   SMLoc S = getLoc();
5841   if (!trySkipId(Prefix, AsmToken::Colon))
5842     return MatchOperand_NoMatch;
5843 
5844   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5845     return MatchOperand_ParseFail;
5846 
5847   unsigned Val = 0;
5848   const unsigned MaxSize = 4;
5849 
5850   // FIXME: How to verify the number of elements matches the number of src
5851   // operands?
5852   for (int I = 0; ; ++I) {
5853     int64_t Op;
5854     SMLoc Loc = getLoc();
5855     if (!parseExpr(Op))
5856       return MatchOperand_ParseFail;
5857 
5858     if (Op != 0 && Op != 1) {
5859       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5860       return MatchOperand_ParseFail;
5861     }
5862 
5863     Val |= (Op << I);
5864 
5865     if (trySkipToken(AsmToken::RBrac))
5866       break;
5867 
5868     if (I + 1 == MaxSize) {
5869       Error(getLoc(), "expected a closing square bracket");
5870       return MatchOperand_ParseFail;
5871     }
5872 
5873     if (!skipToken(AsmToken::Comma, "expected a comma"))
5874       return MatchOperand_ParseFail;
5875   }
5876 
5877   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5878   return MatchOperand_Success;
5879 }
5880 
5881 OperandMatchResultTy
5882 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5883                                AMDGPUOperand::ImmTy ImmTy) {
5884   int64_t Bit;
5885   SMLoc S = getLoc();
5886 
5887   if (trySkipId(Name)) {
5888     Bit = 1;
5889   } else if (trySkipId("no", Name)) {
5890     Bit = 0;
5891   } else {
5892     return MatchOperand_NoMatch;
5893   }
5894 
5895   if (Name == "r128" && !hasMIMG_R128()) {
5896     Error(S, "r128 modifier is not supported on this GPU");
5897     return MatchOperand_ParseFail;
5898   }
5899   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5900     Error(S, "a16 modifier is not supported on this GPU");
5901     return MatchOperand_ParseFail;
5902   }
5903 
5904   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5905     ImmTy = AMDGPUOperand::ImmTyR128A16;
5906 
5907   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5908   return MatchOperand_Success;
5909 }
5910 
5911 OperandMatchResultTy
5912 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5913   unsigned CPolOn = 0;
5914   unsigned CPolOff = 0;
5915   SMLoc S = getLoc();
5916 
5917   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5918   if (isGFX940() && !Mnemo.startswith("s_")) {
5919     if (trySkipId("sc0"))
5920       CPolOn = AMDGPU::CPol::SC0;
5921     else if (trySkipId("nosc0"))
5922       CPolOff = AMDGPU::CPol::SC0;
5923     else if (trySkipId("nt"))
5924       CPolOn = AMDGPU::CPol::NT;
5925     else if (trySkipId("nont"))
5926       CPolOff = AMDGPU::CPol::NT;
5927     else if (trySkipId("sc1"))
5928       CPolOn = AMDGPU::CPol::SC1;
5929     else if (trySkipId("nosc1"))
5930       CPolOff = AMDGPU::CPol::SC1;
5931     else
5932       return MatchOperand_NoMatch;
5933   }
5934   else if (trySkipId("glc"))
5935     CPolOn = AMDGPU::CPol::GLC;
5936   else if (trySkipId("noglc"))
5937     CPolOff = AMDGPU::CPol::GLC;
5938   else if (trySkipId("slc"))
5939     CPolOn = AMDGPU::CPol::SLC;
5940   else if (trySkipId("noslc"))
5941     CPolOff = AMDGPU::CPol::SLC;
5942   else if (trySkipId("dlc"))
5943     CPolOn = AMDGPU::CPol::DLC;
5944   else if (trySkipId("nodlc"))
5945     CPolOff = AMDGPU::CPol::DLC;
5946   else if (trySkipId("scc"))
5947     CPolOn = AMDGPU::CPol::SCC;
5948   else if (trySkipId("noscc"))
5949     CPolOff = AMDGPU::CPol::SCC;
5950   else
5951     return MatchOperand_NoMatch;
5952 
5953   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5954     Error(S, "dlc modifier is not supported on this GPU");
5955     return MatchOperand_ParseFail;
5956   }
5957 
5958   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5959     Error(S, "scc modifier is not supported on this GPU");
5960     return MatchOperand_ParseFail;
5961   }
5962 
5963   if (CPolSeen & (CPolOn | CPolOff)) {
5964     Error(S, "duplicate cache policy modifier");
5965     return MatchOperand_ParseFail;
5966   }
5967 
5968   CPolSeen |= (CPolOn | CPolOff);
5969 
5970   for (unsigned I = 1; I != Operands.size(); ++I) {
5971     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5972     if (Op.isCPol()) {
5973       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5974       return MatchOperand_Success;
5975     }
5976   }
5977 
5978   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5979                                               AMDGPUOperand::ImmTyCPol));
5980 
5981   return MatchOperand_Success;
5982 }
5983 
5984 static void addOptionalImmOperand(
5985   MCInst& Inst, const OperandVector& Operands,
5986   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5987   AMDGPUOperand::ImmTy ImmT,
5988   int64_t Default = 0) {
5989   auto i = OptionalIdx.find(ImmT);
5990   if (i != OptionalIdx.end()) {
5991     unsigned Idx = i->second;
5992     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5993   } else {
5994     Inst.addOperand(MCOperand::createImm(Default));
5995   }
5996 }
5997 
5998 OperandMatchResultTy
5999 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
6000                                        StringRef &Value,
6001                                        SMLoc &StringLoc) {
6002   if (!trySkipId(Prefix, AsmToken::Colon))
6003     return MatchOperand_NoMatch;
6004 
6005   StringLoc = getLoc();
6006   return parseId(Value, "expected an identifier") ? MatchOperand_Success
6007                                                   : MatchOperand_ParseFail;
6008 }
6009 
6010 //===----------------------------------------------------------------------===//
6011 // MTBUF format
6012 //===----------------------------------------------------------------------===//
6013 
6014 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6015                                   int64_t MaxVal,
6016                                   int64_t &Fmt) {
6017   int64_t Val;
6018   SMLoc Loc = getLoc();
6019 
6020   auto Res = parseIntWithPrefix(Pref, Val);
6021   if (Res == MatchOperand_ParseFail)
6022     return false;
6023   if (Res == MatchOperand_NoMatch)
6024     return true;
6025 
6026   if (Val < 0 || Val > MaxVal) {
6027     Error(Loc, Twine("out of range ", StringRef(Pref)));
6028     return false;
6029   }
6030 
6031   Fmt = Val;
6032   return true;
6033 }
6034 
6035 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6036 // values to live in a joint format operand in the MCInst encoding.
6037 OperandMatchResultTy
6038 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6039   using namespace llvm::AMDGPU::MTBUFFormat;
6040 
6041   int64_t Dfmt = DFMT_UNDEF;
6042   int64_t Nfmt = NFMT_UNDEF;
6043 
6044   // dfmt and nfmt can appear in either order, and each is optional.
6045   for (int I = 0; I < 2; ++I) {
6046     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6047       return MatchOperand_ParseFail;
6048 
6049     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
6050       return MatchOperand_ParseFail;
6051     }
6052     // Skip optional comma between dfmt/nfmt
6053     // but guard against 2 commas following each other.
6054     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6055         !peekToken().is(AsmToken::Comma)) {
6056       trySkipToken(AsmToken::Comma);
6057     }
6058   }
6059 
6060   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6061     return MatchOperand_NoMatch;
6062 
6063   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6064   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6065 
6066   Format = encodeDfmtNfmt(Dfmt, Nfmt);
6067   return MatchOperand_Success;
6068 }
6069 
6070 OperandMatchResultTy
6071 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6072   using namespace llvm::AMDGPU::MTBUFFormat;
6073 
6074   int64_t Fmt = UFMT_UNDEF;
6075 
6076   if (!tryParseFmt("format", UFMT_MAX, Fmt))
6077     return MatchOperand_ParseFail;
6078 
6079   if (Fmt == UFMT_UNDEF)
6080     return MatchOperand_NoMatch;
6081 
6082   Format = Fmt;
6083   return MatchOperand_Success;
6084 }
6085 
6086 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6087                                     int64_t &Nfmt,
6088                                     StringRef FormatStr,
6089                                     SMLoc Loc) {
6090   using namespace llvm::AMDGPU::MTBUFFormat;
6091   int64_t Format;
6092 
6093   Format = getDfmt(FormatStr);
6094   if (Format != DFMT_UNDEF) {
6095     Dfmt = Format;
6096     return true;
6097   }
6098 
6099   Format = getNfmt(FormatStr, getSTI());
6100   if (Format != NFMT_UNDEF) {
6101     Nfmt = Format;
6102     return true;
6103   }
6104 
6105   Error(Loc, "unsupported format");
6106   return false;
6107 }
6108 
6109 OperandMatchResultTy
6110 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6111                                           SMLoc FormatLoc,
6112                                           int64_t &Format) {
6113   using namespace llvm::AMDGPU::MTBUFFormat;
6114 
6115   int64_t Dfmt = DFMT_UNDEF;
6116   int64_t Nfmt = NFMT_UNDEF;
6117   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6118     return MatchOperand_ParseFail;
6119 
6120   if (trySkipToken(AsmToken::Comma)) {
6121     StringRef Str;
6122     SMLoc Loc = getLoc();
6123     if (!parseId(Str, "expected a format string") ||
6124         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
6125       return MatchOperand_ParseFail;
6126     }
6127     if (Dfmt == DFMT_UNDEF) {
6128       Error(Loc, "duplicate numeric format");
6129       return MatchOperand_ParseFail;
6130     } else if (Nfmt == NFMT_UNDEF) {
6131       Error(Loc, "duplicate data format");
6132       return MatchOperand_ParseFail;
6133     }
6134   }
6135 
6136   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6137   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6138 
6139   if (isGFX10Plus()) {
6140     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6141     if (Ufmt == UFMT_UNDEF) {
6142       Error(FormatLoc, "unsupported format");
6143       return MatchOperand_ParseFail;
6144     }
6145     Format = Ufmt;
6146   } else {
6147     Format = encodeDfmtNfmt(Dfmt, Nfmt);
6148   }
6149 
6150   return MatchOperand_Success;
6151 }
6152 
6153 OperandMatchResultTy
6154 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6155                                             SMLoc Loc,
6156                                             int64_t &Format) {
6157   using namespace llvm::AMDGPU::MTBUFFormat;
6158 
6159   auto Id = getUnifiedFormat(FormatStr, getSTI());
6160   if (Id == UFMT_UNDEF)
6161     return MatchOperand_NoMatch;
6162 
6163   if (!isGFX10Plus()) {
6164     Error(Loc, "unified format is not supported on this GPU");
6165     return MatchOperand_ParseFail;
6166   }
6167 
6168   Format = Id;
6169   return MatchOperand_Success;
6170 }
6171 
6172 OperandMatchResultTy
6173 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6174   using namespace llvm::AMDGPU::MTBUFFormat;
6175   SMLoc Loc = getLoc();
6176 
6177   if (!parseExpr(Format))
6178     return MatchOperand_ParseFail;
6179   if (!isValidFormatEncoding(Format, getSTI())) {
6180     Error(Loc, "out of range format");
6181     return MatchOperand_ParseFail;
6182   }
6183 
6184   return MatchOperand_Success;
6185 }
6186 
6187 OperandMatchResultTy
6188 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6189   using namespace llvm::AMDGPU::MTBUFFormat;
6190 
6191   if (!trySkipId("format", AsmToken::Colon))
6192     return MatchOperand_NoMatch;
6193 
6194   if (trySkipToken(AsmToken::LBrac)) {
6195     StringRef FormatStr;
6196     SMLoc Loc = getLoc();
6197     if (!parseId(FormatStr, "expected a format string"))
6198       return MatchOperand_ParseFail;
6199 
6200     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6201     if (Res == MatchOperand_NoMatch)
6202       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6203     if (Res != MatchOperand_Success)
6204       return Res;
6205 
6206     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6207       return MatchOperand_ParseFail;
6208 
6209     return MatchOperand_Success;
6210   }
6211 
6212   return parseNumericFormat(Format);
6213 }
6214 
6215 OperandMatchResultTy
6216 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6217   using namespace llvm::AMDGPU::MTBUFFormat;
6218 
6219   int64_t Format = getDefaultFormatEncoding(getSTI());
6220   OperandMatchResultTy Res;
6221   SMLoc Loc = getLoc();
6222 
6223   // Parse legacy format syntax.
6224   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6225   if (Res == MatchOperand_ParseFail)
6226     return Res;
6227 
6228   bool FormatFound = (Res == MatchOperand_Success);
6229 
6230   Operands.push_back(
6231     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6232 
6233   if (FormatFound)
6234     trySkipToken(AsmToken::Comma);
6235 
6236   if (isToken(AsmToken::EndOfStatement)) {
6237     // We are expecting an soffset operand,
6238     // but let matcher handle the error.
6239     return MatchOperand_Success;
6240   }
6241 
6242   // Parse soffset.
6243   Res = parseRegOrImm(Operands);
6244   if (Res != MatchOperand_Success)
6245     return Res;
6246 
6247   trySkipToken(AsmToken::Comma);
6248 
6249   if (!FormatFound) {
6250     Res = parseSymbolicOrNumericFormat(Format);
6251     if (Res == MatchOperand_ParseFail)
6252       return Res;
6253     if (Res == MatchOperand_Success) {
6254       auto Size = Operands.size();
6255       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6256       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6257       Op.setImm(Format);
6258     }
6259     return MatchOperand_Success;
6260   }
6261 
6262   if (isId("format") && peekToken().is(AsmToken::Colon)) {
6263     Error(getLoc(), "duplicate format");
6264     return MatchOperand_ParseFail;
6265   }
6266   return MatchOperand_Success;
6267 }
6268 
6269 //===----------------------------------------------------------------------===//
6270 // ds
6271 //===----------------------------------------------------------------------===//
6272 
6273 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
6274                                     const OperandVector &Operands) {
6275   OptionalImmIndexMap OptionalIdx;
6276 
6277   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6278     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6279 
6280     // Add the register arguments
6281     if (Op.isReg()) {
6282       Op.addRegOperands(Inst, 1);
6283       continue;
6284     }
6285 
6286     // Handle optional arguments
6287     OptionalIdx[Op.getImmTy()] = i;
6288   }
6289 
6290   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
6291   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
6292   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6293 
6294   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6295 }
6296 
6297 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
6298                                 bool IsGdsHardcoded) {
6299   OptionalImmIndexMap OptionalIdx;
6300   AMDGPUOperand::ImmTy OffsetType = AMDGPUOperand::ImmTyOffset;
6301 
6302   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6303     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6304 
6305     // Add the register arguments
6306     if (Op.isReg()) {
6307       Op.addRegOperands(Inst, 1);
6308       continue;
6309     }
6310 
6311     if (Op.isToken() && Op.getToken() == "gds") {
6312       IsGdsHardcoded = true;
6313       continue;
6314     }
6315 
6316     // Handle optional arguments
6317     OptionalIdx[Op.getImmTy()] = i;
6318 
6319     if (Op.getImmTy() == AMDGPUOperand::ImmTySwizzle)
6320       OffsetType = AMDGPUOperand::ImmTySwizzle;
6321   }
6322 
6323   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
6324 
6325   if (!IsGdsHardcoded) {
6326     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
6327   }
6328   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
6329 }
6330 
6331 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6332   OptionalImmIndexMap OptionalIdx;
6333 
6334   unsigned OperandIdx[4];
6335   unsigned EnMask = 0;
6336   int SrcIdx = 0;
6337 
6338   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6339     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6340 
6341     // Add the register arguments
6342     if (Op.isReg()) {
6343       assert(SrcIdx < 4);
6344       OperandIdx[SrcIdx] = Inst.size();
6345       Op.addRegOperands(Inst, 1);
6346       ++SrcIdx;
6347       continue;
6348     }
6349 
6350     if (Op.isOff()) {
6351       assert(SrcIdx < 4);
6352       OperandIdx[SrcIdx] = Inst.size();
6353       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6354       ++SrcIdx;
6355       continue;
6356     }
6357 
6358     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6359       Op.addImmOperands(Inst, 1);
6360       continue;
6361     }
6362 
6363     if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
6364       continue;
6365 
6366     // Handle optional arguments
6367     OptionalIdx[Op.getImmTy()] = i;
6368   }
6369 
6370   assert(SrcIdx == 4);
6371 
6372   bool Compr = false;
6373   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6374     Compr = true;
6375     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6376     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6377     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6378   }
6379 
6380   for (auto i = 0; i < SrcIdx; ++i) {
6381     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6382       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6383     }
6384   }
6385 
6386   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6387   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6388 
6389   Inst.addOperand(MCOperand::createImm(EnMask));
6390 }
6391 
6392 //===----------------------------------------------------------------------===//
6393 // s_waitcnt
6394 //===----------------------------------------------------------------------===//
6395 
6396 static bool
6397 encodeCnt(
6398   const AMDGPU::IsaVersion ISA,
6399   int64_t &IntVal,
6400   int64_t CntVal,
6401   bool Saturate,
6402   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6403   unsigned (*decode)(const IsaVersion &Version, unsigned))
6404 {
6405   bool Failed = false;
6406 
6407   IntVal = encode(ISA, IntVal, CntVal);
6408   if (CntVal != decode(ISA, IntVal)) {
6409     if (Saturate) {
6410       IntVal = encode(ISA, IntVal, -1);
6411     } else {
6412       Failed = true;
6413     }
6414   }
6415   return Failed;
6416 }
6417 
6418 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6419 
6420   SMLoc CntLoc = getLoc();
6421   StringRef CntName = getTokenStr();
6422 
6423   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6424       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6425     return false;
6426 
6427   int64_t CntVal;
6428   SMLoc ValLoc = getLoc();
6429   if (!parseExpr(CntVal))
6430     return false;
6431 
6432   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6433 
6434   bool Failed = true;
6435   bool Sat = CntName.endswith("_sat");
6436 
6437   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6438     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6439   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6440     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6441   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6442     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6443   } else {
6444     Error(CntLoc, "invalid counter name " + CntName);
6445     return false;
6446   }
6447 
6448   if (Failed) {
6449     Error(ValLoc, "too large value for " + CntName);
6450     return false;
6451   }
6452 
6453   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6454     return false;
6455 
6456   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6457     if (isToken(AsmToken::EndOfStatement)) {
6458       Error(getLoc(), "expected a counter name");
6459       return false;
6460     }
6461   }
6462 
6463   return true;
6464 }
6465 
6466 OperandMatchResultTy
6467 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6468   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6469   int64_t Waitcnt = getWaitcntBitMask(ISA);
6470   SMLoc S = getLoc();
6471 
6472   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6473     while (!isToken(AsmToken::EndOfStatement)) {
6474       if (!parseCnt(Waitcnt))
6475         return MatchOperand_ParseFail;
6476     }
6477   } else {
6478     if (!parseExpr(Waitcnt))
6479       return MatchOperand_ParseFail;
6480   }
6481 
6482   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6483   return MatchOperand_Success;
6484 }
6485 
6486 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
6487   SMLoc FieldLoc = getLoc();
6488   StringRef FieldName = getTokenStr();
6489   if (!skipToken(AsmToken::Identifier, "expected a field name") ||
6490       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6491     return false;
6492 
6493   SMLoc ValueLoc = getLoc();
6494   StringRef ValueName = getTokenStr();
6495   if (!skipToken(AsmToken::Identifier, "expected a value name") ||
6496       !skipToken(AsmToken::RParen, "expected a right parenthesis"))
6497     return false;
6498 
6499   unsigned Shift;
6500   if (FieldName == "instid0") {
6501     Shift = 0;
6502   } else if (FieldName == "instskip") {
6503     Shift = 4;
6504   } else if (FieldName == "instid1") {
6505     Shift = 7;
6506   } else {
6507     Error(FieldLoc, "invalid field name " + FieldName);
6508     return false;
6509   }
6510 
6511   int Value;
6512   if (Shift == 4) {
6513     // Parse values for instskip.
6514     Value = StringSwitch<int>(ValueName)
6515                 .Case("SAME", 0)
6516                 .Case("NEXT", 1)
6517                 .Case("SKIP_1", 2)
6518                 .Case("SKIP_2", 3)
6519                 .Case("SKIP_3", 4)
6520                 .Case("SKIP_4", 5)
6521                 .Default(-1);
6522   } else {
6523     // Parse values for instid0 and instid1.
6524     Value = StringSwitch<int>(ValueName)
6525                 .Case("NO_DEP", 0)
6526                 .Case("VALU_DEP_1", 1)
6527                 .Case("VALU_DEP_2", 2)
6528                 .Case("VALU_DEP_3", 3)
6529                 .Case("VALU_DEP_4", 4)
6530                 .Case("TRANS32_DEP_1", 5)
6531                 .Case("TRANS32_DEP_2", 6)
6532                 .Case("TRANS32_DEP_3", 7)
6533                 .Case("FMA_ACCUM_CYCLE_1", 8)
6534                 .Case("SALU_CYCLE_1", 9)
6535                 .Case("SALU_CYCLE_2", 10)
6536                 .Case("SALU_CYCLE_3", 11)
6537                 .Default(-1);
6538   }
6539   if (Value < 0) {
6540     Error(ValueLoc, "invalid value name " + ValueName);
6541     return false;
6542   }
6543 
6544   Delay |= Value << Shift;
6545   return true;
6546 }
6547 
6548 OperandMatchResultTy
6549 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) {
6550   int64_t Delay = 0;
6551   SMLoc S = getLoc();
6552 
6553   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6554     do {
6555       if (!parseDelay(Delay))
6556         return MatchOperand_ParseFail;
6557     } while (trySkipToken(AsmToken::Pipe));
6558   } else {
6559     if (!parseExpr(Delay))
6560       return MatchOperand_ParseFail;
6561   }
6562 
6563   Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
6564   return MatchOperand_Success;
6565 }
6566 
6567 bool
6568 AMDGPUOperand::isSWaitCnt() const {
6569   return isImm();
6570 }
6571 
6572 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); }
6573 
6574 //===----------------------------------------------------------------------===//
6575 // DepCtr
6576 //===----------------------------------------------------------------------===//
6577 
6578 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
6579                                   StringRef DepCtrName) {
6580   switch (ErrorId) {
6581   case OPR_ID_UNKNOWN:
6582     Error(Loc, Twine("invalid counter name ", DepCtrName));
6583     return;
6584   case OPR_ID_UNSUPPORTED:
6585     Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
6586     return;
6587   case OPR_ID_DUPLICATE:
6588     Error(Loc, Twine("duplicate counter name ", DepCtrName));
6589     return;
6590   case OPR_VAL_INVALID:
6591     Error(Loc, Twine("invalid value for ", DepCtrName));
6592     return;
6593   default:
6594     assert(false);
6595   }
6596 }
6597 
6598 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
6599 
6600   using namespace llvm::AMDGPU::DepCtr;
6601 
6602   SMLoc DepCtrLoc = getLoc();
6603   StringRef DepCtrName = getTokenStr();
6604 
6605   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6606       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6607     return false;
6608 
6609   int64_t ExprVal;
6610   if (!parseExpr(ExprVal))
6611     return false;
6612 
6613   unsigned PrevOprMask = UsedOprMask;
6614   int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
6615 
6616   if (CntVal < 0) {
6617     depCtrError(DepCtrLoc, CntVal, DepCtrName);
6618     return false;
6619   }
6620 
6621   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6622     return false;
6623 
6624   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6625     if (isToken(AsmToken::EndOfStatement)) {
6626       Error(getLoc(), "expected a counter name");
6627       return false;
6628     }
6629   }
6630 
6631   unsigned CntValMask = PrevOprMask ^ UsedOprMask;
6632   DepCtr = (DepCtr & ~CntValMask) | CntVal;
6633   return true;
6634 }
6635 
6636 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) {
6637   using namespace llvm::AMDGPU::DepCtr;
6638 
6639   int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
6640   SMLoc Loc = getLoc();
6641 
6642   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6643     unsigned UsedOprMask = 0;
6644     while (!isToken(AsmToken::EndOfStatement)) {
6645       if (!parseDepCtr(DepCtr, UsedOprMask))
6646         return MatchOperand_ParseFail;
6647     }
6648   } else {
6649     if (!parseExpr(DepCtr))
6650       return MatchOperand_ParseFail;
6651   }
6652 
6653   Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
6654   return MatchOperand_Success;
6655 }
6656 
6657 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
6658 
6659 //===----------------------------------------------------------------------===//
6660 // hwreg
6661 //===----------------------------------------------------------------------===//
6662 
6663 bool
6664 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6665                                 OperandInfoTy &Offset,
6666                                 OperandInfoTy &Width) {
6667   using namespace llvm::AMDGPU::Hwreg;
6668 
6669   // The register may be specified by name or using a numeric code
6670   HwReg.Loc = getLoc();
6671   if (isToken(AsmToken::Identifier) &&
6672       (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6673     HwReg.IsSymbolic = true;
6674     lex(); // skip register name
6675   } else if (!parseExpr(HwReg.Id, "a register name")) {
6676     return false;
6677   }
6678 
6679   if (trySkipToken(AsmToken::RParen))
6680     return true;
6681 
6682   // parse optional params
6683   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6684     return false;
6685 
6686   Offset.Loc = getLoc();
6687   if (!parseExpr(Offset.Id))
6688     return false;
6689 
6690   if (!skipToken(AsmToken::Comma, "expected a comma"))
6691     return false;
6692 
6693   Width.Loc = getLoc();
6694   return parseExpr(Width.Id) &&
6695          skipToken(AsmToken::RParen, "expected a closing parenthesis");
6696 }
6697 
6698 bool
6699 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6700                                const OperandInfoTy &Offset,
6701                                const OperandInfoTy &Width) {
6702 
6703   using namespace llvm::AMDGPU::Hwreg;
6704 
6705   if (HwReg.IsSymbolic) {
6706     if (HwReg.Id == OPR_ID_UNSUPPORTED) {
6707       Error(HwReg.Loc,
6708             "specified hardware register is not supported on this GPU");
6709       return false;
6710     }
6711   } else {
6712     if (!isValidHwreg(HwReg.Id)) {
6713       Error(HwReg.Loc,
6714             "invalid code of hardware register: only 6-bit values are legal");
6715       return false;
6716     }
6717   }
6718   if (!isValidHwregOffset(Offset.Id)) {
6719     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6720     return false;
6721   }
6722   if (!isValidHwregWidth(Width.Id)) {
6723     Error(Width.Loc,
6724           "invalid bitfield width: only values from 1 to 32 are legal");
6725     return false;
6726   }
6727   return true;
6728 }
6729 
6730 OperandMatchResultTy
6731 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6732   using namespace llvm::AMDGPU::Hwreg;
6733 
6734   int64_t ImmVal = 0;
6735   SMLoc Loc = getLoc();
6736 
6737   if (trySkipId("hwreg", AsmToken::LParen)) {
6738     OperandInfoTy HwReg(OPR_ID_UNKNOWN);
6739     OperandInfoTy Offset(OFFSET_DEFAULT_);
6740     OperandInfoTy Width(WIDTH_DEFAULT_);
6741     if (parseHwregBody(HwReg, Offset, Width) &&
6742         validateHwreg(HwReg, Offset, Width)) {
6743       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6744     } else {
6745       return MatchOperand_ParseFail;
6746     }
6747   } else if (parseExpr(ImmVal, "a hwreg macro")) {
6748     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6749       Error(Loc, "invalid immediate: only 16-bit values are legal");
6750       return MatchOperand_ParseFail;
6751     }
6752   } else {
6753     return MatchOperand_ParseFail;
6754   }
6755 
6756   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6757   return MatchOperand_Success;
6758 }
6759 
6760 bool AMDGPUOperand::isHwreg() const {
6761   return isImmTy(ImmTyHwreg);
6762 }
6763 
6764 //===----------------------------------------------------------------------===//
6765 // sendmsg
6766 //===----------------------------------------------------------------------===//
6767 
6768 bool
6769 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6770                                   OperandInfoTy &Op,
6771                                   OperandInfoTy &Stream) {
6772   using namespace llvm::AMDGPU::SendMsg;
6773 
6774   Msg.Loc = getLoc();
6775   if (isToken(AsmToken::Identifier) &&
6776       (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
6777     Msg.IsSymbolic = true;
6778     lex(); // skip message name
6779   } else if (!parseExpr(Msg.Id, "a message name")) {
6780     return false;
6781   }
6782 
6783   if (trySkipToken(AsmToken::Comma)) {
6784     Op.IsDefined = true;
6785     Op.Loc = getLoc();
6786     if (isToken(AsmToken::Identifier) &&
6787         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6788       lex(); // skip operation name
6789     } else if (!parseExpr(Op.Id, "an operation name")) {
6790       return false;
6791     }
6792 
6793     if (trySkipToken(AsmToken::Comma)) {
6794       Stream.IsDefined = true;
6795       Stream.Loc = getLoc();
6796       if (!parseExpr(Stream.Id))
6797         return false;
6798     }
6799   }
6800 
6801   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6802 }
6803 
6804 bool
6805 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6806                                  const OperandInfoTy &Op,
6807                                  const OperandInfoTy &Stream) {
6808   using namespace llvm::AMDGPU::SendMsg;
6809 
6810   // Validation strictness depends on whether message is specified
6811   // in a symbolic or in a numeric form. In the latter case
6812   // only encoding possibility is checked.
6813   bool Strict = Msg.IsSymbolic;
6814 
6815   if (Strict) {
6816     if (Msg.Id == OPR_ID_UNSUPPORTED) {
6817       Error(Msg.Loc, "specified message id is not supported on this GPU");
6818       return false;
6819     }
6820   } else {
6821     if (!isValidMsgId(Msg.Id, getSTI())) {
6822       Error(Msg.Loc, "invalid message id");
6823       return false;
6824     }
6825   }
6826   if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) {
6827     if (Op.IsDefined) {
6828       Error(Op.Loc, "message does not support operations");
6829     } else {
6830       Error(Msg.Loc, "missing message operation");
6831     }
6832     return false;
6833   }
6834   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6835     Error(Op.Loc, "invalid operation id");
6836     return false;
6837   }
6838   if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) &&
6839       Stream.IsDefined) {
6840     Error(Stream.Loc, "message operation does not support streams");
6841     return false;
6842   }
6843   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6844     Error(Stream.Loc, "invalid message stream id");
6845     return false;
6846   }
6847   return true;
6848 }
6849 
6850 OperandMatchResultTy
6851 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6852   using namespace llvm::AMDGPU::SendMsg;
6853 
6854   int64_t ImmVal = 0;
6855   SMLoc Loc = getLoc();
6856 
6857   if (trySkipId("sendmsg", AsmToken::LParen)) {
6858     OperandInfoTy Msg(OPR_ID_UNKNOWN);
6859     OperandInfoTy Op(OP_NONE_);
6860     OperandInfoTy Stream(STREAM_ID_NONE_);
6861     if (parseSendMsgBody(Msg, Op, Stream) &&
6862         validateSendMsg(Msg, Op, Stream)) {
6863       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6864     } else {
6865       return MatchOperand_ParseFail;
6866     }
6867   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6868     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6869       Error(Loc, "invalid immediate: only 16-bit values are legal");
6870       return MatchOperand_ParseFail;
6871     }
6872   } else {
6873     return MatchOperand_ParseFail;
6874   }
6875 
6876   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6877   return MatchOperand_Success;
6878 }
6879 
6880 bool AMDGPUOperand::isSendMsg() const {
6881   return isImmTy(ImmTySendMsg);
6882 }
6883 
6884 //===----------------------------------------------------------------------===//
6885 // v_interp
6886 //===----------------------------------------------------------------------===//
6887 
6888 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6889   StringRef Str;
6890   SMLoc S = getLoc();
6891 
6892   if (!parseId(Str))
6893     return MatchOperand_NoMatch;
6894 
6895   int Slot = StringSwitch<int>(Str)
6896     .Case("p10", 0)
6897     .Case("p20", 1)
6898     .Case("p0", 2)
6899     .Default(-1);
6900 
6901   if (Slot == -1) {
6902     Error(S, "invalid interpolation slot");
6903     return MatchOperand_ParseFail;
6904   }
6905 
6906   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6907                                               AMDGPUOperand::ImmTyInterpSlot));
6908   return MatchOperand_Success;
6909 }
6910 
6911 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6912   StringRef Str;
6913   SMLoc S = getLoc();
6914 
6915   if (!parseId(Str))
6916     return MatchOperand_NoMatch;
6917 
6918   if (!Str.startswith("attr")) {
6919     Error(S, "invalid interpolation attribute");
6920     return MatchOperand_ParseFail;
6921   }
6922 
6923   StringRef Chan = Str.take_back(2);
6924   int AttrChan = StringSwitch<int>(Chan)
6925     .Case(".x", 0)
6926     .Case(".y", 1)
6927     .Case(".z", 2)
6928     .Case(".w", 3)
6929     .Default(-1);
6930   if (AttrChan == -1) {
6931     Error(S, "invalid or missing interpolation attribute channel");
6932     return MatchOperand_ParseFail;
6933   }
6934 
6935   Str = Str.drop_back(2).drop_front(4);
6936 
6937   uint8_t Attr;
6938   if (Str.getAsInteger(10, Attr)) {
6939     Error(S, "invalid or missing interpolation attribute number");
6940     return MatchOperand_ParseFail;
6941   }
6942 
6943   if (Attr > 63) {
6944     Error(S, "out of bounds interpolation attribute number");
6945     return MatchOperand_ParseFail;
6946   }
6947 
6948   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6949 
6950   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6951                                               AMDGPUOperand::ImmTyInterpAttr));
6952   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6953                                               AMDGPUOperand::ImmTyAttrChan));
6954   return MatchOperand_Success;
6955 }
6956 
6957 //===----------------------------------------------------------------------===//
6958 // exp
6959 //===----------------------------------------------------------------------===//
6960 
6961 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6962   using namespace llvm::AMDGPU::Exp;
6963 
6964   StringRef Str;
6965   SMLoc S = getLoc();
6966 
6967   if (!parseId(Str))
6968     return MatchOperand_NoMatch;
6969 
6970   unsigned Id = getTgtId(Str);
6971   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6972     Error(S, (Id == ET_INVALID) ?
6973                 "invalid exp target" :
6974                 "exp target is not supported on this GPU");
6975     return MatchOperand_ParseFail;
6976   }
6977 
6978   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6979                                               AMDGPUOperand::ImmTyExpTgt));
6980   return MatchOperand_Success;
6981 }
6982 
6983 //===----------------------------------------------------------------------===//
6984 // parser helpers
6985 //===----------------------------------------------------------------------===//
6986 
6987 bool
6988 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6989   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6990 }
6991 
6992 bool
6993 AMDGPUAsmParser::isId(const StringRef Id) const {
6994   return isId(getToken(), Id);
6995 }
6996 
6997 bool
6998 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6999   return getTokenKind() == Kind;
7000 }
7001 
7002 bool
7003 AMDGPUAsmParser::trySkipId(const StringRef Id) {
7004   if (isId(Id)) {
7005     lex();
7006     return true;
7007   }
7008   return false;
7009 }
7010 
7011 bool
7012 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7013   if (isToken(AsmToken::Identifier)) {
7014     StringRef Tok = getTokenStr();
7015     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
7016       lex();
7017       return true;
7018     }
7019   }
7020   return false;
7021 }
7022 
7023 bool
7024 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7025   if (isId(Id) && peekToken().is(Kind)) {
7026     lex();
7027     lex();
7028     return true;
7029   }
7030   return false;
7031 }
7032 
7033 bool
7034 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7035   if (isToken(Kind)) {
7036     lex();
7037     return true;
7038   }
7039   return false;
7040 }
7041 
7042 bool
7043 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7044                            const StringRef ErrMsg) {
7045   if (!trySkipToken(Kind)) {
7046     Error(getLoc(), ErrMsg);
7047     return false;
7048   }
7049   return true;
7050 }
7051 
7052 bool
7053 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7054   SMLoc S = getLoc();
7055 
7056   const MCExpr *Expr;
7057   if (Parser.parseExpression(Expr))
7058     return false;
7059 
7060   if (Expr->evaluateAsAbsolute(Imm))
7061     return true;
7062 
7063   if (Expected.empty()) {
7064     Error(S, "expected absolute expression");
7065   } else {
7066     Error(S, Twine("expected ", Expected) +
7067              Twine(" or an absolute expression"));
7068   }
7069   return false;
7070 }
7071 
7072 bool
7073 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7074   SMLoc S = getLoc();
7075 
7076   const MCExpr *Expr;
7077   if (Parser.parseExpression(Expr))
7078     return false;
7079 
7080   int64_t IntVal;
7081   if (Expr->evaluateAsAbsolute(IntVal)) {
7082     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7083   } else {
7084     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7085   }
7086   return true;
7087 }
7088 
7089 bool
7090 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7091   if (isToken(AsmToken::String)) {
7092     Val = getToken().getStringContents();
7093     lex();
7094     return true;
7095   } else {
7096     Error(getLoc(), ErrMsg);
7097     return false;
7098   }
7099 }
7100 
7101 bool
7102 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7103   if (isToken(AsmToken::Identifier)) {
7104     Val = getTokenStr();
7105     lex();
7106     return true;
7107   } else {
7108     if (!ErrMsg.empty())
7109       Error(getLoc(), ErrMsg);
7110     return false;
7111   }
7112 }
7113 
7114 AsmToken
7115 AMDGPUAsmParser::getToken() const {
7116   return Parser.getTok();
7117 }
7118 
7119 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7120   return isToken(AsmToken::EndOfStatement)
7121              ? getToken()
7122              : getLexer().peekTok(ShouldSkipSpace);
7123 }
7124 
7125 void
7126 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7127   auto TokCount = getLexer().peekTokens(Tokens);
7128 
7129   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7130     Tokens[Idx] = AsmToken(AsmToken::Error, "");
7131 }
7132 
7133 AsmToken::TokenKind
7134 AMDGPUAsmParser::getTokenKind() const {
7135   return getLexer().getKind();
7136 }
7137 
7138 SMLoc
7139 AMDGPUAsmParser::getLoc() const {
7140   return getToken().getLoc();
7141 }
7142 
7143 StringRef
7144 AMDGPUAsmParser::getTokenStr() const {
7145   return getToken().getString();
7146 }
7147 
7148 void
7149 AMDGPUAsmParser::lex() {
7150   Parser.Lex();
7151 }
7152 
7153 SMLoc
7154 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7155                                const OperandVector &Operands) const {
7156   for (unsigned i = Operands.size() - 1; i > 0; --i) {
7157     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7158     if (Test(Op))
7159       return Op.getStartLoc();
7160   }
7161   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7162 }
7163 
7164 SMLoc
7165 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7166                            const OperandVector &Operands) const {
7167   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7168   return getOperandLoc(Test, Operands);
7169 }
7170 
7171 SMLoc
7172 AMDGPUAsmParser::getRegLoc(unsigned Reg,
7173                            const OperandVector &Operands) const {
7174   auto Test = [=](const AMDGPUOperand& Op) {
7175     return Op.isRegKind() && Op.getReg() == Reg;
7176   };
7177   return getOperandLoc(Test, Operands);
7178 }
7179 
7180 SMLoc
7181 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
7182   auto Test = [](const AMDGPUOperand& Op) {
7183     return Op.IsImmKindLiteral() || Op.isExpr();
7184   };
7185   return getOperandLoc(Test, Operands);
7186 }
7187 
7188 SMLoc
7189 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7190   auto Test = [](const AMDGPUOperand& Op) {
7191     return Op.isImmKindConst();
7192   };
7193   return getOperandLoc(Test, Operands);
7194 }
7195 
7196 //===----------------------------------------------------------------------===//
7197 // swizzle
7198 //===----------------------------------------------------------------------===//
7199 
7200 LLVM_READNONE
7201 static unsigned
7202 encodeBitmaskPerm(const unsigned AndMask,
7203                   const unsigned OrMask,
7204                   const unsigned XorMask) {
7205   using namespace llvm::AMDGPU::Swizzle;
7206 
7207   return BITMASK_PERM_ENC |
7208          (AndMask << BITMASK_AND_SHIFT) |
7209          (OrMask  << BITMASK_OR_SHIFT)  |
7210          (XorMask << BITMASK_XOR_SHIFT);
7211 }
7212 
7213 bool
7214 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7215                                      const unsigned MinVal,
7216                                      const unsigned MaxVal,
7217                                      const StringRef ErrMsg,
7218                                      SMLoc &Loc) {
7219   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7220     return false;
7221   }
7222   Loc = getLoc();
7223   if (!parseExpr(Op)) {
7224     return false;
7225   }
7226   if (Op < MinVal || Op > MaxVal) {
7227     Error(Loc, ErrMsg);
7228     return false;
7229   }
7230 
7231   return true;
7232 }
7233 
7234 bool
7235 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7236                                       const unsigned MinVal,
7237                                       const unsigned MaxVal,
7238                                       const StringRef ErrMsg) {
7239   SMLoc Loc;
7240   for (unsigned i = 0; i < OpNum; ++i) {
7241     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7242       return false;
7243   }
7244 
7245   return true;
7246 }
7247 
7248 bool
7249 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7250   using namespace llvm::AMDGPU::Swizzle;
7251 
7252   int64_t Lane[LANE_NUM];
7253   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7254                            "expected a 2-bit lane id")) {
7255     Imm = QUAD_PERM_ENC;
7256     for (unsigned I = 0; I < LANE_NUM; ++I) {
7257       Imm |= Lane[I] << (LANE_SHIFT * I);
7258     }
7259     return true;
7260   }
7261   return false;
7262 }
7263 
7264 bool
7265 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7266   using namespace llvm::AMDGPU::Swizzle;
7267 
7268   SMLoc Loc;
7269   int64_t GroupSize;
7270   int64_t LaneIdx;
7271 
7272   if (!parseSwizzleOperand(GroupSize,
7273                            2, 32,
7274                            "group size must be in the interval [2,32]",
7275                            Loc)) {
7276     return false;
7277   }
7278   if (!isPowerOf2_64(GroupSize)) {
7279     Error(Loc, "group size must be a power of two");
7280     return false;
7281   }
7282   if (parseSwizzleOperand(LaneIdx,
7283                           0, GroupSize - 1,
7284                           "lane id must be in the interval [0,group size - 1]",
7285                           Loc)) {
7286     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7287     return true;
7288   }
7289   return false;
7290 }
7291 
7292 bool
7293 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7294   using namespace llvm::AMDGPU::Swizzle;
7295 
7296   SMLoc Loc;
7297   int64_t GroupSize;
7298 
7299   if (!parseSwizzleOperand(GroupSize,
7300                            2, 32,
7301                            "group size must be in the interval [2,32]",
7302                            Loc)) {
7303     return false;
7304   }
7305   if (!isPowerOf2_64(GroupSize)) {
7306     Error(Loc, "group size must be a power of two");
7307     return false;
7308   }
7309 
7310   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
7311   return true;
7312 }
7313 
7314 bool
7315 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
7316   using namespace llvm::AMDGPU::Swizzle;
7317 
7318   SMLoc Loc;
7319   int64_t GroupSize;
7320 
7321   if (!parseSwizzleOperand(GroupSize,
7322                            1, 16,
7323                            "group size must be in the interval [1,16]",
7324                            Loc)) {
7325     return false;
7326   }
7327   if (!isPowerOf2_64(GroupSize)) {
7328     Error(Loc, "group size must be a power of two");
7329     return false;
7330   }
7331 
7332   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
7333   return true;
7334 }
7335 
7336 bool
7337 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
7338   using namespace llvm::AMDGPU::Swizzle;
7339 
7340   if (!skipToken(AsmToken::Comma, "expected a comma")) {
7341     return false;
7342   }
7343 
7344   StringRef Ctl;
7345   SMLoc StrLoc = getLoc();
7346   if (!parseString(Ctl)) {
7347     return false;
7348   }
7349   if (Ctl.size() != BITMASK_WIDTH) {
7350     Error(StrLoc, "expected a 5-character mask");
7351     return false;
7352   }
7353 
7354   unsigned AndMask = 0;
7355   unsigned OrMask = 0;
7356   unsigned XorMask = 0;
7357 
7358   for (size_t i = 0; i < Ctl.size(); ++i) {
7359     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
7360     switch(Ctl[i]) {
7361     default:
7362       Error(StrLoc, "invalid mask");
7363       return false;
7364     case '0':
7365       break;
7366     case '1':
7367       OrMask |= Mask;
7368       break;
7369     case 'p':
7370       AndMask |= Mask;
7371       break;
7372     case 'i':
7373       AndMask |= Mask;
7374       XorMask |= Mask;
7375       break;
7376     }
7377   }
7378 
7379   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
7380   return true;
7381 }
7382 
7383 bool
7384 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
7385 
7386   SMLoc OffsetLoc = getLoc();
7387 
7388   if (!parseExpr(Imm, "a swizzle macro")) {
7389     return false;
7390   }
7391   if (!isUInt<16>(Imm)) {
7392     Error(OffsetLoc, "expected a 16-bit offset");
7393     return false;
7394   }
7395   return true;
7396 }
7397 
7398 bool
7399 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
7400   using namespace llvm::AMDGPU::Swizzle;
7401 
7402   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
7403 
7404     SMLoc ModeLoc = getLoc();
7405     bool Ok = false;
7406 
7407     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
7408       Ok = parseSwizzleQuadPerm(Imm);
7409     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
7410       Ok = parseSwizzleBitmaskPerm(Imm);
7411     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
7412       Ok = parseSwizzleBroadcast(Imm);
7413     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
7414       Ok = parseSwizzleSwap(Imm);
7415     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
7416       Ok = parseSwizzleReverse(Imm);
7417     } else {
7418       Error(ModeLoc, "expected a swizzle mode");
7419     }
7420 
7421     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7422   }
7423 
7424   return false;
7425 }
7426 
7427 OperandMatchResultTy
7428 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
7429   SMLoc S = getLoc();
7430   int64_t Imm = 0;
7431 
7432   if (trySkipId("offset")) {
7433 
7434     bool Ok = false;
7435     if (skipToken(AsmToken::Colon, "expected a colon")) {
7436       if (trySkipId("swizzle")) {
7437         Ok = parseSwizzleMacro(Imm);
7438       } else {
7439         Ok = parseSwizzleOffset(Imm);
7440       }
7441     }
7442 
7443     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7444 
7445     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
7446   } else {
7447     // Swizzle "offset" operand is optional.
7448     // If it is omitted, try parsing other optional operands.
7449     return parseOptionalOpr(Operands);
7450   }
7451 }
7452 
7453 bool
7454 AMDGPUOperand::isSwizzle() const {
7455   return isImmTy(ImmTySwizzle);
7456 }
7457 
7458 //===----------------------------------------------------------------------===//
7459 // VGPR Index Mode
7460 //===----------------------------------------------------------------------===//
7461 
7462 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7463 
7464   using namespace llvm::AMDGPU::VGPRIndexMode;
7465 
7466   if (trySkipToken(AsmToken::RParen)) {
7467     return OFF;
7468   }
7469 
7470   int64_t Imm = 0;
7471 
7472   while (true) {
7473     unsigned Mode = 0;
7474     SMLoc S = getLoc();
7475 
7476     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7477       if (trySkipId(IdSymbolic[ModeId])) {
7478         Mode = 1 << ModeId;
7479         break;
7480       }
7481     }
7482 
7483     if (Mode == 0) {
7484       Error(S, (Imm == 0)?
7485                "expected a VGPR index mode or a closing parenthesis" :
7486                "expected a VGPR index mode");
7487       return UNDEF;
7488     }
7489 
7490     if (Imm & Mode) {
7491       Error(S, "duplicate VGPR index mode");
7492       return UNDEF;
7493     }
7494     Imm |= Mode;
7495 
7496     if (trySkipToken(AsmToken::RParen))
7497       break;
7498     if (!skipToken(AsmToken::Comma,
7499                    "expected a comma or a closing parenthesis"))
7500       return UNDEF;
7501   }
7502 
7503   return Imm;
7504 }
7505 
7506 OperandMatchResultTy
7507 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7508 
7509   using namespace llvm::AMDGPU::VGPRIndexMode;
7510 
7511   int64_t Imm = 0;
7512   SMLoc S = getLoc();
7513 
7514   if (trySkipId("gpr_idx", AsmToken::LParen)) {
7515     Imm = parseGPRIdxMacro();
7516     if (Imm == UNDEF)
7517       return MatchOperand_ParseFail;
7518   } else {
7519     if (getParser().parseAbsoluteExpression(Imm))
7520       return MatchOperand_ParseFail;
7521     if (Imm < 0 || !isUInt<4>(Imm)) {
7522       Error(S, "invalid immediate: only 4-bit values are legal");
7523       return MatchOperand_ParseFail;
7524     }
7525   }
7526 
7527   Operands.push_back(
7528       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7529   return MatchOperand_Success;
7530 }
7531 
7532 bool AMDGPUOperand::isGPRIdxMode() const {
7533   return isImmTy(ImmTyGprIdxMode);
7534 }
7535 
7536 //===----------------------------------------------------------------------===//
7537 // sopp branch targets
7538 //===----------------------------------------------------------------------===//
7539 
7540 OperandMatchResultTy
7541 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
7542 
7543   // Make sure we are not parsing something
7544   // that looks like a label or an expression but is not.
7545   // This will improve error messages.
7546   if (isRegister() || isModifier())
7547     return MatchOperand_NoMatch;
7548 
7549   if (!parseExpr(Operands))
7550     return MatchOperand_ParseFail;
7551 
7552   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7553   assert(Opr.isImm() || Opr.isExpr());
7554   SMLoc Loc = Opr.getStartLoc();
7555 
7556   // Currently we do not support arbitrary expressions as branch targets.
7557   // Only labels and absolute expressions are accepted.
7558   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7559     Error(Loc, "expected an absolute expression or a label");
7560   } else if (Opr.isImm() && !Opr.isS16Imm()) {
7561     Error(Loc, "expected a 16-bit signed jump offset");
7562   }
7563 
7564   return MatchOperand_Success;
7565 }
7566 
7567 //===----------------------------------------------------------------------===//
7568 // Boolean holding registers
7569 //===----------------------------------------------------------------------===//
7570 
7571 OperandMatchResultTy
7572 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7573   return parseReg(Operands);
7574 }
7575 
7576 //===----------------------------------------------------------------------===//
7577 // mubuf
7578 //===----------------------------------------------------------------------===//
7579 
7580 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7581   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7582 }
7583 
7584 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7585                                    const OperandVector &Operands,
7586                                    bool IsAtomic,
7587                                    bool IsLds) {
7588   OptionalImmIndexMap OptionalIdx;
7589   unsigned FirstOperandIdx = 1;
7590   bool IsAtomicReturn = false;
7591 
7592   if (IsAtomic) {
7593     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7594       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7595       if (!Op.isCPol())
7596         continue;
7597       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7598       break;
7599     }
7600 
7601     if (!IsAtomicReturn) {
7602       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7603       if (NewOpc != -1)
7604         Inst.setOpcode(NewOpc);
7605     }
7606 
7607     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7608                       SIInstrFlags::IsAtomicRet;
7609   }
7610 
7611   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7612     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7613 
7614     // Add the register arguments
7615     if (Op.isReg()) {
7616       Op.addRegOperands(Inst, 1);
7617       // Insert a tied src for atomic return dst.
7618       // This cannot be postponed as subsequent calls to
7619       // addImmOperands rely on correct number of MC operands.
7620       if (IsAtomicReturn && i == FirstOperandIdx)
7621         Op.addRegOperands(Inst, 1);
7622       continue;
7623     }
7624 
7625     // Handle the case where soffset is an immediate
7626     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7627       Op.addImmOperands(Inst, 1);
7628       continue;
7629     }
7630 
7631     // Handle tokens like 'offen' which are sometimes hard-coded into the
7632     // asm string.  There are no MCInst operands for these.
7633     if (Op.isToken()) {
7634       continue;
7635     }
7636     assert(Op.isImm());
7637 
7638     // Handle optional arguments
7639     OptionalIdx[Op.getImmTy()] = i;
7640   }
7641 
7642   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7643   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7644 
7645   if (!IsLds) { // tfe is not legal with lds opcodes
7646     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7647   }
7648   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7649 }
7650 
7651 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7652   OptionalImmIndexMap OptionalIdx;
7653 
7654   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7655     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7656 
7657     // Add the register arguments
7658     if (Op.isReg()) {
7659       Op.addRegOperands(Inst, 1);
7660       continue;
7661     }
7662 
7663     // Handle the case where soffset is an immediate
7664     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7665       Op.addImmOperands(Inst, 1);
7666       continue;
7667     }
7668 
7669     // Handle tokens like 'offen' which are sometimes hard-coded into the
7670     // asm string.  There are no MCInst operands for these.
7671     if (Op.isToken()) {
7672       continue;
7673     }
7674     assert(Op.isImm());
7675 
7676     // Handle optional arguments
7677     OptionalIdx[Op.getImmTy()] = i;
7678   }
7679 
7680   addOptionalImmOperand(Inst, Operands, OptionalIdx,
7681                         AMDGPUOperand::ImmTyOffset);
7682   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7683   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7684   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7685   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7686 }
7687 
7688 //===----------------------------------------------------------------------===//
7689 // mimg
7690 //===----------------------------------------------------------------------===//
7691 
7692 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7693                               bool IsAtomic) {
7694   unsigned I = 1;
7695   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7696   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7697     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7698   }
7699 
7700   if (IsAtomic) {
7701     // Add src, same as dst
7702     assert(Desc.getNumDefs() == 1);
7703     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7704   }
7705 
7706   OptionalImmIndexMap OptionalIdx;
7707 
7708   for (unsigned E = Operands.size(); I != E; ++I) {
7709     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7710 
7711     // Add the register arguments
7712     if (Op.isReg()) {
7713       Op.addRegOperands(Inst, 1);
7714     } else if (Op.isImmModifier()) {
7715       OptionalIdx[Op.getImmTy()] = I;
7716     } else if (!Op.isToken()) {
7717       llvm_unreachable("unexpected operand type");
7718     }
7719   }
7720 
7721   bool IsGFX10Plus = isGFX10Plus();
7722 
7723   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7724   if (IsGFX10Plus)
7725     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7726   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7727   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7728   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7729   if (IsGFX10Plus)
7730     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7731   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7732     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7733   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7734   if (!IsGFX10Plus)
7735     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7736   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7737 }
7738 
7739 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7740   cvtMIMG(Inst, Operands, true);
7741 }
7742 
7743 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7744   OptionalImmIndexMap OptionalIdx;
7745   bool IsAtomicReturn = false;
7746 
7747   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7748     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7749     if (!Op.isCPol())
7750       continue;
7751     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7752     break;
7753   }
7754 
7755   if (!IsAtomicReturn) {
7756     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7757     if (NewOpc != -1)
7758       Inst.setOpcode(NewOpc);
7759   }
7760 
7761   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
7762                     SIInstrFlags::IsAtomicRet;
7763 
7764   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7765     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7766 
7767     // Add the register arguments
7768     if (Op.isReg()) {
7769       Op.addRegOperands(Inst, 1);
7770       if (IsAtomicReturn && i == 1)
7771         Op.addRegOperands(Inst, 1);
7772       continue;
7773     }
7774 
7775     // Handle the case where soffset is an immediate
7776     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7777       Op.addImmOperands(Inst, 1);
7778       continue;
7779     }
7780 
7781     // Handle tokens like 'offen' which are sometimes hard-coded into the
7782     // asm string.  There are no MCInst operands for these.
7783     if (Op.isToken()) {
7784       continue;
7785     }
7786     assert(Op.isImm());
7787 
7788     // Handle optional arguments
7789     OptionalIdx[Op.getImmTy()] = i;
7790   }
7791 
7792   if ((int)Inst.getNumOperands() <=
7793       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7794     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7795   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7796 }
7797 
7798 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7799                                       const OperandVector &Operands) {
7800   for (unsigned I = 1; I < Operands.size(); ++I) {
7801     auto &Operand = (AMDGPUOperand &)*Operands[I];
7802     if (Operand.isReg())
7803       Operand.addRegOperands(Inst, 1);
7804   }
7805 
7806   Inst.addOperand(MCOperand::createImm(1)); // a16
7807 }
7808 
7809 //===----------------------------------------------------------------------===//
7810 // smrd
7811 //===----------------------------------------------------------------------===//
7812 
7813 bool AMDGPUOperand::isSMRDOffset8() const {
7814   return isImm() && isUInt<8>(getImm());
7815 }
7816 
7817 bool AMDGPUOperand::isSMEMOffset() const {
7818   return isImmTy(ImmTyNone) ||
7819          isImmTy(ImmTyOffset); // Offset range is checked later by validator.
7820 }
7821 
7822 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7823   // 32-bit literals are only supported on CI and we only want to use them
7824   // when the offset is > 8-bits.
7825   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7826 }
7827 
7828 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7829   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7830 }
7831 
7832 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7833   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7834 }
7835 
7836 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7837   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7838 }
7839 
7840 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7841   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7842 }
7843 
7844 //===----------------------------------------------------------------------===//
7845 // vop3
7846 //===----------------------------------------------------------------------===//
7847 
7848 static bool ConvertOmodMul(int64_t &Mul) {
7849   if (Mul != 1 && Mul != 2 && Mul != 4)
7850     return false;
7851 
7852   Mul >>= 1;
7853   return true;
7854 }
7855 
7856 static bool ConvertOmodDiv(int64_t &Div) {
7857   if (Div == 1) {
7858     Div = 0;
7859     return true;
7860   }
7861 
7862   if (Div == 2) {
7863     Div = 3;
7864     return true;
7865   }
7866 
7867   return false;
7868 }
7869 
7870 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7871 // This is intentional and ensures compatibility with sp3.
7872 // See bug 35397 for details.
7873 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7874   if (BoundCtrl == 0 || BoundCtrl == 1) {
7875     BoundCtrl = 1;
7876     return true;
7877   }
7878   return false;
7879 }
7880 
7881 // Note: the order in this table matches the order of operands in AsmString.
7882 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7883   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
7884   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
7885   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
7886   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7887   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7888   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
7889   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
7890   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
7891   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7892   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
7893   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
7894   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
7895   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
7896   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
7897   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
7898   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7899   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
7900   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
7901   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
7902   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
7903   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
7904   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
7905   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
7906   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
7907   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7908   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7909   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7910   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7911   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7912   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7913   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7914   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7915   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7916   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7917   {"dpp8",     AMDGPUOperand::ImmTyDPP8, false, nullptr},
7918   {"dpp_ctrl", AMDGPUOperand::ImmTyDppCtrl, false, nullptr},
7919   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7920   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7921   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7922   {"fi",   AMDGPUOperand::ImmTyDppFi, false, nullptr},
7923   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7924   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7925   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr},
7926   {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr},
7927   {"wait_exp", AMDGPUOperand::ImmTyWaitEXP, false, nullptr}
7928 };
7929 
7930 void AMDGPUAsmParser::onBeginOfFile() {
7931   if (!getParser().getStreamer().getTargetStreamer() ||
7932       getSTI().getTargetTriple().getArch() == Triple::r600)
7933     return;
7934 
7935   if (!getTargetStreamer().getTargetID())
7936     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7937 
7938   if (isHsaAbiVersion3AndAbove(&getSTI()))
7939     getTargetStreamer().EmitDirectiveAMDGCNTarget();
7940 }
7941 
7942 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7943 
7944   OperandMatchResultTy res = parseOptionalOpr(Operands);
7945 
7946   // This is a hack to enable hardcoded mandatory operands which follow
7947   // optional operands.
7948   //
7949   // Current design assumes that all operands after the first optional operand
7950   // are also optional. However implementation of some instructions violates
7951   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7952   //
7953   // To alleviate this problem, we have to (implicitly) parse extra operands
7954   // to make sure autogenerated parser of custom operands never hit hardcoded
7955   // mandatory operands.
7956 
7957   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7958     if (res != MatchOperand_Success ||
7959         isToken(AsmToken::EndOfStatement))
7960       break;
7961 
7962     trySkipToken(AsmToken::Comma);
7963     res = parseOptionalOpr(Operands);
7964   }
7965 
7966   return res;
7967 }
7968 
7969 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7970   OperandMatchResultTy res;
7971   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7972     // try to parse any optional operand here
7973     if (Op.IsBit) {
7974       res = parseNamedBit(Op.Name, Operands, Op.Type);
7975     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7976       res = parseOModOperand(Operands);
7977     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7978                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7979                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7980       res = parseSDWASel(Operands, Op.Name, Op.Type);
7981     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7982       res = parseSDWADstUnused(Operands);
7983     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7984                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7985                Op.Type == AMDGPUOperand::ImmTyNegLo ||
7986                Op.Type == AMDGPUOperand::ImmTyNegHi) {
7987       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7988                                         Op.ConvertResult);
7989     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7990       res = parseDim(Operands);
7991     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7992       res = parseCPol(Operands);
7993     } else if (Op.Type == AMDGPUOperand::ImmTyDPP8) {
7994       res = parseDPP8(Operands);
7995     } else if (Op.Type == AMDGPUOperand::ImmTyDppCtrl) {
7996       res = parseDPPCtrl(Operands);
7997     } else {
7998       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7999       if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) {
8000         res = parseOperandArrayWithPrefix("neg", Operands,
8001                                           AMDGPUOperand::ImmTyBLGP,
8002                                           nullptr);
8003       }
8004     }
8005     if (res != MatchOperand_NoMatch) {
8006       return res;
8007     }
8008   }
8009   return MatchOperand_NoMatch;
8010 }
8011 
8012 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
8013   StringRef Name = getTokenStr();
8014   if (Name == "mul") {
8015     return parseIntWithPrefix("mul", Operands,
8016                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
8017   }
8018 
8019   if (Name == "div") {
8020     return parseIntWithPrefix("div", Operands,
8021                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
8022   }
8023 
8024   return MatchOperand_NoMatch;
8025 }
8026 
8027 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
8028   cvtVOP3P(Inst, Operands);
8029 
8030   int Opc = Inst.getOpcode();
8031 
8032   int SrcNum;
8033   const int Ops[] = { AMDGPU::OpName::src0,
8034                       AMDGPU::OpName::src1,
8035                       AMDGPU::OpName::src2 };
8036   for (SrcNum = 0;
8037        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
8038        ++SrcNum);
8039   assert(SrcNum > 0);
8040 
8041   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8042   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8043 
8044   if ((OpSel & (1 << SrcNum)) != 0) {
8045     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8046     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8047     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
8048   }
8049 }
8050 
8051 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8052       // 1. This operand is input modifiers
8053   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8054       // 2. This is not last operand
8055       && Desc.NumOperands > (OpNum + 1)
8056       // 3. Next operand is register class
8057       && Desc.OpInfo[OpNum + 1].RegClass != -1
8058       // 4. Next register is not tied to any other operand
8059       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
8060 }
8061 
8062 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8063 {
8064   OptionalImmIndexMap OptionalIdx;
8065   unsigned Opc = Inst.getOpcode();
8066 
8067   unsigned I = 1;
8068   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8069   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8070     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8071   }
8072 
8073   for (unsigned E = Operands.size(); I != E; ++I) {
8074     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8075     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8076       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8077     } else if (Op.isInterpSlot() ||
8078                Op.isInterpAttr() ||
8079                Op.isAttrChan()) {
8080       Inst.addOperand(MCOperand::createImm(Op.getImm()));
8081     } else if (Op.isImmModifier()) {
8082       OptionalIdx[Op.getImmTy()] = I;
8083     } else {
8084       llvm_unreachable("unhandled operand type");
8085     }
8086   }
8087 
8088   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
8089     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
8090   }
8091 
8092   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8093     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8094   }
8095 
8096   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8097     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8098   }
8099 }
8100 
8101 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8102 {
8103   OptionalImmIndexMap OptionalIdx;
8104   unsigned Opc = Inst.getOpcode();
8105 
8106   unsigned I = 1;
8107   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8108   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8109     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8110   }
8111 
8112   for (unsigned E = Operands.size(); I != E; ++I) {
8113     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8114     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8115       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8116     } else if (Op.isImmModifier()) {
8117       OptionalIdx[Op.getImmTy()] = I;
8118     } else {
8119       llvm_unreachable("unhandled operand type");
8120     }
8121   }
8122 
8123   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8124 
8125   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8126   if (OpSelIdx != -1)
8127     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8128 
8129   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8130 
8131   if (OpSelIdx == -1)
8132     return;
8133 
8134   const int Ops[] = { AMDGPU::OpName::src0,
8135                       AMDGPU::OpName::src1,
8136                       AMDGPU::OpName::src2 };
8137   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8138                          AMDGPU::OpName::src1_modifiers,
8139                          AMDGPU::OpName::src2_modifiers };
8140 
8141   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8142 
8143   for (int J = 0; J < 3; ++J) {
8144     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8145     if (OpIdx == -1)
8146       break;
8147 
8148     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8149     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8150 
8151     if ((OpSel & (1 << J)) != 0)
8152       ModVal |= SISrcMods::OP_SEL_0;
8153     if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8154         (OpSel & (1 << 3)) != 0)
8155       ModVal |= SISrcMods::DST_OP_SEL;
8156 
8157     Inst.getOperand(ModIdx).setImm(ModVal);
8158   }
8159 }
8160 
8161 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8162                               OptionalImmIndexMap &OptionalIdx) {
8163   unsigned Opc = Inst.getOpcode();
8164 
8165   unsigned I = 1;
8166   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8167   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8168     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8169   }
8170 
8171   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
8172     // This instruction has src modifiers
8173     for (unsigned E = Operands.size(); I != E; ++I) {
8174       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8175       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8176         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8177       } else if (Op.isImmModifier()) {
8178         OptionalIdx[Op.getImmTy()] = I;
8179       } else if (Op.isRegOrImm()) {
8180         Op.addRegOrImmOperands(Inst, 1);
8181       } else {
8182         llvm_unreachable("unhandled operand type");
8183       }
8184     }
8185   } else {
8186     // No src modifiers
8187     for (unsigned E = Operands.size(); I != E; ++I) {
8188       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8189       if (Op.isMod()) {
8190         OptionalIdx[Op.getImmTy()] = I;
8191       } else {
8192         Op.addRegOrImmOperands(Inst, 1);
8193       }
8194     }
8195   }
8196 
8197   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8198     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8199   }
8200 
8201   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8202     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8203   }
8204 
8205   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8206   // it has src2 register operand that is tied to dst operand
8207   // we don't allow modifiers for this operand in assembler so src2_modifiers
8208   // should be 0.
8209   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
8210       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
8211       Opc == AMDGPU::V_MAC_F32_e64_vi ||
8212       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
8213       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
8214       Opc == AMDGPU::V_MAC_F16_e64_vi ||
8215       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
8216       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
8217       Opc == AMDGPU::V_FMAC_F32_e64_gfx11 ||
8218       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
8219       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
8220       Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 ||
8221       Opc == AMDGPU::V_FMAC_F16_e64_gfx10 ||
8222       Opc == AMDGPU::V_FMAC_F16_e64_gfx11) {
8223     auto it = Inst.begin();
8224     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8225     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8226     ++it;
8227     // Copy the operand to ensure it's not invalidated when Inst grows.
8228     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8229   }
8230 }
8231 
8232 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8233   OptionalImmIndexMap OptionalIdx;
8234   cvtVOP3(Inst, Operands, OptionalIdx);
8235 }
8236 
8237 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8238                                OptionalImmIndexMap &OptIdx) {
8239   const int Opc = Inst.getOpcode();
8240   const MCInstrDesc &Desc = MII.get(Opc);
8241 
8242   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8243 
8244   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
8245     assert(!IsPacked);
8246     Inst.addOperand(Inst.getOperand(0));
8247   }
8248 
8249   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8250   // instruction, and then figure out where to actually put the modifiers
8251 
8252   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8253   if (OpSelIdx != -1) {
8254     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8255   }
8256 
8257   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8258   if (OpSelHiIdx != -1) {
8259     int DefaultVal = IsPacked ? -1 : 0;
8260     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8261                           DefaultVal);
8262   }
8263 
8264   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8265   if (NegLoIdx != -1) {
8266     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8267     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8268   }
8269 
8270   const int Ops[] = { AMDGPU::OpName::src0,
8271                       AMDGPU::OpName::src1,
8272                       AMDGPU::OpName::src2 };
8273   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8274                          AMDGPU::OpName::src1_modifiers,
8275                          AMDGPU::OpName::src2_modifiers };
8276 
8277   unsigned OpSel = 0;
8278   unsigned OpSelHi = 0;
8279   unsigned NegLo = 0;
8280   unsigned NegHi = 0;
8281 
8282   if (OpSelIdx != -1)
8283     OpSel = Inst.getOperand(OpSelIdx).getImm();
8284 
8285   if (OpSelHiIdx != -1)
8286     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8287 
8288   if (NegLoIdx != -1) {
8289     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8290     NegLo = Inst.getOperand(NegLoIdx).getImm();
8291     NegHi = Inst.getOperand(NegHiIdx).getImm();
8292   }
8293 
8294   for (int J = 0; J < 3; ++J) {
8295     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8296     if (OpIdx == -1)
8297       break;
8298 
8299     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8300 
8301     if (ModIdx == -1)
8302       continue;
8303 
8304     uint32_t ModVal = 0;
8305 
8306     if ((OpSel & (1 << J)) != 0)
8307       ModVal |= SISrcMods::OP_SEL_0;
8308 
8309     if ((OpSelHi & (1 << J)) != 0)
8310       ModVal |= SISrcMods::OP_SEL_1;
8311 
8312     if ((NegLo & (1 << J)) != 0)
8313       ModVal |= SISrcMods::NEG;
8314 
8315     if ((NegHi & (1 << J)) != 0)
8316       ModVal |= SISrcMods::NEG_HI;
8317 
8318     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8319   }
8320 }
8321 
8322 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8323   OptionalImmIndexMap OptIdx;
8324   cvtVOP3(Inst, Operands, OptIdx);
8325   cvtVOP3P(Inst, Operands, OptIdx);
8326 }
8327 
8328 //===----------------------------------------------------------------------===//
8329 // VOPD
8330 //===----------------------------------------------------------------------===//
8331 
8332 OperandMatchResultTy AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
8333   if (!hasVOPD(getSTI()))
8334     return MatchOperand_NoMatch;
8335 
8336   if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
8337     SMLoc S = getLoc();
8338     lex();
8339     lex();
8340     Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
8341     const MCExpr *Expr;
8342     if (isToken(AsmToken::Identifier) && !Parser.parseExpression(Expr)) {
8343       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
8344       return MatchOperand_Success;
8345     }
8346     Error(S, "invalid VOPD :: usage");
8347     return MatchOperand_ParseFail;
8348   }
8349   return MatchOperand_NoMatch;
8350 }
8351 
8352 // Create VOPD MCInst operands using parsed assembler operands.
8353 // Parsed VOPD operands are ordered as follows:
8354 //   OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::'
8355 //   OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
8356 // If both OpX and OpY have an imm, the first imm has a different name:
8357 //   OpXMnemo dstX src0X [vsrc1X|immDeferred vsrc1X|vsrc1X immDeferred] '::'
8358 //   OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
8359 // MCInst operands have the following order:
8360 //   dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
8361 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
8362   auto addOp = [&](uint16_t i) { // NOLINT:function pointer
8363     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8364     if (Op.isReg()) {
8365       Op.addRegOperands(Inst, 1);
8366       return;
8367     }
8368     if (Op.isImm()) {
8369       Op.addImmOperands(Inst, 1);
8370       return;
8371     }
8372     // Handle tokens like 'offen' which are sometimes hard-coded into the
8373     // asm string.  There are no MCInst operands for these.
8374     if (Op.isToken()) {
8375       return;
8376     }
8377     llvm_unreachable("Unhandled operand type in cvtVOPD");
8378   };
8379 
8380   // Indices into MCInst.Operands
8381   const auto FmamkOpXImmMCIndex = 3; // dstX, dstY, src0X, imm, ...
8382   const auto FmaakOpXImmMCIndex = 4; // dstX, dstY, src0X, src1X, imm, ...
8383   const auto MinOpYImmMCIndex = 4;   // dstX, dstY, src0X, src0Y, imm, ...
8384 
8385   unsigned Opc = Inst.getOpcode();
8386   bool HasVsrc1X =
8387       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1X) != -1;
8388   bool HasImmX =
8389       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 ||
8390       (HasVsrc1X && (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) ==
8391                          FmamkOpXImmMCIndex ||
8392                      AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) ==
8393                          FmaakOpXImmMCIndex));
8394 
8395   bool HasVsrc1Y =
8396       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1Y) != -1;
8397   bool HasImmY =
8398       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 ||
8399       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) >=
8400           MinOpYImmMCIndex + HasVsrc1X;
8401 
8402   // Indices of parsed operands relative to dst
8403   const auto DstIdx = 0;
8404   const auto Src0Idx = 1;
8405   const auto Vsrc1OrImmIdx = 2;
8406 
8407   const auto OpXOperandsSize = 2 + HasImmX + HasVsrc1X;
8408   const auto BridgeTokensSize = 2; // Special VOPD tokens ('::' and OpYMnemo)
8409 
8410   // Offsets into parsed operands
8411   const auto OpXFirstOperandOffset = 1;
8412   const auto OpYFirstOperandOffset =
8413       OpXFirstOperandOffset + OpXOperandsSize + BridgeTokensSize;
8414 
8415   // Order of addOp calls determines MC operand order
8416   addOp(OpXFirstOperandOffset + DstIdx); // vdstX
8417   addOp(OpYFirstOperandOffset + DstIdx); // vdstY
8418 
8419   addOp(OpXFirstOperandOffset + Src0Idx); // src0X
8420   if (HasImmX) {
8421     // immX then vsrc1X for fmamk, vsrc1X then immX for fmaak
8422     addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx);
8423     addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx + 1);
8424   } else {
8425     if (HasVsrc1X) // all except v_mov
8426       addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1X
8427   }
8428 
8429   addOp(OpYFirstOperandOffset + Src0Idx); // src0Y
8430   if (HasImmY) {
8431     // immY then vsrc1Y for fmamk, vsrc1Y then immY for fmaak
8432     addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx);
8433     addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx + 1);
8434   } else {
8435     if (HasVsrc1Y) // all except v_mov
8436       addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1Y
8437   }
8438 }
8439 
8440 //===----------------------------------------------------------------------===//
8441 // dpp
8442 //===----------------------------------------------------------------------===//
8443 
8444 bool AMDGPUOperand::isDPP8() const {
8445   return isImmTy(ImmTyDPP8);
8446 }
8447 
8448 bool AMDGPUOperand::isDPPCtrl() const {
8449   using namespace AMDGPU::DPP;
8450 
8451   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8452   if (result) {
8453     int64_t Imm = getImm();
8454     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8455            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8456            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8457            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8458            (Imm == DppCtrl::WAVE_SHL1) ||
8459            (Imm == DppCtrl::WAVE_ROL1) ||
8460            (Imm == DppCtrl::WAVE_SHR1) ||
8461            (Imm == DppCtrl::WAVE_ROR1) ||
8462            (Imm == DppCtrl::ROW_MIRROR) ||
8463            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8464            (Imm == DppCtrl::BCAST15) ||
8465            (Imm == DppCtrl::BCAST31) ||
8466            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8467            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8468   }
8469   return false;
8470 }
8471 
8472 //===----------------------------------------------------------------------===//
8473 // mAI
8474 //===----------------------------------------------------------------------===//
8475 
8476 bool AMDGPUOperand::isBLGP() const {
8477   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8478 }
8479 
8480 bool AMDGPUOperand::isCBSZ() const {
8481   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8482 }
8483 
8484 bool AMDGPUOperand::isABID() const {
8485   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8486 }
8487 
8488 bool AMDGPUOperand::isS16Imm() const {
8489   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8490 }
8491 
8492 bool AMDGPUOperand::isU16Imm() const {
8493   return isImm() && isUInt<16>(getImm());
8494 }
8495 
8496 //===----------------------------------------------------------------------===//
8497 // dim
8498 //===----------------------------------------------------------------------===//
8499 
8500 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8501   // We want to allow "dim:1D" etc.,
8502   // but the initial 1 is tokenized as an integer.
8503   std::string Token;
8504   if (isToken(AsmToken::Integer)) {
8505     SMLoc Loc = getToken().getEndLoc();
8506     Token = std::string(getTokenStr());
8507     lex();
8508     if (getLoc() != Loc)
8509       return false;
8510   }
8511 
8512   StringRef Suffix;
8513   if (!parseId(Suffix))
8514     return false;
8515   Token += Suffix;
8516 
8517   StringRef DimId = Token;
8518   if (DimId.startswith("SQ_RSRC_IMG_"))
8519     DimId = DimId.drop_front(12);
8520 
8521   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8522   if (!DimInfo)
8523     return false;
8524 
8525   Encoding = DimInfo->Encoding;
8526   return true;
8527 }
8528 
8529 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8530   if (!isGFX10Plus())
8531     return MatchOperand_NoMatch;
8532 
8533   SMLoc S = getLoc();
8534 
8535   if (!trySkipId("dim", AsmToken::Colon))
8536     return MatchOperand_NoMatch;
8537 
8538   unsigned Encoding;
8539   SMLoc Loc = getLoc();
8540   if (!parseDimId(Encoding)) {
8541     Error(Loc, "invalid dim value");
8542     return MatchOperand_ParseFail;
8543   }
8544 
8545   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8546                                               AMDGPUOperand::ImmTyDim));
8547   return MatchOperand_Success;
8548 }
8549 
8550 //===----------------------------------------------------------------------===//
8551 // dpp
8552 //===----------------------------------------------------------------------===//
8553 
8554 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8555   SMLoc S = getLoc();
8556 
8557   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8558     return MatchOperand_NoMatch;
8559 
8560   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8561 
8562   int64_t Sels[8];
8563 
8564   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8565     return MatchOperand_ParseFail;
8566 
8567   for (size_t i = 0; i < 8; ++i) {
8568     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8569       return MatchOperand_ParseFail;
8570 
8571     SMLoc Loc = getLoc();
8572     if (getParser().parseAbsoluteExpression(Sels[i]))
8573       return MatchOperand_ParseFail;
8574     if (0 > Sels[i] || 7 < Sels[i]) {
8575       Error(Loc, "expected a 3-bit value");
8576       return MatchOperand_ParseFail;
8577     }
8578   }
8579 
8580   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8581     return MatchOperand_ParseFail;
8582 
8583   unsigned DPP8 = 0;
8584   for (size_t i = 0; i < 8; ++i)
8585     DPP8 |= (Sels[i] << (i * 3));
8586 
8587   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8588   return MatchOperand_Success;
8589 }
8590 
8591 bool
8592 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8593                                     const OperandVector &Operands) {
8594   if (Ctrl == "row_newbcast")
8595     return isGFX90A();
8596 
8597   if (Ctrl == "row_share" ||
8598       Ctrl == "row_xmask")
8599     return isGFX10Plus();
8600 
8601   if (Ctrl == "wave_shl" ||
8602       Ctrl == "wave_shr" ||
8603       Ctrl == "wave_rol" ||
8604       Ctrl == "wave_ror" ||
8605       Ctrl == "row_bcast")
8606     return isVI() || isGFX9();
8607 
8608   return Ctrl == "row_mirror" ||
8609          Ctrl == "row_half_mirror" ||
8610          Ctrl == "quad_perm" ||
8611          Ctrl == "row_shl" ||
8612          Ctrl == "row_shr" ||
8613          Ctrl == "row_ror";
8614 }
8615 
8616 int64_t
8617 AMDGPUAsmParser::parseDPPCtrlPerm() {
8618   // quad_perm:[%d,%d,%d,%d]
8619 
8620   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8621     return -1;
8622 
8623   int64_t Val = 0;
8624   for (int i = 0; i < 4; ++i) {
8625     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8626       return -1;
8627 
8628     int64_t Temp;
8629     SMLoc Loc = getLoc();
8630     if (getParser().parseAbsoluteExpression(Temp))
8631       return -1;
8632     if (Temp < 0 || Temp > 3) {
8633       Error(Loc, "expected a 2-bit value");
8634       return -1;
8635     }
8636 
8637     Val += (Temp << i * 2);
8638   }
8639 
8640   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8641     return -1;
8642 
8643   return Val;
8644 }
8645 
8646 int64_t
8647 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8648   using namespace AMDGPU::DPP;
8649 
8650   // sel:%d
8651 
8652   int64_t Val;
8653   SMLoc Loc = getLoc();
8654 
8655   if (getParser().parseAbsoluteExpression(Val))
8656     return -1;
8657 
8658   struct DppCtrlCheck {
8659     int64_t Ctrl;
8660     int Lo;
8661     int Hi;
8662   };
8663 
8664   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8665     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
8666     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
8667     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
8668     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
8669     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
8670     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
8671     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
8672     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8673     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8674     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8675     .Default({-1, 0, 0});
8676 
8677   bool Valid;
8678   if (Check.Ctrl == -1) {
8679     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8680     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8681   } else {
8682     Valid = Check.Lo <= Val && Val <= Check.Hi;
8683     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8684   }
8685 
8686   if (!Valid) {
8687     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8688     return -1;
8689   }
8690 
8691   return Val;
8692 }
8693 
8694 OperandMatchResultTy
8695 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8696   using namespace AMDGPU::DPP;
8697 
8698   if (!isToken(AsmToken::Identifier) ||
8699       !isSupportedDPPCtrl(getTokenStr(), Operands))
8700     return MatchOperand_NoMatch;
8701 
8702   SMLoc S = getLoc();
8703   int64_t Val = -1;
8704   StringRef Ctrl;
8705 
8706   parseId(Ctrl);
8707 
8708   if (Ctrl == "row_mirror") {
8709     Val = DppCtrl::ROW_MIRROR;
8710   } else if (Ctrl == "row_half_mirror") {
8711     Val = DppCtrl::ROW_HALF_MIRROR;
8712   } else {
8713     if (skipToken(AsmToken::Colon, "expected a colon")) {
8714       if (Ctrl == "quad_perm") {
8715         Val = parseDPPCtrlPerm();
8716       } else {
8717         Val = parseDPPCtrlSel(Ctrl);
8718       }
8719     }
8720   }
8721 
8722   if (Val == -1)
8723     return MatchOperand_ParseFail;
8724 
8725   Operands.push_back(
8726     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8727   return MatchOperand_Success;
8728 }
8729 
8730 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
8731   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
8732 }
8733 
8734 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
8735   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
8736 }
8737 
8738 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8739   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8740 }
8741 
8742 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8743   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8744 }
8745 
8746 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8747   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8748 }
8749 
8750 // Add dummy $old operand
8751 void AMDGPUAsmParser::cvtVOPC64NoDstDPP(MCInst &Inst,
8752                                         const OperandVector &Operands,
8753                                         bool IsDPP8) {
8754   Inst.addOperand(MCOperand::createReg(0));
8755   cvtVOP3DPP(Inst, Operands, IsDPP8);
8756 }
8757 
8758 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8759   OptionalImmIndexMap OptionalIdx;
8760   unsigned Opc = Inst.getOpcode();
8761   bool HasModifiers = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8762   unsigned I = 1;
8763   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8764   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8765     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8766   }
8767 
8768   int Fi = 0;
8769   for (unsigned E = Operands.size(); I != E; ++I) {
8770     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8771                                             MCOI::TIED_TO);
8772     if (TiedTo != -1) {
8773       assert((unsigned)TiedTo < Inst.getNumOperands());
8774       // handle tied old or src2 for MAC instructions
8775       Inst.addOperand(Inst.getOperand(TiedTo));
8776     }
8777     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8778     // Add the register arguments
8779     if (IsDPP8 && Op.isFI()) {
8780       Fi = Op.getImm();
8781     } else if (HasModifiers &&
8782                isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8783       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8784     } else if (Op.isReg()) {
8785       Op.addRegOperands(Inst, 1);
8786     } else if (Op.isImm() &&
8787                Desc.OpInfo[Inst.getNumOperands()].RegClass != -1) {
8788       assert(!HasModifiers && "Case should be unreachable with modifiers");
8789       assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
8790       Op.addImmOperands(Inst, 1);
8791     } else if (Op.isImm()) {
8792       OptionalIdx[Op.getImmTy()] = I;
8793     } else {
8794       llvm_unreachable("unhandled operand type");
8795     }
8796   }
8797   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
8798     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8799   }
8800   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
8801     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8802   }
8803   if (Desc.TSFlags & SIInstrFlags::VOP3P)
8804     cvtVOP3P(Inst, Operands, OptionalIdx);
8805   else if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) {
8806     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8807   }
8808 
8809   if (IsDPP8) {
8810     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
8811     using namespace llvm::AMDGPU::DPP;
8812     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8813   } else {
8814     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
8815     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8816     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8817     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8818     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8819       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8820     }
8821   }
8822 }
8823 
8824 // Add dummy $old operand
8825 void AMDGPUAsmParser::cvtVOPCNoDstDPP(MCInst &Inst,
8826                                       const OperandVector &Operands,
8827                                       bool IsDPP8) {
8828   Inst.addOperand(MCOperand::createReg(0));
8829   cvtDPP(Inst, Operands, IsDPP8);
8830 }
8831 
8832 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8833   OptionalImmIndexMap OptionalIdx;
8834 
8835   unsigned Opc = Inst.getOpcode();
8836   bool HasModifiers =
8837       AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8838   unsigned I = 1;
8839   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8840   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8841     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8842   }
8843 
8844   int Fi = 0;
8845   for (unsigned E = Operands.size(); I != E; ++I) {
8846     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8847                                             MCOI::TIED_TO);
8848     if (TiedTo != -1) {
8849       assert((unsigned)TiedTo < Inst.getNumOperands());
8850       // handle tied old or src2 for MAC instructions
8851       Inst.addOperand(Inst.getOperand(TiedTo));
8852     }
8853     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8854     // Add the register arguments
8855     if (Op.isReg() && validateVccOperand(Op.getReg())) {
8856       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8857       // Skip it.
8858       continue;
8859     }
8860 
8861     if (IsDPP8) {
8862       if (Op.isDPP8()) {
8863         Op.addImmOperands(Inst, 1);
8864       } else if (HasModifiers &&
8865                  isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8866         Op.addRegWithFPInputModsOperands(Inst, 2);
8867       } else if (Op.isFI()) {
8868         Fi = Op.getImm();
8869       } else if (Op.isReg()) {
8870         Op.addRegOperands(Inst, 1);
8871       } else {
8872         llvm_unreachable("Invalid operand type");
8873       }
8874     } else {
8875       if (HasModifiers &&
8876           isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8877         Op.addRegWithFPInputModsOperands(Inst, 2);
8878       } else if (Op.isReg()) {
8879         Op.addRegOperands(Inst, 1);
8880       } else if (Op.isDPPCtrl()) {
8881         Op.addImmOperands(Inst, 1);
8882       } else if (Op.isImm()) {
8883         // Handle optional arguments
8884         OptionalIdx[Op.getImmTy()] = I;
8885       } else {
8886         llvm_unreachable("Invalid operand type");
8887       }
8888     }
8889   }
8890 
8891   if (IsDPP8) {
8892     using namespace llvm::AMDGPU::DPP;
8893     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8894   } else {
8895     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8896     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8897     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8898     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8899       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8900     }
8901   }
8902 }
8903 
8904 //===----------------------------------------------------------------------===//
8905 // sdwa
8906 //===----------------------------------------------------------------------===//
8907 
8908 OperandMatchResultTy
8909 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8910                               AMDGPUOperand::ImmTy Type) {
8911   using namespace llvm::AMDGPU::SDWA;
8912 
8913   SMLoc S = getLoc();
8914   StringRef Value;
8915   OperandMatchResultTy res;
8916 
8917   SMLoc StringLoc;
8918   res = parseStringWithPrefix(Prefix, Value, StringLoc);
8919   if (res != MatchOperand_Success) {
8920     return res;
8921   }
8922 
8923   int64_t Int;
8924   Int = StringSwitch<int64_t>(Value)
8925         .Case("BYTE_0", SdwaSel::BYTE_0)
8926         .Case("BYTE_1", SdwaSel::BYTE_1)
8927         .Case("BYTE_2", SdwaSel::BYTE_2)
8928         .Case("BYTE_3", SdwaSel::BYTE_3)
8929         .Case("WORD_0", SdwaSel::WORD_0)
8930         .Case("WORD_1", SdwaSel::WORD_1)
8931         .Case("DWORD", SdwaSel::DWORD)
8932         .Default(0xffffffff);
8933 
8934   if (Int == 0xffffffff) {
8935     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8936     return MatchOperand_ParseFail;
8937   }
8938 
8939   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8940   return MatchOperand_Success;
8941 }
8942 
8943 OperandMatchResultTy
8944 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8945   using namespace llvm::AMDGPU::SDWA;
8946 
8947   SMLoc S = getLoc();
8948   StringRef Value;
8949   OperandMatchResultTy res;
8950 
8951   SMLoc StringLoc;
8952   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8953   if (res != MatchOperand_Success) {
8954     return res;
8955   }
8956 
8957   int64_t Int;
8958   Int = StringSwitch<int64_t>(Value)
8959         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8960         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8961         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8962         .Default(0xffffffff);
8963 
8964   if (Int == 0xffffffff) {
8965     Error(StringLoc, "invalid dst_unused value");
8966     return MatchOperand_ParseFail;
8967   }
8968 
8969   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8970   return MatchOperand_Success;
8971 }
8972 
8973 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8974   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8975 }
8976 
8977 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8978   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8979 }
8980 
8981 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8982   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8983 }
8984 
8985 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8986   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8987 }
8988 
8989 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8990   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8991 }
8992 
8993 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8994                               uint64_t BasicInstType,
8995                               bool SkipDstVcc,
8996                               bool SkipSrcVcc) {
8997   using namespace llvm::AMDGPU::SDWA;
8998 
8999   OptionalImmIndexMap OptionalIdx;
9000   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9001   bool SkippedVcc = false;
9002 
9003   unsigned I = 1;
9004   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9005   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9006     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9007   }
9008 
9009   for (unsigned E = Operands.size(); I != E; ++I) {
9010     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9011     if (SkipVcc && !SkippedVcc && Op.isReg() &&
9012         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
9013       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
9014       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
9015       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
9016       // Skip VCC only if we didn't skip it on previous iteration.
9017       // Note that src0 and src1 occupy 2 slots each because of modifiers.
9018       if (BasicInstType == SIInstrFlags::VOP2 &&
9019           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
9020            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
9021         SkippedVcc = true;
9022         continue;
9023       } else if (BasicInstType == SIInstrFlags::VOPC &&
9024                  Inst.getNumOperands() == 0) {
9025         SkippedVcc = true;
9026         continue;
9027       }
9028     }
9029     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9030       Op.addRegOrImmWithInputModsOperands(Inst, 2);
9031     } else if (Op.isImm()) {
9032       // Handle optional arguments
9033       OptionalIdx[Op.getImmTy()] = I;
9034     } else {
9035       llvm_unreachable("Invalid operand type");
9036     }
9037     SkippedVcc = false;
9038   }
9039 
9040   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
9041       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
9042       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
9043     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
9044     switch (BasicInstType) {
9045     case SIInstrFlags::VOP1:
9046       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9047       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
9048         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
9049       }
9050       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
9051       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
9052       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
9053       break;
9054 
9055     case SIInstrFlags::VOP2:
9056       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9057       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
9058         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
9059       }
9060       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
9061       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
9062       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
9063       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
9064       break;
9065 
9066     case SIInstrFlags::VOPC:
9067       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
9068         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9069       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
9070       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
9071       break;
9072 
9073     default:
9074       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9075     }
9076   }
9077 
9078   // special case v_mac_{f16, f32}:
9079   // it has src2 register operand that is tied to dst operand
9080   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9081       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
9082     auto it = Inst.begin();
9083     std::advance(
9084       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
9085     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
9086   }
9087 }
9088 
9089 //===----------------------------------------------------------------------===//
9090 // mAI
9091 //===----------------------------------------------------------------------===//
9092 
9093 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
9094   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
9095 }
9096 
9097 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
9098   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
9099 }
9100 
9101 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
9102   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
9103 }
9104 
9105 /// Force static initialization.
9106 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
9107   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
9108   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
9109 }
9110 
9111 #define GET_REGISTER_MATCHER
9112 #define GET_MATCHER_IMPLEMENTATION
9113 #define GET_MNEMONIC_SPELL_CHECKER
9114 #define GET_MNEMONIC_CHECKER
9115 #include "AMDGPUGenAsmMatcher.inc"
9116 
9117 // This function should be defined after auto-generated include so that we have
9118 // MatchClassKind enum defined
9119 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
9120                                                      unsigned Kind) {
9121   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
9122   // But MatchInstructionImpl() expects to meet token and fails to validate
9123   // operand. This method checks if we are given immediate operand but expect to
9124   // get corresponding token.
9125   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
9126   switch (Kind) {
9127   case MCK_addr64:
9128     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9129   case MCK_gds:
9130     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9131   case MCK_lds:
9132     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9133   case MCK_idxen:
9134     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9135   case MCK_offen:
9136     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9137   case MCK_SSrcB32:
9138     // When operands have expression values, they will return true for isToken,
9139     // because it is not possible to distinguish between a token and an
9140     // expression at parse time. MatchInstructionImpl() will always try to
9141     // match an operand as a token, when isToken returns true, and when the
9142     // name of the expression is not a valid token, the match will fail,
9143     // so we need to handle it here.
9144     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
9145   case MCK_SSrcF32:
9146     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
9147   case MCK_SoppBrTarget:
9148     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
9149   case MCK_VReg32OrOff:
9150     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9151   case MCK_InterpSlot:
9152     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9153   case MCK_Attr:
9154     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9155   case MCK_AttrChan:
9156     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
9157   case MCK_ImmSMEMOffset:
9158     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
9159   case MCK_SReg_64:
9160   case MCK_SReg_64_XEXEC:
9161     // Null is defined as a 32-bit register but
9162     // it should also be enabled with 64-bit operands.
9163     // The following code enables it for SReg_64 operands
9164     // used as source and destination. Remaining source
9165     // operands are handled in isInlinableImm.
9166     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9167   default:
9168     return Match_InvalidOperand;
9169   }
9170 }
9171 
9172 //===----------------------------------------------------------------------===//
9173 // endpgm
9174 //===----------------------------------------------------------------------===//
9175 
9176 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
9177   SMLoc S = getLoc();
9178   int64_t Imm = 0;
9179 
9180   if (!parseExpr(Imm)) {
9181     // The operand is optional, if not present default to 0
9182     Imm = 0;
9183   }
9184 
9185   if (!isUInt<16>(Imm)) {
9186     Error(S, "expected a 16-bit value");
9187     return MatchOperand_ParseFail;
9188   }
9189 
9190   Operands.push_back(
9191       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9192   return MatchOperand_Success;
9193 }
9194 
9195 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
9196 
9197 //===----------------------------------------------------------------------===//
9198 // LDSDIR
9199 //===----------------------------------------------------------------------===//
9200 
9201 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const {
9202   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST);
9203 }
9204 
9205 bool AMDGPUOperand::isWaitVDST() const {
9206   return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
9207 }
9208 
9209 //===----------------------------------------------------------------------===//
9210 // VINTERP
9211 //===----------------------------------------------------------------------===//
9212 
9213 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const {
9214   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP);
9215 }
9216 
9217 bool AMDGPUOperand::isWaitEXP() const {
9218   return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());
9219 }
9220