1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
10 #include "AMDKernelCodeT.h"
11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
12 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
13 #include "SIDefines.h"
14 #include "SIInstrInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallBitVector.h"
24 #include "llvm/ADT/SmallString.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include "llvm/ADT/Twine.h"
28 #include "llvm/BinaryFormat/ELF.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCContext.h"
31 #include "llvm/MC/MCExpr.h"
32 #include "llvm/MC/MCInst.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/MC/MCInstrInfo.h"
35 #include "llvm/MC/MCParser/MCAsmLexer.h"
36 #include "llvm/MC/MCParser/MCAsmParser.h"
37 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
39 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
40 #include "llvm/MC/MCRegisterInfo.h"
41 #include "llvm/MC/MCStreamer.h"
42 #include "llvm/MC/MCSubtargetInfo.h"
43 #include "llvm/MC/MCSymbol.h"
44 #include "llvm/Support/AMDGPUMetadata.h"
45 #include "llvm/Support/AMDHSAKernelDescriptor.h"
46 #include "llvm/Support/Casting.h"
47 #include "llvm/Support/Compiler.h"
48 #include "llvm/Support/Error.h"
49 #include "llvm/Support/MachineValueType.h"
50 #include "llvm/Support/MathExtras.h"
51 #include "llvm/Support/SMLoc.h"
52 #include "llvm/Support/TargetParser.h"
53 #include "llvm/Support/TargetRegistry.h"
54 #include "llvm/Support/raw_ostream.h"
55 #include <algorithm>
56 #include <cassert>
57 #include <cstdint>
58 #include <cstring>
59 #include <iterator>
60 #include <map>
61 #include <memory>
62 #include <string>
63 
64 using namespace llvm;
65 using namespace llvm::AMDGPU;
66 using namespace llvm::amdhsa;
67 
68 namespace {
69 
70 class AMDGPUAsmParser;
71 
72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
73 
74 //===----------------------------------------------------------------------===//
75 // Operand
76 //===----------------------------------------------------------------------===//
77 
78 class AMDGPUOperand : public MCParsedAsmOperand {
79   enum KindTy {
80     Token,
81     Immediate,
82     Register,
83     Expression
84   } Kind;
85 
86   SMLoc StartLoc, EndLoc;
87   const AMDGPUAsmParser *AsmParser;
88 
89 public:
90   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
91     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
92 
93   using Ptr = std::unique_ptr<AMDGPUOperand>;
94 
95   struct Modifiers {
96     bool Abs = false;
97     bool Neg = false;
98     bool Sext = false;
99 
100     bool hasFPModifiers() const { return Abs || Neg; }
101     bool hasIntModifiers() const { return Sext; }
102     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
103 
104     int64_t getFPModifiersOperand() const {
105       int64_t Operand = 0;
106       Operand |= Abs ? SISrcMods::ABS : 0u;
107       Operand |= Neg ? SISrcMods::NEG : 0u;
108       return Operand;
109     }
110 
111     int64_t getIntModifiersOperand() const {
112       int64_t Operand = 0;
113       Operand |= Sext ? SISrcMods::SEXT : 0u;
114       return Operand;
115     }
116 
117     int64_t getModifiersOperand() const {
118       assert(!(hasFPModifiers() && hasIntModifiers())
119            && "fp and int modifiers should not be used simultaneously");
120       if (hasFPModifiers()) {
121         return getFPModifiersOperand();
122       } else if (hasIntModifiers()) {
123         return getIntModifiersOperand();
124       } else {
125         return 0;
126       }
127     }
128 
129     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
130   };
131 
132   enum ImmTy {
133     ImmTyNone,
134     ImmTyGDS,
135     ImmTyLDS,
136     ImmTyOffen,
137     ImmTyIdxen,
138     ImmTyAddr64,
139     ImmTyOffset,
140     ImmTyInstOffset,
141     ImmTyOffset0,
142     ImmTyOffset1,
143     ImmTyDLC,
144     ImmTyGLC,
145     ImmTySLC,
146     ImmTySWZ,
147     ImmTyTFE,
148     ImmTyD16,
149     ImmTyClampSI,
150     ImmTyOModSI,
151     ImmTyDPP8,
152     ImmTyDppCtrl,
153     ImmTyDppRowMask,
154     ImmTyDppBankMask,
155     ImmTyDppBoundCtrl,
156     ImmTyDppFi,
157     ImmTySdwaDstSel,
158     ImmTySdwaSrc0Sel,
159     ImmTySdwaSrc1Sel,
160     ImmTySdwaDstUnused,
161     ImmTyDMask,
162     ImmTyDim,
163     ImmTyUNorm,
164     ImmTyDA,
165     ImmTyR128A16,
166     ImmTyLWE,
167     ImmTyExpTgt,
168     ImmTyExpCompr,
169     ImmTyExpVM,
170     ImmTyFORMAT,
171     ImmTyHwreg,
172     ImmTyOff,
173     ImmTySendMsg,
174     ImmTyInterpSlot,
175     ImmTyInterpAttr,
176     ImmTyAttrChan,
177     ImmTyOpSel,
178     ImmTyOpSelHi,
179     ImmTyNegLo,
180     ImmTyNegHi,
181     ImmTySwizzle,
182     ImmTyGprIdxMode,
183     ImmTyHigh,
184     ImmTyBLGP,
185     ImmTyCBSZ,
186     ImmTyABID,
187     ImmTyEndpgm,
188   };
189 
190 private:
191   struct TokOp {
192     const char *Data;
193     unsigned Length;
194   };
195 
196   struct ImmOp {
197     int64_t Val;
198     ImmTy Type;
199     bool IsFPImm;
200     Modifiers Mods;
201   };
202 
203   struct RegOp {
204     unsigned RegNo;
205     Modifiers Mods;
206   };
207 
208   union {
209     TokOp Tok;
210     ImmOp Imm;
211     RegOp Reg;
212     const MCExpr *Expr;
213   };
214 
215 public:
216   bool isToken() const override {
217     if (Kind == Token)
218       return true;
219 
220     // When parsing operands, we can't always tell if something was meant to be
221     // a token, like 'gds', or an expression that references a global variable.
222     // In this case, we assume the string is an expression, and if we need to
223     // interpret is a token, then we treat the symbol name as the token.
224     return isSymbolRefExpr();
225   }
226 
227   bool isSymbolRefExpr() const {
228     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
229   }
230 
231   bool isImm() const override {
232     return Kind == Immediate;
233   }
234 
235   bool isInlinableImm(MVT type) const;
236   bool isLiteralImm(MVT type) const;
237 
238   bool isRegKind() const {
239     return Kind == Register;
240   }
241 
242   bool isReg() const override {
243     return isRegKind() && !hasModifiers();
244   }
245 
246   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
247     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
248   }
249 
250   bool isRegOrImmWithInt16InputMods() const {
251     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
252   }
253 
254   bool isRegOrImmWithInt32InputMods() const {
255     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
256   }
257 
258   bool isRegOrImmWithInt64InputMods() const {
259     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
260   }
261 
262   bool isRegOrImmWithFP16InputMods() const {
263     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
264   }
265 
266   bool isRegOrImmWithFP32InputMods() const {
267     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
268   }
269 
270   bool isRegOrImmWithFP64InputMods() const {
271     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
272   }
273 
274   bool isVReg() const {
275     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
276            isRegClass(AMDGPU::VReg_64RegClassID) ||
277            isRegClass(AMDGPU::VReg_96RegClassID) ||
278            isRegClass(AMDGPU::VReg_128RegClassID) ||
279            isRegClass(AMDGPU::VReg_160RegClassID) ||
280            isRegClass(AMDGPU::VReg_256RegClassID) ||
281            isRegClass(AMDGPU::VReg_512RegClassID) ||
282            isRegClass(AMDGPU::VReg_1024RegClassID);
283   }
284 
285   bool isVReg32() const {
286     return isRegClass(AMDGPU::VGPR_32RegClassID);
287   }
288 
289   bool isVReg32OrOff() const {
290     return isOff() || isVReg32();
291   }
292 
293   bool isNull() const {
294     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
295   }
296 
297   bool isSDWAOperand(MVT type) const;
298   bool isSDWAFP16Operand() const;
299   bool isSDWAFP32Operand() const;
300   bool isSDWAInt16Operand() const;
301   bool isSDWAInt32Operand() const;
302 
303   bool isImmTy(ImmTy ImmT) const {
304     return isImm() && Imm.Type == ImmT;
305   }
306 
307   bool isImmModifier() const {
308     return isImm() && Imm.Type != ImmTyNone;
309   }
310 
311   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
312   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
313   bool isDMask() const { return isImmTy(ImmTyDMask); }
314   bool isDim() const { return isImmTy(ImmTyDim); }
315   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
316   bool isDA() const { return isImmTy(ImmTyDA); }
317   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
318   bool isLWE() const { return isImmTy(ImmTyLWE); }
319   bool isOff() const { return isImmTy(ImmTyOff); }
320   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
321   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
322   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
323   bool isOffen() const { return isImmTy(ImmTyOffen); }
324   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
325   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
326   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
327   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
328   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
329 
330   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
331   bool isGDS() const { return isImmTy(ImmTyGDS); }
332   bool isLDS() const { return isImmTy(ImmTyLDS); }
333   bool isDLC() const { return isImmTy(ImmTyDLC); }
334   bool isGLC() const { return isImmTy(ImmTyGLC); }
335   bool isSLC() const { return isImmTy(ImmTySLC); }
336   bool isSWZ() const { return isImmTy(ImmTySWZ); }
337   bool isTFE() const { return isImmTy(ImmTyTFE); }
338   bool isD16() const { return isImmTy(ImmTyD16); }
339   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
340   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
341   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
342   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
343   bool isFI() const { return isImmTy(ImmTyDppFi); }
344   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
345   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
346   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
347   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
348   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
349   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
350   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
351   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
352   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
353   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
354   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
355   bool isHigh() const { return isImmTy(ImmTyHigh); }
356 
357   bool isMod() const {
358     return isClampSI() || isOModSI();
359   }
360 
361   bool isRegOrImm() const {
362     return isReg() || isImm();
363   }
364 
365   bool isRegClass(unsigned RCID) const;
366 
367   bool isInlineValue() const;
368 
369   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
370     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
371   }
372 
373   bool isSCSrcB16() const {
374     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
375   }
376 
377   bool isSCSrcV2B16() const {
378     return isSCSrcB16();
379   }
380 
381   bool isSCSrcB32() const {
382     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
383   }
384 
385   bool isSCSrcB64() const {
386     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
387   }
388 
389   bool isBoolReg() const;
390 
391   bool isSCSrcF16() const {
392     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
393   }
394 
395   bool isSCSrcV2F16() const {
396     return isSCSrcF16();
397   }
398 
399   bool isSCSrcF32() const {
400     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
401   }
402 
403   bool isSCSrcF64() const {
404     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
405   }
406 
407   bool isSSrcB32() const {
408     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
409   }
410 
411   bool isSSrcB16() const {
412     return isSCSrcB16() || isLiteralImm(MVT::i16);
413   }
414 
415   bool isSSrcV2B16() const {
416     llvm_unreachable("cannot happen");
417     return isSSrcB16();
418   }
419 
420   bool isSSrcB64() const {
421     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
422     // See isVSrc64().
423     return isSCSrcB64() || isLiteralImm(MVT::i64);
424   }
425 
426   bool isSSrcF32() const {
427     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
428   }
429 
430   bool isSSrcF64() const {
431     return isSCSrcB64() || isLiteralImm(MVT::f64);
432   }
433 
434   bool isSSrcF16() const {
435     return isSCSrcB16() || isLiteralImm(MVT::f16);
436   }
437 
438   bool isSSrcV2F16() const {
439     llvm_unreachable("cannot happen");
440     return isSSrcF16();
441   }
442 
443   bool isSSrcOrLdsB32() const {
444     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
445            isLiteralImm(MVT::i32) || isExpr();
446   }
447 
448   bool isVCSrcB32() const {
449     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
450   }
451 
452   bool isVCSrcB64() const {
453     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
454   }
455 
456   bool isVCSrcB16() const {
457     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
458   }
459 
460   bool isVCSrcV2B16() const {
461     return isVCSrcB16();
462   }
463 
464   bool isVCSrcF32() const {
465     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
466   }
467 
468   bool isVCSrcF64() const {
469     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
470   }
471 
472   bool isVCSrcF16() const {
473     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
474   }
475 
476   bool isVCSrcV2F16() const {
477     return isVCSrcF16();
478   }
479 
480   bool isVSrcB32() const {
481     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
482   }
483 
484   bool isVSrcB64() const {
485     return isVCSrcF64() || isLiteralImm(MVT::i64);
486   }
487 
488   bool isVSrcB16() const {
489     return isVCSrcF16() || isLiteralImm(MVT::i16);
490   }
491 
492   bool isVSrcV2B16() const {
493     return isVSrcB16() || isLiteralImm(MVT::v2i16);
494   }
495 
496   bool isVSrcF32() const {
497     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
498   }
499 
500   bool isVSrcF64() const {
501     return isVCSrcF64() || isLiteralImm(MVT::f64);
502   }
503 
504   bool isVSrcF16() const {
505     return isVCSrcF16() || isLiteralImm(MVT::f16);
506   }
507 
508   bool isVSrcV2F16() const {
509     return isVSrcF16() || isLiteralImm(MVT::v2f16);
510   }
511 
512   bool isVISrcB32() const {
513     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
514   }
515 
516   bool isVISrcB16() const {
517     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
518   }
519 
520   bool isVISrcV2B16() const {
521     return isVISrcB16();
522   }
523 
524   bool isVISrcF32() const {
525     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
526   }
527 
528   bool isVISrcF16() const {
529     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
530   }
531 
532   bool isVISrcV2F16() const {
533     return isVISrcF16() || isVISrcB32();
534   }
535 
536   bool isAISrcB32() const {
537     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
538   }
539 
540   bool isAISrcB16() const {
541     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
542   }
543 
544   bool isAISrcV2B16() const {
545     return isAISrcB16();
546   }
547 
548   bool isAISrcF32() const {
549     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
550   }
551 
552   bool isAISrcF16() const {
553     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
554   }
555 
556   bool isAISrcV2F16() const {
557     return isAISrcF16() || isAISrcB32();
558   }
559 
560   bool isAISrc_128B32() const {
561     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
562   }
563 
564   bool isAISrc_128B16() const {
565     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
566   }
567 
568   bool isAISrc_128V2B16() const {
569     return isAISrc_128B16();
570   }
571 
572   bool isAISrc_128F32() const {
573     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
574   }
575 
576   bool isAISrc_128F16() const {
577     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
578   }
579 
580   bool isAISrc_128V2F16() const {
581     return isAISrc_128F16() || isAISrc_128B32();
582   }
583 
584   bool isAISrc_512B32() const {
585     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
586   }
587 
588   bool isAISrc_512B16() const {
589     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
590   }
591 
592   bool isAISrc_512V2B16() const {
593     return isAISrc_512B16();
594   }
595 
596   bool isAISrc_512F32() const {
597     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
598   }
599 
600   bool isAISrc_512F16() const {
601     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
602   }
603 
604   bool isAISrc_512V2F16() const {
605     return isAISrc_512F16() || isAISrc_512B32();
606   }
607 
608   bool isAISrc_1024B32() const {
609     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
610   }
611 
612   bool isAISrc_1024B16() const {
613     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
614   }
615 
616   bool isAISrc_1024V2B16() const {
617     return isAISrc_1024B16();
618   }
619 
620   bool isAISrc_1024F32() const {
621     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
622   }
623 
624   bool isAISrc_1024F16() const {
625     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
626   }
627 
628   bool isAISrc_1024V2F16() const {
629     return isAISrc_1024F16() || isAISrc_1024B32();
630   }
631 
632   bool isKImmFP32() const {
633     return isLiteralImm(MVT::f32);
634   }
635 
636   bool isKImmFP16() const {
637     return isLiteralImm(MVT::f16);
638   }
639 
640   bool isMem() const override {
641     return false;
642   }
643 
644   bool isExpr() const {
645     return Kind == Expression;
646   }
647 
648   bool isSoppBrTarget() const {
649     return isExpr() || isImm();
650   }
651 
652   bool isSWaitCnt() const;
653   bool isHwreg() const;
654   bool isSendMsg() const;
655   bool isSwizzle() const;
656   bool isSMRDOffset8() const;
657   bool isSMRDOffset20() const;
658   bool isSMRDLiteralOffset() const;
659   bool isDPP8() const;
660   bool isDPPCtrl() const;
661   bool isBLGP() const;
662   bool isCBSZ() const;
663   bool isABID() const;
664   bool isGPRIdxMode() const;
665   bool isS16Imm() const;
666   bool isU16Imm() const;
667   bool isEndpgm() const;
668 
669   StringRef getExpressionAsToken() const {
670     assert(isExpr());
671     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
672     return S->getSymbol().getName();
673   }
674 
675   StringRef getToken() const {
676     assert(isToken());
677 
678     if (Kind == Expression)
679       return getExpressionAsToken();
680 
681     return StringRef(Tok.Data, Tok.Length);
682   }
683 
684   int64_t getImm() const {
685     assert(isImm());
686     return Imm.Val;
687   }
688 
689   ImmTy getImmTy() const {
690     assert(isImm());
691     return Imm.Type;
692   }
693 
694   unsigned getReg() const override {
695     assert(isRegKind());
696     return Reg.RegNo;
697   }
698 
699   SMLoc getStartLoc() const override {
700     return StartLoc;
701   }
702 
703   SMLoc getEndLoc() const override {
704     return EndLoc;
705   }
706 
707   SMRange getLocRange() const {
708     return SMRange(StartLoc, EndLoc);
709   }
710 
711   Modifiers getModifiers() const {
712     assert(isRegKind() || isImmTy(ImmTyNone));
713     return isRegKind() ? Reg.Mods : Imm.Mods;
714   }
715 
716   void setModifiers(Modifiers Mods) {
717     assert(isRegKind() || isImmTy(ImmTyNone));
718     if (isRegKind())
719       Reg.Mods = Mods;
720     else
721       Imm.Mods = Mods;
722   }
723 
724   bool hasModifiers() const {
725     return getModifiers().hasModifiers();
726   }
727 
728   bool hasFPModifiers() const {
729     return getModifiers().hasFPModifiers();
730   }
731 
732   bool hasIntModifiers() const {
733     return getModifiers().hasIntModifiers();
734   }
735 
736   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
737 
738   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
739 
740   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
741 
742   template <unsigned Bitwidth>
743   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
744 
745   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
746     addKImmFPOperands<16>(Inst, N);
747   }
748 
749   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
750     addKImmFPOperands<32>(Inst, N);
751   }
752 
753   void addRegOperands(MCInst &Inst, unsigned N) const;
754 
755   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
756     addRegOperands(Inst, N);
757   }
758 
759   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
760     if (isRegKind())
761       addRegOperands(Inst, N);
762     else if (isExpr())
763       Inst.addOperand(MCOperand::createExpr(Expr));
764     else
765       addImmOperands(Inst, N);
766   }
767 
768   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
769     Modifiers Mods = getModifiers();
770     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
771     if (isRegKind()) {
772       addRegOperands(Inst, N);
773     } else {
774       addImmOperands(Inst, N, false);
775     }
776   }
777 
778   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
779     assert(!hasIntModifiers());
780     addRegOrImmWithInputModsOperands(Inst, N);
781   }
782 
783   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
784     assert(!hasFPModifiers());
785     addRegOrImmWithInputModsOperands(Inst, N);
786   }
787 
788   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
789     Modifiers Mods = getModifiers();
790     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
791     assert(isRegKind());
792     addRegOperands(Inst, N);
793   }
794 
795   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
796     assert(!hasIntModifiers());
797     addRegWithInputModsOperands(Inst, N);
798   }
799 
800   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
801     assert(!hasFPModifiers());
802     addRegWithInputModsOperands(Inst, N);
803   }
804 
805   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
806     if (isImm())
807       addImmOperands(Inst, N);
808     else {
809       assert(isExpr());
810       Inst.addOperand(MCOperand::createExpr(Expr));
811     }
812   }
813 
814   static void printImmTy(raw_ostream& OS, ImmTy Type) {
815     switch (Type) {
816     case ImmTyNone: OS << "None"; break;
817     case ImmTyGDS: OS << "GDS"; break;
818     case ImmTyLDS: OS << "LDS"; break;
819     case ImmTyOffen: OS << "Offen"; break;
820     case ImmTyIdxen: OS << "Idxen"; break;
821     case ImmTyAddr64: OS << "Addr64"; break;
822     case ImmTyOffset: OS << "Offset"; break;
823     case ImmTyInstOffset: OS << "InstOffset"; break;
824     case ImmTyOffset0: OS << "Offset0"; break;
825     case ImmTyOffset1: OS << "Offset1"; break;
826     case ImmTyDLC: OS << "DLC"; break;
827     case ImmTyGLC: OS << "GLC"; break;
828     case ImmTySLC: OS << "SLC"; break;
829     case ImmTySWZ: OS << "SWZ"; break;
830     case ImmTyTFE: OS << "TFE"; break;
831     case ImmTyD16: OS << "D16"; break;
832     case ImmTyFORMAT: OS << "FORMAT"; break;
833     case ImmTyClampSI: OS << "ClampSI"; break;
834     case ImmTyOModSI: OS << "OModSI"; break;
835     case ImmTyDPP8: OS << "DPP8"; break;
836     case ImmTyDppCtrl: OS << "DppCtrl"; break;
837     case ImmTyDppRowMask: OS << "DppRowMask"; break;
838     case ImmTyDppBankMask: OS << "DppBankMask"; break;
839     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
840     case ImmTyDppFi: OS << "FI"; break;
841     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
842     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
843     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
844     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
845     case ImmTyDMask: OS << "DMask"; break;
846     case ImmTyDim: OS << "Dim"; break;
847     case ImmTyUNorm: OS << "UNorm"; break;
848     case ImmTyDA: OS << "DA"; break;
849     case ImmTyR128A16: OS << "R128A16"; break;
850     case ImmTyLWE: OS << "LWE"; break;
851     case ImmTyOff: OS << "Off"; break;
852     case ImmTyExpTgt: OS << "ExpTgt"; break;
853     case ImmTyExpCompr: OS << "ExpCompr"; break;
854     case ImmTyExpVM: OS << "ExpVM"; break;
855     case ImmTyHwreg: OS << "Hwreg"; break;
856     case ImmTySendMsg: OS << "SendMsg"; break;
857     case ImmTyInterpSlot: OS << "InterpSlot"; break;
858     case ImmTyInterpAttr: OS << "InterpAttr"; break;
859     case ImmTyAttrChan: OS << "AttrChan"; break;
860     case ImmTyOpSel: OS << "OpSel"; break;
861     case ImmTyOpSelHi: OS << "OpSelHi"; break;
862     case ImmTyNegLo: OS << "NegLo"; break;
863     case ImmTyNegHi: OS << "NegHi"; break;
864     case ImmTySwizzle: OS << "Swizzle"; break;
865     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
866     case ImmTyHigh: OS << "High"; break;
867     case ImmTyBLGP: OS << "BLGP"; break;
868     case ImmTyCBSZ: OS << "CBSZ"; break;
869     case ImmTyABID: OS << "ABID"; break;
870     case ImmTyEndpgm: OS << "Endpgm"; break;
871     }
872   }
873 
874   void print(raw_ostream &OS) const override {
875     switch (Kind) {
876     case Register:
877       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
878       break;
879     case Immediate:
880       OS << '<' << getImm();
881       if (getImmTy() != ImmTyNone) {
882         OS << " type: "; printImmTy(OS, getImmTy());
883       }
884       OS << " mods: " << Imm.Mods << '>';
885       break;
886     case Token:
887       OS << '\'' << getToken() << '\'';
888       break;
889     case Expression:
890       OS << "<expr " << *Expr << '>';
891       break;
892     }
893   }
894 
895   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
896                                       int64_t Val, SMLoc Loc,
897                                       ImmTy Type = ImmTyNone,
898                                       bool IsFPImm = false) {
899     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
900     Op->Imm.Val = Val;
901     Op->Imm.IsFPImm = IsFPImm;
902     Op->Imm.Type = Type;
903     Op->Imm.Mods = Modifiers();
904     Op->StartLoc = Loc;
905     Op->EndLoc = Loc;
906     return Op;
907   }
908 
909   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
910                                         StringRef Str, SMLoc Loc,
911                                         bool HasExplicitEncodingSize = true) {
912     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
913     Res->Tok.Data = Str.data();
914     Res->Tok.Length = Str.size();
915     Res->StartLoc = Loc;
916     Res->EndLoc = Loc;
917     return Res;
918   }
919 
920   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
921                                       unsigned RegNo, SMLoc S,
922                                       SMLoc E) {
923     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
924     Op->Reg.RegNo = RegNo;
925     Op->Reg.Mods = Modifiers();
926     Op->StartLoc = S;
927     Op->EndLoc = E;
928     return Op;
929   }
930 
931   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
932                                        const class MCExpr *Expr, SMLoc S) {
933     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
934     Op->Expr = Expr;
935     Op->StartLoc = S;
936     Op->EndLoc = S;
937     return Op;
938   }
939 };
940 
941 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
942   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
943   return OS;
944 }
945 
946 //===----------------------------------------------------------------------===//
947 // AsmParser
948 //===----------------------------------------------------------------------===//
949 
950 // Holds info related to the current kernel, e.g. count of SGPRs used.
951 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
952 // .amdgpu_hsa_kernel or at EOF.
953 class KernelScopeInfo {
954   int SgprIndexUnusedMin = -1;
955   int VgprIndexUnusedMin = -1;
956   MCContext *Ctx = nullptr;
957 
958   void usesSgprAt(int i) {
959     if (i >= SgprIndexUnusedMin) {
960       SgprIndexUnusedMin = ++i;
961       if (Ctx) {
962         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
963         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
964       }
965     }
966   }
967 
968   void usesVgprAt(int i) {
969     if (i >= VgprIndexUnusedMin) {
970       VgprIndexUnusedMin = ++i;
971       if (Ctx) {
972         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
973         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
974       }
975     }
976   }
977 
978 public:
979   KernelScopeInfo() = default;
980 
981   void initialize(MCContext &Context) {
982     Ctx = &Context;
983     usesSgprAt(SgprIndexUnusedMin = -1);
984     usesVgprAt(VgprIndexUnusedMin = -1);
985   }
986 
987   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
988     switch (RegKind) {
989       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
990       case IS_AGPR: // fall through
991       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
992       default: break;
993     }
994   }
995 };
996 
997 class AMDGPUAsmParser : public MCTargetAsmParser {
998   MCAsmParser &Parser;
999 
1000   // Number of extra operands parsed after the first optional operand.
1001   // This may be necessary to skip hardcoded mandatory operands.
1002   static const unsigned MAX_OPR_LOOKAHEAD = 8;
1003 
1004   unsigned ForcedEncodingSize = 0;
1005   bool ForcedDPP = false;
1006   bool ForcedSDWA = false;
1007   KernelScopeInfo KernelScope;
1008 
1009   /// @name Auto-generated Match Functions
1010   /// {
1011 
1012 #define GET_ASSEMBLER_HEADER
1013 #include "AMDGPUGenAsmMatcher.inc"
1014 
1015   /// }
1016 
1017 private:
1018   bool ParseAsAbsoluteExpression(uint32_t &Ret);
1019   bool OutOfRangeError(SMRange Range);
1020   /// Calculate VGPR/SGPR blocks required for given target, reserved
1021   /// registers, and user-specified NextFreeXGPR values.
1022   ///
1023   /// \param Features [in] Target features, used for bug corrections.
1024   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1025   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1026   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1027   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1028   /// descriptor field, if valid.
1029   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1030   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1031   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1032   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1033   /// \param VGPRBlocks [out] Result VGPR block count.
1034   /// \param SGPRBlocks [out] Result SGPR block count.
1035   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1036                           bool FlatScrUsed, bool XNACKUsed,
1037                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1038                           SMRange VGPRRange, unsigned NextFreeSGPR,
1039                           SMRange SGPRRange, unsigned &VGPRBlocks,
1040                           unsigned &SGPRBlocks);
1041   bool ParseDirectiveAMDGCNTarget();
1042   bool ParseDirectiveAMDHSAKernel();
1043   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1044   bool ParseDirectiveHSACodeObjectVersion();
1045   bool ParseDirectiveHSACodeObjectISA();
1046   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1047   bool ParseDirectiveAMDKernelCodeT();
1048   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1049   bool ParseDirectiveAMDGPUHsaKernel();
1050 
1051   bool ParseDirectiveISAVersion();
1052   bool ParseDirectiveHSAMetadata();
1053   bool ParseDirectivePALMetadataBegin();
1054   bool ParseDirectivePALMetadata();
1055   bool ParseDirectiveAMDGPULDS();
1056 
1057   /// Common code to parse out a block of text (typically YAML) between start and
1058   /// end directives.
1059   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1060                            const char *AssemblerDirectiveEnd,
1061                            std::string &CollectString);
1062 
1063   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1064                              RegisterKind RegKind, unsigned Reg1);
1065   bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
1066                            unsigned& RegNum, unsigned& RegWidth);
1067   unsigned ParseRegularReg(RegisterKind &RegKind,
1068                            unsigned &RegNum,
1069                            unsigned &RegWidth);
1070   unsigned ParseSpecialReg(RegisterKind &RegKind,
1071                            unsigned &RegNum,
1072                            unsigned &RegWidth);
1073   unsigned ParseRegList(RegisterKind &RegKind,
1074                         unsigned &RegNum,
1075                         unsigned &RegWidth);
1076   bool ParseRegRange(unsigned& Num, unsigned& Width);
1077   unsigned getRegularReg(RegisterKind RegKind,
1078                          unsigned RegNum,
1079                          unsigned RegWidth);
1080 
1081   bool isRegister();
1082   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1083   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1084   void initializeGprCountSymbol(RegisterKind RegKind);
1085   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1086                              unsigned RegWidth);
1087   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1088                     bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1089   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1090                  bool IsGdsHardcoded);
1091 
1092 public:
1093   enum AMDGPUMatchResultTy {
1094     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1095   };
1096   enum OperandMode {
1097     OperandMode_Default,
1098     OperandMode_NSA,
1099   };
1100 
1101   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1102 
1103   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1104                const MCInstrInfo &MII,
1105                const MCTargetOptions &Options)
1106       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1107     MCAsmParserExtension::Initialize(Parser);
1108 
1109     if (getFeatureBits().none()) {
1110       // Set default features.
1111       copySTI().ToggleFeature("southern-islands");
1112     }
1113 
1114     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1115 
1116     {
1117       // TODO: make those pre-defined variables read-only.
1118       // Currently there is none suitable machinery in the core llvm-mc for this.
1119       // MCSymbol::isRedefinable is intended for another purpose, and
1120       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1121       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1122       MCContext &Ctx = getContext();
1123       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1124         MCSymbol *Sym =
1125             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1126         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1127         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1128         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1129         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1130         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1131       } else {
1132         MCSymbol *Sym =
1133             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1134         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1135         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1136         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1137         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1138         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1139       }
1140       if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
1141         initializeGprCountSymbol(IS_VGPR);
1142         initializeGprCountSymbol(IS_SGPR);
1143       } else
1144         KernelScope.initialize(getContext());
1145     }
1146   }
1147 
1148   bool hasXNACK() const {
1149     return AMDGPU::hasXNACK(getSTI());
1150   }
1151 
1152   bool hasMIMG_R128() const {
1153     return AMDGPU::hasMIMG_R128(getSTI());
1154   }
1155 
1156   bool hasPackedD16() const {
1157     return AMDGPU::hasPackedD16(getSTI());
1158   }
1159 
1160   bool isSI() const {
1161     return AMDGPU::isSI(getSTI());
1162   }
1163 
1164   bool isCI() const {
1165     return AMDGPU::isCI(getSTI());
1166   }
1167 
1168   bool isVI() const {
1169     return AMDGPU::isVI(getSTI());
1170   }
1171 
1172   bool isGFX9() const {
1173     return AMDGPU::isGFX9(getSTI());
1174   }
1175 
1176   bool isGFX10() const {
1177     return AMDGPU::isGFX10(getSTI());
1178   }
1179 
1180   bool hasInv2PiInlineImm() const {
1181     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1182   }
1183 
1184   bool hasFlatOffsets() const {
1185     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1186   }
1187 
1188   bool hasSGPR102_SGPR103() const {
1189     return !isVI() && !isGFX9();
1190   }
1191 
1192   bool hasSGPR104_SGPR105() const {
1193     return isGFX10();
1194   }
1195 
1196   bool hasIntClamp() const {
1197     return getFeatureBits()[AMDGPU::FeatureIntClamp];
1198   }
1199 
1200   AMDGPUTargetStreamer &getTargetStreamer() {
1201     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1202     return static_cast<AMDGPUTargetStreamer &>(TS);
1203   }
1204 
1205   const MCRegisterInfo *getMRI() const {
1206     // We need this const_cast because for some reason getContext() is not const
1207     // in MCAsmParser.
1208     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1209   }
1210 
1211   const MCInstrInfo *getMII() const {
1212     return &MII;
1213   }
1214 
1215   const FeatureBitset &getFeatureBits() const {
1216     return getSTI().getFeatureBits();
1217   }
1218 
1219   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1220   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1221   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1222 
1223   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1224   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1225   bool isForcedDPP() const { return ForcedDPP; }
1226   bool isForcedSDWA() const { return ForcedSDWA; }
1227   ArrayRef<unsigned> getMatchedVariants() const;
1228 
1229   std::unique_ptr<AMDGPUOperand> parseRegister();
1230   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1231   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1232   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1233                                       unsigned Kind) override;
1234   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1235                                OperandVector &Operands, MCStreamer &Out,
1236                                uint64_t &ErrorInfo,
1237                                bool MatchingInlineAsm) override;
1238   bool ParseDirective(AsmToken DirectiveID) override;
1239   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1240                                     OperandMode Mode = OperandMode_Default);
1241   StringRef parseMnemonicSuffix(StringRef Name);
1242   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1243                         SMLoc NameLoc, OperandVector &Operands) override;
1244   //bool ProcessInstruction(MCInst &Inst);
1245 
1246   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1247 
1248   OperandMatchResultTy
1249   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1250                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1251                      bool (*ConvertResult)(int64_t &) = nullptr);
1252 
1253   OperandMatchResultTy
1254   parseOperandArrayWithPrefix(const char *Prefix,
1255                               OperandVector &Operands,
1256                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1257                               bool (*ConvertResult)(int64_t&) = nullptr);
1258 
1259   OperandMatchResultTy
1260   parseNamedBit(const char *Name, OperandVector &Operands,
1261                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1262   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1263                                              StringRef &Value);
1264 
1265   bool isModifier();
1266   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1267   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1268   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1269   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1270   bool parseSP3NegModifier();
1271   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1272   OperandMatchResultTy parseReg(OperandVector &Operands);
1273   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1274   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1275   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1276   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1277   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1278   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1279   OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
1280 
1281   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
1282   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
1283   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1284   void cvtExp(MCInst &Inst, const OperandVector &Operands);
1285 
1286   bool parseCnt(int64_t &IntVal);
1287   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1288   OperandMatchResultTy parseHwreg(OperandVector &Operands);
1289 
1290 private:
1291   struct OperandInfoTy {
1292     int64_t Id;
1293     bool IsSymbolic = false;
1294     bool IsDefined = false;
1295 
1296     OperandInfoTy(int64_t Id_) : Id(Id_) {}
1297   };
1298 
1299   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1300   bool validateSendMsg(const OperandInfoTy &Msg,
1301                        const OperandInfoTy &Op,
1302                        const OperandInfoTy &Stream,
1303                        const SMLoc Loc);
1304 
1305   bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
1306   bool validateHwreg(const OperandInfoTy &HwReg,
1307                      const int64_t Offset,
1308                      const int64_t Width,
1309                      const SMLoc Loc);
1310 
1311   void errorExpTgt();
1312   OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
1313   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1314 
1315   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1316   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1317   bool validateSOPLiteral(const MCInst &Inst) const;
1318   bool validateConstantBusLimitations(const MCInst &Inst);
1319   bool validateEarlyClobberLimitations(const MCInst &Inst);
1320   bool validateIntClampSupported(const MCInst &Inst);
1321   bool validateMIMGAtomicDMask(const MCInst &Inst);
1322   bool validateMIMGGatherDMask(const MCInst &Inst);
1323   bool validateMovrels(const MCInst &Inst);
1324   bool validateMIMGDataSize(const MCInst &Inst);
1325   bool validateMIMGAddrSize(const MCInst &Inst);
1326   bool validateMIMGD16(const MCInst &Inst);
1327   bool validateMIMGDim(const MCInst &Inst);
1328   bool validateLdsDirect(const MCInst &Inst);
1329   bool validateOpSel(const MCInst &Inst);
1330   bool validateVccOperand(unsigned Reg) const;
1331   bool validateVOP3Literal(const MCInst &Inst) const;
1332   unsigned getConstantBusLimit(unsigned Opcode) const;
1333   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1334   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1335   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1336 
1337   bool isId(const StringRef Id) const;
1338   bool isId(const AsmToken &Token, const StringRef Id) const;
1339   bool isToken(const AsmToken::TokenKind Kind) const;
1340   bool trySkipId(const StringRef Id);
1341   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1342   bool trySkipToken(const AsmToken::TokenKind Kind);
1343   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1344   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1345   void peekTokens(MutableArrayRef<AsmToken> Tokens);
1346   AsmToken::TokenKind getTokenKind() const;
1347   bool parseExpr(int64_t &Imm);
1348   bool parseExpr(OperandVector &Operands);
1349   StringRef getTokenStr() const;
1350   AsmToken peekToken();
1351   AsmToken getToken() const;
1352   SMLoc getLoc() const;
1353   void lex();
1354 
1355 public:
1356   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1357   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1358 
1359   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1360   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1361   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1362   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1363   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1364   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1365 
1366   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1367                             const unsigned MinVal,
1368                             const unsigned MaxVal,
1369                             const StringRef ErrMsg);
1370   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1371   bool parseSwizzleOffset(int64_t &Imm);
1372   bool parseSwizzleMacro(int64_t &Imm);
1373   bool parseSwizzleQuadPerm(int64_t &Imm);
1374   bool parseSwizzleBitmaskPerm(int64_t &Imm);
1375   bool parseSwizzleBroadcast(int64_t &Imm);
1376   bool parseSwizzleSwap(int64_t &Imm);
1377   bool parseSwizzleReverse(int64_t &Imm);
1378 
1379   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1380   int64_t parseGPRIdxMacro();
1381 
1382   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
1383   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
1384   void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
1385   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1386   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1387 
1388   AMDGPUOperand::Ptr defaultDLC() const;
1389   AMDGPUOperand::Ptr defaultGLC() const;
1390   AMDGPUOperand::Ptr defaultSLC() const;
1391 
1392   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1393   AMDGPUOperand::Ptr defaultSMRDOffset20() const;
1394   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1395   AMDGPUOperand::Ptr defaultFlatOffset() const;
1396 
1397   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1398 
1399   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1400                OptionalImmIndexMap &OptionalIdx);
1401   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1402   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1403   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1404 
1405   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1406 
1407   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1408                bool IsAtomic = false);
1409   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1410 
1411   OperandMatchResultTy parseDim(OperandVector &Operands);
1412   OperandMatchResultTy parseDPP8(OperandVector &Operands);
1413   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1414   AMDGPUOperand::Ptr defaultRowMask() const;
1415   AMDGPUOperand::Ptr defaultBankMask() const;
1416   AMDGPUOperand::Ptr defaultBoundCtrl() const;
1417   AMDGPUOperand::Ptr defaultFI() const;
1418   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1419   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1420 
1421   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1422                                     AMDGPUOperand::ImmTy Type);
1423   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1424   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1425   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1426   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1427   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1428   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1429   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1430                uint64_t BasicInstType,
1431                bool SkipDstVcc = false,
1432                bool SkipSrcVcc = false);
1433 
1434   AMDGPUOperand::Ptr defaultBLGP() const;
1435   AMDGPUOperand::Ptr defaultCBSZ() const;
1436   AMDGPUOperand::Ptr defaultABID() const;
1437 
1438   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1439   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1440 };
1441 
1442 struct OptionalOperand {
1443   const char *Name;
1444   AMDGPUOperand::ImmTy Type;
1445   bool IsBit;
1446   bool (*ConvertResult)(int64_t&);
1447 };
1448 
1449 } // end anonymous namespace
1450 
1451 // May be called with integer type with equivalent bitwidth.
1452 static const fltSemantics *getFltSemantics(unsigned Size) {
1453   switch (Size) {
1454   case 4:
1455     return &APFloat::IEEEsingle();
1456   case 8:
1457     return &APFloat::IEEEdouble();
1458   case 2:
1459     return &APFloat::IEEEhalf();
1460   default:
1461     llvm_unreachable("unsupported fp type");
1462   }
1463 }
1464 
1465 static const fltSemantics *getFltSemantics(MVT VT) {
1466   return getFltSemantics(VT.getSizeInBits() / 8);
1467 }
1468 
1469 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1470   switch (OperandType) {
1471   case AMDGPU::OPERAND_REG_IMM_INT32:
1472   case AMDGPU::OPERAND_REG_IMM_FP32:
1473   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1474   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1475   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1476   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1477     return &APFloat::IEEEsingle();
1478   case AMDGPU::OPERAND_REG_IMM_INT64:
1479   case AMDGPU::OPERAND_REG_IMM_FP64:
1480   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1481   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1482     return &APFloat::IEEEdouble();
1483   case AMDGPU::OPERAND_REG_IMM_INT16:
1484   case AMDGPU::OPERAND_REG_IMM_FP16:
1485   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1486   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1487   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1488   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1489   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1490   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1491   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1492   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1493   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1494   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1495     return &APFloat::IEEEhalf();
1496   default:
1497     llvm_unreachable("unsupported fp type");
1498   }
1499 }
1500 
1501 //===----------------------------------------------------------------------===//
1502 // Operand
1503 //===----------------------------------------------------------------------===//
1504 
1505 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1506   bool Lost;
1507 
1508   // Convert literal to single precision
1509   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1510                                                APFloat::rmNearestTiesToEven,
1511                                                &Lost);
1512   // We allow precision lost but not overflow or underflow
1513   if (Status != APFloat::opOK &&
1514       Lost &&
1515       ((Status & APFloat::opOverflow)  != 0 ||
1516        (Status & APFloat::opUnderflow) != 0)) {
1517     return false;
1518   }
1519 
1520   return true;
1521 }
1522 
1523 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1524   return isUIntN(Size, Val) || isIntN(Size, Val);
1525 }
1526 
1527 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1528 
1529   // This is a hack to enable named inline values like
1530   // shared_base with both 32-bit and 64-bit operands.
1531   // Note that these values are defined as
1532   // 32-bit operands only.
1533   if (isInlineValue()) {
1534     return true;
1535   }
1536 
1537   if (!isImmTy(ImmTyNone)) {
1538     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1539     return false;
1540   }
1541   // TODO: We should avoid using host float here. It would be better to
1542   // check the float bit values which is what a few other places do.
1543   // We've had bot failures before due to weird NaN support on mips hosts.
1544 
1545   APInt Literal(64, Imm.Val);
1546 
1547   if (Imm.IsFPImm) { // We got fp literal token
1548     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1549       return AMDGPU::isInlinableLiteral64(Imm.Val,
1550                                           AsmParser->hasInv2PiInlineImm());
1551     }
1552 
1553     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1554     if (!canLosslesslyConvertToFPType(FPLiteral, type))
1555       return false;
1556 
1557     if (type.getScalarSizeInBits() == 16) {
1558       return AMDGPU::isInlinableLiteral16(
1559         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1560         AsmParser->hasInv2PiInlineImm());
1561     }
1562 
1563     // Check if single precision literal is inlinable
1564     return AMDGPU::isInlinableLiteral32(
1565       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1566       AsmParser->hasInv2PiInlineImm());
1567   }
1568 
1569   // We got int literal token.
1570   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1571     return AMDGPU::isInlinableLiteral64(Imm.Val,
1572                                         AsmParser->hasInv2PiInlineImm());
1573   }
1574 
1575   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1576     return false;
1577   }
1578 
1579   if (type.getScalarSizeInBits() == 16) {
1580     return AMDGPU::isInlinableLiteral16(
1581       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1582       AsmParser->hasInv2PiInlineImm());
1583   }
1584 
1585   return AMDGPU::isInlinableLiteral32(
1586     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1587     AsmParser->hasInv2PiInlineImm());
1588 }
1589 
1590 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1591   // Check that this immediate can be added as literal
1592   if (!isImmTy(ImmTyNone)) {
1593     return false;
1594   }
1595 
1596   if (!Imm.IsFPImm) {
1597     // We got int literal token.
1598 
1599     if (type == MVT::f64 && hasFPModifiers()) {
1600       // Cannot apply fp modifiers to int literals preserving the same semantics
1601       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1602       // disable these cases.
1603       return false;
1604     }
1605 
1606     unsigned Size = type.getSizeInBits();
1607     if (Size == 64)
1608       Size = 32;
1609 
1610     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1611     // types.
1612     return isSafeTruncation(Imm.Val, Size);
1613   }
1614 
1615   // We got fp literal token
1616   if (type == MVT::f64) { // Expected 64-bit fp operand
1617     // We would set low 64-bits of literal to zeroes but we accept this literals
1618     return true;
1619   }
1620 
1621   if (type == MVT::i64) { // Expected 64-bit int operand
1622     // We don't allow fp literals in 64-bit integer instructions. It is
1623     // unclear how we should encode them.
1624     return false;
1625   }
1626 
1627   // We allow fp literals with f16x2 operands assuming that the specified
1628   // literal goes into the lower half and the upper half is zero. We also
1629   // require that the literal may be losslesly converted to f16.
1630   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1631                      (type == MVT::v2i16)? MVT::i16 : type;
1632 
1633   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1634   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1635 }
1636 
1637 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1638   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1639 }
1640 
1641 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1642   if (AsmParser->isVI())
1643     return isVReg32();
1644   else if (AsmParser->isGFX9() || AsmParser->isGFX10())
1645     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1646   else
1647     return false;
1648 }
1649 
1650 bool AMDGPUOperand::isSDWAFP16Operand() const {
1651   return isSDWAOperand(MVT::f16);
1652 }
1653 
1654 bool AMDGPUOperand::isSDWAFP32Operand() const {
1655   return isSDWAOperand(MVT::f32);
1656 }
1657 
1658 bool AMDGPUOperand::isSDWAInt16Operand() const {
1659   return isSDWAOperand(MVT::i16);
1660 }
1661 
1662 bool AMDGPUOperand::isSDWAInt32Operand() const {
1663   return isSDWAOperand(MVT::i32);
1664 }
1665 
1666 bool AMDGPUOperand::isBoolReg() const {
1667   return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1668          (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1669 }
1670 
1671 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1672 {
1673   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1674   assert(Size == 2 || Size == 4 || Size == 8);
1675 
1676   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1677 
1678   if (Imm.Mods.Abs) {
1679     Val &= ~FpSignMask;
1680   }
1681   if (Imm.Mods.Neg) {
1682     Val ^= FpSignMask;
1683   }
1684 
1685   return Val;
1686 }
1687 
1688 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1689   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1690                              Inst.getNumOperands())) {
1691     addLiteralImmOperand(Inst, Imm.Val,
1692                          ApplyModifiers &
1693                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1694   } else {
1695     assert(!isImmTy(ImmTyNone) || !hasModifiers());
1696     Inst.addOperand(MCOperand::createImm(Imm.Val));
1697   }
1698 }
1699 
1700 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1701   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1702   auto OpNum = Inst.getNumOperands();
1703   // Check that this operand accepts literals
1704   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1705 
1706   if (ApplyModifiers) {
1707     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1708     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1709     Val = applyInputFPModifiers(Val, Size);
1710   }
1711 
1712   APInt Literal(64, Val);
1713   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1714 
1715   if (Imm.IsFPImm) { // We got fp literal token
1716     switch (OpTy) {
1717     case AMDGPU::OPERAND_REG_IMM_INT64:
1718     case AMDGPU::OPERAND_REG_IMM_FP64:
1719     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1720     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1721       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1722                                        AsmParser->hasInv2PiInlineImm())) {
1723         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1724         return;
1725       }
1726 
1727       // Non-inlineable
1728       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1729         // For fp operands we check if low 32 bits are zeros
1730         if (Literal.getLoBits(32) != 0) {
1731           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1732           "Can't encode literal as exact 64-bit floating-point operand. "
1733           "Low 32-bits will be set to zero");
1734         }
1735 
1736         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1737         return;
1738       }
1739 
1740       // We don't allow fp literals in 64-bit integer instructions. It is
1741       // unclear how we should encode them. This case should be checked earlier
1742       // in predicate methods (isLiteralImm())
1743       llvm_unreachable("fp literal in 64-bit integer instruction.");
1744 
1745     case AMDGPU::OPERAND_REG_IMM_INT32:
1746     case AMDGPU::OPERAND_REG_IMM_FP32:
1747     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1748     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1749     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1750     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1751     case AMDGPU::OPERAND_REG_IMM_INT16:
1752     case AMDGPU::OPERAND_REG_IMM_FP16:
1753     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1754     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1755     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1756     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1757     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1758     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1759     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1760     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1761     case AMDGPU::OPERAND_REG_IMM_V2INT16:
1762     case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1763       bool lost;
1764       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1765       // Convert literal to single precision
1766       FPLiteral.convert(*getOpFltSemantics(OpTy),
1767                         APFloat::rmNearestTiesToEven, &lost);
1768       // We allow precision lost but not overflow or underflow. This should be
1769       // checked earlier in isLiteralImm()
1770 
1771       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1772       Inst.addOperand(MCOperand::createImm(ImmVal));
1773       return;
1774     }
1775     default:
1776       llvm_unreachable("invalid operand size");
1777     }
1778 
1779     return;
1780   }
1781 
1782   // We got int literal token.
1783   // Only sign extend inline immediates.
1784   switch (OpTy) {
1785   case AMDGPU::OPERAND_REG_IMM_INT32:
1786   case AMDGPU::OPERAND_REG_IMM_FP32:
1787   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1788   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1789   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1790   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1791   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1792   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1793     if (isSafeTruncation(Val, 32) &&
1794         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1795                                      AsmParser->hasInv2PiInlineImm())) {
1796       Inst.addOperand(MCOperand::createImm(Val));
1797       return;
1798     }
1799 
1800     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1801     return;
1802 
1803   case AMDGPU::OPERAND_REG_IMM_INT64:
1804   case AMDGPU::OPERAND_REG_IMM_FP64:
1805   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1806   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1807     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1808       Inst.addOperand(MCOperand::createImm(Val));
1809       return;
1810     }
1811 
1812     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1813     return;
1814 
1815   case AMDGPU::OPERAND_REG_IMM_INT16:
1816   case AMDGPU::OPERAND_REG_IMM_FP16:
1817   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1818   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1819   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1820   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1821     if (isSafeTruncation(Val, 16) &&
1822         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1823                                      AsmParser->hasInv2PiInlineImm())) {
1824       Inst.addOperand(MCOperand::createImm(Val));
1825       return;
1826     }
1827 
1828     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1829     return;
1830 
1831   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1832   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1833   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1834   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1835     assert(isSafeTruncation(Val, 16));
1836     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1837                                         AsmParser->hasInv2PiInlineImm()));
1838 
1839     Inst.addOperand(MCOperand::createImm(Val));
1840     return;
1841   }
1842   default:
1843     llvm_unreachable("invalid operand size");
1844   }
1845 }
1846 
1847 template <unsigned Bitwidth>
1848 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1849   APInt Literal(64, Imm.Val);
1850 
1851   if (!Imm.IsFPImm) {
1852     // We got int literal token.
1853     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1854     return;
1855   }
1856 
1857   bool Lost;
1858   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1859   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1860                     APFloat::rmNearestTiesToEven, &Lost);
1861   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1862 }
1863 
1864 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1865   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1866 }
1867 
1868 static bool isInlineValue(unsigned Reg) {
1869   switch (Reg) {
1870   case AMDGPU::SRC_SHARED_BASE:
1871   case AMDGPU::SRC_SHARED_LIMIT:
1872   case AMDGPU::SRC_PRIVATE_BASE:
1873   case AMDGPU::SRC_PRIVATE_LIMIT:
1874   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1875     return true;
1876   case AMDGPU::SRC_VCCZ:
1877   case AMDGPU::SRC_EXECZ:
1878   case AMDGPU::SRC_SCC:
1879     return true;
1880   case AMDGPU::SGPR_NULL:
1881     return true;
1882   default:
1883     return false;
1884   }
1885 }
1886 
1887 bool AMDGPUOperand::isInlineValue() const {
1888   return isRegKind() && ::isInlineValue(getReg());
1889 }
1890 
1891 //===----------------------------------------------------------------------===//
1892 // AsmParser
1893 //===----------------------------------------------------------------------===//
1894 
1895 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1896   if (Is == IS_VGPR) {
1897     switch (RegWidth) {
1898       default: return -1;
1899       case 1: return AMDGPU::VGPR_32RegClassID;
1900       case 2: return AMDGPU::VReg_64RegClassID;
1901       case 3: return AMDGPU::VReg_96RegClassID;
1902       case 4: return AMDGPU::VReg_128RegClassID;
1903       case 5: return AMDGPU::VReg_160RegClassID;
1904       case 8: return AMDGPU::VReg_256RegClassID;
1905       case 16: return AMDGPU::VReg_512RegClassID;
1906       case 32: return AMDGPU::VReg_1024RegClassID;
1907     }
1908   } else if (Is == IS_TTMP) {
1909     switch (RegWidth) {
1910       default: return -1;
1911       case 1: return AMDGPU::TTMP_32RegClassID;
1912       case 2: return AMDGPU::TTMP_64RegClassID;
1913       case 4: return AMDGPU::TTMP_128RegClassID;
1914       case 8: return AMDGPU::TTMP_256RegClassID;
1915       case 16: return AMDGPU::TTMP_512RegClassID;
1916     }
1917   } else if (Is == IS_SGPR) {
1918     switch (RegWidth) {
1919       default: return -1;
1920       case 1: return AMDGPU::SGPR_32RegClassID;
1921       case 2: return AMDGPU::SGPR_64RegClassID;
1922       case 4: return AMDGPU::SGPR_128RegClassID;
1923       case 8: return AMDGPU::SGPR_256RegClassID;
1924       case 16: return AMDGPU::SGPR_512RegClassID;
1925     }
1926   } else if (Is == IS_AGPR) {
1927     switch (RegWidth) {
1928       default: return -1;
1929       case 1: return AMDGPU::AGPR_32RegClassID;
1930       case 2: return AMDGPU::AReg_64RegClassID;
1931       case 4: return AMDGPU::AReg_128RegClassID;
1932       case 16: return AMDGPU::AReg_512RegClassID;
1933       case 32: return AMDGPU::AReg_1024RegClassID;
1934     }
1935   }
1936   return -1;
1937 }
1938 
1939 static unsigned getSpecialRegForName(StringRef RegName) {
1940   return StringSwitch<unsigned>(RegName)
1941     .Case("exec", AMDGPU::EXEC)
1942     .Case("vcc", AMDGPU::VCC)
1943     .Case("flat_scratch", AMDGPU::FLAT_SCR)
1944     .Case("xnack_mask", AMDGPU::XNACK_MASK)
1945     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
1946     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
1947     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1948     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
1949     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
1950     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
1951     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1952     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
1953     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1954     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
1955     .Case("lds_direct", AMDGPU::LDS_DIRECT)
1956     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
1957     .Case("m0", AMDGPU::M0)
1958     .Case("vccz", AMDGPU::SRC_VCCZ)
1959     .Case("src_vccz", AMDGPU::SRC_VCCZ)
1960     .Case("execz", AMDGPU::SRC_EXECZ)
1961     .Case("src_execz", AMDGPU::SRC_EXECZ)
1962     .Case("scc", AMDGPU::SRC_SCC)
1963     .Case("src_scc", AMDGPU::SRC_SCC)
1964     .Case("tba", AMDGPU::TBA)
1965     .Case("tma", AMDGPU::TMA)
1966     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
1967     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
1968     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
1969     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
1970     .Case("vcc_lo", AMDGPU::VCC_LO)
1971     .Case("vcc_hi", AMDGPU::VCC_HI)
1972     .Case("exec_lo", AMDGPU::EXEC_LO)
1973     .Case("exec_hi", AMDGPU::EXEC_HI)
1974     .Case("tma_lo", AMDGPU::TMA_LO)
1975     .Case("tma_hi", AMDGPU::TMA_HI)
1976     .Case("tba_lo", AMDGPU::TBA_LO)
1977     .Case("tba_hi", AMDGPU::TBA_HI)
1978     .Case("null", AMDGPU::SGPR_NULL)
1979     .Default(AMDGPU::NoRegister);
1980 }
1981 
1982 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1983                                     SMLoc &EndLoc) {
1984   auto R = parseRegister();
1985   if (!R) return true;
1986   assert(R->isReg());
1987   RegNo = R->getReg();
1988   StartLoc = R->getStartLoc();
1989   EndLoc = R->getEndLoc();
1990   return false;
1991 }
1992 
1993 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
1994                                             RegisterKind RegKind, unsigned Reg1) {
1995   switch (RegKind) {
1996   case IS_SPECIAL:
1997     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
1998       Reg = AMDGPU::EXEC;
1999       RegWidth = 2;
2000       return true;
2001     }
2002     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2003       Reg = AMDGPU::FLAT_SCR;
2004       RegWidth = 2;
2005       return true;
2006     }
2007     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2008       Reg = AMDGPU::XNACK_MASK;
2009       RegWidth = 2;
2010       return true;
2011     }
2012     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2013       Reg = AMDGPU::VCC;
2014       RegWidth = 2;
2015       return true;
2016     }
2017     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2018       Reg = AMDGPU::TBA;
2019       RegWidth = 2;
2020       return true;
2021     }
2022     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2023       Reg = AMDGPU::TMA;
2024       RegWidth = 2;
2025       return true;
2026     }
2027     return false;
2028   case IS_VGPR:
2029   case IS_SGPR:
2030   case IS_AGPR:
2031   case IS_TTMP:
2032     if (Reg1 != Reg + RegWidth) {
2033       return false;
2034     }
2035     RegWidth++;
2036     return true;
2037   default:
2038     llvm_unreachable("unexpected register kind");
2039   }
2040 }
2041 
2042 struct RegInfo {
2043   StringLiteral Name;
2044   RegisterKind Kind;
2045 };
2046 
2047 static constexpr RegInfo RegularRegisters[] = {
2048   {{"v"},    IS_VGPR},
2049   {{"s"},    IS_SGPR},
2050   {{"ttmp"}, IS_TTMP},
2051   {{"acc"},  IS_AGPR},
2052   {{"a"},    IS_AGPR},
2053 };
2054 
2055 static bool isRegularReg(RegisterKind Kind) {
2056   return Kind == IS_VGPR ||
2057          Kind == IS_SGPR ||
2058          Kind == IS_TTMP ||
2059          Kind == IS_AGPR;
2060 }
2061 
2062 static const RegInfo* getRegularRegInfo(StringRef Str) {
2063   for (const RegInfo &Reg : RegularRegisters)
2064     if (Str.startswith(Reg.Name))
2065       return &Reg;
2066   return nullptr;
2067 }
2068 
2069 static bool getRegNum(StringRef Str, unsigned& Num) {
2070   return !Str.getAsInteger(10, Num);
2071 }
2072 
2073 bool
2074 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2075                             const AsmToken &NextToken) const {
2076 
2077   // A list of consecutive registers: [s0,s1,s2,s3]
2078   if (Token.is(AsmToken::LBrac))
2079     return true;
2080 
2081   if (!Token.is(AsmToken::Identifier))
2082     return false;
2083 
2084   // A single register like s0 or a range of registers like s[0:1]
2085 
2086   StringRef Str = Token.getString();
2087   const RegInfo *Reg = getRegularRegInfo(Str);
2088   if (Reg) {
2089     StringRef RegName = Reg->Name;
2090     StringRef RegSuffix = Str.substr(RegName.size());
2091     if (!RegSuffix.empty()) {
2092       unsigned Num;
2093       // A single register with an index: rXX
2094       if (getRegNum(RegSuffix, Num))
2095         return true;
2096     } else {
2097       // A range of registers: r[XX:YY].
2098       if (NextToken.is(AsmToken::LBrac))
2099         return true;
2100     }
2101   }
2102 
2103   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2104 }
2105 
2106 bool
2107 AMDGPUAsmParser::isRegister()
2108 {
2109   return isRegister(getToken(), peekToken());
2110 }
2111 
2112 unsigned
2113 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2114                                unsigned RegNum,
2115                                unsigned RegWidth) {
2116 
2117   assert(isRegularReg(RegKind));
2118 
2119   unsigned AlignSize = 1;
2120   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2121     // SGPR and TTMP registers must be aligned.
2122     // Max required alignment is 4 dwords.
2123     AlignSize = std::min(RegWidth, 4u);
2124   }
2125 
2126   if (RegNum % AlignSize != 0)
2127     return AMDGPU::NoRegister;
2128 
2129   unsigned RegIdx = RegNum / AlignSize;
2130   int RCID = getRegClass(RegKind, RegWidth);
2131   if (RCID == -1)
2132     return AMDGPU::NoRegister;
2133 
2134   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2135   const MCRegisterClass RC = TRI->getRegClass(RCID);
2136   if (RegIdx >= RC.getNumRegs())
2137     return AMDGPU::NoRegister;
2138 
2139   return RC.getRegister(RegIdx);
2140 }
2141 
2142 bool
2143 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2144   int64_t RegLo, RegHi;
2145   if (!trySkipToken(AsmToken::LBrac))
2146     return false;
2147 
2148   if (!parseExpr(RegLo))
2149     return false;
2150 
2151   if (trySkipToken(AsmToken::Colon)) {
2152     if (!parseExpr(RegHi))
2153       return false;
2154   } else {
2155     RegHi = RegLo;
2156   }
2157 
2158   if (!trySkipToken(AsmToken::RBrac))
2159     return false;
2160 
2161   if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi)
2162     return false;
2163 
2164   Num = static_cast<unsigned>(RegLo);
2165   Width = (RegHi - RegLo) + 1;
2166   return true;
2167 }
2168 
2169 unsigned
2170 AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2171                                  unsigned &RegNum,
2172                                  unsigned &RegWidth) {
2173   assert(isToken(AsmToken::Identifier));
2174   unsigned Reg = getSpecialRegForName(getTokenStr());
2175   if (Reg) {
2176     RegNum = 0;
2177     RegWidth = 1;
2178     RegKind = IS_SPECIAL;
2179     lex(); // skip register name
2180   }
2181   return Reg;
2182 }
2183 
2184 unsigned
2185 AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2186                                  unsigned &RegNum,
2187                                  unsigned &RegWidth) {
2188   assert(isToken(AsmToken::Identifier));
2189   StringRef RegName = getTokenStr();
2190 
2191   const RegInfo *RI = getRegularRegInfo(RegName);
2192   if (!RI)
2193     return AMDGPU::NoRegister;
2194   lex(); // skip register name
2195 
2196   RegKind = RI->Kind;
2197   StringRef RegSuffix = RegName.substr(RI->Name.size());
2198   if (!RegSuffix.empty()) {
2199     // Single 32-bit register: vXX.
2200     if (!getRegNum(RegSuffix, RegNum))
2201       return AMDGPU::NoRegister;
2202     RegWidth = 1;
2203   } else {
2204     // Range of registers: v[XX:YY]. ":YY" is optional.
2205     if (!ParseRegRange(RegNum, RegWidth))
2206       return AMDGPU::NoRegister;
2207   }
2208 
2209   return getRegularReg(RegKind, RegNum, RegWidth);
2210 }
2211 
2212 unsigned
2213 AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
2214                               unsigned &RegNum,
2215                               unsigned &RegWidth) {
2216   unsigned Reg = AMDGPU::NoRegister;
2217 
2218   if (!trySkipToken(AsmToken::LBrac))
2219     return AMDGPU::NoRegister;
2220 
2221   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2222 
2223   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2224     return AMDGPU::NoRegister;
2225   if (RegWidth != 1)
2226     return AMDGPU::NoRegister;
2227 
2228   for (; trySkipToken(AsmToken::Comma); ) {
2229     RegisterKind NextRegKind;
2230     unsigned NextReg, NextRegNum, NextRegWidth;
2231 
2232     if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth))
2233       return AMDGPU::NoRegister;
2234     if (NextRegWidth != 1)
2235       return AMDGPU::NoRegister;
2236     if (NextRegKind != RegKind)
2237       return AMDGPU::NoRegister;
2238     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg))
2239       return AMDGPU::NoRegister;
2240   }
2241 
2242   if (!trySkipToken(AsmToken::RBrac))
2243     return AMDGPU::NoRegister;
2244 
2245   if (isRegularReg(RegKind))
2246     Reg = getRegularReg(RegKind, RegNum, RegWidth);
2247 
2248   return Reg;
2249 }
2250 
2251 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
2252                                           unsigned &Reg,
2253                                           unsigned &RegNum,
2254                                           unsigned &RegWidth) {
2255   Reg = AMDGPU::NoRegister;
2256 
2257   if (isToken(AsmToken::Identifier)) {
2258     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth);
2259     if (Reg == AMDGPU::NoRegister)
2260       Reg = ParseRegularReg(RegKind, RegNum, RegWidth);
2261   } else {
2262     Reg = ParseRegList(RegKind, RegNum, RegWidth);
2263   }
2264 
2265   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2266   return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg);
2267 }
2268 
2269 Optional<StringRef>
2270 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2271   switch (RegKind) {
2272   case IS_VGPR:
2273     return StringRef(".amdgcn.next_free_vgpr");
2274   case IS_SGPR:
2275     return StringRef(".amdgcn.next_free_sgpr");
2276   default:
2277     return None;
2278   }
2279 }
2280 
2281 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2282   auto SymbolName = getGprCountSymbolName(RegKind);
2283   assert(SymbolName && "initializing invalid register kind");
2284   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2285   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2286 }
2287 
2288 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2289                                             unsigned DwordRegIndex,
2290                                             unsigned RegWidth) {
2291   // Symbols are only defined for GCN targets
2292   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2293     return true;
2294 
2295   auto SymbolName = getGprCountSymbolName(RegKind);
2296   if (!SymbolName)
2297     return true;
2298   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2299 
2300   int64_t NewMax = DwordRegIndex + RegWidth - 1;
2301   int64_t OldCount;
2302 
2303   if (!Sym->isVariable())
2304     return !Error(getParser().getTok().getLoc(),
2305                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2306   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2307     return !Error(
2308         getParser().getTok().getLoc(),
2309         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2310 
2311   if (OldCount <= NewMax)
2312     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2313 
2314   return true;
2315 }
2316 
2317 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
2318   const auto &Tok = Parser.getTok();
2319   SMLoc StartLoc = Tok.getLoc();
2320   SMLoc EndLoc = Tok.getEndLoc();
2321   RegisterKind RegKind;
2322   unsigned Reg, RegNum, RegWidth;
2323 
2324   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2325     //FIXME: improve error messages (bug 41303).
2326     Error(StartLoc, "not a valid operand.");
2327     return nullptr;
2328   }
2329   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
2330     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2331       return nullptr;
2332   } else
2333     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2334   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2335 }
2336 
2337 OperandMatchResultTy
2338 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2339   // TODO: add syntactic sugar for 1/(2*PI)
2340 
2341   assert(!isRegister());
2342   assert(!isModifier());
2343 
2344   const auto& Tok = getToken();
2345   const auto& NextTok = peekToken();
2346   bool IsReal = Tok.is(AsmToken::Real);
2347   SMLoc S = getLoc();
2348   bool Negate = false;
2349 
2350   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2351     lex();
2352     IsReal = true;
2353     Negate = true;
2354   }
2355 
2356   if (IsReal) {
2357     // Floating-point expressions are not supported.
2358     // Can only allow floating-point literals with an
2359     // optional sign.
2360 
2361     StringRef Num = getTokenStr();
2362     lex();
2363 
2364     APFloat RealVal(APFloat::IEEEdouble());
2365     auto roundMode = APFloat::rmNearestTiesToEven;
2366     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2367       return MatchOperand_ParseFail;
2368     }
2369     if (Negate)
2370       RealVal.changeSign();
2371 
2372     Operands.push_back(
2373       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2374                                AMDGPUOperand::ImmTyNone, true));
2375 
2376     return MatchOperand_Success;
2377 
2378   } else {
2379     int64_t IntVal;
2380     const MCExpr *Expr;
2381     SMLoc S = getLoc();
2382 
2383     if (HasSP3AbsModifier) {
2384       // This is a workaround for handling expressions
2385       // as arguments of SP3 'abs' modifier, for example:
2386       //     |1.0|
2387       //     |-1|
2388       //     |1+x|
2389       // This syntax is not compatible with syntax of standard
2390       // MC expressions (due to the trailing '|').
2391       SMLoc EndLoc;
2392       if (getParser().parsePrimaryExpr(Expr, EndLoc))
2393         return MatchOperand_ParseFail;
2394     } else {
2395       if (Parser.parseExpression(Expr))
2396         return MatchOperand_ParseFail;
2397     }
2398 
2399     if (Expr->evaluateAsAbsolute(IntVal)) {
2400       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2401     } else {
2402       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2403     }
2404 
2405     return MatchOperand_Success;
2406   }
2407 
2408   return MatchOperand_NoMatch;
2409 }
2410 
2411 OperandMatchResultTy
2412 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2413   if (!isRegister())
2414     return MatchOperand_NoMatch;
2415 
2416   if (auto R = parseRegister()) {
2417     assert(R->isReg());
2418     Operands.push_back(std::move(R));
2419     return MatchOperand_Success;
2420   }
2421   return MatchOperand_ParseFail;
2422 }
2423 
2424 OperandMatchResultTy
2425 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2426   auto res = parseReg(Operands);
2427   if (res != MatchOperand_NoMatch) {
2428     return res;
2429   } else if (isModifier()) {
2430     return MatchOperand_NoMatch;
2431   } else {
2432     return parseImm(Operands, HasSP3AbsMod);
2433   }
2434 }
2435 
2436 bool
2437 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2438   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2439     const auto &str = Token.getString();
2440     return str == "abs" || str == "neg" || str == "sext";
2441   }
2442   return false;
2443 }
2444 
2445 bool
2446 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2447   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2448 }
2449 
2450 bool
2451 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2452   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2453 }
2454 
2455 bool
2456 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2457   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2458 }
2459 
2460 // Check if this is an operand modifier or an opcode modifier
2461 // which may look like an expression but it is not. We should
2462 // avoid parsing these modifiers as expressions. Currently
2463 // recognized sequences are:
2464 //   |...|
2465 //   abs(...)
2466 //   neg(...)
2467 //   sext(...)
2468 //   -reg
2469 //   -|...|
2470 //   -abs(...)
2471 //   name:...
2472 // Note that simple opcode modifiers like 'gds' may be parsed as
2473 // expressions; this is a special case. See getExpressionAsToken.
2474 //
2475 bool
2476 AMDGPUAsmParser::isModifier() {
2477 
2478   AsmToken Tok = getToken();
2479   AsmToken NextToken[2];
2480   peekTokens(NextToken);
2481 
2482   return isOperandModifier(Tok, NextToken[0]) ||
2483          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2484          isOpcodeModifierWithVal(Tok, NextToken[0]);
2485 }
2486 
2487 // Check if the current token is an SP3 'neg' modifier.
2488 // Currently this modifier is allowed in the following context:
2489 //
2490 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2491 // 2. Before an 'abs' modifier: -abs(...)
2492 // 3. Before an SP3 'abs' modifier: -|...|
2493 //
2494 // In all other cases "-" is handled as a part
2495 // of an expression that follows the sign.
2496 //
2497 // Note: When "-" is followed by an integer literal,
2498 // this is interpreted as integer negation rather
2499 // than a floating-point NEG modifier applied to N.
2500 // Beside being contr-intuitive, such use of floating-point
2501 // NEG modifier would have resulted in different meaning
2502 // of integer literals used with VOP1/2/C and VOP3,
2503 // for example:
2504 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2505 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2506 // Negative fp literals with preceding "-" are
2507 // handled likewise for unifomtity
2508 //
2509 bool
2510 AMDGPUAsmParser::parseSP3NegModifier() {
2511 
2512   AsmToken NextToken[2];
2513   peekTokens(NextToken);
2514 
2515   if (isToken(AsmToken::Minus) &&
2516       (isRegister(NextToken[0], NextToken[1]) ||
2517        NextToken[0].is(AsmToken::Pipe) ||
2518        isId(NextToken[0], "abs"))) {
2519     lex();
2520     return true;
2521   }
2522 
2523   return false;
2524 }
2525 
2526 OperandMatchResultTy
2527 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2528                                               bool AllowImm) {
2529   bool Neg, SP3Neg;
2530   bool Abs, SP3Abs;
2531   SMLoc Loc;
2532 
2533   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2534   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2535     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2536     return MatchOperand_ParseFail;
2537   }
2538 
2539   SP3Neg = parseSP3NegModifier();
2540 
2541   Loc = getLoc();
2542   Neg = trySkipId("neg");
2543   if (Neg && SP3Neg) {
2544     Error(Loc, "expected register or immediate");
2545     return MatchOperand_ParseFail;
2546   }
2547   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2548     return MatchOperand_ParseFail;
2549 
2550   Abs = trySkipId("abs");
2551   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2552     return MatchOperand_ParseFail;
2553 
2554   Loc = getLoc();
2555   SP3Abs = trySkipToken(AsmToken::Pipe);
2556   if (Abs && SP3Abs) {
2557     Error(Loc, "expected register or immediate");
2558     return MatchOperand_ParseFail;
2559   }
2560 
2561   OperandMatchResultTy Res;
2562   if (AllowImm) {
2563     Res = parseRegOrImm(Operands, SP3Abs);
2564   } else {
2565     Res = parseReg(Operands);
2566   }
2567   if (Res != MatchOperand_Success) {
2568     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2569   }
2570 
2571   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2572     return MatchOperand_ParseFail;
2573   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2574     return MatchOperand_ParseFail;
2575   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2576     return MatchOperand_ParseFail;
2577 
2578   AMDGPUOperand::Modifiers Mods;
2579   Mods.Abs = Abs || SP3Abs;
2580   Mods.Neg = Neg || SP3Neg;
2581 
2582   if (Mods.hasFPModifiers()) {
2583     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2584     if (Op.isExpr()) {
2585       Error(Op.getStartLoc(), "expected an absolute expression");
2586       return MatchOperand_ParseFail;
2587     }
2588     Op.setModifiers(Mods);
2589   }
2590   return MatchOperand_Success;
2591 }
2592 
2593 OperandMatchResultTy
2594 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2595                                                bool AllowImm) {
2596   bool Sext = trySkipId("sext");
2597   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2598     return MatchOperand_ParseFail;
2599 
2600   OperandMatchResultTy Res;
2601   if (AllowImm) {
2602     Res = parseRegOrImm(Operands);
2603   } else {
2604     Res = parseReg(Operands);
2605   }
2606   if (Res != MatchOperand_Success) {
2607     return Sext? MatchOperand_ParseFail : Res;
2608   }
2609 
2610   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2611     return MatchOperand_ParseFail;
2612 
2613   AMDGPUOperand::Modifiers Mods;
2614   Mods.Sext = Sext;
2615 
2616   if (Mods.hasIntModifiers()) {
2617     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2618     if (Op.isExpr()) {
2619       Error(Op.getStartLoc(), "expected an absolute expression");
2620       return MatchOperand_ParseFail;
2621     }
2622     Op.setModifiers(Mods);
2623   }
2624 
2625   return MatchOperand_Success;
2626 }
2627 
2628 OperandMatchResultTy
2629 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2630   return parseRegOrImmWithFPInputMods(Operands, false);
2631 }
2632 
2633 OperandMatchResultTy
2634 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2635   return parseRegOrImmWithIntInputMods(Operands, false);
2636 }
2637 
2638 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2639   auto Loc = getLoc();
2640   if (trySkipId("off")) {
2641     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2642                                                 AMDGPUOperand::ImmTyOff, false));
2643     return MatchOperand_Success;
2644   }
2645 
2646   if (!isRegister())
2647     return MatchOperand_NoMatch;
2648 
2649   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2650   if (Reg) {
2651     Operands.push_back(std::move(Reg));
2652     return MatchOperand_Success;
2653   }
2654 
2655   return MatchOperand_ParseFail;
2656 
2657 }
2658 
2659 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2660   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2661 
2662   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2663       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2664       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2665       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2666     return Match_InvalidOperand;
2667 
2668   if ((TSFlags & SIInstrFlags::VOP3) &&
2669       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2670       getForcedEncodingSize() != 64)
2671     return Match_PreferE32;
2672 
2673   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2674       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2675     // v_mac_f32/16 allow only dst_sel == DWORD;
2676     auto OpNum =
2677         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2678     const auto &Op = Inst.getOperand(OpNum);
2679     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2680       return Match_InvalidOperand;
2681     }
2682   }
2683 
2684   return Match_Success;
2685 }
2686 
2687 // What asm variants we should check
2688 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2689   if (getForcedEncodingSize() == 32) {
2690     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2691     return makeArrayRef(Variants);
2692   }
2693 
2694   if (isForcedVOP3()) {
2695     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2696     return makeArrayRef(Variants);
2697   }
2698 
2699   if (isForcedSDWA()) {
2700     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2701                                         AMDGPUAsmVariants::SDWA9};
2702     return makeArrayRef(Variants);
2703   }
2704 
2705   if (isForcedDPP()) {
2706     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2707     return makeArrayRef(Variants);
2708   }
2709 
2710   static const unsigned Variants[] = {
2711     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2712     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2713   };
2714 
2715   return makeArrayRef(Variants);
2716 }
2717 
2718 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2719   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2720   const unsigned Num = Desc.getNumImplicitUses();
2721   for (unsigned i = 0; i < Num; ++i) {
2722     unsigned Reg = Desc.ImplicitUses[i];
2723     switch (Reg) {
2724     case AMDGPU::FLAT_SCR:
2725     case AMDGPU::VCC:
2726     case AMDGPU::VCC_LO:
2727     case AMDGPU::VCC_HI:
2728     case AMDGPU::M0:
2729       return Reg;
2730     default:
2731       break;
2732     }
2733   }
2734   return AMDGPU::NoRegister;
2735 }
2736 
2737 // NB: This code is correct only when used to check constant
2738 // bus limitations because GFX7 support no f16 inline constants.
2739 // Note that there are no cases when a GFX7 opcode violates
2740 // constant bus limitations due to the use of an f16 constant.
2741 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2742                                        unsigned OpIdx) const {
2743   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2744 
2745   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2746     return false;
2747   }
2748 
2749   const MCOperand &MO = Inst.getOperand(OpIdx);
2750 
2751   int64_t Val = MO.getImm();
2752   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2753 
2754   switch (OpSize) { // expected operand size
2755   case 8:
2756     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2757   case 4:
2758     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2759   case 2: {
2760     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2761     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2762         OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2763         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2764         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2765         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 ||
2766         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) {
2767       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2768     } else {
2769       return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
2770     }
2771   }
2772   default:
2773     llvm_unreachable("invalid operand size");
2774   }
2775 }
2776 
2777 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
2778   if (!isGFX10())
2779     return 1;
2780 
2781   switch (Opcode) {
2782   // 64-bit shift instructions can use only one scalar value input
2783   case AMDGPU::V_LSHLREV_B64:
2784   case AMDGPU::V_LSHLREV_B64_gfx10:
2785   case AMDGPU::V_LSHL_B64:
2786   case AMDGPU::V_LSHRREV_B64:
2787   case AMDGPU::V_LSHRREV_B64_gfx10:
2788   case AMDGPU::V_LSHR_B64:
2789   case AMDGPU::V_ASHRREV_I64:
2790   case AMDGPU::V_ASHRREV_I64_gfx10:
2791   case AMDGPU::V_ASHR_I64:
2792     return 1;
2793   default:
2794     return 2;
2795   }
2796 }
2797 
2798 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
2799   const MCOperand &MO = Inst.getOperand(OpIdx);
2800   if (MO.isImm()) {
2801     return !isInlineConstant(Inst, OpIdx);
2802   } else if (MO.isReg()) {
2803     auto Reg = MO.getReg();
2804     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2805     return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL;
2806   } else {
2807     return true;
2808   }
2809 }
2810 
2811 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) {
2812   const unsigned Opcode = Inst.getOpcode();
2813   const MCInstrDesc &Desc = MII.get(Opcode);
2814   unsigned ConstantBusUseCount = 0;
2815   unsigned NumLiterals = 0;
2816   unsigned LiteralSize;
2817 
2818   if (Desc.TSFlags &
2819       (SIInstrFlags::VOPC |
2820        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
2821        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
2822        SIInstrFlags::SDWA)) {
2823     // Check special imm operands (used by madmk, etc)
2824     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
2825       ++ConstantBusUseCount;
2826     }
2827 
2828     SmallDenseSet<unsigned> SGPRsUsed;
2829     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
2830     if (SGPRUsed != AMDGPU::NoRegister) {
2831       SGPRsUsed.insert(SGPRUsed);
2832       ++ConstantBusUseCount;
2833     }
2834 
2835     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2836     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2837     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2838 
2839     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2840 
2841     for (int OpIdx : OpIndices) {
2842       if (OpIdx == -1) break;
2843 
2844       const MCOperand &MO = Inst.getOperand(OpIdx);
2845       if (usesConstantBus(Inst, OpIdx)) {
2846         if (MO.isReg()) {
2847           const unsigned Reg = mc2PseudoReg(MO.getReg());
2848           // Pairs of registers with a partial intersections like these
2849           //   s0, s[0:1]
2850           //   flat_scratch_lo, flat_scratch
2851           //   flat_scratch_lo, flat_scratch_hi
2852           // are theoretically valid but they are disabled anyway.
2853           // Note that this code mimics SIInstrInfo::verifyInstruction
2854           if (!SGPRsUsed.count(Reg)) {
2855             SGPRsUsed.insert(Reg);
2856             ++ConstantBusUseCount;
2857           }
2858         } else { // Expression or a literal
2859 
2860           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
2861             continue; // special operand like VINTERP attr_chan
2862 
2863           // An instruction may use only one literal.
2864           // This has been validated on the previous step.
2865           // See validateVOP3Literal.
2866           // This literal may be used as more than one operand.
2867           // If all these operands are of the same size,
2868           // this literal counts as one scalar value.
2869           // Otherwise it counts as 2 scalar values.
2870           // See "GFX10 Shader Programming", section 3.6.2.3.
2871 
2872           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
2873           if (Size < 4) Size = 4;
2874 
2875           if (NumLiterals == 0) {
2876             NumLiterals = 1;
2877             LiteralSize = Size;
2878           } else if (LiteralSize != Size) {
2879             NumLiterals = 2;
2880           }
2881         }
2882       }
2883     }
2884   }
2885   ConstantBusUseCount += NumLiterals;
2886 
2887   return ConstantBusUseCount <= getConstantBusLimit(Opcode);
2888 }
2889 
2890 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) {
2891   const unsigned Opcode = Inst.getOpcode();
2892   const MCInstrDesc &Desc = MII.get(Opcode);
2893 
2894   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
2895   if (DstIdx == -1 ||
2896       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
2897     return true;
2898   }
2899 
2900   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2901 
2902   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
2903   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
2904   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
2905 
2906   assert(DstIdx != -1);
2907   const MCOperand &Dst = Inst.getOperand(DstIdx);
2908   assert(Dst.isReg());
2909   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
2910 
2911   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
2912 
2913   for (int SrcIdx : SrcIndices) {
2914     if (SrcIdx == -1) break;
2915     const MCOperand &Src = Inst.getOperand(SrcIdx);
2916     if (Src.isReg()) {
2917       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
2918       if (isRegIntersect(DstReg, SrcReg, TRI)) {
2919         return false;
2920       }
2921     }
2922   }
2923 
2924   return true;
2925 }
2926 
2927 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
2928 
2929   const unsigned Opc = Inst.getOpcode();
2930   const MCInstrDesc &Desc = MII.get(Opc);
2931 
2932   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
2933     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
2934     assert(ClampIdx != -1);
2935     return Inst.getOperand(ClampIdx).getImm() == 0;
2936   }
2937 
2938   return true;
2939 }
2940 
2941 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
2942 
2943   const unsigned Opc = Inst.getOpcode();
2944   const MCInstrDesc &Desc = MII.get(Opc);
2945 
2946   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
2947     return true;
2948 
2949   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
2950   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
2951   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
2952 
2953   assert(VDataIdx != -1);
2954   assert(DMaskIdx != -1);
2955   assert(TFEIdx != -1);
2956 
2957   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
2958   unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
2959   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
2960   if (DMask == 0)
2961     DMask = 1;
2962 
2963   unsigned DataSize =
2964     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
2965   if (hasPackedD16()) {
2966     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
2967     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
2968       DataSize = (DataSize + 1) / 2;
2969   }
2970 
2971   return (VDataSize / 4) == DataSize + TFESize;
2972 }
2973 
2974 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
2975   const unsigned Opc = Inst.getOpcode();
2976   const MCInstrDesc &Desc = MII.get(Opc);
2977 
2978   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10())
2979     return true;
2980 
2981   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
2982   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2983       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
2984   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
2985   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
2986   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
2987 
2988   assert(VAddr0Idx != -1);
2989   assert(SrsrcIdx != -1);
2990   assert(DimIdx != -1);
2991   assert(SrsrcIdx > VAddr0Idx);
2992 
2993   unsigned Dim = Inst.getOperand(DimIdx).getImm();
2994   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
2995   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
2996   unsigned VAddrSize =
2997       IsNSA ? SrsrcIdx - VAddr0Idx
2998             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
2999 
3000   unsigned AddrSize = BaseOpcode->NumExtraArgs +
3001                       (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3002                       (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3003                       (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3004   if (!IsNSA) {
3005     if (AddrSize > 8)
3006       AddrSize = 16;
3007     else if (AddrSize > 4)
3008       AddrSize = 8;
3009   }
3010 
3011   return VAddrSize == AddrSize;
3012 }
3013 
3014 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3015 
3016   const unsigned Opc = Inst.getOpcode();
3017   const MCInstrDesc &Desc = MII.get(Opc);
3018 
3019   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3020     return true;
3021   if (!Desc.mayLoad() || !Desc.mayStore())
3022     return true; // Not atomic
3023 
3024   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3025   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3026 
3027   // This is an incomplete check because image_atomic_cmpswap
3028   // may only use 0x3 and 0xf while other atomic operations
3029   // may use 0x1 and 0x3. However these limitations are
3030   // verified when we check that dmask matches dst size.
3031   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3032 }
3033 
3034 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3035 
3036   const unsigned Opc = Inst.getOpcode();
3037   const MCInstrDesc &Desc = MII.get(Opc);
3038 
3039   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3040     return true;
3041 
3042   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3043   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3044 
3045   // GATHER4 instructions use dmask in a different fashion compared to
3046   // other MIMG instructions. The only useful DMASK values are
3047   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3048   // (red,red,red,red) etc.) The ISA document doesn't mention
3049   // this.
3050   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3051 }
3052 
3053 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3054 {
3055   switch (Opcode) {
3056   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3057   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3058   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3059     return true;
3060   default:
3061     return false;
3062   }
3063 }
3064 
3065 // movrels* opcodes should only allow VGPRS as src0.
3066 // This is specified in .td description for vop1/vop3,
3067 // but sdwa is handled differently. See isSDWAOperand.
3068 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) {
3069 
3070   const unsigned Opc = Inst.getOpcode();
3071   const MCInstrDesc &Desc = MII.get(Opc);
3072 
3073   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3074     return true;
3075 
3076   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3077   assert(Src0Idx != -1);
3078 
3079   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3080   if (!Src0.isReg())
3081     return false;
3082 
3083   auto Reg = Src0.getReg();
3084   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3085   return !isSGPR(mc2PseudoReg(Reg), TRI);
3086 }
3087 
3088 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3089 
3090   const unsigned Opc = Inst.getOpcode();
3091   const MCInstrDesc &Desc = MII.get(Opc);
3092 
3093   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3094     return true;
3095 
3096   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3097   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3098     if (isCI() || isSI())
3099       return false;
3100   }
3101 
3102   return true;
3103 }
3104 
3105 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3106   const unsigned Opc = Inst.getOpcode();
3107   const MCInstrDesc &Desc = MII.get(Opc);
3108 
3109   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3110     return true;
3111 
3112   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3113   if (DimIdx < 0)
3114     return true;
3115 
3116   long Imm = Inst.getOperand(DimIdx).getImm();
3117   if (Imm < 0 || Imm >= 8)
3118     return false;
3119 
3120   return true;
3121 }
3122 
3123 static bool IsRevOpcode(const unsigned Opcode)
3124 {
3125   switch (Opcode) {
3126   case AMDGPU::V_SUBREV_F32_e32:
3127   case AMDGPU::V_SUBREV_F32_e64:
3128   case AMDGPU::V_SUBREV_F32_e32_gfx10:
3129   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3130   case AMDGPU::V_SUBREV_F32_e32_vi:
3131   case AMDGPU::V_SUBREV_F32_e64_gfx10:
3132   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3133   case AMDGPU::V_SUBREV_F32_e64_vi:
3134 
3135   case AMDGPU::V_SUBREV_I32_e32:
3136   case AMDGPU::V_SUBREV_I32_e64:
3137   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3138   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3139 
3140   case AMDGPU::V_SUBBREV_U32_e32:
3141   case AMDGPU::V_SUBBREV_U32_e64:
3142   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3143   case AMDGPU::V_SUBBREV_U32_e32_vi:
3144   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3145   case AMDGPU::V_SUBBREV_U32_e64_vi:
3146 
3147   case AMDGPU::V_SUBREV_U32_e32:
3148   case AMDGPU::V_SUBREV_U32_e64:
3149   case AMDGPU::V_SUBREV_U32_e32_gfx9:
3150   case AMDGPU::V_SUBREV_U32_e32_vi:
3151   case AMDGPU::V_SUBREV_U32_e64_gfx9:
3152   case AMDGPU::V_SUBREV_U32_e64_vi:
3153 
3154   case AMDGPU::V_SUBREV_F16_e32:
3155   case AMDGPU::V_SUBREV_F16_e64:
3156   case AMDGPU::V_SUBREV_F16_e32_gfx10:
3157   case AMDGPU::V_SUBREV_F16_e32_vi:
3158   case AMDGPU::V_SUBREV_F16_e64_gfx10:
3159   case AMDGPU::V_SUBREV_F16_e64_vi:
3160 
3161   case AMDGPU::V_SUBREV_U16_e32:
3162   case AMDGPU::V_SUBREV_U16_e64:
3163   case AMDGPU::V_SUBREV_U16_e32_vi:
3164   case AMDGPU::V_SUBREV_U16_e64_vi:
3165 
3166   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3167   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3168   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3169 
3170   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3171   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3172 
3173   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3174   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3175 
3176   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3177   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3178 
3179   case AMDGPU::V_LSHRREV_B32_e32:
3180   case AMDGPU::V_LSHRREV_B32_e64:
3181   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3182   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3183   case AMDGPU::V_LSHRREV_B32_e32_vi:
3184   case AMDGPU::V_LSHRREV_B32_e64_vi:
3185   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3186   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3187 
3188   case AMDGPU::V_ASHRREV_I32_e32:
3189   case AMDGPU::V_ASHRREV_I32_e64:
3190   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3191   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3192   case AMDGPU::V_ASHRREV_I32_e32_vi:
3193   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3194   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3195   case AMDGPU::V_ASHRREV_I32_e64_vi:
3196 
3197   case AMDGPU::V_LSHLREV_B32_e32:
3198   case AMDGPU::V_LSHLREV_B32_e64:
3199   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3200   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3201   case AMDGPU::V_LSHLREV_B32_e32_vi:
3202   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3203   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3204   case AMDGPU::V_LSHLREV_B32_e64_vi:
3205 
3206   case AMDGPU::V_LSHLREV_B16_e32:
3207   case AMDGPU::V_LSHLREV_B16_e64:
3208   case AMDGPU::V_LSHLREV_B16_e32_vi:
3209   case AMDGPU::V_LSHLREV_B16_e64_vi:
3210   case AMDGPU::V_LSHLREV_B16_gfx10:
3211 
3212   case AMDGPU::V_LSHRREV_B16_e32:
3213   case AMDGPU::V_LSHRREV_B16_e64:
3214   case AMDGPU::V_LSHRREV_B16_e32_vi:
3215   case AMDGPU::V_LSHRREV_B16_e64_vi:
3216   case AMDGPU::V_LSHRREV_B16_gfx10:
3217 
3218   case AMDGPU::V_ASHRREV_I16_e32:
3219   case AMDGPU::V_ASHRREV_I16_e64:
3220   case AMDGPU::V_ASHRREV_I16_e32_vi:
3221   case AMDGPU::V_ASHRREV_I16_e64_vi:
3222   case AMDGPU::V_ASHRREV_I16_gfx10:
3223 
3224   case AMDGPU::V_LSHLREV_B64:
3225   case AMDGPU::V_LSHLREV_B64_gfx10:
3226   case AMDGPU::V_LSHLREV_B64_vi:
3227 
3228   case AMDGPU::V_LSHRREV_B64:
3229   case AMDGPU::V_LSHRREV_B64_gfx10:
3230   case AMDGPU::V_LSHRREV_B64_vi:
3231 
3232   case AMDGPU::V_ASHRREV_I64:
3233   case AMDGPU::V_ASHRREV_I64_gfx10:
3234   case AMDGPU::V_ASHRREV_I64_vi:
3235 
3236   case AMDGPU::V_PK_LSHLREV_B16:
3237   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3238   case AMDGPU::V_PK_LSHLREV_B16_vi:
3239 
3240   case AMDGPU::V_PK_LSHRREV_B16:
3241   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3242   case AMDGPU::V_PK_LSHRREV_B16_vi:
3243   case AMDGPU::V_PK_ASHRREV_I16:
3244   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3245   case AMDGPU::V_PK_ASHRREV_I16_vi:
3246     return true;
3247   default:
3248     return false;
3249   }
3250 }
3251 
3252 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3253 
3254   using namespace SIInstrFlags;
3255   const unsigned Opcode = Inst.getOpcode();
3256   const MCInstrDesc &Desc = MII.get(Opcode);
3257 
3258   // lds_direct register is defined so that it can be used
3259   // with 9-bit operands only. Ignore encodings which do not accept these.
3260   if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3261     return true;
3262 
3263   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3264   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3265   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3266 
3267   const int SrcIndices[] = { Src1Idx, Src2Idx };
3268 
3269   // lds_direct cannot be specified as either src1 or src2.
3270   for (int SrcIdx : SrcIndices) {
3271     if (SrcIdx == -1) break;
3272     const MCOperand &Src = Inst.getOperand(SrcIdx);
3273     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3274       return false;
3275     }
3276   }
3277 
3278   if (Src0Idx == -1)
3279     return true;
3280 
3281   const MCOperand &Src = Inst.getOperand(Src0Idx);
3282   if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3283     return true;
3284 
3285   // lds_direct is specified as src0. Check additional limitations.
3286   return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3287 }
3288 
3289 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3290   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3291     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3292     if (Op.isFlatOffset())
3293       return Op.getStartLoc();
3294   }
3295   return getLoc();
3296 }
3297 
3298 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3299                                          const OperandVector &Operands) {
3300   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3301   if ((TSFlags & SIInstrFlags::FLAT) == 0)
3302     return true;
3303 
3304   auto Opcode = Inst.getOpcode();
3305   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3306   assert(OpNum != -1);
3307 
3308   const auto &Op = Inst.getOperand(OpNum);
3309   if (!hasFlatOffsets() && Op.getImm() != 0) {
3310     Error(getFlatOffsetLoc(Operands),
3311           "flat offset modifier is not supported on this GPU");
3312     return false;
3313   }
3314 
3315   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9.
3316   // For FLAT segment the offset must be positive;
3317   // MSB is ignored and forced to zero.
3318   unsigned OffsetSize = isGFX9() ? 13 : 12;
3319   if (TSFlags & SIInstrFlags::IsNonFlatSeg) {
3320     if (!isIntN(OffsetSize, Op.getImm())) {
3321       Error(getFlatOffsetLoc(Operands),
3322             isGFX9() ? "expected a 13-bit signed offset" :
3323                        "expected a 12-bit signed offset");
3324       return false;
3325     }
3326   } else {
3327     if (!isUIntN(OffsetSize - 1, Op.getImm())) {
3328       Error(getFlatOffsetLoc(Operands),
3329             isGFX9() ? "expected a 12-bit unsigned offset" :
3330                        "expected an 11-bit unsigned offset");
3331       return false;
3332     }
3333   }
3334 
3335   return true;
3336 }
3337 
3338 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3339   unsigned Opcode = Inst.getOpcode();
3340   const MCInstrDesc &Desc = MII.get(Opcode);
3341   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3342     return true;
3343 
3344   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3345   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3346 
3347   const int OpIndices[] = { Src0Idx, Src1Idx };
3348 
3349   unsigned NumExprs = 0;
3350   unsigned NumLiterals = 0;
3351   uint32_t LiteralValue;
3352 
3353   for (int OpIdx : OpIndices) {
3354     if (OpIdx == -1) break;
3355 
3356     const MCOperand &MO = Inst.getOperand(OpIdx);
3357     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3358     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3359       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3360         uint32_t Value = static_cast<uint32_t>(MO.getImm());
3361         if (NumLiterals == 0 || LiteralValue != Value) {
3362           LiteralValue = Value;
3363           ++NumLiterals;
3364         }
3365       } else if (MO.isExpr()) {
3366         ++NumExprs;
3367       }
3368     }
3369   }
3370 
3371   return NumLiterals + NumExprs <= 1;
3372 }
3373 
3374 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3375   const unsigned Opc = Inst.getOpcode();
3376   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3377       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3378     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3379     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3380 
3381     if (OpSel & ~3)
3382       return false;
3383   }
3384   return true;
3385 }
3386 
3387 // Check if VCC register matches wavefront size
3388 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3389   auto FB = getFeatureBits();
3390   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3391     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3392 }
3393 
3394 // VOP3 literal is only allowed in GFX10+ and only one can be used
3395 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const {
3396   unsigned Opcode = Inst.getOpcode();
3397   const MCInstrDesc &Desc = MII.get(Opcode);
3398   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3399     return true;
3400 
3401   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3402   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3403   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3404 
3405   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3406 
3407   unsigned NumExprs = 0;
3408   unsigned NumLiterals = 0;
3409   uint32_t LiteralValue;
3410 
3411   for (int OpIdx : OpIndices) {
3412     if (OpIdx == -1) break;
3413 
3414     const MCOperand &MO = Inst.getOperand(OpIdx);
3415     if (!MO.isImm() && !MO.isExpr())
3416       continue;
3417     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3418       continue;
3419 
3420     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3421         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug])
3422       return false;
3423 
3424     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3425       uint32_t Value = static_cast<uint32_t>(MO.getImm());
3426       if (NumLiterals == 0 || LiteralValue != Value) {
3427         LiteralValue = Value;
3428         ++NumLiterals;
3429       }
3430     } else if (MO.isExpr()) {
3431       ++NumExprs;
3432     }
3433   }
3434   NumLiterals += NumExprs;
3435 
3436   return !NumLiterals ||
3437          (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]);
3438 }
3439 
3440 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3441                                           const SMLoc &IDLoc,
3442                                           const OperandVector &Operands) {
3443   if (!validateLdsDirect(Inst)) {
3444     Error(IDLoc,
3445       "invalid use of lds_direct");
3446     return false;
3447   }
3448   if (!validateSOPLiteral(Inst)) {
3449     Error(IDLoc,
3450       "only one literal operand is allowed");
3451     return false;
3452   }
3453   if (!validateVOP3Literal(Inst)) {
3454     Error(IDLoc,
3455       "invalid literal operand");
3456     return false;
3457   }
3458   if (!validateConstantBusLimitations(Inst)) {
3459     Error(IDLoc,
3460       "invalid operand (violates constant bus restrictions)");
3461     return false;
3462   }
3463   if (!validateEarlyClobberLimitations(Inst)) {
3464     Error(IDLoc,
3465       "destination must be different than all sources");
3466     return false;
3467   }
3468   if (!validateIntClampSupported(Inst)) {
3469     Error(IDLoc,
3470       "integer clamping is not supported on this GPU");
3471     return false;
3472   }
3473   if (!validateOpSel(Inst)) {
3474     Error(IDLoc,
3475       "invalid op_sel operand");
3476     return false;
3477   }
3478   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3479   if (!validateMIMGD16(Inst)) {
3480     Error(IDLoc,
3481       "d16 modifier is not supported on this GPU");
3482     return false;
3483   }
3484   if (!validateMIMGDim(Inst)) {
3485     Error(IDLoc, "dim modifier is required on this GPU");
3486     return false;
3487   }
3488   if (!validateMIMGDataSize(Inst)) {
3489     Error(IDLoc,
3490       "image data size does not match dmask and tfe");
3491     return false;
3492   }
3493   if (!validateMIMGAddrSize(Inst)) {
3494     Error(IDLoc,
3495       "image address size does not match dim and a16");
3496     return false;
3497   }
3498   if (!validateMIMGAtomicDMask(Inst)) {
3499     Error(IDLoc,
3500       "invalid atomic image dmask");
3501     return false;
3502   }
3503   if (!validateMIMGGatherDMask(Inst)) {
3504     Error(IDLoc,
3505       "invalid image_gather dmask: only one bit must be set");
3506     return false;
3507   }
3508   if (!validateMovrels(Inst)) {
3509     Error(IDLoc, "source operand must be a VGPR");
3510     return false;
3511   }
3512   if (!validateFlatOffset(Inst, Operands)) {
3513     return false;
3514   }
3515 
3516   return true;
3517 }
3518 
3519 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3520                                             const FeatureBitset &FBS,
3521                                             unsigned VariantID = 0);
3522 
3523 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3524                                               OperandVector &Operands,
3525                                               MCStreamer &Out,
3526                                               uint64_t &ErrorInfo,
3527                                               bool MatchingInlineAsm) {
3528   MCInst Inst;
3529   unsigned Result = Match_Success;
3530   for (auto Variant : getMatchedVariants()) {
3531     uint64_t EI;
3532     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3533                                   Variant);
3534     // We order match statuses from least to most specific. We use most specific
3535     // status as resulting
3536     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3537     if ((R == Match_Success) ||
3538         (R == Match_PreferE32) ||
3539         (R == Match_MissingFeature && Result != Match_PreferE32) ||
3540         (R == Match_InvalidOperand && Result != Match_MissingFeature
3541                                    && Result != Match_PreferE32) ||
3542         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
3543                                    && Result != Match_MissingFeature
3544                                    && Result != Match_PreferE32)) {
3545       Result = R;
3546       ErrorInfo = EI;
3547     }
3548     if (R == Match_Success)
3549       break;
3550   }
3551 
3552   switch (Result) {
3553   default: break;
3554   case Match_Success:
3555     if (!validateInstruction(Inst, IDLoc, Operands)) {
3556       return true;
3557     }
3558     Inst.setLoc(IDLoc);
3559     Out.EmitInstruction(Inst, getSTI());
3560     return false;
3561 
3562   case Match_MissingFeature:
3563     return Error(IDLoc, "instruction not supported on this GPU");
3564 
3565   case Match_MnemonicFail: {
3566     FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3567     std::string Suggestion = AMDGPUMnemonicSpellCheck(
3568         ((AMDGPUOperand &)*Operands[0]).getToken(), FBS);
3569     return Error(IDLoc, "invalid instruction" + Suggestion,
3570                  ((AMDGPUOperand &)*Operands[0]).getLocRange());
3571   }
3572 
3573   case Match_InvalidOperand: {
3574     SMLoc ErrorLoc = IDLoc;
3575     if (ErrorInfo != ~0ULL) {
3576       if (ErrorInfo >= Operands.size()) {
3577         return Error(IDLoc, "too few operands for instruction");
3578       }
3579       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
3580       if (ErrorLoc == SMLoc())
3581         ErrorLoc = IDLoc;
3582     }
3583     return Error(ErrorLoc, "invalid operand for instruction");
3584   }
3585 
3586   case Match_PreferE32:
3587     return Error(IDLoc, "internal error: instruction without _e64 suffix "
3588                         "should be encoded as e32");
3589   }
3590   llvm_unreachable("Implement any new match types added!");
3591 }
3592 
3593 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
3594   int64_t Tmp = -1;
3595   if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
3596     return true;
3597   }
3598   if (getParser().parseAbsoluteExpression(Tmp)) {
3599     return true;
3600   }
3601   Ret = static_cast<uint32_t>(Tmp);
3602   return false;
3603 }
3604 
3605 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
3606                                                uint32_t &Minor) {
3607   if (ParseAsAbsoluteExpression(Major))
3608     return TokError("invalid major version");
3609 
3610   if (getLexer().isNot(AsmToken::Comma))
3611     return TokError("minor version number required, comma expected");
3612   Lex();
3613 
3614   if (ParseAsAbsoluteExpression(Minor))
3615     return TokError("invalid minor version");
3616 
3617   return false;
3618 }
3619 
3620 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
3621   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3622     return TokError("directive only supported for amdgcn architecture");
3623 
3624   std::string Target;
3625 
3626   SMLoc TargetStart = getTok().getLoc();
3627   if (getParser().parseEscapedString(Target))
3628     return true;
3629   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
3630 
3631   std::string ExpectedTarget;
3632   raw_string_ostream ExpectedTargetOS(ExpectedTarget);
3633   IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
3634 
3635   if (Target != ExpectedTargetOS.str())
3636     return getParser().Error(TargetRange.Start, "target must match options",
3637                              TargetRange);
3638 
3639   getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
3640   return false;
3641 }
3642 
3643 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
3644   return getParser().Error(Range.Start, "value out of range", Range);
3645 }
3646 
3647 bool AMDGPUAsmParser::calculateGPRBlocks(
3648     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
3649     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
3650     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
3651     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
3652   // TODO(scott.linder): These calculations are duplicated from
3653   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
3654   IsaVersion Version = getIsaVersion(getSTI().getCPU());
3655 
3656   unsigned NumVGPRs = NextFreeVGPR;
3657   unsigned NumSGPRs = NextFreeSGPR;
3658 
3659   if (Version.Major >= 10)
3660     NumSGPRs = 0;
3661   else {
3662     unsigned MaxAddressableNumSGPRs =
3663         IsaInfo::getAddressableNumSGPRs(&getSTI());
3664 
3665     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
3666         NumSGPRs > MaxAddressableNumSGPRs)
3667       return OutOfRangeError(SGPRRange);
3668 
3669     NumSGPRs +=
3670         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
3671 
3672     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
3673         NumSGPRs > MaxAddressableNumSGPRs)
3674       return OutOfRangeError(SGPRRange);
3675 
3676     if (Features.test(FeatureSGPRInitBug))
3677       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
3678   }
3679 
3680   VGPRBlocks =
3681       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
3682   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
3683 
3684   return false;
3685 }
3686 
3687 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
3688   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
3689     return TokError("directive only supported for amdgcn architecture");
3690 
3691   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
3692     return TokError("directive only supported for amdhsa OS");
3693 
3694   StringRef KernelName;
3695   if (getParser().parseIdentifier(KernelName))
3696     return true;
3697 
3698   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
3699 
3700   StringSet<> Seen;
3701 
3702   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
3703 
3704   SMRange VGPRRange;
3705   uint64_t NextFreeVGPR = 0;
3706   SMRange SGPRRange;
3707   uint64_t NextFreeSGPR = 0;
3708   unsigned UserSGPRCount = 0;
3709   bool ReserveVCC = true;
3710   bool ReserveFlatScr = true;
3711   bool ReserveXNACK = hasXNACK();
3712   Optional<bool> EnableWavefrontSize32;
3713 
3714   while (true) {
3715     while (getLexer().is(AsmToken::EndOfStatement))
3716       Lex();
3717 
3718     if (getLexer().isNot(AsmToken::Identifier))
3719       return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel");
3720 
3721     StringRef ID = getTok().getIdentifier();
3722     SMRange IDRange = getTok().getLocRange();
3723     Lex();
3724 
3725     if (ID == ".end_amdhsa_kernel")
3726       break;
3727 
3728     if (Seen.find(ID) != Seen.end())
3729       return TokError(".amdhsa_ directives cannot be repeated");
3730     Seen.insert(ID);
3731 
3732     SMLoc ValStart = getTok().getLoc();
3733     int64_t IVal;
3734     if (getParser().parseAbsoluteExpression(IVal))
3735       return true;
3736     SMLoc ValEnd = getTok().getLoc();
3737     SMRange ValRange = SMRange(ValStart, ValEnd);
3738 
3739     if (IVal < 0)
3740       return OutOfRangeError(ValRange);
3741 
3742     uint64_t Val = IVal;
3743 
3744 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
3745   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
3746     return OutOfRangeError(RANGE);                                             \
3747   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
3748 
3749     if (ID == ".amdhsa_group_segment_fixed_size") {
3750       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
3751         return OutOfRangeError(ValRange);
3752       KD.group_segment_fixed_size = Val;
3753     } else if (ID == ".amdhsa_private_segment_fixed_size") {
3754       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
3755         return OutOfRangeError(ValRange);
3756       KD.private_segment_fixed_size = Val;
3757     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
3758       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3759                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
3760                        Val, ValRange);
3761       if (Val)
3762         UserSGPRCount += 4;
3763     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
3764       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3765                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
3766                        ValRange);
3767       if (Val)
3768         UserSGPRCount += 2;
3769     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
3770       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3771                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
3772                        ValRange);
3773       if (Val)
3774         UserSGPRCount += 2;
3775     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
3776       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3777                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
3778                        Val, ValRange);
3779       if (Val)
3780         UserSGPRCount += 2;
3781     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
3782       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3783                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
3784                        ValRange);
3785       if (Val)
3786         UserSGPRCount += 2;
3787     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
3788       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3789                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
3790                        ValRange);
3791       if (Val)
3792         UserSGPRCount += 2;
3793     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
3794       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3795                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
3796                        Val, ValRange);
3797       if (Val)
3798         UserSGPRCount += 1;
3799     } else if (ID == ".amdhsa_wavefront_size32") {
3800       if (IVersion.Major < 10)
3801         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3802                                  IDRange);
3803       EnableWavefrontSize32 = Val;
3804       PARSE_BITS_ENTRY(KD.kernel_code_properties,
3805                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
3806                        Val, ValRange);
3807     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
3808       PARSE_BITS_ENTRY(
3809           KD.compute_pgm_rsrc2,
3810           COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val,
3811           ValRange);
3812     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
3813       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3814                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
3815                        ValRange);
3816     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
3817       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3818                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
3819                        ValRange);
3820     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
3821       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3822                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
3823                        ValRange);
3824     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
3825       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3826                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
3827                        ValRange);
3828     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
3829       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3830                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
3831                        ValRange);
3832     } else if (ID == ".amdhsa_next_free_vgpr") {
3833       VGPRRange = ValRange;
3834       NextFreeVGPR = Val;
3835     } else if (ID == ".amdhsa_next_free_sgpr") {
3836       SGPRRange = ValRange;
3837       NextFreeSGPR = Val;
3838     } else if (ID == ".amdhsa_reserve_vcc") {
3839       if (!isUInt<1>(Val))
3840         return OutOfRangeError(ValRange);
3841       ReserveVCC = Val;
3842     } else if (ID == ".amdhsa_reserve_flat_scratch") {
3843       if (IVersion.Major < 7)
3844         return getParser().Error(IDRange.Start, "directive requires gfx7+",
3845                                  IDRange);
3846       if (!isUInt<1>(Val))
3847         return OutOfRangeError(ValRange);
3848       ReserveFlatScr = Val;
3849     } else if (ID == ".amdhsa_reserve_xnack_mask") {
3850       if (IVersion.Major < 8)
3851         return getParser().Error(IDRange.Start, "directive requires gfx8+",
3852                                  IDRange);
3853       if (!isUInt<1>(Val))
3854         return OutOfRangeError(ValRange);
3855       ReserveXNACK = Val;
3856     } else if (ID == ".amdhsa_float_round_mode_32") {
3857       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3858                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
3859     } else if (ID == ".amdhsa_float_round_mode_16_64") {
3860       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3861                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
3862     } else if (ID == ".amdhsa_float_denorm_mode_32") {
3863       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3864                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
3865     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
3866       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3867                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
3868                        ValRange);
3869     } else if (ID == ".amdhsa_dx10_clamp") {
3870       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
3871                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
3872     } else if (ID == ".amdhsa_ieee_mode") {
3873       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
3874                        Val, ValRange);
3875     } else if (ID == ".amdhsa_fp16_overflow") {
3876       if (IVersion.Major < 9)
3877         return getParser().Error(IDRange.Start, "directive requires gfx9+",
3878                                  IDRange);
3879       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
3880                        ValRange);
3881     } else if (ID == ".amdhsa_workgroup_processor_mode") {
3882       if (IVersion.Major < 10)
3883         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3884                                  IDRange);
3885       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
3886                        ValRange);
3887     } else if (ID == ".amdhsa_memory_ordered") {
3888       if (IVersion.Major < 10)
3889         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3890                                  IDRange);
3891       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
3892                        ValRange);
3893     } else if (ID == ".amdhsa_forward_progress") {
3894       if (IVersion.Major < 10)
3895         return getParser().Error(IDRange.Start, "directive requires gfx10+",
3896                                  IDRange);
3897       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
3898                        ValRange);
3899     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
3900       PARSE_BITS_ENTRY(
3901           KD.compute_pgm_rsrc2,
3902           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
3903           ValRange);
3904     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
3905       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3906                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
3907                        Val, ValRange);
3908     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
3909       PARSE_BITS_ENTRY(
3910           KD.compute_pgm_rsrc2,
3911           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
3912           ValRange);
3913     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
3914       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3915                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
3916                        Val, ValRange);
3917     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
3918       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3919                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
3920                        Val, ValRange);
3921     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
3922       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3923                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
3924                        Val, ValRange);
3925     } else if (ID == ".amdhsa_exception_int_div_zero") {
3926       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
3927                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
3928                        Val, ValRange);
3929     } else {
3930       return getParser().Error(IDRange.Start,
3931                                "unknown .amdhsa_kernel directive", IDRange);
3932     }
3933 
3934 #undef PARSE_BITS_ENTRY
3935   }
3936 
3937   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
3938     return TokError(".amdhsa_next_free_vgpr directive is required");
3939 
3940   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
3941     return TokError(".amdhsa_next_free_sgpr directive is required");
3942 
3943   unsigned VGPRBlocks;
3944   unsigned SGPRBlocks;
3945   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
3946                          ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
3947                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
3948                          SGPRBlocks))
3949     return true;
3950 
3951   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
3952           VGPRBlocks))
3953     return OutOfRangeError(VGPRRange);
3954   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3955                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
3956 
3957   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
3958           SGPRBlocks))
3959     return OutOfRangeError(SGPRRange);
3960   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
3961                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
3962                   SGPRBlocks);
3963 
3964   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
3965     return TokError("too many user SGPRs enabled");
3966   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
3967                   UserSGPRCount);
3968 
3969   getTargetStreamer().EmitAmdhsaKernelDescriptor(
3970       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
3971       ReserveFlatScr, ReserveXNACK);
3972   return false;
3973 }
3974 
3975 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
3976   uint32_t Major;
3977   uint32_t Minor;
3978 
3979   if (ParseDirectiveMajorMinor(Major, Minor))
3980     return true;
3981 
3982   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
3983   return false;
3984 }
3985 
3986 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
3987   uint32_t Major;
3988   uint32_t Minor;
3989   uint32_t Stepping;
3990   StringRef VendorName;
3991   StringRef ArchName;
3992 
3993   // If this directive has no arguments, then use the ISA version for the
3994   // targeted GPU.
3995   if (getLexer().is(AsmToken::EndOfStatement)) {
3996     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
3997     getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
3998                                                       ISA.Stepping,
3999                                                       "AMD", "AMDGPU");
4000     return false;
4001   }
4002 
4003   if (ParseDirectiveMajorMinor(Major, Minor))
4004     return true;
4005 
4006   if (getLexer().isNot(AsmToken::Comma))
4007     return TokError("stepping version number required, comma expected");
4008   Lex();
4009 
4010   if (ParseAsAbsoluteExpression(Stepping))
4011     return TokError("invalid stepping version");
4012 
4013   if (getLexer().isNot(AsmToken::Comma))
4014     return TokError("vendor name required, comma expected");
4015   Lex();
4016 
4017   if (getLexer().isNot(AsmToken::String))
4018     return TokError("invalid vendor name");
4019 
4020   VendorName = getLexer().getTok().getStringContents();
4021   Lex();
4022 
4023   if (getLexer().isNot(AsmToken::Comma))
4024     return TokError("arch name required, comma expected");
4025   Lex();
4026 
4027   if (getLexer().isNot(AsmToken::String))
4028     return TokError("invalid arch name");
4029 
4030   ArchName = getLexer().getTok().getStringContents();
4031   Lex();
4032 
4033   getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4034                                                     VendorName, ArchName);
4035   return false;
4036 }
4037 
4038 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4039                                                amd_kernel_code_t &Header) {
4040   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4041   // assembly for backwards compatibility.
4042   if (ID == "max_scratch_backing_memory_byte_size") {
4043     Parser.eatToEndOfStatement();
4044     return false;
4045   }
4046 
4047   SmallString<40> ErrStr;
4048   raw_svector_ostream Err(ErrStr);
4049   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4050     return TokError(Err.str());
4051   }
4052   Lex();
4053 
4054   if (ID == "enable_wavefront_size32") {
4055     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4056       if (!isGFX10())
4057         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4058       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4059         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4060     } else {
4061       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4062         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4063     }
4064   }
4065 
4066   if (ID == "wavefront_size") {
4067     if (Header.wavefront_size == 5) {
4068       if (!isGFX10())
4069         return TokError("wavefront_size=5 is only allowed on GFX10+");
4070       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4071         return TokError("wavefront_size=5 requires +WavefrontSize32");
4072     } else if (Header.wavefront_size == 6) {
4073       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4074         return TokError("wavefront_size=6 requires +WavefrontSize64");
4075     }
4076   }
4077 
4078   if (ID == "enable_wgp_mode") {
4079     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
4080       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4081   }
4082 
4083   if (ID == "enable_mem_ordered") {
4084     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10())
4085       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4086   }
4087 
4088   if (ID == "enable_fwd_progress") {
4089     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10())
4090       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4091   }
4092 
4093   return false;
4094 }
4095 
4096 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4097   amd_kernel_code_t Header;
4098   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4099 
4100   while (true) {
4101     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
4102     // will set the current token to EndOfStatement.
4103     while(getLexer().is(AsmToken::EndOfStatement))
4104       Lex();
4105 
4106     if (getLexer().isNot(AsmToken::Identifier))
4107       return TokError("expected value identifier or .end_amd_kernel_code_t");
4108 
4109     StringRef ID = getLexer().getTok().getIdentifier();
4110     Lex();
4111 
4112     if (ID == ".end_amd_kernel_code_t")
4113       break;
4114 
4115     if (ParseAMDKernelCodeTValue(ID, Header))
4116       return true;
4117   }
4118 
4119   getTargetStreamer().EmitAMDKernelCodeT(Header);
4120 
4121   return false;
4122 }
4123 
4124 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4125   if (getLexer().isNot(AsmToken::Identifier))
4126     return TokError("expected symbol name");
4127 
4128   StringRef KernelName = Parser.getTok().getString();
4129 
4130   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4131                                            ELF::STT_AMDGPU_HSA_KERNEL);
4132   Lex();
4133   if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
4134     KernelScope.initialize(getContext());
4135   return false;
4136 }
4137 
4138 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4139   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4140     return Error(getParser().getTok().getLoc(),
4141                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
4142                  "architectures");
4143   }
4144 
4145   auto ISAVersionStringFromASM = getLexer().getTok().getStringContents();
4146 
4147   std::string ISAVersionStringFromSTI;
4148   raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4149   IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4150 
4151   if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4152     return Error(getParser().getTok().getLoc(),
4153                  ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4154                  "arguments specified through the command line");
4155   }
4156 
4157   getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4158   Lex();
4159 
4160   return false;
4161 }
4162 
4163 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4164   const char *AssemblerDirectiveBegin;
4165   const char *AssemblerDirectiveEnd;
4166   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4167       AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())
4168           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4169                             HSAMD::V3::AssemblerDirectiveEnd)
4170           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4171                             HSAMD::AssemblerDirectiveEnd);
4172 
4173   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4174     return Error(getParser().getTok().getLoc(),
4175                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4176                  "not available on non-amdhsa OSes")).str());
4177   }
4178 
4179   std::string HSAMetadataString;
4180   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4181                           HSAMetadataString))
4182     return true;
4183 
4184   if (IsaInfo::hasCodeObjectV3(&getSTI())) {
4185     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4186       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4187   } else {
4188     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4189       return Error(getParser().getTok().getLoc(), "invalid HSA metadata");
4190   }
4191 
4192   return false;
4193 }
4194 
4195 /// Common code to parse out a block of text (typically YAML) between start and
4196 /// end directives.
4197 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4198                                           const char *AssemblerDirectiveEnd,
4199                                           std::string &CollectString) {
4200 
4201   raw_string_ostream CollectStream(CollectString);
4202 
4203   getLexer().setSkipSpace(false);
4204 
4205   bool FoundEnd = false;
4206   while (!getLexer().is(AsmToken::Eof)) {
4207     while (getLexer().is(AsmToken::Space)) {
4208       CollectStream << getLexer().getTok().getString();
4209       Lex();
4210     }
4211 
4212     if (getLexer().is(AsmToken::Identifier)) {
4213       StringRef ID = getLexer().getTok().getIdentifier();
4214       if (ID == AssemblerDirectiveEnd) {
4215         Lex();
4216         FoundEnd = true;
4217         break;
4218       }
4219     }
4220 
4221     CollectStream << Parser.parseStringToEndOfStatement()
4222                   << getContext().getAsmInfo()->getSeparatorString();
4223 
4224     Parser.eatToEndOfStatement();
4225   }
4226 
4227   getLexer().setSkipSpace(true);
4228 
4229   if (getLexer().is(AsmToken::Eof) && !FoundEnd) {
4230     return TokError(Twine("expected directive ") +
4231                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4232   }
4233 
4234   CollectStream.flush();
4235   return false;
4236 }
4237 
4238 /// Parse the assembler directive for new MsgPack-format PAL metadata.
4239 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4240   std::string String;
4241   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4242                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4243     return true;
4244 
4245   auto PALMetadata = getTargetStreamer().getPALMetadata();
4246   if (!PALMetadata->setFromString(String))
4247     return Error(getParser().getTok().getLoc(), "invalid PAL metadata");
4248   return false;
4249 }
4250 
4251 /// Parse the assembler directive for old linear-format PAL metadata.
4252 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4253   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4254     return Error(getParser().getTok().getLoc(),
4255                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4256                  "not available on non-amdpal OSes")).str());
4257   }
4258 
4259   auto PALMetadata = getTargetStreamer().getPALMetadata();
4260   PALMetadata->setLegacy();
4261   for (;;) {
4262     uint32_t Key, Value;
4263     if (ParseAsAbsoluteExpression(Key)) {
4264       return TokError(Twine("invalid value in ") +
4265                       Twine(PALMD::AssemblerDirective));
4266     }
4267     if (getLexer().isNot(AsmToken::Comma)) {
4268       return TokError(Twine("expected an even number of values in ") +
4269                       Twine(PALMD::AssemblerDirective));
4270     }
4271     Lex();
4272     if (ParseAsAbsoluteExpression(Value)) {
4273       return TokError(Twine("invalid value in ") +
4274                       Twine(PALMD::AssemblerDirective));
4275     }
4276     PALMetadata->setRegister(Key, Value);
4277     if (getLexer().isNot(AsmToken::Comma))
4278       break;
4279     Lex();
4280   }
4281   return false;
4282 }
4283 
4284 /// ParseDirectiveAMDGPULDS
4285 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
4286 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4287   if (getParser().checkForValidSection())
4288     return true;
4289 
4290   StringRef Name;
4291   SMLoc NameLoc = getLexer().getLoc();
4292   if (getParser().parseIdentifier(Name))
4293     return TokError("expected identifier in directive");
4294 
4295   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4296   if (parseToken(AsmToken::Comma, "expected ','"))
4297     return true;
4298 
4299   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4300 
4301   int64_t Size;
4302   SMLoc SizeLoc = getLexer().getLoc();
4303   if (getParser().parseAbsoluteExpression(Size))
4304     return true;
4305   if (Size < 0)
4306     return Error(SizeLoc, "size must be non-negative");
4307   if (Size > LocalMemorySize)
4308     return Error(SizeLoc, "size is too large");
4309 
4310   int64_t Align = 4;
4311   if (getLexer().is(AsmToken::Comma)) {
4312     Lex();
4313     SMLoc AlignLoc = getLexer().getLoc();
4314     if (getParser().parseAbsoluteExpression(Align))
4315       return true;
4316     if (Align < 0 || !isPowerOf2_64(Align))
4317       return Error(AlignLoc, "alignment must be a power of two");
4318 
4319     // Alignment larger than the size of LDS is possible in theory, as long
4320     // as the linker manages to place to symbol at address 0, but we do want
4321     // to make sure the alignment fits nicely into a 32-bit integer.
4322     if (Align >= 1u << 31)
4323       return Error(AlignLoc, "alignment is too large");
4324   }
4325 
4326   if (parseToken(AsmToken::EndOfStatement,
4327                  "unexpected token in '.amdgpu_lds' directive"))
4328     return true;
4329 
4330   Symbol->redefineIfPossible();
4331   if (!Symbol->isUndefined())
4332     return Error(NameLoc, "invalid symbol redefinition");
4333 
4334   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align);
4335   return false;
4336 }
4337 
4338 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4339   StringRef IDVal = DirectiveID.getString();
4340 
4341   if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
4342     if (IDVal == ".amdgcn_target")
4343       return ParseDirectiveAMDGCNTarget();
4344 
4345     if (IDVal == ".amdhsa_kernel")
4346       return ParseDirectiveAMDHSAKernel();
4347 
4348     // TODO: Restructure/combine with PAL metadata directive.
4349     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4350       return ParseDirectiveHSAMetadata();
4351   } else {
4352     if (IDVal == ".hsa_code_object_version")
4353       return ParseDirectiveHSACodeObjectVersion();
4354 
4355     if (IDVal == ".hsa_code_object_isa")
4356       return ParseDirectiveHSACodeObjectISA();
4357 
4358     if (IDVal == ".amd_kernel_code_t")
4359       return ParseDirectiveAMDKernelCodeT();
4360 
4361     if (IDVal == ".amdgpu_hsa_kernel")
4362       return ParseDirectiveAMDGPUHsaKernel();
4363 
4364     if (IDVal == ".amd_amdgpu_isa")
4365       return ParseDirectiveISAVersion();
4366 
4367     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4368       return ParseDirectiveHSAMetadata();
4369   }
4370 
4371   if (IDVal == ".amdgpu_lds")
4372     return ParseDirectiveAMDGPULDS();
4373 
4374   if (IDVal == PALMD::AssemblerDirectiveBegin)
4375     return ParseDirectivePALMetadataBegin();
4376 
4377   if (IDVal == PALMD::AssemblerDirective)
4378     return ParseDirectivePALMetadata();
4379 
4380   return true;
4381 }
4382 
4383 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4384                                            unsigned RegNo) const {
4385 
4386   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4387        R.isValid(); ++R) {
4388     if (*R == RegNo)
4389       return isGFX9() || isGFX10();
4390   }
4391 
4392   // GFX10 has 2 more SGPRs 104 and 105.
4393   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4394        R.isValid(); ++R) {
4395     if (*R == RegNo)
4396       return hasSGPR104_SGPR105();
4397   }
4398 
4399   switch (RegNo) {
4400   case AMDGPU::SRC_SHARED_BASE:
4401   case AMDGPU::SRC_SHARED_LIMIT:
4402   case AMDGPU::SRC_PRIVATE_BASE:
4403   case AMDGPU::SRC_PRIVATE_LIMIT:
4404   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4405     return !isCI() && !isSI() && !isVI();
4406   case AMDGPU::TBA:
4407   case AMDGPU::TBA_LO:
4408   case AMDGPU::TBA_HI:
4409   case AMDGPU::TMA:
4410   case AMDGPU::TMA_LO:
4411   case AMDGPU::TMA_HI:
4412     return !isGFX9() && !isGFX10();
4413   case AMDGPU::XNACK_MASK:
4414   case AMDGPU::XNACK_MASK_LO:
4415   case AMDGPU::XNACK_MASK_HI:
4416     return !isCI() && !isSI() && !isGFX10() && hasXNACK();
4417   case AMDGPU::SGPR_NULL:
4418     return isGFX10();
4419   default:
4420     break;
4421   }
4422 
4423   if (isCI())
4424     return true;
4425 
4426   if (isSI() || isGFX10()) {
4427     // No flat_scr on SI.
4428     // On GFX10 flat scratch is not a valid register operand and can only be
4429     // accessed with s_setreg/s_getreg.
4430     switch (RegNo) {
4431     case AMDGPU::FLAT_SCR:
4432     case AMDGPU::FLAT_SCR_LO:
4433     case AMDGPU::FLAT_SCR_HI:
4434       return false;
4435     default:
4436       return true;
4437     }
4438   }
4439 
4440   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4441   // SI/CI have.
4442   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4443        R.isValid(); ++R) {
4444     if (*R == RegNo)
4445       return hasSGPR102_SGPR103();
4446   }
4447 
4448   return true;
4449 }
4450 
4451 OperandMatchResultTy
4452 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4453                               OperandMode Mode) {
4454   // Try to parse with a custom parser
4455   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4456 
4457   // If we successfully parsed the operand or if there as an error parsing,
4458   // we are done.
4459   //
4460   // If we are parsing after we reach EndOfStatement then this means we
4461   // are appending default values to the Operands list.  This is only done
4462   // by custom parser, so we shouldn't continue on to the generic parsing.
4463   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4464       getLexer().is(AsmToken::EndOfStatement))
4465     return ResTy;
4466 
4467   if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) {
4468     unsigned Prefix = Operands.size();
4469     SMLoc LBraceLoc = getTok().getLoc();
4470     Parser.Lex(); // eat the '['
4471 
4472     for (;;) {
4473       ResTy = parseReg(Operands);
4474       if (ResTy != MatchOperand_Success)
4475         return ResTy;
4476 
4477       if (getLexer().is(AsmToken::RBrac))
4478         break;
4479 
4480       if (getLexer().isNot(AsmToken::Comma))
4481         return MatchOperand_ParseFail;
4482       Parser.Lex();
4483     }
4484 
4485     if (Operands.size() - Prefix > 1) {
4486       Operands.insert(Operands.begin() + Prefix,
4487                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4488       Operands.push_back(AMDGPUOperand::CreateToken(this, "]",
4489                                                     getTok().getLoc()));
4490     }
4491 
4492     Parser.Lex(); // eat the ']'
4493     return MatchOperand_Success;
4494   }
4495 
4496   return parseRegOrImm(Operands);
4497 }
4498 
4499 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4500   // Clear any forced encodings from the previous instruction.
4501   setForcedEncodingSize(0);
4502   setForcedDPP(false);
4503   setForcedSDWA(false);
4504 
4505   if (Name.endswith("_e64")) {
4506     setForcedEncodingSize(64);
4507     return Name.substr(0, Name.size() - 4);
4508   } else if (Name.endswith("_e32")) {
4509     setForcedEncodingSize(32);
4510     return Name.substr(0, Name.size() - 4);
4511   } else if (Name.endswith("_dpp")) {
4512     setForcedDPP(true);
4513     return Name.substr(0, Name.size() - 4);
4514   } else if (Name.endswith("_sdwa")) {
4515     setForcedSDWA(true);
4516     return Name.substr(0, Name.size() - 5);
4517   }
4518   return Name;
4519 }
4520 
4521 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4522                                        StringRef Name,
4523                                        SMLoc NameLoc, OperandVector &Operands) {
4524   // Add the instruction mnemonic
4525   Name = parseMnemonicSuffix(Name);
4526   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4527 
4528   bool IsMIMG = Name.startswith("image_");
4529 
4530   while (!getLexer().is(AsmToken::EndOfStatement)) {
4531     OperandMode Mode = OperandMode_Default;
4532     if (IsMIMG && isGFX10() && Operands.size() == 2)
4533       Mode = OperandMode_NSA;
4534     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4535 
4536     // Eat the comma or space if there is one.
4537     if (getLexer().is(AsmToken::Comma))
4538       Parser.Lex();
4539 
4540     switch (Res) {
4541       case MatchOperand_Success: break;
4542       case MatchOperand_ParseFail:
4543         // FIXME: use real operand location rather than the current location.
4544         Error(getLexer().getLoc(), "failed parsing operand.");
4545         while (!getLexer().is(AsmToken::EndOfStatement)) {
4546           Parser.Lex();
4547         }
4548         return true;
4549       case MatchOperand_NoMatch:
4550         // FIXME: use real operand location rather than the current location.
4551         Error(getLexer().getLoc(), "not a valid operand.");
4552         while (!getLexer().is(AsmToken::EndOfStatement)) {
4553           Parser.Lex();
4554         }
4555         return true;
4556     }
4557   }
4558 
4559   return false;
4560 }
4561 
4562 //===----------------------------------------------------------------------===//
4563 // Utility functions
4564 //===----------------------------------------------------------------------===//
4565 
4566 OperandMatchResultTy
4567 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4568 
4569   if (!trySkipId(Prefix, AsmToken::Colon))
4570     return MatchOperand_NoMatch;
4571 
4572   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4573 }
4574 
4575 OperandMatchResultTy
4576 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4577                                     AMDGPUOperand::ImmTy ImmTy,
4578                                     bool (*ConvertResult)(int64_t&)) {
4579   SMLoc S = getLoc();
4580   int64_t Value = 0;
4581 
4582   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4583   if (Res != MatchOperand_Success)
4584     return Res;
4585 
4586   if (ConvertResult && !ConvertResult(Value)) {
4587     Error(S, "invalid " + StringRef(Prefix) + " value.");
4588   }
4589 
4590   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4591   return MatchOperand_Success;
4592 }
4593 
4594 OperandMatchResultTy
4595 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
4596                                              OperandVector &Operands,
4597                                              AMDGPUOperand::ImmTy ImmTy,
4598                                              bool (*ConvertResult)(int64_t&)) {
4599   SMLoc S = getLoc();
4600   if (!trySkipId(Prefix, AsmToken::Colon))
4601     return MatchOperand_NoMatch;
4602 
4603   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
4604     return MatchOperand_ParseFail;
4605 
4606   unsigned Val = 0;
4607   const unsigned MaxSize = 4;
4608 
4609   // FIXME: How to verify the number of elements matches the number of src
4610   // operands?
4611   for (int I = 0; ; ++I) {
4612     int64_t Op;
4613     SMLoc Loc = getLoc();
4614     if (!parseExpr(Op))
4615       return MatchOperand_ParseFail;
4616 
4617     if (Op != 0 && Op != 1) {
4618       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
4619       return MatchOperand_ParseFail;
4620     }
4621 
4622     Val |= (Op << I);
4623 
4624     if (trySkipToken(AsmToken::RBrac))
4625       break;
4626 
4627     if (I + 1 == MaxSize) {
4628       Error(getLoc(), "expected a closing square bracket");
4629       return MatchOperand_ParseFail;
4630     }
4631 
4632     if (!skipToken(AsmToken::Comma, "expected a comma"))
4633       return MatchOperand_ParseFail;
4634   }
4635 
4636   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
4637   return MatchOperand_Success;
4638 }
4639 
4640 OperandMatchResultTy
4641 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
4642                                AMDGPUOperand::ImmTy ImmTy) {
4643   int64_t Bit = 0;
4644   SMLoc S = Parser.getTok().getLoc();
4645 
4646   // We are at the end of the statement, and this is a default argument, so
4647   // use a default value.
4648   if (getLexer().isNot(AsmToken::EndOfStatement)) {
4649     switch(getLexer().getKind()) {
4650       case AsmToken::Identifier: {
4651         StringRef Tok = Parser.getTok().getString();
4652         if (Tok == Name) {
4653           if (Tok == "r128" && isGFX9())
4654             Error(S, "r128 modifier is not supported on this GPU");
4655           if (Tok == "a16" && !isGFX9() && !isGFX10())
4656             Error(S, "a16 modifier is not supported on this GPU");
4657           Bit = 1;
4658           Parser.Lex();
4659         } else if (Tok.startswith("no") && Tok.endswith(Name)) {
4660           Bit = 0;
4661           Parser.Lex();
4662         } else {
4663           return MatchOperand_NoMatch;
4664         }
4665         break;
4666       }
4667       default:
4668         return MatchOperand_NoMatch;
4669     }
4670   }
4671 
4672   if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC)
4673     return MatchOperand_ParseFail;
4674 
4675   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
4676   return MatchOperand_Success;
4677 }
4678 
4679 static void addOptionalImmOperand(
4680   MCInst& Inst, const OperandVector& Operands,
4681   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
4682   AMDGPUOperand::ImmTy ImmT,
4683   int64_t Default = 0) {
4684   auto i = OptionalIdx.find(ImmT);
4685   if (i != OptionalIdx.end()) {
4686     unsigned Idx = i->second;
4687     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
4688   } else {
4689     Inst.addOperand(MCOperand::createImm(Default));
4690   }
4691 }
4692 
4693 OperandMatchResultTy
4694 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
4695   if (getLexer().isNot(AsmToken::Identifier)) {
4696     return MatchOperand_NoMatch;
4697   }
4698   StringRef Tok = Parser.getTok().getString();
4699   if (Tok != Prefix) {
4700     return MatchOperand_NoMatch;
4701   }
4702 
4703   Parser.Lex();
4704   if (getLexer().isNot(AsmToken::Colon)) {
4705     return MatchOperand_ParseFail;
4706   }
4707 
4708   Parser.Lex();
4709   if (getLexer().isNot(AsmToken::Identifier)) {
4710     return MatchOperand_ParseFail;
4711   }
4712 
4713   Value = Parser.getTok().getString();
4714   return MatchOperand_Success;
4715 }
4716 
4717 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
4718 // values to live in a joint format operand in the MCInst encoding.
4719 OperandMatchResultTy
4720 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
4721   SMLoc S = Parser.getTok().getLoc();
4722   int64_t Dfmt = 0, Nfmt = 0;
4723   // dfmt and nfmt can appear in either order, and each is optional.
4724   bool GotDfmt = false, GotNfmt = false;
4725   while (!GotDfmt || !GotNfmt) {
4726     if (!GotDfmt) {
4727       auto Res = parseIntWithPrefix("dfmt", Dfmt);
4728       if (Res != MatchOperand_NoMatch) {
4729         if (Res != MatchOperand_Success)
4730           return Res;
4731         if (Dfmt >= 16) {
4732           Error(Parser.getTok().getLoc(), "out of range dfmt");
4733           return MatchOperand_ParseFail;
4734         }
4735         GotDfmt = true;
4736         Parser.Lex();
4737         continue;
4738       }
4739     }
4740     if (!GotNfmt) {
4741       auto Res = parseIntWithPrefix("nfmt", Nfmt);
4742       if (Res != MatchOperand_NoMatch) {
4743         if (Res != MatchOperand_Success)
4744           return Res;
4745         if (Nfmt >= 8) {
4746           Error(Parser.getTok().getLoc(), "out of range nfmt");
4747           return MatchOperand_ParseFail;
4748         }
4749         GotNfmt = true;
4750         Parser.Lex();
4751         continue;
4752       }
4753     }
4754     break;
4755   }
4756   if (!GotDfmt && !GotNfmt)
4757     return MatchOperand_NoMatch;
4758   auto Format = Dfmt | Nfmt << 4;
4759   Operands.push_back(
4760       AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
4761   return MatchOperand_Success;
4762 }
4763 
4764 //===----------------------------------------------------------------------===//
4765 // ds
4766 //===----------------------------------------------------------------------===//
4767 
4768 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
4769                                     const OperandVector &Operands) {
4770   OptionalImmIndexMap OptionalIdx;
4771 
4772   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4773     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4774 
4775     // Add the register arguments
4776     if (Op.isReg()) {
4777       Op.addRegOperands(Inst, 1);
4778       continue;
4779     }
4780 
4781     // Handle optional arguments
4782     OptionalIdx[Op.getImmTy()] = i;
4783   }
4784 
4785   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
4786   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
4787   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4788 
4789   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4790 }
4791 
4792 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
4793                                 bool IsGdsHardcoded) {
4794   OptionalImmIndexMap OptionalIdx;
4795 
4796   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4797     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4798 
4799     // Add the register arguments
4800     if (Op.isReg()) {
4801       Op.addRegOperands(Inst, 1);
4802       continue;
4803     }
4804 
4805     if (Op.isToken() && Op.getToken() == "gds") {
4806       IsGdsHardcoded = true;
4807       continue;
4808     }
4809 
4810     // Handle optional arguments
4811     OptionalIdx[Op.getImmTy()] = i;
4812   }
4813 
4814   AMDGPUOperand::ImmTy OffsetType =
4815     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
4816      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
4817      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
4818                                                       AMDGPUOperand::ImmTyOffset;
4819 
4820   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
4821 
4822   if (!IsGdsHardcoded) {
4823     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
4824   }
4825   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
4826 }
4827 
4828 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
4829   OptionalImmIndexMap OptionalIdx;
4830 
4831   unsigned OperandIdx[4];
4832   unsigned EnMask = 0;
4833   int SrcIdx = 0;
4834 
4835   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4836     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4837 
4838     // Add the register arguments
4839     if (Op.isReg()) {
4840       assert(SrcIdx < 4);
4841       OperandIdx[SrcIdx] = Inst.size();
4842       Op.addRegOperands(Inst, 1);
4843       ++SrcIdx;
4844       continue;
4845     }
4846 
4847     if (Op.isOff()) {
4848       assert(SrcIdx < 4);
4849       OperandIdx[SrcIdx] = Inst.size();
4850       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
4851       ++SrcIdx;
4852       continue;
4853     }
4854 
4855     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
4856       Op.addImmOperands(Inst, 1);
4857       continue;
4858     }
4859 
4860     if (Op.isToken() && Op.getToken() == "done")
4861       continue;
4862 
4863     // Handle optional arguments
4864     OptionalIdx[Op.getImmTy()] = i;
4865   }
4866 
4867   assert(SrcIdx == 4);
4868 
4869   bool Compr = false;
4870   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
4871     Compr = true;
4872     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
4873     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
4874     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
4875   }
4876 
4877   for (auto i = 0; i < SrcIdx; ++i) {
4878     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
4879       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
4880     }
4881   }
4882 
4883   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
4884   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
4885 
4886   Inst.addOperand(MCOperand::createImm(EnMask));
4887 }
4888 
4889 //===----------------------------------------------------------------------===//
4890 // s_waitcnt
4891 //===----------------------------------------------------------------------===//
4892 
4893 static bool
4894 encodeCnt(
4895   const AMDGPU::IsaVersion ISA,
4896   int64_t &IntVal,
4897   int64_t CntVal,
4898   bool Saturate,
4899   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
4900   unsigned (*decode)(const IsaVersion &Version, unsigned))
4901 {
4902   bool Failed = false;
4903 
4904   IntVal = encode(ISA, IntVal, CntVal);
4905   if (CntVal != decode(ISA, IntVal)) {
4906     if (Saturate) {
4907       IntVal = encode(ISA, IntVal, -1);
4908     } else {
4909       Failed = true;
4910     }
4911   }
4912   return Failed;
4913 }
4914 
4915 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
4916 
4917   SMLoc CntLoc = getLoc();
4918   StringRef CntName = getTokenStr();
4919 
4920   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
4921       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
4922     return false;
4923 
4924   int64_t CntVal;
4925   SMLoc ValLoc = getLoc();
4926   if (!parseExpr(CntVal))
4927     return false;
4928 
4929   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4930 
4931   bool Failed = true;
4932   bool Sat = CntName.endswith("_sat");
4933 
4934   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
4935     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
4936   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
4937     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
4938   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
4939     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
4940   } else {
4941     Error(CntLoc, "invalid counter name " + CntName);
4942     return false;
4943   }
4944 
4945   if (Failed) {
4946     Error(ValLoc, "too large value for " + CntName);
4947     return false;
4948   }
4949 
4950   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
4951     return false;
4952 
4953   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
4954     if (isToken(AsmToken::EndOfStatement)) {
4955       Error(getLoc(), "expected a counter name");
4956       return false;
4957     }
4958   }
4959 
4960   return true;
4961 }
4962 
4963 OperandMatchResultTy
4964 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
4965   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4966   int64_t Waitcnt = getWaitcntBitMask(ISA);
4967   SMLoc S = getLoc();
4968 
4969   // If parse failed, do not return error code
4970   // to avoid excessive error messages.
4971   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
4972     while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement));
4973   } else {
4974     parseExpr(Waitcnt);
4975   }
4976 
4977   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
4978   return MatchOperand_Success;
4979 }
4980 
4981 bool
4982 AMDGPUOperand::isSWaitCnt() const {
4983   return isImm();
4984 }
4985 
4986 //===----------------------------------------------------------------------===//
4987 // hwreg
4988 //===----------------------------------------------------------------------===//
4989 
4990 bool
4991 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
4992                                 int64_t &Offset,
4993                                 int64_t &Width) {
4994   using namespace llvm::AMDGPU::Hwreg;
4995 
4996   // The register may be specified by name or using a numeric code
4997   if (isToken(AsmToken::Identifier) &&
4998       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
4999     HwReg.IsSymbolic = true;
5000     lex(); // skip message name
5001   } else if (!parseExpr(HwReg.Id)) {
5002     return false;
5003   }
5004 
5005   if (trySkipToken(AsmToken::RParen))
5006     return true;
5007 
5008   // parse optional params
5009   return
5010     skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") &&
5011     parseExpr(Offset) &&
5012     skipToken(AsmToken::Comma, "expected a comma") &&
5013     parseExpr(Width) &&
5014     skipToken(AsmToken::RParen, "expected a closing parenthesis");
5015 }
5016 
5017 bool
5018 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
5019                                const int64_t Offset,
5020                                const int64_t Width,
5021                                const SMLoc Loc) {
5022 
5023   using namespace llvm::AMDGPU::Hwreg;
5024 
5025   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
5026     Error(Loc, "specified hardware register is not supported on this GPU");
5027     return false;
5028   } else if (!isValidHwreg(HwReg.Id)) {
5029     Error(Loc, "invalid code of hardware register: only 6-bit values are legal");
5030     return false;
5031   } else if (!isValidHwregOffset(Offset)) {
5032     Error(Loc, "invalid bit offset: only 5-bit values are legal");
5033     return false;
5034   } else if (!isValidHwregWidth(Width)) {
5035     Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal");
5036     return false;
5037   }
5038   return true;
5039 }
5040 
5041 OperandMatchResultTy
5042 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
5043   using namespace llvm::AMDGPU::Hwreg;
5044 
5045   int64_t ImmVal = 0;
5046   SMLoc Loc = getLoc();
5047 
5048   // If parse failed, do not return error code
5049   // to avoid excessive error messages.
5050   if (trySkipId("hwreg", AsmToken::LParen)) {
5051     OperandInfoTy HwReg(ID_UNKNOWN_);
5052     int64_t Offset = OFFSET_DEFAULT_;
5053     int64_t Width = WIDTH_DEFAULT_;
5054     if (parseHwregBody(HwReg, Offset, Width) &&
5055         validateHwreg(HwReg, Offset, Width, Loc)) {
5056       ImmVal = encodeHwreg(HwReg.Id, Offset, Width);
5057     }
5058   } else if (parseExpr(ImmVal)) {
5059     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5060       Error(Loc, "invalid immediate: only 16-bit values are legal");
5061   }
5062 
5063   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5064   return MatchOperand_Success;
5065 }
5066 
5067 bool AMDGPUOperand::isHwreg() const {
5068   return isImmTy(ImmTyHwreg);
5069 }
5070 
5071 //===----------------------------------------------------------------------===//
5072 // sendmsg
5073 //===----------------------------------------------------------------------===//
5074 
5075 bool
5076 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
5077                                   OperandInfoTy &Op,
5078                                   OperandInfoTy &Stream) {
5079   using namespace llvm::AMDGPU::SendMsg;
5080 
5081   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
5082     Msg.IsSymbolic = true;
5083     lex(); // skip message name
5084   } else if (!parseExpr(Msg.Id)) {
5085     return false;
5086   }
5087 
5088   if (trySkipToken(AsmToken::Comma)) {
5089     Op.IsDefined = true;
5090     if (isToken(AsmToken::Identifier) &&
5091         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
5092       lex(); // skip operation name
5093     } else if (!parseExpr(Op.Id)) {
5094       return false;
5095     }
5096 
5097     if (trySkipToken(AsmToken::Comma)) {
5098       Stream.IsDefined = true;
5099       if (!parseExpr(Stream.Id))
5100         return false;
5101     }
5102   }
5103 
5104   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
5105 }
5106 
5107 bool
5108 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
5109                                  const OperandInfoTy &Op,
5110                                  const OperandInfoTy &Stream,
5111                                  const SMLoc S) {
5112   using namespace llvm::AMDGPU::SendMsg;
5113 
5114   // Validation strictness depends on whether message is specified
5115   // in a symbolc or in a numeric form. In the latter case
5116   // only encoding possibility is checked.
5117   bool Strict = Msg.IsSymbolic;
5118 
5119   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5120     Error(S, "invalid message id");
5121     return false;
5122   } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5123     Error(S, Op.IsDefined ?
5124              "message does not support operations" :
5125              "missing message operation");
5126     return false;
5127   } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5128     Error(S, "invalid operation id");
5129     return false;
5130   } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5131     Error(S, "message operation does not support streams");
5132     return false;
5133   } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5134     Error(S, "invalid message stream id");
5135     return false;
5136   }
5137   return true;
5138 }
5139 
5140 OperandMatchResultTy
5141 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5142   using namespace llvm::AMDGPU::SendMsg;
5143 
5144   int64_t ImmVal = 0;
5145   SMLoc Loc = getLoc();
5146 
5147   // If parse failed, do not return error code
5148   // to avoid excessive error messages.
5149   if (trySkipId("sendmsg", AsmToken::LParen)) {
5150     OperandInfoTy Msg(ID_UNKNOWN_);
5151     OperandInfoTy Op(OP_NONE_);
5152     OperandInfoTy Stream(STREAM_ID_NONE_);
5153     if (parseSendMsgBody(Msg, Op, Stream) &&
5154         validateSendMsg(Msg, Op, Stream, Loc)) {
5155       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5156     }
5157   } else if (parseExpr(ImmVal)) {
5158     if (ImmVal < 0 || !isUInt<16>(ImmVal))
5159       Error(Loc, "invalid immediate: only 16-bit values are legal");
5160   }
5161 
5162   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5163   return MatchOperand_Success;
5164 }
5165 
5166 bool AMDGPUOperand::isSendMsg() const {
5167   return isImmTy(ImmTySendMsg);
5168 }
5169 
5170 //===----------------------------------------------------------------------===//
5171 // v_interp
5172 //===----------------------------------------------------------------------===//
5173 
5174 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5175   if (getLexer().getKind() != AsmToken::Identifier)
5176     return MatchOperand_NoMatch;
5177 
5178   StringRef Str = Parser.getTok().getString();
5179   int Slot = StringSwitch<int>(Str)
5180     .Case("p10", 0)
5181     .Case("p20", 1)
5182     .Case("p0", 2)
5183     .Default(-1);
5184 
5185   SMLoc S = Parser.getTok().getLoc();
5186   if (Slot == -1)
5187     return MatchOperand_ParseFail;
5188 
5189   Parser.Lex();
5190   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5191                                               AMDGPUOperand::ImmTyInterpSlot));
5192   return MatchOperand_Success;
5193 }
5194 
5195 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5196   if (getLexer().getKind() != AsmToken::Identifier)
5197     return MatchOperand_NoMatch;
5198 
5199   StringRef Str = Parser.getTok().getString();
5200   if (!Str.startswith("attr"))
5201     return MatchOperand_NoMatch;
5202 
5203   StringRef Chan = Str.take_back(2);
5204   int AttrChan = StringSwitch<int>(Chan)
5205     .Case(".x", 0)
5206     .Case(".y", 1)
5207     .Case(".z", 2)
5208     .Case(".w", 3)
5209     .Default(-1);
5210   if (AttrChan == -1)
5211     return MatchOperand_ParseFail;
5212 
5213   Str = Str.drop_back(2).drop_front(4);
5214 
5215   uint8_t Attr;
5216   if (Str.getAsInteger(10, Attr))
5217     return MatchOperand_ParseFail;
5218 
5219   SMLoc S = Parser.getTok().getLoc();
5220   Parser.Lex();
5221   if (Attr > 63) {
5222     Error(S, "out of bounds attr");
5223     return MatchOperand_Success;
5224   }
5225 
5226   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5227 
5228   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5229                                               AMDGPUOperand::ImmTyInterpAttr));
5230   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5231                                               AMDGPUOperand::ImmTyAttrChan));
5232   return MatchOperand_Success;
5233 }
5234 
5235 //===----------------------------------------------------------------------===//
5236 // exp
5237 //===----------------------------------------------------------------------===//
5238 
5239 void AMDGPUAsmParser::errorExpTgt() {
5240   Error(Parser.getTok().getLoc(), "invalid exp target");
5241 }
5242 
5243 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
5244                                                       uint8_t &Val) {
5245   if (Str == "null") {
5246     Val = 9;
5247     return MatchOperand_Success;
5248   }
5249 
5250   if (Str.startswith("mrt")) {
5251     Str = Str.drop_front(3);
5252     if (Str == "z") { // == mrtz
5253       Val = 8;
5254       return MatchOperand_Success;
5255     }
5256 
5257     if (Str.getAsInteger(10, Val))
5258       return MatchOperand_ParseFail;
5259 
5260     if (Val > 7)
5261       errorExpTgt();
5262 
5263     return MatchOperand_Success;
5264   }
5265 
5266   if (Str.startswith("pos")) {
5267     Str = Str.drop_front(3);
5268     if (Str.getAsInteger(10, Val))
5269       return MatchOperand_ParseFail;
5270 
5271     if (Val > 4 || (Val == 4 && !isGFX10()))
5272       errorExpTgt();
5273 
5274     Val += 12;
5275     return MatchOperand_Success;
5276   }
5277 
5278   if (isGFX10() && Str == "prim") {
5279     Val = 20;
5280     return MatchOperand_Success;
5281   }
5282 
5283   if (Str.startswith("param")) {
5284     Str = Str.drop_front(5);
5285     if (Str.getAsInteger(10, Val))
5286       return MatchOperand_ParseFail;
5287 
5288     if (Val >= 32)
5289       errorExpTgt();
5290 
5291     Val += 32;
5292     return MatchOperand_Success;
5293   }
5294 
5295   if (Str.startswith("invalid_target_")) {
5296     Str = Str.drop_front(15);
5297     if (Str.getAsInteger(10, Val))
5298       return MatchOperand_ParseFail;
5299 
5300     errorExpTgt();
5301     return MatchOperand_Success;
5302   }
5303 
5304   return MatchOperand_NoMatch;
5305 }
5306 
5307 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5308   uint8_t Val;
5309   StringRef Str = Parser.getTok().getString();
5310 
5311   auto Res = parseExpTgtImpl(Str, Val);
5312   if (Res != MatchOperand_Success)
5313     return Res;
5314 
5315   SMLoc S = Parser.getTok().getLoc();
5316   Parser.Lex();
5317 
5318   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
5319                                               AMDGPUOperand::ImmTyExpTgt));
5320   return MatchOperand_Success;
5321 }
5322 
5323 //===----------------------------------------------------------------------===//
5324 // parser helpers
5325 //===----------------------------------------------------------------------===//
5326 
5327 bool
5328 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5329   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5330 }
5331 
5332 bool
5333 AMDGPUAsmParser::isId(const StringRef Id) const {
5334   return isId(getToken(), Id);
5335 }
5336 
5337 bool
5338 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5339   return getTokenKind() == Kind;
5340 }
5341 
5342 bool
5343 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5344   if (isId(Id)) {
5345     lex();
5346     return true;
5347   }
5348   return false;
5349 }
5350 
5351 bool
5352 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5353   if (isId(Id) && peekToken().is(Kind)) {
5354     lex();
5355     lex();
5356     return true;
5357   }
5358   return false;
5359 }
5360 
5361 bool
5362 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5363   if (isToken(Kind)) {
5364     lex();
5365     return true;
5366   }
5367   return false;
5368 }
5369 
5370 bool
5371 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5372                            const StringRef ErrMsg) {
5373   if (!trySkipToken(Kind)) {
5374     Error(getLoc(), ErrMsg);
5375     return false;
5376   }
5377   return true;
5378 }
5379 
5380 bool
5381 AMDGPUAsmParser::parseExpr(int64_t &Imm) {
5382   return !getParser().parseAbsoluteExpression(Imm);
5383 }
5384 
5385 bool
5386 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5387   SMLoc S = getLoc();
5388 
5389   const MCExpr *Expr;
5390   if (Parser.parseExpression(Expr))
5391     return false;
5392 
5393   int64_t IntVal;
5394   if (Expr->evaluateAsAbsolute(IntVal)) {
5395     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5396   } else {
5397     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5398   }
5399   return true;
5400 }
5401 
5402 bool
5403 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
5404   if (isToken(AsmToken::String)) {
5405     Val = getToken().getStringContents();
5406     lex();
5407     return true;
5408   } else {
5409     Error(getLoc(), ErrMsg);
5410     return false;
5411   }
5412 }
5413 
5414 AsmToken
5415 AMDGPUAsmParser::getToken() const {
5416   return Parser.getTok();
5417 }
5418 
5419 AsmToken
5420 AMDGPUAsmParser::peekToken() {
5421   return getLexer().peekTok();
5422 }
5423 
5424 void
5425 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
5426   auto TokCount = getLexer().peekTokens(Tokens);
5427 
5428   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
5429     Tokens[Idx] = AsmToken(AsmToken::Error, "");
5430 }
5431 
5432 AsmToken::TokenKind
5433 AMDGPUAsmParser::getTokenKind() const {
5434   return getLexer().getKind();
5435 }
5436 
5437 SMLoc
5438 AMDGPUAsmParser::getLoc() const {
5439   return getToken().getLoc();
5440 }
5441 
5442 StringRef
5443 AMDGPUAsmParser::getTokenStr() const {
5444   return getToken().getString();
5445 }
5446 
5447 void
5448 AMDGPUAsmParser::lex() {
5449   Parser.Lex();
5450 }
5451 
5452 //===----------------------------------------------------------------------===//
5453 // swizzle
5454 //===----------------------------------------------------------------------===//
5455 
5456 LLVM_READNONE
5457 static unsigned
5458 encodeBitmaskPerm(const unsigned AndMask,
5459                   const unsigned OrMask,
5460                   const unsigned XorMask) {
5461   using namespace llvm::AMDGPU::Swizzle;
5462 
5463   return BITMASK_PERM_ENC |
5464          (AndMask << BITMASK_AND_SHIFT) |
5465          (OrMask  << BITMASK_OR_SHIFT)  |
5466          (XorMask << BITMASK_XOR_SHIFT);
5467 }
5468 
5469 bool
5470 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
5471                                       const unsigned MinVal,
5472                                       const unsigned MaxVal,
5473                                       const StringRef ErrMsg) {
5474   for (unsigned i = 0; i < OpNum; ++i) {
5475     if (!skipToken(AsmToken::Comma, "expected a comma")){
5476       return false;
5477     }
5478     SMLoc ExprLoc = Parser.getTok().getLoc();
5479     if (!parseExpr(Op[i])) {
5480       return false;
5481     }
5482     if (Op[i] < MinVal || Op[i] > MaxVal) {
5483       Error(ExprLoc, ErrMsg);
5484       return false;
5485     }
5486   }
5487 
5488   return true;
5489 }
5490 
5491 bool
5492 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
5493   using namespace llvm::AMDGPU::Swizzle;
5494 
5495   int64_t Lane[LANE_NUM];
5496   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
5497                            "expected a 2-bit lane id")) {
5498     Imm = QUAD_PERM_ENC;
5499     for (unsigned I = 0; I < LANE_NUM; ++I) {
5500       Imm |= Lane[I] << (LANE_SHIFT * I);
5501     }
5502     return true;
5503   }
5504   return false;
5505 }
5506 
5507 bool
5508 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
5509   using namespace llvm::AMDGPU::Swizzle;
5510 
5511   SMLoc S = Parser.getTok().getLoc();
5512   int64_t GroupSize;
5513   int64_t LaneIdx;
5514 
5515   if (!parseSwizzleOperands(1, &GroupSize,
5516                             2, 32,
5517                             "group size must be in the interval [2,32]")) {
5518     return false;
5519   }
5520   if (!isPowerOf2_64(GroupSize)) {
5521     Error(S, "group size must be a power of two");
5522     return false;
5523   }
5524   if (parseSwizzleOperands(1, &LaneIdx,
5525                            0, GroupSize - 1,
5526                            "lane id must be in the interval [0,group size - 1]")) {
5527     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
5528     return true;
5529   }
5530   return false;
5531 }
5532 
5533 bool
5534 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
5535   using namespace llvm::AMDGPU::Swizzle;
5536 
5537   SMLoc S = Parser.getTok().getLoc();
5538   int64_t GroupSize;
5539 
5540   if (!parseSwizzleOperands(1, &GroupSize,
5541       2, 32, "group size must be in the interval [2,32]")) {
5542     return false;
5543   }
5544   if (!isPowerOf2_64(GroupSize)) {
5545     Error(S, "group size must be a power of two");
5546     return false;
5547   }
5548 
5549   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
5550   return true;
5551 }
5552 
5553 bool
5554 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
5555   using namespace llvm::AMDGPU::Swizzle;
5556 
5557   SMLoc S = Parser.getTok().getLoc();
5558   int64_t GroupSize;
5559 
5560   if (!parseSwizzleOperands(1, &GroupSize,
5561       1, 16, "group size must be in the interval [1,16]")) {
5562     return false;
5563   }
5564   if (!isPowerOf2_64(GroupSize)) {
5565     Error(S, "group size must be a power of two");
5566     return false;
5567   }
5568 
5569   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
5570   return true;
5571 }
5572 
5573 bool
5574 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
5575   using namespace llvm::AMDGPU::Swizzle;
5576 
5577   if (!skipToken(AsmToken::Comma, "expected a comma")) {
5578     return false;
5579   }
5580 
5581   StringRef Ctl;
5582   SMLoc StrLoc = Parser.getTok().getLoc();
5583   if (!parseString(Ctl)) {
5584     return false;
5585   }
5586   if (Ctl.size() != BITMASK_WIDTH) {
5587     Error(StrLoc, "expected a 5-character mask");
5588     return false;
5589   }
5590 
5591   unsigned AndMask = 0;
5592   unsigned OrMask = 0;
5593   unsigned XorMask = 0;
5594 
5595   for (size_t i = 0; i < Ctl.size(); ++i) {
5596     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
5597     switch(Ctl[i]) {
5598     default:
5599       Error(StrLoc, "invalid mask");
5600       return false;
5601     case '0':
5602       break;
5603     case '1':
5604       OrMask |= Mask;
5605       break;
5606     case 'p':
5607       AndMask |= Mask;
5608       break;
5609     case 'i':
5610       AndMask |= Mask;
5611       XorMask |= Mask;
5612       break;
5613     }
5614   }
5615 
5616   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
5617   return true;
5618 }
5619 
5620 bool
5621 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
5622 
5623   SMLoc OffsetLoc = Parser.getTok().getLoc();
5624 
5625   if (!parseExpr(Imm)) {
5626     return false;
5627   }
5628   if (!isUInt<16>(Imm)) {
5629     Error(OffsetLoc, "expected a 16-bit offset");
5630     return false;
5631   }
5632   return true;
5633 }
5634 
5635 bool
5636 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
5637   using namespace llvm::AMDGPU::Swizzle;
5638 
5639   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
5640 
5641     SMLoc ModeLoc = Parser.getTok().getLoc();
5642     bool Ok = false;
5643 
5644     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
5645       Ok = parseSwizzleQuadPerm(Imm);
5646     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
5647       Ok = parseSwizzleBitmaskPerm(Imm);
5648     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
5649       Ok = parseSwizzleBroadcast(Imm);
5650     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
5651       Ok = parseSwizzleSwap(Imm);
5652     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
5653       Ok = parseSwizzleReverse(Imm);
5654     } else {
5655       Error(ModeLoc, "expected a swizzle mode");
5656     }
5657 
5658     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
5659   }
5660 
5661   return false;
5662 }
5663 
5664 OperandMatchResultTy
5665 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
5666   SMLoc S = Parser.getTok().getLoc();
5667   int64_t Imm = 0;
5668 
5669   if (trySkipId("offset")) {
5670 
5671     bool Ok = false;
5672     if (skipToken(AsmToken::Colon, "expected a colon")) {
5673       if (trySkipId("swizzle")) {
5674         Ok = parseSwizzleMacro(Imm);
5675       } else {
5676         Ok = parseSwizzleOffset(Imm);
5677       }
5678     }
5679 
5680     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
5681 
5682     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
5683   } else {
5684     // Swizzle "offset" operand is optional.
5685     // If it is omitted, try parsing other optional operands.
5686     return parseOptionalOpr(Operands);
5687   }
5688 }
5689 
5690 bool
5691 AMDGPUOperand::isSwizzle() const {
5692   return isImmTy(ImmTySwizzle);
5693 }
5694 
5695 //===----------------------------------------------------------------------===//
5696 // VGPR Index Mode
5697 //===----------------------------------------------------------------------===//
5698 
5699 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
5700 
5701   using namespace llvm::AMDGPU::VGPRIndexMode;
5702 
5703   if (trySkipToken(AsmToken::RParen)) {
5704     return OFF;
5705   }
5706 
5707   int64_t Imm = 0;
5708 
5709   while (true) {
5710     unsigned Mode = 0;
5711     SMLoc S = Parser.getTok().getLoc();
5712 
5713     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
5714       if (trySkipId(IdSymbolic[ModeId])) {
5715         Mode = 1 << ModeId;
5716         break;
5717       }
5718     }
5719 
5720     if (Mode == 0) {
5721       Error(S, (Imm == 0)?
5722                "expected a VGPR index mode or a closing parenthesis" :
5723                "expected a VGPR index mode");
5724       break;
5725     }
5726 
5727     if (Imm & Mode) {
5728       Error(S, "duplicate VGPR index mode");
5729       break;
5730     }
5731     Imm |= Mode;
5732 
5733     if (trySkipToken(AsmToken::RParen))
5734       break;
5735     if (!skipToken(AsmToken::Comma,
5736                    "expected a comma or a closing parenthesis"))
5737       break;
5738   }
5739 
5740   return Imm;
5741 }
5742 
5743 OperandMatchResultTy
5744 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
5745 
5746   int64_t Imm = 0;
5747   SMLoc S = Parser.getTok().getLoc();
5748 
5749   if (getLexer().getKind() == AsmToken::Identifier &&
5750       Parser.getTok().getString() == "gpr_idx" &&
5751       getLexer().peekTok().is(AsmToken::LParen)) {
5752 
5753     Parser.Lex();
5754     Parser.Lex();
5755 
5756     // If parse failed, trigger an error but do not return error code
5757     // to avoid excessive error messages.
5758     Imm = parseGPRIdxMacro();
5759 
5760   } else {
5761     if (getParser().parseAbsoluteExpression(Imm))
5762       return MatchOperand_NoMatch;
5763     if (Imm < 0 || !isUInt<4>(Imm)) {
5764       Error(S, "invalid immediate: only 4-bit values are legal");
5765     }
5766   }
5767 
5768   Operands.push_back(
5769       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
5770   return MatchOperand_Success;
5771 }
5772 
5773 bool AMDGPUOperand::isGPRIdxMode() const {
5774   return isImmTy(ImmTyGprIdxMode);
5775 }
5776 
5777 //===----------------------------------------------------------------------===//
5778 // sopp branch targets
5779 //===----------------------------------------------------------------------===//
5780 
5781 OperandMatchResultTy
5782 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
5783 
5784   // Make sure we are not parsing something
5785   // that looks like a label or an expression but is not.
5786   // This will improve error messages.
5787   if (isRegister() || isModifier())
5788     return MatchOperand_NoMatch;
5789 
5790   if (parseExpr(Operands)) {
5791 
5792     AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
5793     assert(Opr.isImm() || Opr.isExpr());
5794     SMLoc Loc = Opr.getStartLoc();
5795 
5796     // Currently we do not support arbitrary expressions as branch targets.
5797     // Only labels and absolute expressions are accepted.
5798     if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
5799       Error(Loc, "expected an absolute expression or a label");
5800     } else if (Opr.isImm() && !Opr.isS16Imm()) {
5801       Error(Loc, "expected a 16-bit signed jump offset");
5802     }
5803   }
5804 
5805   return MatchOperand_Success; // avoid excessive error messages
5806 }
5807 
5808 //===----------------------------------------------------------------------===//
5809 // Boolean holding registers
5810 //===----------------------------------------------------------------------===//
5811 
5812 OperandMatchResultTy
5813 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
5814   return parseReg(Operands);
5815 }
5816 
5817 //===----------------------------------------------------------------------===//
5818 // mubuf
5819 //===----------------------------------------------------------------------===//
5820 
5821 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
5822   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
5823 }
5824 
5825 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
5826   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
5827 }
5828 
5829 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
5830   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
5831 }
5832 
5833 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
5834                                const OperandVector &Operands,
5835                                bool IsAtomic,
5836                                bool IsAtomicReturn,
5837                                bool IsLds) {
5838   bool IsLdsOpcode = IsLds;
5839   bool HasLdsModifier = false;
5840   OptionalImmIndexMap OptionalIdx;
5841   assert(IsAtomicReturn ? IsAtomic : true);
5842   unsigned FirstOperandIdx = 1;
5843 
5844   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
5845     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5846 
5847     // Add the register arguments
5848     if (Op.isReg()) {
5849       Op.addRegOperands(Inst, 1);
5850       // Insert a tied src for atomic return dst.
5851       // This cannot be postponed as subsequent calls to
5852       // addImmOperands rely on correct number of MC operands.
5853       if (IsAtomicReturn && i == FirstOperandIdx)
5854         Op.addRegOperands(Inst, 1);
5855       continue;
5856     }
5857 
5858     // Handle the case where soffset is an immediate
5859     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5860       Op.addImmOperands(Inst, 1);
5861       continue;
5862     }
5863 
5864     HasLdsModifier |= Op.isLDS();
5865 
5866     // Handle tokens like 'offen' which are sometimes hard-coded into the
5867     // asm string.  There are no MCInst operands for these.
5868     if (Op.isToken()) {
5869       continue;
5870     }
5871     assert(Op.isImm());
5872 
5873     // Handle optional arguments
5874     OptionalIdx[Op.getImmTy()] = i;
5875   }
5876 
5877   // This is a workaround for an llvm quirk which may result in an
5878   // incorrect instruction selection. Lds and non-lds versions of
5879   // MUBUF instructions are identical except that lds versions
5880   // have mandatory 'lds' modifier. However this modifier follows
5881   // optional modifiers and llvm asm matcher regards this 'lds'
5882   // modifier as an optional one. As a result, an lds version
5883   // of opcode may be selected even if it has no 'lds' modifier.
5884   if (IsLdsOpcode && !HasLdsModifier) {
5885     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
5886     if (NoLdsOpcode != -1) { // Got lds version - correct it.
5887       Inst.setOpcode(NoLdsOpcode);
5888       IsLdsOpcode = false;
5889     }
5890   }
5891 
5892   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
5893   if (!IsAtomic) { // glc is hard-coded.
5894     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5895   }
5896   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5897 
5898   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
5899     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5900   }
5901 
5902   if (isGFX10())
5903     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5904 }
5905 
5906 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
5907   OptionalImmIndexMap OptionalIdx;
5908 
5909   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5910     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5911 
5912     // Add the register arguments
5913     if (Op.isReg()) {
5914       Op.addRegOperands(Inst, 1);
5915       continue;
5916     }
5917 
5918     // Handle the case where soffset is an immediate
5919     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
5920       Op.addImmOperands(Inst, 1);
5921       continue;
5922     }
5923 
5924     // Handle tokens like 'offen' which are sometimes hard-coded into the
5925     // asm string.  There are no MCInst operands for these.
5926     if (Op.isToken()) {
5927       continue;
5928     }
5929     assert(Op.isImm());
5930 
5931     // Handle optional arguments
5932     OptionalIdx[Op.getImmTy()] = i;
5933   }
5934 
5935   addOptionalImmOperand(Inst, Operands, OptionalIdx,
5936                         AMDGPUOperand::ImmTyOffset);
5937   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
5938   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5939   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5940   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5941 
5942   if (isGFX10())
5943     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5944 }
5945 
5946 //===----------------------------------------------------------------------===//
5947 // mimg
5948 //===----------------------------------------------------------------------===//
5949 
5950 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
5951                               bool IsAtomic) {
5952   unsigned I = 1;
5953   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5954   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
5955     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
5956   }
5957 
5958   if (IsAtomic) {
5959     // Add src, same as dst
5960     assert(Desc.getNumDefs() == 1);
5961     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
5962   }
5963 
5964   OptionalImmIndexMap OptionalIdx;
5965 
5966   for (unsigned E = Operands.size(); I != E; ++I) {
5967     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5968 
5969     // Add the register arguments
5970     if (Op.isReg()) {
5971       Op.addRegOperands(Inst, 1);
5972     } else if (Op.isImmModifier()) {
5973       OptionalIdx[Op.getImmTy()] = I;
5974     } else if (!Op.isToken()) {
5975       llvm_unreachable("unexpected operand type");
5976     }
5977   }
5978 
5979   bool IsGFX10 = isGFX10();
5980 
5981   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
5982   if (IsGFX10)
5983     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
5984   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
5985   if (IsGFX10)
5986     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
5987   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
5988   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
5989   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
5990   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
5991   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
5992   if (!IsGFX10)
5993     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
5994   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
5995 }
5996 
5997 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
5998   cvtMIMG(Inst, Operands, true);
5999 }
6000 
6001 //===----------------------------------------------------------------------===//
6002 // smrd
6003 //===----------------------------------------------------------------------===//
6004 
6005 bool AMDGPUOperand::isSMRDOffset8() const {
6006   return isImm() && isUInt<8>(getImm());
6007 }
6008 
6009 bool AMDGPUOperand::isSMRDOffset20() const {
6010   return isImm() && isUInt<20>(getImm());
6011 }
6012 
6013 bool AMDGPUOperand::isSMRDLiteralOffset() const {
6014   // 32-bit literals are only supported on CI and we only want to use them
6015   // when the offset is > 8-bits.
6016   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
6017 }
6018 
6019 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
6020   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6021 }
6022 
6023 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
6024   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6025 }
6026 
6027 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
6028   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6029 }
6030 
6031 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
6032   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6033 }
6034 
6035 //===----------------------------------------------------------------------===//
6036 // vop3
6037 //===----------------------------------------------------------------------===//
6038 
6039 static bool ConvertOmodMul(int64_t &Mul) {
6040   if (Mul != 1 && Mul != 2 && Mul != 4)
6041     return false;
6042 
6043   Mul >>= 1;
6044   return true;
6045 }
6046 
6047 static bool ConvertOmodDiv(int64_t &Div) {
6048   if (Div == 1) {
6049     Div = 0;
6050     return true;
6051   }
6052 
6053   if (Div == 2) {
6054     Div = 3;
6055     return true;
6056   }
6057 
6058   return false;
6059 }
6060 
6061 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
6062   if (BoundCtrl == 0) {
6063     BoundCtrl = 1;
6064     return true;
6065   }
6066 
6067   if (BoundCtrl == -1) {
6068     BoundCtrl = 0;
6069     return true;
6070   }
6071 
6072   return false;
6073 }
6074 
6075 // Note: the order in this table matches the order of operands in AsmString.
6076 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
6077   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
6078   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
6079   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
6080   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
6081   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
6082   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
6083   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
6084   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
6085   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
6086   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
6087   {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
6088   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
6089   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
6090   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
6091   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
6092   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
6093   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
6094   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
6095   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
6096   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
6097   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
6098   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6099   {"a16",     AMDGPUOperand::ImmTyR128A16,  true, nullptr},
6100   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
6101   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
6102   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
6103   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
6104   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
6105   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
6106   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
6107   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
6108   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
6109   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
6110   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
6111   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
6112   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
6113   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
6114   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6115   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6116   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6117   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6118   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6119   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6120   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6121 };
6122 
6123 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6124 
6125   OperandMatchResultTy res = parseOptionalOpr(Operands);
6126 
6127   // This is a hack to enable hardcoded mandatory operands which follow
6128   // optional operands.
6129   //
6130   // Current design assumes that all operands after the first optional operand
6131   // are also optional. However implementation of some instructions violates
6132   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6133   //
6134   // To alleviate this problem, we have to (implicitly) parse extra operands
6135   // to make sure autogenerated parser of custom operands never hit hardcoded
6136   // mandatory operands.
6137 
6138   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6139     if (res != MatchOperand_Success ||
6140         isToken(AsmToken::EndOfStatement))
6141       break;
6142 
6143     trySkipToken(AsmToken::Comma);
6144     res = parseOptionalOpr(Operands);
6145   }
6146 
6147   return res;
6148 }
6149 
6150 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6151   OperandMatchResultTy res;
6152   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6153     // try to parse any optional operand here
6154     if (Op.IsBit) {
6155       res = parseNamedBit(Op.Name, Operands, Op.Type);
6156     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6157       res = parseOModOperand(Operands);
6158     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6159                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6160                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6161       res = parseSDWASel(Operands, Op.Name, Op.Type);
6162     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6163       res = parseSDWADstUnused(Operands);
6164     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6165                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6166                Op.Type == AMDGPUOperand::ImmTyNegLo ||
6167                Op.Type == AMDGPUOperand::ImmTyNegHi) {
6168       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6169                                         Op.ConvertResult);
6170     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6171       res = parseDim(Operands);
6172     } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
6173       res = parseDfmtNfmt(Operands);
6174     } else {
6175       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6176     }
6177     if (res != MatchOperand_NoMatch) {
6178       return res;
6179     }
6180   }
6181   return MatchOperand_NoMatch;
6182 }
6183 
6184 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6185   StringRef Name = Parser.getTok().getString();
6186   if (Name == "mul") {
6187     return parseIntWithPrefix("mul", Operands,
6188                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6189   }
6190 
6191   if (Name == "div") {
6192     return parseIntWithPrefix("div", Operands,
6193                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6194   }
6195 
6196   return MatchOperand_NoMatch;
6197 }
6198 
6199 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6200   cvtVOP3P(Inst, Operands);
6201 
6202   int Opc = Inst.getOpcode();
6203 
6204   int SrcNum;
6205   const int Ops[] = { AMDGPU::OpName::src0,
6206                       AMDGPU::OpName::src1,
6207                       AMDGPU::OpName::src2 };
6208   for (SrcNum = 0;
6209        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6210        ++SrcNum);
6211   assert(SrcNum > 0);
6212 
6213   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6214   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6215 
6216   if ((OpSel & (1 << SrcNum)) != 0) {
6217     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6218     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6219     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6220   }
6221 }
6222 
6223 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6224       // 1. This operand is input modifiers
6225   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6226       // 2. This is not last operand
6227       && Desc.NumOperands > (OpNum + 1)
6228       // 3. Next operand is register class
6229       && Desc.OpInfo[OpNum + 1].RegClass != -1
6230       // 4. Next register is not tied to any other operand
6231       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6232 }
6233 
6234 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6235 {
6236   OptionalImmIndexMap OptionalIdx;
6237   unsigned Opc = Inst.getOpcode();
6238 
6239   unsigned I = 1;
6240   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6241   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6242     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6243   }
6244 
6245   for (unsigned E = Operands.size(); I != E; ++I) {
6246     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6247     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6248       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6249     } else if (Op.isInterpSlot() ||
6250                Op.isInterpAttr() ||
6251                Op.isAttrChan()) {
6252       Inst.addOperand(MCOperand::createImm(Op.getImm()));
6253     } else if (Op.isImmModifier()) {
6254       OptionalIdx[Op.getImmTy()] = I;
6255     } else {
6256       llvm_unreachable("unhandled operand type");
6257     }
6258   }
6259 
6260   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6261     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6262   }
6263 
6264   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6265     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6266   }
6267 
6268   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6269     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6270   }
6271 }
6272 
6273 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6274                               OptionalImmIndexMap &OptionalIdx) {
6275   unsigned Opc = Inst.getOpcode();
6276 
6277   unsigned I = 1;
6278   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6279   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6280     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6281   }
6282 
6283   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6284     // This instruction has src modifiers
6285     for (unsigned E = Operands.size(); I != E; ++I) {
6286       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6287       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6288         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6289       } else if (Op.isImmModifier()) {
6290         OptionalIdx[Op.getImmTy()] = I;
6291       } else if (Op.isRegOrImm()) {
6292         Op.addRegOrImmOperands(Inst, 1);
6293       } else {
6294         llvm_unreachable("unhandled operand type");
6295       }
6296     }
6297   } else {
6298     // No src modifiers
6299     for (unsigned E = Operands.size(); I != E; ++I) {
6300       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6301       if (Op.isMod()) {
6302         OptionalIdx[Op.getImmTy()] = I;
6303       } else {
6304         Op.addRegOrImmOperands(Inst, 1);
6305       }
6306     }
6307   }
6308 
6309   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6310     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6311   }
6312 
6313   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6314     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6315   }
6316 
6317   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
6318   // it has src2 register operand that is tied to dst operand
6319   // we don't allow modifiers for this operand in assembler so src2_modifiers
6320   // should be 0.
6321   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
6322       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
6323       Opc == AMDGPU::V_MAC_F32_e64_vi ||
6324       Opc == AMDGPU::V_MAC_F16_e64_vi ||
6325       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
6326       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
6327       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
6328     auto it = Inst.begin();
6329     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
6330     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
6331     ++it;
6332     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6333   }
6334 }
6335 
6336 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
6337   OptionalImmIndexMap OptionalIdx;
6338   cvtVOP3(Inst, Operands, OptionalIdx);
6339 }
6340 
6341 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
6342                                const OperandVector &Operands) {
6343   OptionalImmIndexMap OptIdx;
6344   const int Opc = Inst.getOpcode();
6345   const MCInstrDesc &Desc = MII.get(Opc);
6346 
6347   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
6348 
6349   cvtVOP3(Inst, Operands, OptIdx);
6350 
6351   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
6352     assert(!IsPacked);
6353     Inst.addOperand(Inst.getOperand(0));
6354   }
6355 
6356   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
6357   // instruction, and then figure out where to actually put the modifiers
6358 
6359   addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
6360 
6361   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
6362   if (OpSelHiIdx != -1) {
6363     int DefaultVal = IsPacked ? -1 : 0;
6364     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
6365                           DefaultVal);
6366   }
6367 
6368   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
6369   if (NegLoIdx != -1) {
6370     assert(IsPacked);
6371     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
6372     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
6373   }
6374 
6375   const int Ops[] = { AMDGPU::OpName::src0,
6376                       AMDGPU::OpName::src1,
6377                       AMDGPU::OpName::src2 };
6378   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
6379                          AMDGPU::OpName::src1_modifiers,
6380                          AMDGPU::OpName::src2_modifiers };
6381 
6382   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6383 
6384   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6385   unsigned OpSelHi = 0;
6386   unsigned NegLo = 0;
6387   unsigned NegHi = 0;
6388 
6389   if (OpSelHiIdx != -1) {
6390     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
6391   }
6392 
6393   if (NegLoIdx != -1) {
6394     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
6395     NegLo = Inst.getOperand(NegLoIdx).getImm();
6396     NegHi = Inst.getOperand(NegHiIdx).getImm();
6397   }
6398 
6399   for (int J = 0; J < 3; ++J) {
6400     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
6401     if (OpIdx == -1)
6402       break;
6403 
6404     uint32_t ModVal = 0;
6405 
6406     if ((OpSel & (1 << J)) != 0)
6407       ModVal |= SISrcMods::OP_SEL_0;
6408 
6409     if ((OpSelHi & (1 << J)) != 0)
6410       ModVal |= SISrcMods::OP_SEL_1;
6411 
6412     if ((NegLo & (1 << J)) != 0)
6413       ModVal |= SISrcMods::NEG;
6414 
6415     if ((NegHi & (1 << J)) != 0)
6416       ModVal |= SISrcMods::NEG_HI;
6417 
6418     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
6419 
6420     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
6421   }
6422 }
6423 
6424 //===----------------------------------------------------------------------===//
6425 // dpp
6426 //===----------------------------------------------------------------------===//
6427 
6428 bool AMDGPUOperand::isDPP8() const {
6429   return isImmTy(ImmTyDPP8);
6430 }
6431 
6432 bool AMDGPUOperand::isDPPCtrl() const {
6433   using namespace AMDGPU::DPP;
6434 
6435   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
6436   if (result) {
6437     int64_t Imm = getImm();
6438     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
6439            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
6440            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
6441            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
6442            (Imm == DppCtrl::WAVE_SHL1) ||
6443            (Imm == DppCtrl::WAVE_ROL1) ||
6444            (Imm == DppCtrl::WAVE_SHR1) ||
6445            (Imm == DppCtrl::WAVE_ROR1) ||
6446            (Imm == DppCtrl::ROW_MIRROR) ||
6447            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
6448            (Imm == DppCtrl::BCAST15) ||
6449            (Imm == DppCtrl::BCAST31) ||
6450            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
6451            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
6452   }
6453   return false;
6454 }
6455 
6456 //===----------------------------------------------------------------------===//
6457 // mAI
6458 //===----------------------------------------------------------------------===//
6459 
6460 bool AMDGPUOperand::isBLGP() const {
6461   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
6462 }
6463 
6464 bool AMDGPUOperand::isCBSZ() const {
6465   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
6466 }
6467 
6468 bool AMDGPUOperand::isABID() const {
6469   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
6470 }
6471 
6472 bool AMDGPUOperand::isS16Imm() const {
6473   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
6474 }
6475 
6476 bool AMDGPUOperand::isU16Imm() const {
6477   return isImm() && isUInt<16>(getImm());
6478 }
6479 
6480 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
6481   if (!isGFX10())
6482     return MatchOperand_NoMatch;
6483 
6484   SMLoc S = Parser.getTok().getLoc();
6485 
6486   if (getLexer().isNot(AsmToken::Identifier))
6487     return MatchOperand_NoMatch;
6488   if (getLexer().getTok().getString() != "dim")
6489     return MatchOperand_NoMatch;
6490 
6491   Parser.Lex();
6492   if (getLexer().isNot(AsmToken::Colon))
6493     return MatchOperand_ParseFail;
6494 
6495   Parser.Lex();
6496 
6497   // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
6498   // integer.
6499   std::string Token;
6500   if (getLexer().is(AsmToken::Integer)) {
6501     SMLoc Loc = getLexer().getTok().getEndLoc();
6502     Token = getLexer().getTok().getString();
6503     Parser.Lex();
6504     if (getLexer().getTok().getLoc() != Loc)
6505       return MatchOperand_ParseFail;
6506   }
6507   if (getLexer().isNot(AsmToken::Identifier))
6508     return MatchOperand_ParseFail;
6509   Token += getLexer().getTok().getString();
6510 
6511   StringRef DimId = Token;
6512   if (DimId.startswith("SQ_RSRC_IMG_"))
6513     DimId = DimId.substr(12);
6514 
6515   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
6516   if (!DimInfo)
6517     return MatchOperand_ParseFail;
6518 
6519   Parser.Lex();
6520 
6521   Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
6522                                               AMDGPUOperand::ImmTyDim));
6523   return MatchOperand_Success;
6524 }
6525 
6526 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
6527   SMLoc S = Parser.getTok().getLoc();
6528   StringRef Prefix;
6529 
6530   if (getLexer().getKind() == AsmToken::Identifier) {
6531     Prefix = Parser.getTok().getString();
6532   } else {
6533     return MatchOperand_NoMatch;
6534   }
6535 
6536   if (Prefix != "dpp8")
6537     return parseDPPCtrl(Operands);
6538   if (!isGFX10())
6539     return MatchOperand_NoMatch;
6540 
6541   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
6542 
6543   int64_t Sels[8];
6544 
6545   Parser.Lex();
6546   if (getLexer().isNot(AsmToken::Colon))
6547     return MatchOperand_ParseFail;
6548 
6549   Parser.Lex();
6550   if (getLexer().isNot(AsmToken::LBrac))
6551     return MatchOperand_ParseFail;
6552 
6553   Parser.Lex();
6554   if (getParser().parseAbsoluteExpression(Sels[0]))
6555     return MatchOperand_ParseFail;
6556   if (0 > Sels[0] || 7 < Sels[0])
6557     return MatchOperand_ParseFail;
6558 
6559   for (size_t i = 1; i < 8; ++i) {
6560     if (getLexer().isNot(AsmToken::Comma))
6561       return MatchOperand_ParseFail;
6562 
6563     Parser.Lex();
6564     if (getParser().parseAbsoluteExpression(Sels[i]))
6565       return MatchOperand_ParseFail;
6566     if (0 > Sels[i] || 7 < Sels[i])
6567       return MatchOperand_ParseFail;
6568   }
6569 
6570   if (getLexer().isNot(AsmToken::RBrac))
6571     return MatchOperand_ParseFail;
6572   Parser.Lex();
6573 
6574   unsigned DPP8 = 0;
6575   for (size_t i = 0; i < 8; ++i)
6576     DPP8 |= (Sels[i] << (i * 3));
6577 
6578   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
6579   return MatchOperand_Success;
6580 }
6581 
6582 OperandMatchResultTy
6583 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
6584   using namespace AMDGPU::DPP;
6585 
6586   SMLoc S = Parser.getTok().getLoc();
6587   StringRef Prefix;
6588   int64_t Int;
6589 
6590   if (getLexer().getKind() == AsmToken::Identifier) {
6591     Prefix = Parser.getTok().getString();
6592   } else {
6593     return MatchOperand_NoMatch;
6594   }
6595 
6596   if (Prefix == "row_mirror") {
6597     Int = DppCtrl::ROW_MIRROR;
6598     Parser.Lex();
6599   } else if (Prefix == "row_half_mirror") {
6600     Int = DppCtrl::ROW_HALF_MIRROR;
6601     Parser.Lex();
6602   } else {
6603     // Check to prevent parseDPPCtrlOps from eating invalid tokens
6604     if (Prefix != "quad_perm"
6605         && Prefix != "row_shl"
6606         && Prefix != "row_shr"
6607         && Prefix != "row_ror"
6608         && Prefix != "wave_shl"
6609         && Prefix != "wave_rol"
6610         && Prefix != "wave_shr"
6611         && Prefix != "wave_ror"
6612         && Prefix != "row_bcast"
6613         && Prefix != "row_share"
6614         && Prefix != "row_xmask") {
6615       return MatchOperand_NoMatch;
6616     }
6617 
6618     if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask"))
6619       return MatchOperand_NoMatch;
6620 
6621     if (!isVI() && !isGFX9() &&
6622         (Prefix == "wave_shl" || Prefix == "wave_shr" ||
6623          Prefix == "wave_rol" || Prefix == "wave_ror" ||
6624          Prefix == "row_bcast"))
6625       return MatchOperand_NoMatch;
6626 
6627     Parser.Lex();
6628     if (getLexer().isNot(AsmToken::Colon))
6629       return MatchOperand_ParseFail;
6630 
6631     if (Prefix == "quad_perm") {
6632       // quad_perm:[%d,%d,%d,%d]
6633       Parser.Lex();
6634       if (getLexer().isNot(AsmToken::LBrac))
6635         return MatchOperand_ParseFail;
6636       Parser.Lex();
6637 
6638       if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
6639         return MatchOperand_ParseFail;
6640 
6641       for (int i = 0; i < 3; ++i) {
6642         if (getLexer().isNot(AsmToken::Comma))
6643           return MatchOperand_ParseFail;
6644         Parser.Lex();
6645 
6646         int64_t Temp;
6647         if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
6648           return MatchOperand_ParseFail;
6649         const int shift = i*2 + 2;
6650         Int += (Temp << shift);
6651       }
6652 
6653       if (getLexer().isNot(AsmToken::RBrac))
6654         return MatchOperand_ParseFail;
6655       Parser.Lex();
6656     } else {
6657       // sel:%d
6658       Parser.Lex();
6659       if (getParser().parseAbsoluteExpression(Int))
6660         return MatchOperand_ParseFail;
6661 
6662       if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
6663         Int |= DppCtrl::ROW_SHL0;
6664       } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
6665         Int |= DppCtrl::ROW_SHR0;
6666       } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
6667         Int |= DppCtrl::ROW_ROR0;
6668       } else if (Prefix == "wave_shl" && 1 == Int) {
6669         Int = DppCtrl::WAVE_SHL1;
6670       } else if (Prefix == "wave_rol" && 1 == Int) {
6671         Int = DppCtrl::WAVE_ROL1;
6672       } else if (Prefix == "wave_shr" && 1 == Int) {
6673         Int = DppCtrl::WAVE_SHR1;
6674       } else if (Prefix == "wave_ror" && 1 == Int) {
6675         Int = DppCtrl::WAVE_ROR1;
6676       } else if (Prefix == "row_bcast") {
6677         if (Int == 15) {
6678           Int = DppCtrl::BCAST15;
6679         } else if (Int == 31) {
6680           Int = DppCtrl::BCAST31;
6681         } else {
6682           return MatchOperand_ParseFail;
6683         }
6684       } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) {
6685         Int |= DppCtrl::ROW_SHARE_FIRST;
6686       } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) {
6687         Int |= DppCtrl::ROW_XMASK_FIRST;
6688       } else {
6689         return MatchOperand_ParseFail;
6690       }
6691     }
6692   }
6693 
6694   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
6695   return MatchOperand_Success;
6696 }
6697 
6698 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
6699   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
6700 }
6701 
6702 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
6703   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
6704 }
6705 
6706 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
6707   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
6708 }
6709 
6710 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
6711   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
6712 }
6713 
6714 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
6715   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
6716 }
6717 
6718 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
6719   OptionalImmIndexMap OptionalIdx;
6720 
6721   unsigned I = 1;
6722   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6723   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6724     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6725   }
6726 
6727   int Fi = 0;
6728   for (unsigned E = Operands.size(); I != E; ++I) {
6729     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
6730                                             MCOI::TIED_TO);
6731     if (TiedTo != -1) {
6732       assert((unsigned)TiedTo < Inst.getNumOperands());
6733       // handle tied old or src2 for MAC instructions
6734       Inst.addOperand(Inst.getOperand(TiedTo));
6735     }
6736     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6737     // Add the register arguments
6738     if (Op.isReg() && validateVccOperand(Op.getReg())) {
6739       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
6740       // Skip it.
6741       continue;
6742     }
6743 
6744     if (IsDPP8) {
6745       if (Op.isDPP8()) {
6746         Op.addImmOperands(Inst, 1);
6747       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6748         Op.addRegWithFPInputModsOperands(Inst, 2);
6749       } else if (Op.isFI()) {
6750         Fi = Op.getImm();
6751       } else if (Op.isReg()) {
6752         Op.addRegOperands(Inst, 1);
6753       } else {
6754         llvm_unreachable("Invalid operand type");
6755       }
6756     } else {
6757       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6758         Op.addRegWithFPInputModsOperands(Inst, 2);
6759       } else if (Op.isDPPCtrl()) {
6760         Op.addImmOperands(Inst, 1);
6761       } else if (Op.isImm()) {
6762         // Handle optional arguments
6763         OptionalIdx[Op.getImmTy()] = I;
6764       } else {
6765         llvm_unreachable("Invalid operand type");
6766       }
6767     }
6768   }
6769 
6770   if (IsDPP8) {
6771     using namespace llvm::AMDGPU::DPP;
6772     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
6773   } else {
6774     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
6775     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
6776     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
6777     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
6778       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
6779     }
6780   }
6781 }
6782 
6783 //===----------------------------------------------------------------------===//
6784 // sdwa
6785 //===----------------------------------------------------------------------===//
6786 
6787 OperandMatchResultTy
6788 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
6789                               AMDGPUOperand::ImmTy Type) {
6790   using namespace llvm::AMDGPU::SDWA;
6791 
6792   SMLoc S = Parser.getTok().getLoc();
6793   StringRef Value;
6794   OperandMatchResultTy res;
6795 
6796   res = parseStringWithPrefix(Prefix, Value);
6797   if (res != MatchOperand_Success) {
6798     return res;
6799   }
6800 
6801   int64_t Int;
6802   Int = StringSwitch<int64_t>(Value)
6803         .Case("BYTE_0", SdwaSel::BYTE_0)
6804         .Case("BYTE_1", SdwaSel::BYTE_1)
6805         .Case("BYTE_2", SdwaSel::BYTE_2)
6806         .Case("BYTE_3", SdwaSel::BYTE_3)
6807         .Case("WORD_0", SdwaSel::WORD_0)
6808         .Case("WORD_1", SdwaSel::WORD_1)
6809         .Case("DWORD", SdwaSel::DWORD)
6810         .Default(0xffffffff);
6811   Parser.Lex(); // eat last token
6812 
6813   if (Int == 0xffffffff) {
6814     return MatchOperand_ParseFail;
6815   }
6816 
6817   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
6818   return MatchOperand_Success;
6819 }
6820 
6821 OperandMatchResultTy
6822 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
6823   using namespace llvm::AMDGPU::SDWA;
6824 
6825   SMLoc S = Parser.getTok().getLoc();
6826   StringRef Value;
6827   OperandMatchResultTy res;
6828 
6829   res = parseStringWithPrefix("dst_unused", Value);
6830   if (res != MatchOperand_Success) {
6831     return res;
6832   }
6833 
6834   int64_t Int;
6835   Int = StringSwitch<int64_t>(Value)
6836         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
6837         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
6838         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
6839         .Default(0xffffffff);
6840   Parser.Lex(); // eat last token
6841 
6842   if (Int == 0xffffffff) {
6843     return MatchOperand_ParseFail;
6844   }
6845 
6846   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
6847   return MatchOperand_Success;
6848 }
6849 
6850 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
6851   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
6852 }
6853 
6854 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
6855   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
6856 }
6857 
6858 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
6859   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
6860 }
6861 
6862 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
6863   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
6864 }
6865 
6866 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
6867   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
6868 }
6869 
6870 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
6871                               uint64_t BasicInstType,
6872                               bool SkipDstVcc,
6873                               bool SkipSrcVcc) {
6874   using namespace llvm::AMDGPU::SDWA;
6875 
6876   OptionalImmIndexMap OptionalIdx;
6877   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
6878   bool SkippedVcc = false;
6879 
6880   unsigned I = 1;
6881   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6882   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6883     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6884   }
6885 
6886   for (unsigned E = Operands.size(); I != E; ++I) {
6887     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6888     if (SkipVcc && !SkippedVcc && Op.isReg() &&
6889         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
6890       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
6891       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
6892       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
6893       // Skip VCC only if we didn't skip it on previous iteration.
6894       // Note that src0 and src1 occupy 2 slots each because of modifiers.
6895       if (BasicInstType == SIInstrFlags::VOP2 &&
6896           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
6897            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
6898         SkippedVcc = true;
6899         continue;
6900       } else if (BasicInstType == SIInstrFlags::VOPC &&
6901                  Inst.getNumOperands() == 0) {
6902         SkippedVcc = true;
6903         continue;
6904       }
6905     }
6906     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6907       Op.addRegOrImmWithInputModsOperands(Inst, 2);
6908     } else if (Op.isImm()) {
6909       // Handle optional arguments
6910       OptionalIdx[Op.getImmTy()] = I;
6911     } else {
6912       llvm_unreachable("Invalid operand type");
6913     }
6914     SkippedVcc = false;
6915   }
6916 
6917   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
6918       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
6919       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
6920     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
6921     switch (BasicInstType) {
6922     case SIInstrFlags::VOP1:
6923       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6924       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6925         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6926       }
6927       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6928       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6929       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6930       break;
6931 
6932     case SIInstrFlags::VOP2:
6933       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6934       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
6935         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
6936       }
6937       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
6938       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
6939       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6940       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6941       break;
6942 
6943     case SIInstrFlags::VOPC:
6944       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
6945         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
6946       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
6947       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
6948       break;
6949 
6950     default:
6951       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
6952     }
6953   }
6954 
6955   // special case v_mac_{f16, f32}:
6956   // it has src2 register operand that is tied to dst operand
6957   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
6958       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
6959     auto it = Inst.begin();
6960     std::advance(
6961       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
6962     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
6963   }
6964 }
6965 
6966 //===----------------------------------------------------------------------===//
6967 // mAI
6968 //===----------------------------------------------------------------------===//
6969 
6970 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
6971   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
6972 }
6973 
6974 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
6975   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
6976 }
6977 
6978 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
6979   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
6980 }
6981 
6982 /// Force static initialization.
6983 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
6984   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
6985   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
6986 }
6987 
6988 #define GET_REGISTER_MATCHER
6989 #define GET_MATCHER_IMPLEMENTATION
6990 #define GET_MNEMONIC_SPELL_CHECKER
6991 #include "AMDGPUGenAsmMatcher.inc"
6992 
6993 // This fuction should be defined after auto-generated include so that we have
6994 // MatchClassKind enum defined
6995 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
6996                                                      unsigned Kind) {
6997   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
6998   // But MatchInstructionImpl() expects to meet token and fails to validate
6999   // operand. This method checks if we are given immediate operand but expect to
7000   // get corresponding token.
7001   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
7002   switch (Kind) {
7003   case MCK_addr64:
7004     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
7005   case MCK_gds:
7006     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
7007   case MCK_lds:
7008     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
7009   case MCK_glc:
7010     return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
7011   case MCK_idxen:
7012     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
7013   case MCK_offen:
7014     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
7015   case MCK_SSrcB32:
7016     // When operands have expression values, they will return true for isToken,
7017     // because it is not possible to distinguish between a token and an
7018     // expression at parse time. MatchInstructionImpl() will always try to
7019     // match an operand as a token, when isToken returns true, and when the
7020     // name of the expression is not a valid token, the match will fail,
7021     // so we need to handle it here.
7022     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
7023   case MCK_SSrcF32:
7024     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
7025   case MCK_SoppBrTarget:
7026     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
7027   case MCK_VReg32OrOff:
7028     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
7029   case MCK_InterpSlot:
7030     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
7031   case MCK_Attr:
7032     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
7033   case MCK_AttrChan:
7034     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
7035   case MCK_SReg_64:
7036   case MCK_SReg_64_XEXEC:
7037     // Null is defined as a 32-bit register but
7038     // it should also be enabled with 64-bit operands.
7039     // The following code enables it for SReg_64 operands
7040     // used as source and destination. Remaining source
7041     // operands are handled in isInlinableImm.
7042     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
7043   default:
7044     return Match_InvalidOperand;
7045   }
7046 }
7047 
7048 //===----------------------------------------------------------------------===//
7049 // endpgm
7050 //===----------------------------------------------------------------------===//
7051 
7052 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
7053   SMLoc S = Parser.getTok().getLoc();
7054   int64_t Imm = 0;
7055 
7056   if (!parseExpr(Imm)) {
7057     // The operand is optional, if not present default to 0
7058     Imm = 0;
7059   }
7060 
7061   if (!isUInt<16>(Imm)) {
7062     Error(S, "expected a 16-bit value");
7063     return MatchOperand_ParseFail;
7064   }
7065 
7066   Operands.push_back(
7067       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
7068   return MatchOperand_Success;
7069 }
7070 
7071 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
7072