1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/MC/MCAsmInfo.h"
24 #include "llvm/MC/MCContext.h"
25 #include "llvm/MC/MCExpr.h"
26 #include "llvm/MC/MCInst.h"
27 #include "llvm/MC/MCParser/MCAsmParser.h"
28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
29 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
30 #include "llvm/MC/MCSymbol.h"
31 #include "llvm/MC/TargetRegistry.h"
32 #include "llvm/Support/AMDGPUMetadata.h"
33 #include "llvm/Support/AMDHSAKernelDescriptor.h"
34 #include "llvm/Support/Casting.h"
35 #include "llvm/Support/MachineValueType.h"
36 #include "llvm/Support/TargetParser.h"
37
38 using namespace llvm;
39 using namespace llvm::AMDGPU;
40 using namespace llvm::amdhsa;
41
42 namespace {
43
44 class AMDGPUAsmParser;
45
46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
47
48 //===----------------------------------------------------------------------===//
49 // Operand
50 //===----------------------------------------------------------------------===//
51
52 class AMDGPUOperand : public MCParsedAsmOperand {
53 enum KindTy {
54 Token,
55 Immediate,
56 Register,
57 Expression
58 } Kind;
59
60 SMLoc StartLoc, EndLoc;
61 const AMDGPUAsmParser *AsmParser;
62
63 public:
AMDGPUOperand(KindTy Kind_,const AMDGPUAsmParser * AsmParser_)64 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
65 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
66
67 using Ptr = std::unique_ptr<AMDGPUOperand>;
68
69 struct Modifiers {
70 bool Abs = false;
71 bool Neg = false;
72 bool Sext = false;
73
hasFPModifiers__anone4e5b7f50111::AMDGPUOperand::Modifiers74 bool hasFPModifiers() const { return Abs || Neg; }
hasIntModifiers__anone4e5b7f50111::AMDGPUOperand::Modifiers75 bool hasIntModifiers() const { return Sext; }
hasModifiers__anone4e5b7f50111::AMDGPUOperand::Modifiers76 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
77
getFPModifiersOperand__anone4e5b7f50111::AMDGPUOperand::Modifiers78 int64_t getFPModifiersOperand() const {
79 int64_t Operand = 0;
80 Operand |= Abs ? SISrcMods::ABS : 0u;
81 Operand |= Neg ? SISrcMods::NEG : 0u;
82 return Operand;
83 }
84
getIntModifiersOperand__anone4e5b7f50111::AMDGPUOperand::Modifiers85 int64_t getIntModifiersOperand() const {
86 int64_t Operand = 0;
87 Operand |= Sext ? SISrcMods::SEXT : 0u;
88 return Operand;
89 }
90
getModifiersOperand__anone4e5b7f50111::AMDGPUOperand::Modifiers91 int64_t getModifiersOperand() const {
92 assert(!(hasFPModifiers() && hasIntModifiers())
93 && "fp and int modifiers should not be used simultaneously");
94 if (hasFPModifiers()) {
95 return getFPModifiersOperand();
96 } else if (hasIntModifiers()) {
97 return getIntModifiersOperand();
98 } else {
99 return 0;
100 }
101 }
102
103 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
104 };
105
106 enum ImmTy {
107 ImmTyNone,
108 ImmTyGDS,
109 ImmTyLDS,
110 ImmTyOffen,
111 ImmTyIdxen,
112 ImmTyAddr64,
113 ImmTyOffset,
114 ImmTyInstOffset,
115 ImmTyOffset0,
116 ImmTyOffset1,
117 ImmTyCPol,
118 ImmTySWZ,
119 ImmTyTFE,
120 ImmTyD16,
121 ImmTyClampSI,
122 ImmTyOModSI,
123 ImmTyDPP8,
124 ImmTyDppCtrl,
125 ImmTyDppRowMask,
126 ImmTyDppBankMask,
127 ImmTyDppBoundCtrl,
128 ImmTyDppFi,
129 ImmTySdwaDstSel,
130 ImmTySdwaSrc0Sel,
131 ImmTySdwaSrc1Sel,
132 ImmTySdwaDstUnused,
133 ImmTyDMask,
134 ImmTyDim,
135 ImmTyUNorm,
136 ImmTyDA,
137 ImmTyR128A16,
138 ImmTyA16,
139 ImmTyLWE,
140 ImmTyExpTgt,
141 ImmTyExpCompr,
142 ImmTyExpVM,
143 ImmTyFORMAT,
144 ImmTyHwreg,
145 ImmTyOff,
146 ImmTySendMsg,
147 ImmTyInterpSlot,
148 ImmTyInterpAttr,
149 ImmTyAttrChan,
150 ImmTyOpSel,
151 ImmTyOpSelHi,
152 ImmTyNegLo,
153 ImmTyNegHi,
154 ImmTySwizzle,
155 ImmTyGprIdxMode,
156 ImmTyHigh,
157 ImmTyBLGP,
158 ImmTyCBSZ,
159 ImmTyABID,
160 ImmTyEndpgm,
161 };
162
163 enum ImmKindTy {
164 ImmKindTyNone,
165 ImmKindTyLiteral,
166 ImmKindTyConst,
167 };
168
169 private:
170 struct TokOp {
171 const char *Data;
172 unsigned Length;
173 };
174
175 struct ImmOp {
176 int64_t Val;
177 ImmTy Type;
178 bool IsFPImm;
179 mutable ImmKindTy Kind;
180 Modifiers Mods;
181 };
182
183 struct RegOp {
184 unsigned RegNo;
185 Modifiers Mods;
186 };
187
188 union {
189 TokOp Tok;
190 ImmOp Imm;
191 RegOp Reg;
192 const MCExpr *Expr;
193 };
194
195 public:
isToken() const196 bool isToken() const override {
197 if (Kind == Token)
198 return true;
199
200 // When parsing operands, we can't always tell if something was meant to be
201 // a token, like 'gds', or an expression that references a global variable.
202 // In this case, we assume the string is an expression, and if we need to
203 // interpret is a token, then we treat the symbol name as the token.
204 return isSymbolRefExpr();
205 }
206
isSymbolRefExpr() const207 bool isSymbolRefExpr() const {
208 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
209 }
210
isImm() const211 bool isImm() const override {
212 return Kind == Immediate;
213 }
214
setImmKindNone() const215 void setImmKindNone() const {
216 assert(isImm());
217 Imm.Kind = ImmKindTyNone;
218 }
219
setImmKindLiteral() const220 void setImmKindLiteral() const {
221 assert(isImm());
222 Imm.Kind = ImmKindTyLiteral;
223 }
224
setImmKindConst() const225 void setImmKindConst() const {
226 assert(isImm());
227 Imm.Kind = ImmKindTyConst;
228 }
229
IsImmKindLiteral() const230 bool IsImmKindLiteral() const {
231 return isImm() && Imm.Kind == ImmKindTyLiteral;
232 }
233
isImmKindConst() const234 bool isImmKindConst() const {
235 return isImm() && Imm.Kind == ImmKindTyConst;
236 }
237
238 bool isInlinableImm(MVT type) const;
239 bool isLiteralImm(MVT type) const;
240
isRegKind() const241 bool isRegKind() const {
242 return Kind == Register;
243 }
244
isReg() const245 bool isReg() const override {
246 return isRegKind() && !hasModifiers();
247 }
248
isRegOrImmWithInputMods(unsigned RCID,MVT type) const249 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
250 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
251 }
252
isRegOrImmWithInt16InputMods() const253 bool isRegOrImmWithInt16InputMods() const {
254 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
255 }
256
isRegOrImmWithInt32InputMods() const257 bool isRegOrImmWithInt32InputMods() const {
258 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
259 }
260
isRegOrImmWithInt64InputMods() const261 bool isRegOrImmWithInt64InputMods() const {
262 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
263 }
264
isRegOrImmWithFP16InputMods() const265 bool isRegOrImmWithFP16InputMods() const {
266 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
267 }
268
isRegOrImmWithFP32InputMods() const269 bool isRegOrImmWithFP32InputMods() const {
270 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
271 }
272
isRegOrImmWithFP64InputMods() const273 bool isRegOrImmWithFP64InputMods() const {
274 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
275 }
276
isVReg() const277 bool isVReg() const {
278 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
279 isRegClass(AMDGPU::VReg_64RegClassID) ||
280 isRegClass(AMDGPU::VReg_96RegClassID) ||
281 isRegClass(AMDGPU::VReg_128RegClassID) ||
282 isRegClass(AMDGPU::VReg_160RegClassID) ||
283 isRegClass(AMDGPU::VReg_192RegClassID) ||
284 isRegClass(AMDGPU::VReg_256RegClassID) ||
285 isRegClass(AMDGPU::VReg_512RegClassID) ||
286 isRegClass(AMDGPU::VReg_1024RegClassID);
287 }
288
isVReg32() const289 bool isVReg32() const {
290 return isRegClass(AMDGPU::VGPR_32RegClassID);
291 }
292
isVReg32OrOff() const293 bool isVReg32OrOff() const {
294 return isOff() || isVReg32();
295 }
296
isNull() const297 bool isNull() const {
298 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
299 }
300
301 bool isVRegWithInputMods() const;
302
303 bool isSDWAOperand(MVT type) const;
304 bool isSDWAFP16Operand() const;
305 bool isSDWAFP32Operand() const;
306 bool isSDWAInt16Operand() const;
307 bool isSDWAInt32Operand() const;
308
isImmTy(ImmTy ImmT) const309 bool isImmTy(ImmTy ImmT) const {
310 return isImm() && Imm.Type == ImmT;
311 }
312
isImmModifier() const313 bool isImmModifier() const {
314 return isImm() && Imm.Type != ImmTyNone;
315 }
316
isClampSI() const317 bool isClampSI() const { return isImmTy(ImmTyClampSI); }
isOModSI() const318 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
isDMask() const319 bool isDMask() const { return isImmTy(ImmTyDMask); }
isDim() const320 bool isDim() const { return isImmTy(ImmTyDim); }
isUNorm() const321 bool isUNorm() const { return isImmTy(ImmTyUNorm); }
isDA() const322 bool isDA() const { return isImmTy(ImmTyDA); }
isR128A16() const323 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
isGFX10A16() const324 bool isGFX10A16() const { return isImmTy(ImmTyA16); }
isLWE() const325 bool isLWE() const { return isImmTy(ImmTyLWE); }
isOff() const326 bool isOff() const { return isImmTy(ImmTyOff); }
isExpTgt() const327 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
isExpVM() const328 bool isExpVM() const { return isImmTy(ImmTyExpVM); }
isExpCompr() const329 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
isOffen() const330 bool isOffen() const { return isImmTy(ImmTyOffen); }
isIdxen() const331 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
isAddr64() const332 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
isOffset() const333 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
isOffset0() const334 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
isOffset1() const335 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
336
isFlatOffset() const337 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
isGDS() const338 bool isGDS() const { return isImmTy(ImmTyGDS); }
isLDS() const339 bool isLDS() const { return isImmTy(ImmTyLDS); }
isCPol() const340 bool isCPol() const { return isImmTy(ImmTyCPol); }
isSWZ() const341 bool isSWZ() const { return isImmTy(ImmTySWZ); }
isTFE() const342 bool isTFE() const { return isImmTy(ImmTyTFE); }
isD16() const343 bool isD16() const { return isImmTy(ImmTyD16); }
isFORMAT() const344 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
isBankMask() const345 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
isRowMask() const346 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
isBoundCtrl() const347 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
isFI() const348 bool isFI() const { return isImmTy(ImmTyDppFi); }
isSDWADstSel() const349 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
isSDWASrc0Sel() const350 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
isSDWASrc1Sel() const351 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
isSDWADstUnused() const352 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
isInterpSlot() const353 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
isInterpAttr() const354 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
isAttrChan() const355 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
isOpSel() const356 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
isOpSelHi() const357 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
isNegLo() const358 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
isNegHi() const359 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
isHigh() const360 bool isHigh() const { return isImmTy(ImmTyHigh); }
361
isMod() const362 bool isMod() const {
363 return isClampSI() || isOModSI();
364 }
365
isRegOrImm() const366 bool isRegOrImm() const {
367 return isReg() || isImm();
368 }
369
370 bool isRegClass(unsigned RCID) const;
371
372 bool isInlineValue() const;
373
isRegOrInlineNoMods(unsigned RCID,MVT type) const374 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
375 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
376 }
377
isSCSrcB16() const378 bool isSCSrcB16() const {
379 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
380 }
381
isSCSrcV2B16() const382 bool isSCSrcV2B16() const {
383 return isSCSrcB16();
384 }
385
isSCSrcB32() const386 bool isSCSrcB32() const {
387 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
388 }
389
isSCSrcB64() const390 bool isSCSrcB64() const {
391 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
392 }
393
394 bool isBoolReg() const;
395
isSCSrcF16() const396 bool isSCSrcF16() const {
397 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
398 }
399
isSCSrcV2F16() const400 bool isSCSrcV2F16() const {
401 return isSCSrcF16();
402 }
403
isSCSrcF32() const404 bool isSCSrcF32() const {
405 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
406 }
407
isSCSrcF64() const408 bool isSCSrcF64() const {
409 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
410 }
411
isSSrcB32() const412 bool isSSrcB32() const {
413 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
414 }
415
isSSrcB16() const416 bool isSSrcB16() const {
417 return isSCSrcB16() || isLiteralImm(MVT::i16);
418 }
419
isSSrcV2B16() const420 bool isSSrcV2B16() const {
421 llvm_unreachable("cannot happen");
422 return isSSrcB16();
423 }
424
isSSrcB64() const425 bool isSSrcB64() const {
426 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
427 // See isVSrc64().
428 return isSCSrcB64() || isLiteralImm(MVT::i64);
429 }
430
isSSrcF32() const431 bool isSSrcF32() const {
432 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
433 }
434
isSSrcF64() const435 bool isSSrcF64() const {
436 return isSCSrcB64() || isLiteralImm(MVT::f64);
437 }
438
isSSrcF16() const439 bool isSSrcF16() const {
440 return isSCSrcB16() || isLiteralImm(MVT::f16);
441 }
442
isSSrcV2F16() const443 bool isSSrcV2F16() const {
444 llvm_unreachable("cannot happen");
445 return isSSrcF16();
446 }
447
isSSrcV2FP32() const448 bool isSSrcV2FP32() const {
449 llvm_unreachable("cannot happen");
450 return isSSrcF32();
451 }
452
isSCSrcV2FP32() const453 bool isSCSrcV2FP32() const {
454 llvm_unreachable("cannot happen");
455 return isSCSrcF32();
456 }
457
isSSrcV2INT32() const458 bool isSSrcV2INT32() const {
459 llvm_unreachable("cannot happen");
460 return isSSrcB32();
461 }
462
isSCSrcV2INT32() const463 bool isSCSrcV2INT32() const {
464 llvm_unreachable("cannot happen");
465 return isSCSrcB32();
466 }
467
isSSrcOrLdsB32() const468 bool isSSrcOrLdsB32() const {
469 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
470 isLiteralImm(MVT::i32) || isExpr();
471 }
472
isVCSrcB32() const473 bool isVCSrcB32() const {
474 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
475 }
476
isVCSrcB64() const477 bool isVCSrcB64() const {
478 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
479 }
480
isVCSrcB16() const481 bool isVCSrcB16() const {
482 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
483 }
484
isVCSrcV2B16() const485 bool isVCSrcV2B16() const {
486 return isVCSrcB16();
487 }
488
isVCSrcF32() const489 bool isVCSrcF32() const {
490 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
491 }
492
isVCSrcF64() const493 bool isVCSrcF64() const {
494 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
495 }
496
isVCSrcF16() const497 bool isVCSrcF16() const {
498 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
499 }
500
isVCSrcV2F16() const501 bool isVCSrcV2F16() const {
502 return isVCSrcF16();
503 }
504
isVSrcB32() const505 bool isVSrcB32() const {
506 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
507 }
508
isVSrcB64() const509 bool isVSrcB64() const {
510 return isVCSrcF64() || isLiteralImm(MVT::i64);
511 }
512
isVSrcB16() const513 bool isVSrcB16() const {
514 return isVCSrcB16() || isLiteralImm(MVT::i16);
515 }
516
isVSrcV2B16() const517 bool isVSrcV2B16() const {
518 return isVSrcB16() || isLiteralImm(MVT::v2i16);
519 }
520
isVCSrcV2FP32() const521 bool isVCSrcV2FP32() const {
522 return isVCSrcF64();
523 }
524
isVSrcV2FP32() const525 bool isVSrcV2FP32() const {
526 return isVSrcF64() || isLiteralImm(MVT::v2f32);
527 }
528
isVCSrcV2INT32() const529 bool isVCSrcV2INT32() const {
530 return isVCSrcB64();
531 }
532
isVSrcV2INT32() const533 bool isVSrcV2INT32() const {
534 return isVSrcB64() || isLiteralImm(MVT::v2i32);
535 }
536
isVSrcF32() const537 bool isVSrcF32() const {
538 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
539 }
540
isVSrcF64() const541 bool isVSrcF64() const {
542 return isVCSrcF64() || isLiteralImm(MVT::f64);
543 }
544
isVSrcF16() const545 bool isVSrcF16() const {
546 return isVCSrcF16() || isLiteralImm(MVT::f16);
547 }
548
isVSrcV2F16() const549 bool isVSrcV2F16() const {
550 return isVSrcF16() || isLiteralImm(MVT::v2f16);
551 }
552
isVISrcB32() const553 bool isVISrcB32() const {
554 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
555 }
556
isVISrcB16() const557 bool isVISrcB16() const {
558 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
559 }
560
isVISrcV2B16() const561 bool isVISrcV2B16() const {
562 return isVISrcB16();
563 }
564
isVISrcF32() const565 bool isVISrcF32() const {
566 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
567 }
568
isVISrcF16() const569 bool isVISrcF16() const {
570 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
571 }
572
isVISrcV2F16() const573 bool isVISrcV2F16() const {
574 return isVISrcF16() || isVISrcB32();
575 }
576
isVISrc_64B64() const577 bool isVISrc_64B64() const {
578 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
579 }
580
isVISrc_64F64() const581 bool isVISrc_64F64() const {
582 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
583 }
584
isVISrc_64V2FP32() const585 bool isVISrc_64V2FP32() const {
586 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
587 }
588
isVISrc_64V2INT32() const589 bool isVISrc_64V2INT32() const {
590 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
591 }
592
isVISrc_256B64() const593 bool isVISrc_256B64() const {
594 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
595 }
596
isVISrc_256F64() const597 bool isVISrc_256F64() const {
598 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
599 }
600
isVISrc_128B16() const601 bool isVISrc_128B16() const {
602 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
603 }
604
isVISrc_128V2B16() const605 bool isVISrc_128V2B16() const {
606 return isVISrc_128B16();
607 }
608
isVISrc_128B32() const609 bool isVISrc_128B32() const {
610 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
611 }
612
isVISrc_128F32() const613 bool isVISrc_128F32() const {
614 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
615 }
616
isVISrc_256V2FP32() const617 bool isVISrc_256V2FP32() const {
618 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
619 }
620
isVISrc_256V2INT32() const621 bool isVISrc_256V2INT32() const {
622 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
623 }
624
isVISrc_512B32() const625 bool isVISrc_512B32() const {
626 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
627 }
628
isVISrc_512B16() const629 bool isVISrc_512B16() const {
630 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
631 }
632
isVISrc_512V2B16() const633 bool isVISrc_512V2B16() const {
634 return isVISrc_512B16();
635 }
636
isVISrc_512F32() const637 bool isVISrc_512F32() const {
638 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
639 }
640
isVISrc_512F16() const641 bool isVISrc_512F16() const {
642 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
643 }
644
isVISrc_512V2F16() const645 bool isVISrc_512V2F16() const {
646 return isVISrc_512F16() || isVISrc_512B32();
647 }
648
isVISrc_1024B32() const649 bool isVISrc_1024B32() const {
650 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
651 }
652
isVISrc_1024B16() const653 bool isVISrc_1024B16() const {
654 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
655 }
656
isVISrc_1024V2B16() const657 bool isVISrc_1024V2B16() const {
658 return isVISrc_1024B16();
659 }
660
isVISrc_1024F32() const661 bool isVISrc_1024F32() const {
662 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
663 }
664
isVISrc_1024F16() const665 bool isVISrc_1024F16() const {
666 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
667 }
668
isVISrc_1024V2F16() const669 bool isVISrc_1024V2F16() const {
670 return isVISrc_1024F16() || isVISrc_1024B32();
671 }
672
isAISrcB32() const673 bool isAISrcB32() const {
674 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
675 }
676
isAISrcB16() const677 bool isAISrcB16() const {
678 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
679 }
680
isAISrcV2B16() const681 bool isAISrcV2B16() const {
682 return isAISrcB16();
683 }
684
isAISrcF32() const685 bool isAISrcF32() const {
686 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
687 }
688
isAISrcF16() const689 bool isAISrcF16() const {
690 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
691 }
692
isAISrcV2F16() const693 bool isAISrcV2F16() const {
694 return isAISrcF16() || isAISrcB32();
695 }
696
isAISrc_64B64() const697 bool isAISrc_64B64() const {
698 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
699 }
700
isAISrc_64F64() const701 bool isAISrc_64F64() const {
702 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
703 }
704
isAISrc_128B32() const705 bool isAISrc_128B32() const {
706 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
707 }
708
isAISrc_128B16() const709 bool isAISrc_128B16() const {
710 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
711 }
712
isAISrc_128V2B16() const713 bool isAISrc_128V2B16() const {
714 return isAISrc_128B16();
715 }
716
isAISrc_128F32() const717 bool isAISrc_128F32() const {
718 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
719 }
720
isAISrc_128F16() const721 bool isAISrc_128F16() const {
722 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
723 }
724
isAISrc_128V2F16() const725 bool isAISrc_128V2F16() const {
726 return isAISrc_128F16() || isAISrc_128B32();
727 }
728
isVISrc_128F16() const729 bool isVISrc_128F16() const {
730 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
731 }
732
isVISrc_128V2F16() const733 bool isVISrc_128V2F16() const {
734 return isVISrc_128F16() || isVISrc_128B32();
735 }
736
isAISrc_256B64() const737 bool isAISrc_256B64() const {
738 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
739 }
740
isAISrc_256F64() const741 bool isAISrc_256F64() const {
742 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
743 }
744
isAISrc_512B32() const745 bool isAISrc_512B32() const {
746 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
747 }
748
isAISrc_512B16() const749 bool isAISrc_512B16() const {
750 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
751 }
752
isAISrc_512V2B16() const753 bool isAISrc_512V2B16() const {
754 return isAISrc_512B16();
755 }
756
isAISrc_512F32() const757 bool isAISrc_512F32() const {
758 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
759 }
760
isAISrc_512F16() const761 bool isAISrc_512F16() const {
762 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
763 }
764
isAISrc_512V2F16() const765 bool isAISrc_512V2F16() const {
766 return isAISrc_512F16() || isAISrc_512B32();
767 }
768
isAISrc_1024B32() const769 bool isAISrc_1024B32() const {
770 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
771 }
772
isAISrc_1024B16() const773 bool isAISrc_1024B16() const {
774 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
775 }
776
isAISrc_1024V2B16() const777 bool isAISrc_1024V2B16() const {
778 return isAISrc_1024B16();
779 }
780
isAISrc_1024F32() const781 bool isAISrc_1024F32() const {
782 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
783 }
784
isAISrc_1024F16() const785 bool isAISrc_1024F16() const {
786 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
787 }
788
isAISrc_1024V2F16() const789 bool isAISrc_1024V2F16() const {
790 return isAISrc_1024F16() || isAISrc_1024B32();
791 }
792
isKImmFP32() const793 bool isKImmFP32() const {
794 return isLiteralImm(MVT::f32);
795 }
796
isKImmFP16() const797 bool isKImmFP16() const {
798 return isLiteralImm(MVT::f16);
799 }
800
isMem() const801 bool isMem() const override {
802 return false;
803 }
804
isExpr() const805 bool isExpr() const {
806 return Kind == Expression;
807 }
808
isSoppBrTarget() const809 bool isSoppBrTarget() const {
810 return isExpr() || isImm();
811 }
812
813 bool isSWaitCnt() const;
814 bool isHwreg() const;
815 bool isSendMsg() const;
816 bool isSwizzle() const;
817 bool isSMRDOffset8() const;
818 bool isSMEMOffset() const;
819 bool isSMRDLiteralOffset() const;
820 bool isDPP8() const;
821 bool isDPPCtrl() const;
822 bool isBLGP() const;
823 bool isCBSZ() const;
824 bool isABID() const;
825 bool isGPRIdxMode() const;
826 bool isS16Imm() const;
827 bool isU16Imm() const;
828 bool isEndpgm() const;
829
getExpressionAsToken() const830 StringRef getExpressionAsToken() const {
831 assert(isExpr());
832 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
833 return S->getSymbol().getName();
834 }
835
getToken() const836 StringRef getToken() const {
837 assert(isToken());
838
839 if (Kind == Expression)
840 return getExpressionAsToken();
841
842 return StringRef(Tok.Data, Tok.Length);
843 }
844
getImm() const845 int64_t getImm() const {
846 assert(isImm());
847 return Imm.Val;
848 }
849
setImm(int64_t Val)850 void setImm(int64_t Val) {
851 assert(isImm());
852 Imm.Val = Val;
853 }
854
getImmTy() const855 ImmTy getImmTy() const {
856 assert(isImm());
857 return Imm.Type;
858 }
859
getReg() const860 unsigned getReg() const override {
861 assert(isRegKind());
862 return Reg.RegNo;
863 }
864
getStartLoc() const865 SMLoc getStartLoc() const override {
866 return StartLoc;
867 }
868
getEndLoc() const869 SMLoc getEndLoc() const override {
870 return EndLoc;
871 }
872
getLocRange() const873 SMRange getLocRange() const {
874 return SMRange(StartLoc, EndLoc);
875 }
876
getModifiers() const877 Modifiers getModifiers() const {
878 assert(isRegKind() || isImmTy(ImmTyNone));
879 return isRegKind() ? Reg.Mods : Imm.Mods;
880 }
881
setModifiers(Modifiers Mods)882 void setModifiers(Modifiers Mods) {
883 assert(isRegKind() || isImmTy(ImmTyNone));
884 if (isRegKind())
885 Reg.Mods = Mods;
886 else
887 Imm.Mods = Mods;
888 }
889
hasModifiers() const890 bool hasModifiers() const {
891 return getModifiers().hasModifiers();
892 }
893
hasFPModifiers() const894 bool hasFPModifiers() const {
895 return getModifiers().hasFPModifiers();
896 }
897
hasIntModifiers() const898 bool hasIntModifiers() const {
899 return getModifiers().hasIntModifiers();
900 }
901
902 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
903
904 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
905
906 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
907
908 template <unsigned Bitwidth>
909 void addKImmFPOperands(MCInst &Inst, unsigned N) const;
910
addKImmFP16Operands(MCInst & Inst,unsigned N) const911 void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
912 addKImmFPOperands<16>(Inst, N);
913 }
914
addKImmFP32Operands(MCInst & Inst,unsigned N) const915 void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
916 addKImmFPOperands<32>(Inst, N);
917 }
918
919 void addRegOperands(MCInst &Inst, unsigned N) const;
920
addBoolRegOperands(MCInst & Inst,unsigned N) const921 void addBoolRegOperands(MCInst &Inst, unsigned N) const {
922 addRegOperands(Inst, N);
923 }
924
addRegOrImmOperands(MCInst & Inst,unsigned N) const925 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
926 if (isRegKind())
927 addRegOperands(Inst, N);
928 else if (isExpr())
929 Inst.addOperand(MCOperand::createExpr(Expr));
930 else
931 addImmOperands(Inst, N);
932 }
933
addRegOrImmWithInputModsOperands(MCInst & Inst,unsigned N) const934 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
935 Modifiers Mods = getModifiers();
936 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
937 if (isRegKind()) {
938 addRegOperands(Inst, N);
939 } else {
940 addImmOperands(Inst, N, false);
941 }
942 }
943
addRegOrImmWithFPInputModsOperands(MCInst & Inst,unsigned N) const944 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
945 assert(!hasIntModifiers());
946 addRegOrImmWithInputModsOperands(Inst, N);
947 }
948
addRegOrImmWithIntInputModsOperands(MCInst & Inst,unsigned N) const949 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
950 assert(!hasFPModifiers());
951 addRegOrImmWithInputModsOperands(Inst, N);
952 }
953
addRegWithInputModsOperands(MCInst & Inst,unsigned N) const954 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
955 Modifiers Mods = getModifiers();
956 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
957 assert(isRegKind());
958 addRegOperands(Inst, N);
959 }
960
addRegWithFPInputModsOperands(MCInst & Inst,unsigned N) const961 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
962 assert(!hasIntModifiers());
963 addRegWithInputModsOperands(Inst, N);
964 }
965
addRegWithIntInputModsOperands(MCInst & Inst,unsigned N) const966 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
967 assert(!hasFPModifiers());
968 addRegWithInputModsOperands(Inst, N);
969 }
970
addSoppBrTargetOperands(MCInst & Inst,unsigned N) const971 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
972 if (isImm())
973 addImmOperands(Inst, N);
974 else {
975 assert(isExpr());
976 Inst.addOperand(MCOperand::createExpr(Expr));
977 }
978 }
979
printImmTy(raw_ostream & OS,ImmTy Type)980 static void printImmTy(raw_ostream& OS, ImmTy Type) {
981 switch (Type) {
982 case ImmTyNone: OS << "None"; break;
983 case ImmTyGDS: OS << "GDS"; break;
984 case ImmTyLDS: OS << "LDS"; break;
985 case ImmTyOffen: OS << "Offen"; break;
986 case ImmTyIdxen: OS << "Idxen"; break;
987 case ImmTyAddr64: OS << "Addr64"; break;
988 case ImmTyOffset: OS << "Offset"; break;
989 case ImmTyInstOffset: OS << "InstOffset"; break;
990 case ImmTyOffset0: OS << "Offset0"; break;
991 case ImmTyOffset1: OS << "Offset1"; break;
992 case ImmTyCPol: OS << "CPol"; break;
993 case ImmTySWZ: OS << "SWZ"; break;
994 case ImmTyTFE: OS << "TFE"; break;
995 case ImmTyD16: OS << "D16"; break;
996 case ImmTyFORMAT: OS << "FORMAT"; break;
997 case ImmTyClampSI: OS << "ClampSI"; break;
998 case ImmTyOModSI: OS << "OModSI"; break;
999 case ImmTyDPP8: OS << "DPP8"; break;
1000 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1001 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1002 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1003 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1004 case ImmTyDppFi: OS << "FI"; break;
1005 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
1006 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
1007 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
1008 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
1009 case ImmTyDMask: OS << "DMask"; break;
1010 case ImmTyDim: OS << "Dim"; break;
1011 case ImmTyUNorm: OS << "UNorm"; break;
1012 case ImmTyDA: OS << "DA"; break;
1013 case ImmTyR128A16: OS << "R128A16"; break;
1014 case ImmTyA16: OS << "A16"; break;
1015 case ImmTyLWE: OS << "LWE"; break;
1016 case ImmTyOff: OS << "Off"; break;
1017 case ImmTyExpTgt: OS << "ExpTgt"; break;
1018 case ImmTyExpCompr: OS << "ExpCompr"; break;
1019 case ImmTyExpVM: OS << "ExpVM"; break;
1020 case ImmTyHwreg: OS << "Hwreg"; break;
1021 case ImmTySendMsg: OS << "SendMsg"; break;
1022 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1023 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1024 case ImmTyAttrChan: OS << "AttrChan"; break;
1025 case ImmTyOpSel: OS << "OpSel"; break;
1026 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1027 case ImmTyNegLo: OS << "NegLo"; break;
1028 case ImmTyNegHi: OS << "NegHi"; break;
1029 case ImmTySwizzle: OS << "Swizzle"; break;
1030 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1031 case ImmTyHigh: OS << "High"; break;
1032 case ImmTyBLGP: OS << "BLGP"; break;
1033 case ImmTyCBSZ: OS << "CBSZ"; break;
1034 case ImmTyABID: OS << "ABID"; break;
1035 case ImmTyEndpgm: OS << "Endpgm"; break;
1036 }
1037 }
1038
print(raw_ostream & OS) const1039 void print(raw_ostream &OS) const override {
1040 switch (Kind) {
1041 case Register:
1042 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1043 break;
1044 case Immediate:
1045 OS << '<' << getImm();
1046 if (getImmTy() != ImmTyNone) {
1047 OS << " type: "; printImmTy(OS, getImmTy());
1048 }
1049 OS << " mods: " << Imm.Mods << '>';
1050 break;
1051 case Token:
1052 OS << '\'' << getToken() << '\'';
1053 break;
1054 case Expression:
1055 OS << "<expr " << *Expr << '>';
1056 break;
1057 }
1058 }
1059
CreateImm(const AMDGPUAsmParser * AsmParser,int64_t Val,SMLoc Loc,ImmTy Type=ImmTyNone,bool IsFPImm=false)1060 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1061 int64_t Val, SMLoc Loc,
1062 ImmTy Type = ImmTyNone,
1063 bool IsFPImm = false) {
1064 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1065 Op->Imm.Val = Val;
1066 Op->Imm.IsFPImm = IsFPImm;
1067 Op->Imm.Kind = ImmKindTyNone;
1068 Op->Imm.Type = Type;
1069 Op->Imm.Mods = Modifiers();
1070 Op->StartLoc = Loc;
1071 Op->EndLoc = Loc;
1072 return Op;
1073 }
1074
CreateToken(const AMDGPUAsmParser * AsmParser,StringRef Str,SMLoc Loc,bool HasExplicitEncodingSize=true)1075 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1076 StringRef Str, SMLoc Loc,
1077 bool HasExplicitEncodingSize = true) {
1078 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1079 Res->Tok.Data = Str.data();
1080 Res->Tok.Length = Str.size();
1081 Res->StartLoc = Loc;
1082 Res->EndLoc = Loc;
1083 return Res;
1084 }
1085
CreateReg(const AMDGPUAsmParser * AsmParser,unsigned RegNo,SMLoc S,SMLoc E)1086 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1087 unsigned RegNo, SMLoc S,
1088 SMLoc E) {
1089 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1090 Op->Reg.RegNo = RegNo;
1091 Op->Reg.Mods = Modifiers();
1092 Op->StartLoc = S;
1093 Op->EndLoc = E;
1094 return Op;
1095 }
1096
CreateExpr(const AMDGPUAsmParser * AsmParser,const class MCExpr * Expr,SMLoc S)1097 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1098 const class MCExpr *Expr, SMLoc S) {
1099 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1100 Op->Expr = Expr;
1101 Op->StartLoc = S;
1102 Op->EndLoc = S;
1103 return Op;
1104 }
1105 };
1106
operator <<(raw_ostream & OS,AMDGPUOperand::Modifiers Mods)1107 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1108 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1109 return OS;
1110 }
1111
1112 //===----------------------------------------------------------------------===//
1113 // AsmParser
1114 //===----------------------------------------------------------------------===//
1115
1116 // Holds info related to the current kernel, e.g. count of SGPRs used.
1117 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1118 // .amdgpu_hsa_kernel or at EOF.
1119 class KernelScopeInfo {
1120 int SgprIndexUnusedMin = -1;
1121 int VgprIndexUnusedMin = -1;
1122 MCContext *Ctx = nullptr;
1123
usesSgprAt(int i)1124 void usesSgprAt(int i) {
1125 if (i >= SgprIndexUnusedMin) {
1126 SgprIndexUnusedMin = ++i;
1127 if (Ctx) {
1128 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1129 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1130 }
1131 }
1132 }
1133
usesVgprAt(int i)1134 void usesVgprAt(int i) {
1135 if (i >= VgprIndexUnusedMin) {
1136 VgprIndexUnusedMin = ++i;
1137 if (Ctx) {
1138 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1139 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
1140 }
1141 }
1142 }
1143
1144 public:
1145 KernelScopeInfo() = default;
1146
initialize(MCContext & Context)1147 void initialize(MCContext &Context) {
1148 Ctx = &Context;
1149 usesSgprAt(SgprIndexUnusedMin = -1);
1150 usesVgprAt(VgprIndexUnusedMin = -1);
1151 }
1152
usesRegister(RegisterKind RegKind,unsigned DwordRegIndex,unsigned RegWidth)1153 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1154 switch (RegKind) {
1155 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1156 case IS_AGPR: // fall through
1157 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1158 default: break;
1159 }
1160 }
1161 };
1162
1163 class AMDGPUAsmParser : public MCTargetAsmParser {
1164 MCAsmParser &Parser;
1165
1166 // Number of extra operands parsed after the first optional operand.
1167 // This may be necessary to skip hardcoded mandatory operands.
1168 static const unsigned MAX_OPR_LOOKAHEAD = 8;
1169
1170 unsigned ForcedEncodingSize = 0;
1171 bool ForcedDPP = false;
1172 bool ForcedSDWA = false;
1173 KernelScopeInfo KernelScope;
1174 unsigned CPolSeen;
1175
1176 /// @name Auto-generated Match Functions
1177 /// {
1178
1179 #define GET_ASSEMBLER_HEADER
1180 #include "AMDGPUGenAsmMatcher.inc"
1181
1182 /// }
1183
1184 private:
1185 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1186 bool OutOfRangeError(SMRange Range);
1187 /// Calculate VGPR/SGPR blocks required for given target, reserved
1188 /// registers, and user-specified NextFreeXGPR values.
1189 ///
1190 /// \param Features [in] Target features, used for bug corrections.
1191 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1192 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1193 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1194 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1195 /// descriptor field, if valid.
1196 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1197 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1198 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1199 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1200 /// \param VGPRBlocks [out] Result VGPR block count.
1201 /// \param SGPRBlocks [out] Result SGPR block count.
1202 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1203 bool FlatScrUsed, bool XNACKUsed,
1204 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1205 SMRange VGPRRange, unsigned NextFreeSGPR,
1206 SMRange SGPRRange, unsigned &VGPRBlocks,
1207 unsigned &SGPRBlocks);
1208 bool ParseDirectiveAMDGCNTarget();
1209 bool ParseDirectiveAMDHSAKernel();
1210 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1211 bool ParseDirectiveHSACodeObjectVersion();
1212 bool ParseDirectiveHSACodeObjectISA();
1213 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1214 bool ParseDirectiveAMDKernelCodeT();
1215 // TODO: Possibly make subtargetHasRegister const.
1216 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1217 bool ParseDirectiveAMDGPUHsaKernel();
1218
1219 bool ParseDirectiveISAVersion();
1220 bool ParseDirectiveHSAMetadata();
1221 bool ParseDirectivePALMetadataBegin();
1222 bool ParseDirectivePALMetadata();
1223 bool ParseDirectiveAMDGPULDS();
1224
1225 /// Common code to parse out a block of text (typically YAML) between start and
1226 /// end directives.
1227 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1228 const char *AssemblerDirectiveEnd,
1229 std::string &CollectString);
1230
1231 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1232 RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1233 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1234 unsigned &RegNum, unsigned &RegWidth,
1235 bool RestoreOnFailure = false);
1236 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1237 unsigned &RegNum, unsigned &RegWidth,
1238 SmallVectorImpl<AsmToken> &Tokens);
1239 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1240 unsigned &RegWidth,
1241 SmallVectorImpl<AsmToken> &Tokens);
1242 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1243 unsigned &RegWidth,
1244 SmallVectorImpl<AsmToken> &Tokens);
1245 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1246 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1247 bool ParseRegRange(unsigned& Num, unsigned& Width);
1248 unsigned getRegularReg(RegisterKind RegKind,
1249 unsigned RegNum,
1250 unsigned RegWidth,
1251 SMLoc Loc);
1252
1253 bool isRegister();
1254 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1255 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1256 void initializeGprCountSymbol(RegisterKind RegKind);
1257 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1258 unsigned RegWidth);
1259 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1260 bool IsAtomic, bool IsLds = false);
1261 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1262 bool IsGdsHardcoded);
1263
1264 public:
1265 enum AMDGPUMatchResultTy {
1266 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1267 };
1268 enum OperandMode {
1269 OperandMode_Default,
1270 OperandMode_NSA,
1271 };
1272
1273 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1274
AMDGPUAsmParser(const MCSubtargetInfo & STI,MCAsmParser & _Parser,const MCInstrInfo & MII,const MCTargetOptions & Options)1275 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1276 const MCInstrInfo &MII,
1277 const MCTargetOptions &Options)
1278 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1279 MCAsmParserExtension::Initialize(Parser);
1280
1281 if (getFeatureBits().none()) {
1282 // Set default features.
1283 copySTI().ToggleFeature("southern-islands");
1284 }
1285
1286 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1287
1288 {
1289 // TODO: make those pre-defined variables read-only.
1290 // Currently there is none suitable machinery in the core llvm-mc for this.
1291 // MCSymbol::isRedefinable is intended for another purpose, and
1292 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1293 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1294 MCContext &Ctx = getContext();
1295 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1296 MCSymbol *Sym =
1297 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1298 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1299 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1300 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1301 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1302 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1303 } else {
1304 MCSymbol *Sym =
1305 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1306 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1307 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1308 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1309 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1310 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1311 }
1312 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
1313 initializeGprCountSymbol(IS_VGPR);
1314 initializeGprCountSymbol(IS_SGPR);
1315 } else
1316 KernelScope.initialize(getContext());
1317 }
1318 }
1319
hasMIMG_R128() const1320 bool hasMIMG_R128() const {
1321 return AMDGPU::hasMIMG_R128(getSTI());
1322 }
1323
hasPackedD16() const1324 bool hasPackedD16() const {
1325 return AMDGPU::hasPackedD16(getSTI());
1326 }
1327
hasGFX10A16() const1328 bool hasGFX10A16() const {
1329 return AMDGPU::hasGFX10A16(getSTI());
1330 }
1331
hasG16() const1332 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1333
isSI() const1334 bool isSI() const {
1335 return AMDGPU::isSI(getSTI());
1336 }
1337
isCI() const1338 bool isCI() const {
1339 return AMDGPU::isCI(getSTI());
1340 }
1341
isVI() const1342 bool isVI() const {
1343 return AMDGPU::isVI(getSTI());
1344 }
1345
isGFX9() const1346 bool isGFX9() const {
1347 return AMDGPU::isGFX9(getSTI());
1348 }
1349
isGFX90A() const1350 bool isGFX90A() const {
1351 return AMDGPU::isGFX90A(getSTI());
1352 }
1353
isGFX9Plus() const1354 bool isGFX9Plus() const {
1355 return AMDGPU::isGFX9Plus(getSTI());
1356 }
1357
isGFX10() const1358 bool isGFX10() const {
1359 return AMDGPU::isGFX10(getSTI());
1360 }
1361
isGFX10Plus() const1362 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1363
isGFX10_BEncoding() const1364 bool isGFX10_BEncoding() const {
1365 return AMDGPU::isGFX10_BEncoding(getSTI());
1366 }
1367
hasInv2PiInlineImm() const1368 bool hasInv2PiInlineImm() const {
1369 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1370 }
1371
hasFlatOffsets() const1372 bool hasFlatOffsets() const {
1373 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1374 }
1375
hasArchitectedFlatScratch() const1376 bool hasArchitectedFlatScratch() const {
1377 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1378 }
1379
hasSGPR102_SGPR103() const1380 bool hasSGPR102_SGPR103() const {
1381 return !isVI() && !isGFX9();
1382 }
1383
hasSGPR104_SGPR105() const1384 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1385
hasIntClamp() const1386 bool hasIntClamp() const {
1387 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1388 }
1389
getTargetStreamer()1390 AMDGPUTargetStreamer &getTargetStreamer() {
1391 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1392 return static_cast<AMDGPUTargetStreamer &>(TS);
1393 }
1394
getMRI() const1395 const MCRegisterInfo *getMRI() const {
1396 // We need this const_cast because for some reason getContext() is not const
1397 // in MCAsmParser.
1398 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1399 }
1400
getMII() const1401 const MCInstrInfo *getMII() const {
1402 return &MII;
1403 }
1404
getFeatureBits() const1405 const FeatureBitset &getFeatureBits() const {
1406 return getSTI().getFeatureBits();
1407 }
1408
setForcedEncodingSize(unsigned Size)1409 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
setForcedDPP(bool ForceDPP_)1410 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
setForcedSDWA(bool ForceSDWA_)1411 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1412
getForcedEncodingSize() const1413 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
isForcedVOP3() const1414 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
isForcedDPP() const1415 bool isForcedDPP() const { return ForcedDPP; }
isForcedSDWA() const1416 bool isForcedSDWA() const { return ForcedSDWA; }
1417 ArrayRef<unsigned> getMatchedVariants() const;
1418 StringRef getMatchedVariantName() const;
1419
1420 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1421 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1422 bool RestoreOnFailure);
1423 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1424 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1425 SMLoc &EndLoc) override;
1426 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1427 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1428 unsigned Kind) override;
1429 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1430 OperandVector &Operands, MCStreamer &Out,
1431 uint64_t &ErrorInfo,
1432 bool MatchingInlineAsm) override;
1433 bool ParseDirective(AsmToken DirectiveID) override;
1434 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1435 OperandMode Mode = OperandMode_Default);
1436 StringRef parseMnemonicSuffix(StringRef Name);
1437 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1438 SMLoc NameLoc, OperandVector &Operands) override;
1439 //bool ProcessInstruction(MCInst &Inst);
1440
1441 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1442
1443 OperandMatchResultTy
1444 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1445 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1446 bool (*ConvertResult)(int64_t &) = nullptr);
1447
1448 OperandMatchResultTy
1449 parseOperandArrayWithPrefix(const char *Prefix,
1450 OperandVector &Operands,
1451 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1452 bool (*ConvertResult)(int64_t&) = nullptr);
1453
1454 OperandMatchResultTy
1455 parseNamedBit(StringRef Name, OperandVector &Operands,
1456 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1457 OperandMatchResultTy parseCPol(OperandVector &Operands);
1458 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1459 StringRef &Value,
1460 SMLoc &StringLoc);
1461
1462 bool isModifier();
1463 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1464 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1465 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1466 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1467 bool parseSP3NegModifier();
1468 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1469 OperandMatchResultTy parseReg(OperandVector &Operands);
1470 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1471 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1472 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1473 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1474 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1475 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1476 OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1477 OperandMatchResultTy parseUfmt(int64_t &Format);
1478 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1479 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1480 OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1481 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1482 OperandMatchResultTy parseNumericFormat(int64_t &Format);
1483 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1484 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1485
1486 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
cvtDS(MCInst & Inst,const OperandVector & Operands)1487 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
cvtDSGds(MCInst & Inst,const OperandVector & Operands)1488 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1489 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1490
1491 bool parseCnt(int64_t &IntVal);
1492 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1493 OperandMatchResultTy parseHwreg(OperandVector &Operands);
1494
1495 private:
1496 struct OperandInfoTy {
1497 SMLoc Loc;
1498 int64_t Id;
1499 bool IsSymbolic = false;
1500 bool IsDefined = false;
1501
OperandInfoTy__anone4e5b7f50111::AMDGPUAsmParser::OperandInfoTy1502 OperandInfoTy(int64_t Id_) : Id(Id_) {}
1503 };
1504
1505 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1506 bool validateSendMsg(const OperandInfoTy &Msg,
1507 const OperandInfoTy &Op,
1508 const OperandInfoTy &Stream);
1509
1510 bool parseHwregBody(OperandInfoTy &HwReg,
1511 OperandInfoTy &Offset,
1512 OperandInfoTy &Width);
1513 bool validateHwreg(const OperandInfoTy &HwReg,
1514 const OperandInfoTy &Offset,
1515 const OperandInfoTy &Width);
1516
1517 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1518 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1519
1520 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1521 const OperandVector &Operands) const;
1522 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1523 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1524 SMLoc getLitLoc(const OperandVector &Operands) const;
1525 SMLoc getConstLoc(const OperandVector &Operands) const;
1526
1527 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1528 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1529 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1530 bool validateSOPLiteral(const MCInst &Inst) const;
1531 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1532 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1533 bool validateIntClampSupported(const MCInst &Inst);
1534 bool validateMIMGAtomicDMask(const MCInst &Inst);
1535 bool validateMIMGGatherDMask(const MCInst &Inst);
1536 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1537 bool validateMIMGDataSize(const MCInst &Inst);
1538 bool validateMIMGAddrSize(const MCInst &Inst);
1539 bool validateMIMGD16(const MCInst &Inst);
1540 bool validateMIMGDim(const MCInst &Inst);
1541 bool validateMIMGMSAA(const MCInst &Inst);
1542 bool validateOpSel(const MCInst &Inst);
1543 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1544 bool validateVccOperand(unsigned Reg) const;
1545 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1546 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1547 bool validateAGPRLdSt(const MCInst &Inst) const;
1548 bool validateVGPRAlign(const MCInst &Inst) const;
1549 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1550 bool validateDivScale(const MCInst &Inst);
1551 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1552 const SMLoc &IDLoc);
1553 Optional<StringRef> validateLdsDirect(const MCInst &Inst);
1554 unsigned getConstantBusLimit(unsigned Opcode) const;
1555 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1556 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1557 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1558
1559 bool isSupportedMnemo(StringRef Mnemo,
1560 const FeatureBitset &FBS);
1561 bool isSupportedMnemo(StringRef Mnemo,
1562 const FeatureBitset &FBS,
1563 ArrayRef<unsigned> Variants);
1564 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1565
1566 bool isId(const StringRef Id) const;
1567 bool isId(const AsmToken &Token, const StringRef Id) const;
1568 bool isToken(const AsmToken::TokenKind Kind) const;
1569 bool trySkipId(const StringRef Id);
1570 bool trySkipId(const StringRef Pref, const StringRef Id);
1571 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1572 bool trySkipToken(const AsmToken::TokenKind Kind);
1573 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1574 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1575 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1576
1577 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1578 AsmToken::TokenKind getTokenKind() const;
1579 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1580 bool parseExpr(OperandVector &Operands);
1581 StringRef getTokenStr() const;
1582 AsmToken peekToken();
1583 AsmToken getToken() const;
1584 SMLoc getLoc() const;
1585 void lex();
1586
1587 public:
1588 void onBeginOfFile() override;
1589
1590 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1591 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1592
1593 OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1594 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1595 OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1596 OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1597 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1598 OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1599
1600 bool parseSwizzleOperand(int64_t &Op,
1601 const unsigned MinVal,
1602 const unsigned MaxVal,
1603 const StringRef ErrMsg,
1604 SMLoc &Loc);
1605 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1606 const unsigned MinVal,
1607 const unsigned MaxVal,
1608 const StringRef ErrMsg);
1609 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1610 bool parseSwizzleOffset(int64_t &Imm);
1611 bool parseSwizzleMacro(int64_t &Imm);
1612 bool parseSwizzleQuadPerm(int64_t &Imm);
1613 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1614 bool parseSwizzleBroadcast(int64_t &Imm);
1615 bool parseSwizzleSwap(int64_t &Imm);
1616 bool parseSwizzleReverse(int64_t &Imm);
1617
1618 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1619 int64_t parseGPRIdxMacro();
1620
cvtMubuf(MCInst & Inst,const OperandVector & Operands)1621 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
cvtMubufAtomic(MCInst & Inst,const OperandVector & Operands)1622 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
cvtMubufLds(MCInst & Inst,const OperandVector & Operands)1623 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
1624 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1625
1626 AMDGPUOperand::Ptr defaultCPol() const;
1627
1628 AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1629 AMDGPUOperand::Ptr defaultSMEMOffset() const;
1630 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1631 AMDGPUOperand::Ptr defaultFlatOffset() const;
1632
1633 OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1634
1635 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1636 OptionalImmIndexMap &OptionalIdx);
1637 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1638 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1639 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1640 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1641 OptionalImmIndexMap &OptionalIdx);
1642
1643 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1644
1645 void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1646 bool IsAtomic = false);
1647 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1648 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1649
1650 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
1651
1652 bool parseDimId(unsigned &Encoding);
1653 OperandMatchResultTy parseDim(OperandVector &Operands);
1654 OperandMatchResultTy parseDPP8(OperandVector &Operands);
1655 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1656 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1657 int64_t parseDPPCtrlSel(StringRef Ctrl);
1658 int64_t parseDPPCtrlPerm();
1659 AMDGPUOperand::Ptr defaultRowMask() const;
1660 AMDGPUOperand::Ptr defaultBankMask() const;
1661 AMDGPUOperand::Ptr defaultBoundCtrl() const;
1662 AMDGPUOperand::Ptr defaultFI() const;
1663 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
cvtDPP8(MCInst & Inst,const OperandVector & Operands)1664 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1665
1666 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1667 AMDGPUOperand::ImmTy Type);
1668 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1669 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1670 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1671 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1672 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1673 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1674 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1675 uint64_t BasicInstType,
1676 bool SkipDstVcc = false,
1677 bool SkipSrcVcc = false);
1678
1679 AMDGPUOperand::Ptr defaultBLGP() const;
1680 AMDGPUOperand::Ptr defaultCBSZ() const;
1681 AMDGPUOperand::Ptr defaultABID() const;
1682
1683 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1684 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1685 };
1686
1687 struct OptionalOperand {
1688 const char *Name;
1689 AMDGPUOperand::ImmTy Type;
1690 bool IsBit;
1691 bool (*ConvertResult)(int64_t&);
1692 };
1693
1694 } // end anonymous namespace
1695
1696 // May be called with integer type with equivalent bitwidth.
getFltSemantics(unsigned Size)1697 static const fltSemantics *getFltSemantics(unsigned Size) {
1698 switch (Size) {
1699 case 4:
1700 return &APFloat::IEEEsingle();
1701 case 8:
1702 return &APFloat::IEEEdouble();
1703 case 2:
1704 return &APFloat::IEEEhalf();
1705 default:
1706 llvm_unreachable("unsupported fp type");
1707 }
1708 }
1709
getFltSemantics(MVT VT)1710 static const fltSemantics *getFltSemantics(MVT VT) {
1711 return getFltSemantics(VT.getSizeInBits() / 8);
1712 }
1713
getOpFltSemantics(uint8_t OperandType)1714 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1715 switch (OperandType) {
1716 case AMDGPU::OPERAND_REG_IMM_INT32:
1717 case AMDGPU::OPERAND_REG_IMM_FP32:
1718 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1719 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1720 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1721 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1722 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1723 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1724 case AMDGPU::OPERAND_REG_IMM_V2FP32:
1725 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1726 case AMDGPU::OPERAND_REG_IMM_V2INT32:
1727 case AMDGPU::OPERAND_KIMM32:
1728 return &APFloat::IEEEsingle();
1729 case AMDGPU::OPERAND_REG_IMM_INT64:
1730 case AMDGPU::OPERAND_REG_IMM_FP64:
1731 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1732 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1733 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1734 return &APFloat::IEEEdouble();
1735 case AMDGPU::OPERAND_REG_IMM_INT16:
1736 case AMDGPU::OPERAND_REG_IMM_FP16:
1737 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1738 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1739 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1740 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1741 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1742 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1743 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1744 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1745 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1746 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1747 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1748 case AMDGPU::OPERAND_KIMM16:
1749 return &APFloat::IEEEhalf();
1750 default:
1751 llvm_unreachable("unsupported fp type");
1752 }
1753 }
1754
1755 //===----------------------------------------------------------------------===//
1756 // Operand
1757 //===----------------------------------------------------------------------===//
1758
canLosslesslyConvertToFPType(APFloat & FPLiteral,MVT VT)1759 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1760 bool Lost;
1761
1762 // Convert literal to single precision
1763 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1764 APFloat::rmNearestTiesToEven,
1765 &Lost);
1766 // We allow precision lost but not overflow or underflow
1767 if (Status != APFloat::opOK &&
1768 Lost &&
1769 ((Status & APFloat::opOverflow) != 0 ||
1770 (Status & APFloat::opUnderflow) != 0)) {
1771 return false;
1772 }
1773
1774 return true;
1775 }
1776
isSafeTruncation(int64_t Val,unsigned Size)1777 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1778 return isUIntN(Size, Val) || isIntN(Size, Val);
1779 }
1780
isInlineableLiteralOp16(int64_t Val,MVT VT,bool HasInv2Pi)1781 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1782 if (VT.getScalarType() == MVT::i16) {
1783 // FP immediate values are broken.
1784 return isInlinableIntLiteral(Val);
1785 }
1786
1787 // f16/v2f16 operands work correctly for all values.
1788 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1789 }
1790
isInlinableImm(MVT type) const1791 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1792
1793 // This is a hack to enable named inline values like
1794 // shared_base with both 32-bit and 64-bit operands.
1795 // Note that these values are defined as
1796 // 32-bit operands only.
1797 if (isInlineValue()) {
1798 return true;
1799 }
1800
1801 if (!isImmTy(ImmTyNone)) {
1802 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1803 return false;
1804 }
1805 // TODO: We should avoid using host float here. It would be better to
1806 // check the float bit values which is what a few other places do.
1807 // We've had bot failures before due to weird NaN support on mips hosts.
1808
1809 APInt Literal(64, Imm.Val);
1810
1811 if (Imm.IsFPImm) { // We got fp literal token
1812 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1813 return AMDGPU::isInlinableLiteral64(Imm.Val,
1814 AsmParser->hasInv2PiInlineImm());
1815 }
1816
1817 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1818 if (!canLosslesslyConvertToFPType(FPLiteral, type))
1819 return false;
1820
1821 if (type.getScalarSizeInBits() == 16) {
1822 return isInlineableLiteralOp16(
1823 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1824 type, AsmParser->hasInv2PiInlineImm());
1825 }
1826
1827 // Check if single precision literal is inlinable
1828 return AMDGPU::isInlinableLiteral32(
1829 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1830 AsmParser->hasInv2PiInlineImm());
1831 }
1832
1833 // We got int literal token.
1834 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1835 return AMDGPU::isInlinableLiteral64(Imm.Val,
1836 AsmParser->hasInv2PiInlineImm());
1837 }
1838
1839 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1840 return false;
1841 }
1842
1843 if (type.getScalarSizeInBits() == 16) {
1844 return isInlineableLiteralOp16(
1845 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1846 type, AsmParser->hasInv2PiInlineImm());
1847 }
1848
1849 return AMDGPU::isInlinableLiteral32(
1850 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1851 AsmParser->hasInv2PiInlineImm());
1852 }
1853
isLiteralImm(MVT type) const1854 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1855 // Check that this immediate can be added as literal
1856 if (!isImmTy(ImmTyNone)) {
1857 return false;
1858 }
1859
1860 if (!Imm.IsFPImm) {
1861 // We got int literal token.
1862
1863 if (type == MVT::f64 && hasFPModifiers()) {
1864 // Cannot apply fp modifiers to int literals preserving the same semantics
1865 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1866 // disable these cases.
1867 return false;
1868 }
1869
1870 unsigned Size = type.getSizeInBits();
1871 if (Size == 64)
1872 Size = 32;
1873
1874 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1875 // types.
1876 return isSafeTruncation(Imm.Val, Size);
1877 }
1878
1879 // We got fp literal token
1880 if (type == MVT::f64) { // Expected 64-bit fp operand
1881 // We would set low 64-bits of literal to zeroes but we accept this literals
1882 return true;
1883 }
1884
1885 if (type == MVT::i64) { // Expected 64-bit int operand
1886 // We don't allow fp literals in 64-bit integer instructions. It is
1887 // unclear how we should encode them.
1888 return false;
1889 }
1890
1891 // We allow fp literals with f16x2 operands assuming that the specified
1892 // literal goes into the lower half and the upper half is zero. We also
1893 // require that the literal may be losslesly converted to f16.
1894 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1895 (type == MVT::v2i16)? MVT::i16 :
1896 (type == MVT::v2f32)? MVT::f32 : type;
1897
1898 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1899 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1900 }
1901
isRegClass(unsigned RCID) const1902 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1903 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1904 }
1905
isVRegWithInputMods() const1906 bool AMDGPUOperand::isVRegWithInputMods() const {
1907 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
1908 // GFX90A allows DPP on 64-bit operands.
1909 (isRegClass(AMDGPU::VReg_64RegClassID) &&
1910 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
1911 }
1912
isSDWAOperand(MVT type) const1913 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1914 if (AsmParser->isVI())
1915 return isVReg32();
1916 else if (AsmParser->isGFX9Plus())
1917 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1918 else
1919 return false;
1920 }
1921
isSDWAFP16Operand() const1922 bool AMDGPUOperand::isSDWAFP16Operand() const {
1923 return isSDWAOperand(MVT::f16);
1924 }
1925
isSDWAFP32Operand() const1926 bool AMDGPUOperand::isSDWAFP32Operand() const {
1927 return isSDWAOperand(MVT::f32);
1928 }
1929
isSDWAInt16Operand() const1930 bool AMDGPUOperand::isSDWAInt16Operand() const {
1931 return isSDWAOperand(MVT::i16);
1932 }
1933
isSDWAInt32Operand() const1934 bool AMDGPUOperand::isSDWAInt32Operand() const {
1935 return isSDWAOperand(MVT::i32);
1936 }
1937
isBoolReg() const1938 bool AMDGPUOperand::isBoolReg() const {
1939 auto FB = AsmParser->getFeatureBits();
1940 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1941 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
1942 }
1943
applyInputFPModifiers(uint64_t Val,unsigned Size) const1944 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1945 {
1946 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1947 assert(Size == 2 || Size == 4 || Size == 8);
1948
1949 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1950
1951 if (Imm.Mods.Abs) {
1952 Val &= ~FpSignMask;
1953 }
1954 if (Imm.Mods.Neg) {
1955 Val ^= FpSignMask;
1956 }
1957
1958 return Val;
1959 }
1960
addImmOperands(MCInst & Inst,unsigned N,bool ApplyModifiers) const1961 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1962 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1963 Inst.getNumOperands())) {
1964 addLiteralImmOperand(Inst, Imm.Val,
1965 ApplyModifiers &
1966 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1967 } else {
1968 assert(!isImmTy(ImmTyNone) || !hasModifiers());
1969 Inst.addOperand(MCOperand::createImm(Imm.Val));
1970 setImmKindNone();
1971 }
1972 }
1973
addLiteralImmOperand(MCInst & Inst,int64_t Val,bool ApplyModifiers) const1974 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1975 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1976 auto OpNum = Inst.getNumOperands();
1977 // Check that this operand accepts literals
1978 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1979
1980 if (ApplyModifiers) {
1981 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1982 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1983 Val = applyInputFPModifiers(Val, Size);
1984 }
1985
1986 APInt Literal(64, Val);
1987 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1988
1989 if (Imm.IsFPImm) { // We got fp literal token
1990 switch (OpTy) {
1991 case AMDGPU::OPERAND_REG_IMM_INT64:
1992 case AMDGPU::OPERAND_REG_IMM_FP64:
1993 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1994 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1995 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1996 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1997 AsmParser->hasInv2PiInlineImm())) {
1998 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1999 setImmKindConst();
2000 return;
2001 }
2002
2003 // Non-inlineable
2004 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2005 // For fp operands we check if low 32 bits are zeros
2006 if (Literal.getLoBits(32) != 0) {
2007 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2008 "Can't encode literal as exact 64-bit floating-point operand. "
2009 "Low 32-bits will be set to zero");
2010 }
2011
2012 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
2013 setImmKindLiteral();
2014 return;
2015 }
2016
2017 // We don't allow fp literals in 64-bit integer instructions. It is
2018 // unclear how we should encode them. This case should be checked earlier
2019 // in predicate methods (isLiteralImm())
2020 llvm_unreachable("fp literal in 64-bit integer instruction.");
2021
2022 case AMDGPU::OPERAND_REG_IMM_INT32:
2023 case AMDGPU::OPERAND_REG_IMM_FP32:
2024 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2025 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2026 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2027 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2028 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2029 case AMDGPU::OPERAND_REG_IMM_INT16:
2030 case AMDGPU::OPERAND_REG_IMM_FP16:
2031 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2032 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2033 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2034 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2035 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2036 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2037 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2038 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2039 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2040 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2041 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2042 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2043 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2044 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2045 case AMDGPU::OPERAND_REG_IMM_V2INT32:
2046 case AMDGPU::OPERAND_KIMM32:
2047 case AMDGPU::OPERAND_KIMM16: {
2048 bool lost;
2049 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2050 // Convert literal to single precision
2051 FPLiteral.convert(*getOpFltSemantics(OpTy),
2052 APFloat::rmNearestTiesToEven, &lost);
2053 // We allow precision lost but not overflow or underflow. This should be
2054 // checked earlier in isLiteralImm()
2055
2056 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2057 Inst.addOperand(MCOperand::createImm(ImmVal));
2058 setImmKindLiteral();
2059 return;
2060 }
2061 default:
2062 llvm_unreachable("invalid operand size");
2063 }
2064
2065 return;
2066 }
2067
2068 // We got int literal token.
2069 // Only sign extend inline immediates.
2070 switch (OpTy) {
2071 case AMDGPU::OPERAND_REG_IMM_INT32:
2072 case AMDGPU::OPERAND_REG_IMM_FP32:
2073 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2074 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2075 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2076 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2077 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2078 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2079 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2080 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2081 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2082 case AMDGPU::OPERAND_REG_IMM_V2INT32:
2083 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2084 if (isSafeTruncation(Val, 32) &&
2085 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2086 AsmParser->hasInv2PiInlineImm())) {
2087 Inst.addOperand(MCOperand::createImm(Val));
2088 setImmKindConst();
2089 return;
2090 }
2091
2092 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2093 setImmKindLiteral();
2094 return;
2095
2096 case AMDGPU::OPERAND_REG_IMM_INT64:
2097 case AMDGPU::OPERAND_REG_IMM_FP64:
2098 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2099 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2100 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2101 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2102 Inst.addOperand(MCOperand::createImm(Val));
2103 setImmKindConst();
2104 return;
2105 }
2106
2107 Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
2108 setImmKindLiteral();
2109 return;
2110
2111 case AMDGPU::OPERAND_REG_IMM_INT16:
2112 case AMDGPU::OPERAND_REG_IMM_FP16:
2113 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2114 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2115 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2116 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2117 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2118 if (isSafeTruncation(Val, 16) &&
2119 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2120 AsmParser->hasInv2PiInlineImm())) {
2121 Inst.addOperand(MCOperand::createImm(Val));
2122 setImmKindConst();
2123 return;
2124 }
2125
2126 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2127 setImmKindLiteral();
2128 return;
2129
2130 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2131 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2132 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2133 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2134 assert(isSafeTruncation(Val, 16));
2135 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2136 AsmParser->hasInv2PiInlineImm()));
2137
2138 Inst.addOperand(MCOperand::createImm(Val));
2139 return;
2140 }
2141 case AMDGPU::OPERAND_KIMM32:
2142 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2143 setImmKindNone();
2144 return;
2145 case AMDGPU::OPERAND_KIMM16:
2146 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2147 setImmKindNone();
2148 return;
2149 default:
2150 llvm_unreachable("invalid operand size");
2151 }
2152 }
2153
2154 template <unsigned Bitwidth>
addKImmFPOperands(MCInst & Inst,unsigned N) const2155 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
2156 APInt Literal(64, Imm.Val);
2157 setImmKindNone();
2158
2159 if (!Imm.IsFPImm) {
2160 // We got int literal token.
2161 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
2162 return;
2163 }
2164
2165 bool Lost;
2166 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2167 FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
2168 APFloat::rmNearestTiesToEven, &Lost);
2169 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
2170 }
2171
addRegOperands(MCInst & Inst,unsigned N) const2172 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2173 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2174 }
2175
isInlineValue(unsigned Reg)2176 static bool isInlineValue(unsigned Reg) {
2177 switch (Reg) {
2178 case AMDGPU::SRC_SHARED_BASE:
2179 case AMDGPU::SRC_SHARED_LIMIT:
2180 case AMDGPU::SRC_PRIVATE_BASE:
2181 case AMDGPU::SRC_PRIVATE_LIMIT:
2182 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
2183 return true;
2184 case AMDGPU::SRC_VCCZ:
2185 case AMDGPU::SRC_EXECZ:
2186 case AMDGPU::SRC_SCC:
2187 return true;
2188 case AMDGPU::SGPR_NULL:
2189 return true;
2190 default:
2191 return false;
2192 }
2193 }
2194
isInlineValue() const2195 bool AMDGPUOperand::isInlineValue() const {
2196 return isRegKind() && ::isInlineValue(getReg());
2197 }
2198
2199 //===----------------------------------------------------------------------===//
2200 // AsmParser
2201 //===----------------------------------------------------------------------===//
2202
getRegClass(RegisterKind Is,unsigned RegWidth)2203 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2204 if (Is == IS_VGPR) {
2205 switch (RegWidth) {
2206 default: return -1;
2207 case 1: return AMDGPU::VGPR_32RegClassID;
2208 case 2: return AMDGPU::VReg_64RegClassID;
2209 case 3: return AMDGPU::VReg_96RegClassID;
2210 case 4: return AMDGPU::VReg_128RegClassID;
2211 case 5: return AMDGPU::VReg_160RegClassID;
2212 case 6: return AMDGPU::VReg_192RegClassID;
2213 case 7: return AMDGPU::VReg_224RegClassID;
2214 case 8: return AMDGPU::VReg_256RegClassID;
2215 case 16: return AMDGPU::VReg_512RegClassID;
2216 case 32: return AMDGPU::VReg_1024RegClassID;
2217 }
2218 } else if (Is == IS_TTMP) {
2219 switch (RegWidth) {
2220 default: return -1;
2221 case 1: return AMDGPU::TTMP_32RegClassID;
2222 case 2: return AMDGPU::TTMP_64RegClassID;
2223 case 4: return AMDGPU::TTMP_128RegClassID;
2224 case 8: return AMDGPU::TTMP_256RegClassID;
2225 case 16: return AMDGPU::TTMP_512RegClassID;
2226 }
2227 } else if (Is == IS_SGPR) {
2228 switch (RegWidth) {
2229 default: return -1;
2230 case 1: return AMDGPU::SGPR_32RegClassID;
2231 case 2: return AMDGPU::SGPR_64RegClassID;
2232 case 3: return AMDGPU::SGPR_96RegClassID;
2233 case 4: return AMDGPU::SGPR_128RegClassID;
2234 case 5: return AMDGPU::SGPR_160RegClassID;
2235 case 6: return AMDGPU::SGPR_192RegClassID;
2236 case 7: return AMDGPU::SGPR_224RegClassID;
2237 case 8: return AMDGPU::SGPR_256RegClassID;
2238 case 16: return AMDGPU::SGPR_512RegClassID;
2239 }
2240 } else if (Is == IS_AGPR) {
2241 switch (RegWidth) {
2242 default: return -1;
2243 case 1: return AMDGPU::AGPR_32RegClassID;
2244 case 2: return AMDGPU::AReg_64RegClassID;
2245 case 3: return AMDGPU::AReg_96RegClassID;
2246 case 4: return AMDGPU::AReg_128RegClassID;
2247 case 5: return AMDGPU::AReg_160RegClassID;
2248 case 6: return AMDGPU::AReg_192RegClassID;
2249 case 7: return AMDGPU::AReg_224RegClassID;
2250 case 8: return AMDGPU::AReg_256RegClassID;
2251 case 16: return AMDGPU::AReg_512RegClassID;
2252 case 32: return AMDGPU::AReg_1024RegClassID;
2253 }
2254 }
2255 return -1;
2256 }
2257
getSpecialRegForName(StringRef RegName)2258 static unsigned getSpecialRegForName(StringRef RegName) {
2259 return StringSwitch<unsigned>(RegName)
2260 .Case("exec", AMDGPU::EXEC)
2261 .Case("vcc", AMDGPU::VCC)
2262 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2263 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2264 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2265 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2266 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2267 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2268 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2269 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2270 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2271 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2272 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2273 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2274 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2275 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2276 .Case("m0", AMDGPU::M0)
2277 .Case("vccz", AMDGPU::SRC_VCCZ)
2278 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2279 .Case("execz", AMDGPU::SRC_EXECZ)
2280 .Case("src_execz", AMDGPU::SRC_EXECZ)
2281 .Case("scc", AMDGPU::SRC_SCC)
2282 .Case("src_scc", AMDGPU::SRC_SCC)
2283 .Case("tba", AMDGPU::TBA)
2284 .Case("tma", AMDGPU::TMA)
2285 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2286 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2287 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2288 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2289 .Case("vcc_lo", AMDGPU::VCC_LO)
2290 .Case("vcc_hi", AMDGPU::VCC_HI)
2291 .Case("exec_lo", AMDGPU::EXEC_LO)
2292 .Case("exec_hi", AMDGPU::EXEC_HI)
2293 .Case("tma_lo", AMDGPU::TMA_LO)
2294 .Case("tma_hi", AMDGPU::TMA_HI)
2295 .Case("tba_lo", AMDGPU::TBA_LO)
2296 .Case("tba_hi", AMDGPU::TBA_HI)
2297 .Case("pc", AMDGPU::PC_REG)
2298 .Case("null", AMDGPU::SGPR_NULL)
2299 .Default(AMDGPU::NoRegister);
2300 }
2301
ParseRegister(unsigned & RegNo,SMLoc & StartLoc,SMLoc & EndLoc,bool RestoreOnFailure)2302 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2303 SMLoc &EndLoc, bool RestoreOnFailure) {
2304 auto R = parseRegister();
2305 if (!R) return true;
2306 assert(R->isReg());
2307 RegNo = R->getReg();
2308 StartLoc = R->getStartLoc();
2309 EndLoc = R->getEndLoc();
2310 return false;
2311 }
2312
ParseRegister(unsigned & RegNo,SMLoc & StartLoc,SMLoc & EndLoc)2313 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2314 SMLoc &EndLoc) {
2315 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2316 }
2317
tryParseRegister(unsigned & RegNo,SMLoc & StartLoc,SMLoc & EndLoc)2318 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2319 SMLoc &StartLoc,
2320 SMLoc &EndLoc) {
2321 bool Result =
2322 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2323 bool PendingErrors = getParser().hasPendingError();
2324 getParser().clearPendingErrors();
2325 if (PendingErrors)
2326 return MatchOperand_ParseFail;
2327 if (Result)
2328 return MatchOperand_NoMatch;
2329 return MatchOperand_Success;
2330 }
2331
AddNextRegisterToList(unsigned & Reg,unsigned & RegWidth,RegisterKind RegKind,unsigned Reg1,SMLoc Loc)2332 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2333 RegisterKind RegKind, unsigned Reg1,
2334 SMLoc Loc) {
2335 switch (RegKind) {
2336 case IS_SPECIAL:
2337 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2338 Reg = AMDGPU::EXEC;
2339 RegWidth = 2;
2340 return true;
2341 }
2342 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2343 Reg = AMDGPU::FLAT_SCR;
2344 RegWidth = 2;
2345 return true;
2346 }
2347 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2348 Reg = AMDGPU::XNACK_MASK;
2349 RegWidth = 2;
2350 return true;
2351 }
2352 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2353 Reg = AMDGPU::VCC;
2354 RegWidth = 2;
2355 return true;
2356 }
2357 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2358 Reg = AMDGPU::TBA;
2359 RegWidth = 2;
2360 return true;
2361 }
2362 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2363 Reg = AMDGPU::TMA;
2364 RegWidth = 2;
2365 return true;
2366 }
2367 Error(Loc, "register does not fit in the list");
2368 return false;
2369 case IS_VGPR:
2370 case IS_SGPR:
2371 case IS_AGPR:
2372 case IS_TTMP:
2373 if (Reg1 != Reg + RegWidth) {
2374 Error(Loc, "registers in a list must have consecutive indices");
2375 return false;
2376 }
2377 RegWidth++;
2378 return true;
2379 default:
2380 llvm_unreachable("unexpected register kind");
2381 }
2382 }
2383
2384 struct RegInfo {
2385 StringLiteral Name;
2386 RegisterKind Kind;
2387 };
2388
2389 static constexpr RegInfo RegularRegisters[] = {
2390 {{"v"}, IS_VGPR},
2391 {{"s"}, IS_SGPR},
2392 {{"ttmp"}, IS_TTMP},
2393 {{"acc"}, IS_AGPR},
2394 {{"a"}, IS_AGPR},
2395 };
2396
isRegularReg(RegisterKind Kind)2397 static bool isRegularReg(RegisterKind Kind) {
2398 return Kind == IS_VGPR ||
2399 Kind == IS_SGPR ||
2400 Kind == IS_TTMP ||
2401 Kind == IS_AGPR;
2402 }
2403
getRegularRegInfo(StringRef Str)2404 static const RegInfo* getRegularRegInfo(StringRef Str) {
2405 for (const RegInfo &Reg : RegularRegisters)
2406 if (Str.startswith(Reg.Name))
2407 return &Reg;
2408 return nullptr;
2409 }
2410
getRegNum(StringRef Str,unsigned & Num)2411 static bool getRegNum(StringRef Str, unsigned& Num) {
2412 return !Str.getAsInteger(10, Num);
2413 }
2414
2415 bool
isRegister(const AsmToken & Token,const AsmToken & NextToken) const2416 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2417 const AsmToken &NextToken) const {
2418
2419 // A list of consecutive registers: [s0,s1,s2,s3]
2420 if (Token.is(AsmToken::LBrac))
2421 return true;
2422
2423 if (!Token.is(AsmToken::Identifier))
2424 return false;
2425
2426 // A single register like s0 or a range of registers like s[0:1]
2427
2428 StringRef Str = Token.getString();
2429 const RegInfo *Reg = getRegularRegInfo(Str);
2430 if (Reg) {
2431 StringRef RegName = Reg->Name;
2432 StringRef RegSuffix = Str.substr(RegName.size());
2433 if (!RegSuffix.empty()) {
2434 unsigned Num;
2435 // A single register with an index: rXX
2436 if (getRegNum(RegSuffix, Num))
2437 return true;
2438 } else {
2439 // A range of registers: r[XX:YY].
2440 if (NextToken.is(AsmToken::LBrac))
2441 return true;
2442 }
2443 }
2444
2445 return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2446 }
2447
2448 bool
isRegister()2449 AMDGPUAsmParser::isRegister()
2450 {
2451 return isRegister(getToken(), peekToken());
2452 }
2453
2454 unsigned
getRegularReg(RegisterKind RegKind,unsigned RegNum,unsigned RegWidth,SMLoc Loc)2455 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2456 unsigned RegNum,
2457 unsigned RegWidth,
2458 SMLoc Loc) {
2459
2460 assert(isRegularReg(RegKind));
2461
2462 unsigned AlignSize = 1;
2463 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2464 // SGPR and TTMP registers must be aligned.
2465 // Max required alignment is 4 dwords.
2466 AlignSize = std::min(RegWidth, 4u);
2467 }
2468
2469 if (RegNum % AlignSize != 0) {
2470 Error(Loc, "invalid register alignment");
2471 return AMDGPU::NoRegister;
2472 }
2473
2474 unsigned RegIdx = RegNum / AlignSize;
2475 int RCID = getRegClass(RegKind, RegWidth);
2476 if (RCID == -1) {
2477 Error(Loc, "invalid or unsupported register size");
2478 return AMDGPU::NoRegister;
2479 }
2480
2481 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2482 const MCRegisterClass RC = TRI->getRegClass(RCID);
2483 if (RegIdx >= RC.getNumRegs()) {
2484 Error(Loc, "register index is out of range");
2485 return AMDGPU::NoRegister;
2486 }
2487
2488 return RC.getRegister(RegIdx);
2489 }
2490
2491 bool
ParseRegRange(unsigned & Num,unsigned & Width)2492 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2493 int64_t RegLo, RegHi;
2494 if (!skipToken(AsmToken::LBrac, "missing register index"))
2495 return false;
2496
2497 SMLoc FirstIdxLoc = getLoc();
2498 SMLoc SecondIdxLoc;
2499
2500 if (!parseExpr(RegLo))
2501 return false;
2502
2503 if (trySkipToken(AsmToken::Colon)) {
2504 SecondIdxLoc = getLoc();
2505 if (!parseExpr(RegHi))
2506 return false;
2507 } else {
2508 RegHi = RegLo;
2509 }
2510
2511 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2512 return false;
2513
2514 if (!isUInt<32>(RegLo)) {
2515 Error(FirstIdxLoc, "invalid register index");
2516 return false;
2517 }
2518
2519 if (!isUInt<32>(RegHi)) {
2520 Error(SecondIdxLoc, "invalid register index");
2521 return false;
2522 }
2523
2524 if (RegLo > RegHi) {
2525 Error(FirstIdxLoc, "first register index should not exceed second index");
2526 return false;
2527 }
2528
2529 Num = static_cast<unsigned>(RegLo);
2530 Width = (RegHi - RegLo) + 1;
2531 return true;
2532 }
2533
ParseSpecialReg(RegisterKind & RegKind,unsigned & RegNum,unsigned & RegWidth,SmallVectorImpl<AsmToken> & Tokens)2534 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2535 unsigned &RegNum, unsigned &RegWidth,
2536 SmallVectorImpl<AsmToken> &Tokens) {
2537 assert(isToken(AsmToken::Identifier));
2538 unsigned Reg = getSpecialRegForName(getTokenStr());
2539 if (Reg) {
2540 RegNum = 0;
2541 RegWidth = 1;
2542 RegKind = IS_SPECIAL;
2543 Tokens.push_back(getToken());
2544 lex(); // skip register name
2545 }
2546 return Reg;
2547 }
2548
ParseRegularReg(RegisterKind & RegKind,unsigned & RegNum,unsigned & RegWidth,SmallVectorImpl<AsmToken> & Tokens)2549 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2550 unsigned &RegNum, unsigned &RegWidth,
2551 SmallVectorImpl<AsmToken> &Tokens) {
2552 assert(isToken(AsmToken::Identifier));
2553 StringRef RegName = getTokenStr();
2554 auto Loc = getLoc();
2555
2556 const RegInfo *RI = getRegularRegInfo(RegName);
2557 if (!RI) {
2558 Error(Loc, "invalid register name");
2559 return AMDGPU::NoRegister;
2560 }
2561
2562 Tokens.push_back(getToken());
2563 lex(); // skip register name
2564
2565 RegKind = RI->Kind;
2566 StringRef RegSuffix = RegName.substr(RI->Name.size());
2567 if (!RegSuffix.empty()) {
2568 // Single 32-bit register: vXX.
2569 if (!getRegNum(RegSuffix, RegNum)) {
2570 Error(Loc, "invalid register index");
2571 return AMDGPU::NoRegister;
2572 }
2573 RegWidth = 1;
2574 } else {
2575 // Range of registers: v[XX:YY]. ":YY" is optional.
2576 if (!ParseRegRange(RegNum, RegWidth))
2577 return AMDGPU::NoRegister;
2578 }
2579
2580 return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2581 }
2582
ParseRegList(RegisterKind & RegKind,unsigned & RegNum,unsigned & RegWidth,SmallVectorImpl<AsmToken> & Tokens)2583 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2584 unsigned &RegWidth,
2585 SmallVectorImpl<AsmToken> &Tokens) {
2586 unsigned Reg = AMDGPU::NoRegister;
2587 auto ListLoc = getLoc();
2588
2589 if (!skipToken(AsmToken::LBrac,
2590 "expected a register or a list of registers")) {
2591 return AMDGPU::NoRegister;
2592 }
2593
2594 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2595
2596 auto Loc = getLoc();
2597 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2598 return AMDGPU::NoRegister;
2599 if (RegWidth != 1) {
2600 Error(Loc, "expected a single 32-bit register");
2601 return AMDGPU::NoRegister;
2602 }
2603
2604 for (; trySkipToken(AsmToken::Comma); ) {
2605 RegisterKind NextRegKind;
2606 unsigned NextReg, NextRegNum, NextRegWidth;
2607 Loc = getLoc();
2608
2609 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2610 NextRegNum, NextRegWidth,
2611 Tokens)) {
2612 return AMDGPU::NoRegister;
2613 }
2614 if (NextRegWidth != 1) {
2615 Error(Loc, "expected a single 32-bit register");
2616 return AMDGPU::NoRegister;
2617 }
2618 if (NextRegKind != RegKind) {
2619 Error(Loc, "registers in a list must be of the same kind");
2620 return AMDGPU::NoRegister;
2621 }
2622 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2623 return AMDGPU::NoRegister;
2624 }
2625
2626 if (!skipToken(AsmToken::RBrac,
2627 "expected a comma or a closing square bracket")) {
2628 return AMDGPU::NoRegister;
2629 }
2630
2631 if (isRegularReg(RegKind))
2632 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2633
2634 return Reg;
2635 }
2636
ParseAMDGPURegister(RegisterKind & RegKind,unsigned & Reg,unsigned & RegNum,unsigned & RegWidth,SmallVectorImpl<AsmToken> & Tokens)2637 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2638 unsigned &RegNum, unsigned &RegWidth,
2639 SmallVectorImpl<AsmToken> &Tokens) {
2640 auto Loc = getLoc();
2641 Reg = AMDGPU::NoRegister;
2642
2643 if (isToken(AsmToken::Identifier)) {
2644 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2645 if (Reg == AMDGPU::NoRegister)
2646 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2647 } else {
2648 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2649 }
2650
2651 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2652 if (Reg == AMDGPU::NoRegister) {
2653 assert(Parser.hasPendingError());
2654 return false;
2655 }
2656
2657 if (!subtargetHasRegister(*TRI, Reg)) {
2658 if (Reg == AMDGPU::SGPR_NULL) {
2659 Error(Loc, "'null' operand is not supported on this GPU");
2660 } else {
2661 Error(Loc, "register not available on this GPU");
2662 }
2663 return false;
2664 }
2665
2666 return true;
2667 }
2668
ParseAMDGPURegister(RegisterKind & RegKind,unsigned & Reg,unsigned & RegNum,unsigned & RegWidth,bool RestoreOnFailure)2669 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2670 unsigned &RegNum, unsigned &RegWidth,
2671 bool RestoreOnFailure /*=false*/) {
2672 Reg = AMDGPU::NoRegister;
2673
2674 SmallVector<AsmToken, 1> Tokens;
2675 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2676 if (RestoreOnFailure) {
2677 while (!Tokens.empty()) {
2678 getLexer().UnLex(Tokens.pop_back_val());
2679 }
2680 }
2681 return true;
2682 }
2683 return false;
2684 }
2685
2686 Optional<StringRef>
getGprCountSymbolName(RegisterKind RegKind)2687 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2688 switch (RegKind) {
2689 case IS_VGPR:
2690 return StringRef(".amdgcn.next_free_vgpr");
2691 case IS_SGPR:
2692 return StringRef(".amdgcn.next_free_sgpr");
2693 default:
2694 return None;
2695 }
2696 }
2697
initializeGprCountSymbol(RegisterKind RegKind)2698 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2699 auto SymbolName = getGprCountSymbolName(RegKind);
2700 assert(SymbolName && "initializing invalid register kind");
2701 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2702 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2703 }
2704
updateGprCountSymbols(RegisterKind RegKind,unsigned DwordRegIndex,unsigned RegWidth)2705 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2706 unsigned DwordRegIndex,
2707 unsigned RegWidth) {
2708 // Symbols are only defined for GCN targets
2709 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2710 return true;
2711
2712 auto SymbolName = getGprCountSymbolName(RegKind);
2713 if (!SymbolName)
2714 return true;
2715 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2716
2717 int64_t NewMax = DwordRegIndex + RegWidth - 1;
2718 int64_t OldCount;
2719
2720 if (!Sym->isVariable())
2721 return !Error(getLoc(),
2722 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2723 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2724 return !Error(
2725 getLoc(),
2726 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2727
2728 if (OldCount <= NewMax)
2729 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2730
2731 return true;
2732 }
2733
2734 std::unique_ptr<AMDGPUOperand>
parseRegister(bool RestoreOnFailure)2735 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2736 const auto &Tok = getToken();
2737 SMLoc StartLoc = Tok.getLoc();
2738 SMLoc EndLoc = Tok.getEndLoc();
2739 RegisterKind RegKind;
2740 unsigned Reg, RegNum, RegWidth;
2741
2742 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2743 return nullptr;
2744 }
2745 if (isHsaAbiVersion3Or4(&getSTI())) {
2746 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2747 return nullptr;
2748 } else
2749 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2750 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2751 }
2752
2753 OperandMatchResultTy
parseImm(OperandVector & Operands,bool HasSP3AbsModifier)2754 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2755 // TODO: add syntactic sugar for 1/(2*PI)
2756
2757 assert(!isRegister());
2758 assert(!isModifier());
2759
2760 const auto& Tok = getToken();
2761 const auto& NextTok = peekToken();
2762 bool IsReal = Tok.is(AsmToken::Real);
2763 SMLoc S = getLoc();
2764 bool Negate = false;
2765
2766 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2767 lex();
2768 IsReal = true;
2769 Negate = true;
2770 }
2771
2772 if (IsReal) {
2773 // Floating-point expressions are not supported.
2774 // Can only allow floating-point literals with an
2775 // optional sign.
2776
2777 StringRef Num = getTokenStr();
2778 lex();
2779
2780 APFloat RealVal(APFloat::IEEEdouble());
2781 auto roundMode = APFloat::rmNearestTiesToEven;
2782 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2783 return MatchOperand_ParseFail;
2784 }
2785 if (Negate)
2786 RealVal.changeSign();
2787
2788 Operands.push_back(
2789 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2790 AMDGPUOperand::ImmTyNone, true));
2791
2792 return MatchOperand_Success;
2793
2794 } else {
2795 int64_t IntVal;
2796 const MCExpr *Expr;
2797 SMLoc S = getLoc();
2798
2799 if (HasSP3AbsModifier) {
2800 // This is a workaround for handling expressions
2801 // as arguments of SP3 'abs' modifier, for example:
2802 // |1.0|
2803 // |-1|
2804 // |1+x|
2805 // This syntax is not compatible with syntax of standard
2806 // MC expressions (due to the trailing '|').
2807 SMLoc EndLoc;
2808 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2809 return MatchOperand_ParseFail;
2810 } else {
2811 if (Parser.parseExpression(Expr))
2812 return MatchOperand_ParseFail;
2813 }
2814
2815 if (Expr->evaluateAsAbsolute(IntVal)) {
2816 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2817 } else {
2818 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2819 }
2820
2821 return MatchOperand_Success;
2822 }
2823
2824 return MatchOperand_NoMatch;
2825 }
2826
2827 OperandMatchResultTy
parseReg(OperandVector & Operands)2828 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2829 if (!isRegister())
2830 return MatchOperand_NoMatch;
2831
2832 if (auto R = parseRegister()) {
2833 assert(R->isReg());
2834 Operands.push_back(std::move(R));
2835 return MatchOperand_Success;
2836 }
2837 return MatchOperand_ParseFail;
2838 }
2839
2840 OperandMatchResultTy
parseRegOrImm(OperandVector & Operands,bool HasSP3AbsMod)2841 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2842 auto res = parseReg(Operands);
2843 if (res != MatchOperand_NoMatch) {
2844 return res;
2845 } else if (isModifier()) {
2846 return MatchOperand_NoMatch;
2847 } else {
2848 return parseImm(Operands, HasSP3AbsMod);
2849 }
2850 }
2851
2852 bool
isNamedOperandModifier(const AsmToken & Token,const AsmToken & NextToken) const2853 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2854 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2855 const auto &str = Token.getString();
2856 return str == "abs" || str == "neg" || str == "sext";
2857 }
2858 return false;
2859 }
2860
2861 bool
isOpcodeModifierWithVal(const AsmToken & Token,const AsmToken & NextToken) const2862 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2863 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2864 }
2865
2866 bool
isOperandModifier(const AsmToken & Token,const AsmToken & NextToken) const2867 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2868 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2869 }
2870
2871 bool
isRegOrOperandModifier(const AsmToken & Token,const AsmToken & NextToken) const2872 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2873 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2874 }
2875
2876 // Check if this is an operand modifier or an opcode modifier
2877 // which may look like an expression but it is not. We should
2878 // avoid parsing these modifiers as expressions. Currently
2879 // recognized sequences are:
2880 // |...|
2881 // abs(...)
2882 // neg(...)
2883 // sext(...)
2884 // -reg
2885 // -|...|
2886 // -abs(...)
2887 // name:...
2888 // Note that simple opcode modifiers like 'gds' may be parsed as
2889 // expressions; this is a special case. See getExpressionAsToken.
2890 //
2891 bool
isModifier()2892 AMDGPUAsmParser::isModifier() {
2893
2894 AsmToken Tok = getToken();
2895 AsmToken NextToken[2];
2896 peekTokens(NextToken);
2897
2898 return isOperandModifier(Tok, NextToken[0]) ||
2899 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2900 isOpcodeModifierWithVal(Tok, NextToken[0]);
2901 }
2902
2903 // Check if the current token is an SP3 'neg' modifier.
2904 // Currently this modifier is allowed in the following context:
2905 //
2906 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2907 // 2. Before an 'abs' modifier: -abs(...)
2908 // 3. Before an SP3 'abs' modifier: -|...|
2909 //
2910 // In all other cases "-" is handled as a part
2911 // of an expression that follows the sign.
2912 //
2913 // Note: When "-" is followed by an integer literal,
2914 // this is interpreted as integer negation rather
2915 // than a floating-point NEG modifier applied to N.
2916 // Beside being contr-intuitive, such use of floating-point
2917 // NEG modifier would have resulted in different meaning
2918 // of integer literals used with VOP1/2/C and VOP3,
2919 // for example:
2920 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2921 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2922 // Negative fp literals with preceding "-" are
2923 // handled likewise for unifomtity
2924 //
2925 bool
parseSP3NegModifier()2926 AMDGPUAsmParser::parseSP3NegModifier() {
2927
2928 AsmToken NextToken[2];
2929 peekTokens(NextToken);
2930
2931 if (isToken(AsmToken::Minus) &&
2932 (isRegister(NextToken[0], NextToken[1]) ||
2933 NextToken[0].is(AsmToken::Pipe) ||
2934 isId(NextToken[0], "abs"))) {
2935 lex();
2936 return true;
2937 }
2938
2939 return false;
2940 }
2941
2942 OperandMatchResultTy
parseRegOrImmWithFPInputMods(OperandVector & Operands,bool AllowImm)2943 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2944 bool AllowImm) {
2945 bool Neg, SP3Neg;
2946 bool Abs, SP3Abs;
2947 SMLoc Loc;
2948
2949 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2950 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2951 Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2952 return MatchOperand_ParseFail;
2953 }
2954
2955 SP3Neg = parseSP3NegModifier();
2956
2957 Loc = getLoc();
2958 Neg = trySkipId("neg");
2959 if (Neg && SP3Neg) {
2960 Error(Loc, "expected register or immediate");
2961 return MatchOperand_ParseFail;
2962 }
2963 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2964 return MatchOperand_ParseFail;
2965
2966 Abs = trySkipId("abs");
2967 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2968 return MatchOperand_ParseFail;
2969
2970 Loc = getLoc();
2971 SP3Abs = trySkipToken(AsmToken::Pipe);
2972 if (Abs && SP3Abs) {
2973 Error(Loc, "expected register or immediate");
2974 return MatchOperand_ParseFail;
2975 }
2976
2977 OperandMatchResultTy Res;
2978 if (AllowImm) {
2979 Res = parseRegOrImm(Operands, SP3Abs);
2980 } else {
2981 Res = parseReg(Operands);
2982 }
2983 if (Res != MatchOperand_Success) {
2984 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2985 }
2986
2987 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2988 return MatchOperand_ParseFail;
2989 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2990 return MatchOperand_ParseFail;
2991 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2992 return MatchOperand_ParseFail;
2993
2994 AMDGPUOperand::Modifiers Mods;
2995 Mods.Abs = Abs || SP3Abs;
2996 Mods.Neg = Neg || SP3Neg;
2997
2998 if (Mods.hasFPModifiers()) {
2999 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3000 if (Op.isExpr()) {
3001 Error(Op.getStartLoc(), "expected an absolute expression");
3002 return MatchOperand_ParseFail;
3003 }
3004 Op.setModifiers(Mods);
3005 }
3006 return MatchOperand_Success;
3007 }
3008
3009 OperandMatchResultTy
parseRegOrImmWithIntInputMods(OperandVector & Operands,bool AllowImm)3010 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3011 bool AllowImm) {
3012 bool Sext = trySkipId("sext");
3013 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3014 return MatchOperand_ParseFail;
3015
3016 OperandMatchResultTy Res;
3017 if (AllowImm) {
3018 Res = parseRegOrImm(Operands);
3019 } else {
3020 Res = parseReg(Operands);
3021 }
3022 if (Res != MatchOperand_Success) {
3023 return Sext? MatchOperand_ParseFail : Res;
3024 }
3025
3026 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3027 return MatchOperand_ParseFail;
3028
3029 AMDGPUOperand::Modifiers Mods;
3030 Mods.Sext = Sext;
3031
3032 if (Mods.hasIntModifiers()) {
3033 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3034 if (Op.isExpr()) {
3035 Error(Op.getStartLoc(), "expected an absolute expression");
3036 return MatchOperand_ParseFail;
3037 }
3038 Op.setModifiers(Mods);
3039 }
3040
3041 return MatchOperand_Success;
3042 }
3043
3044 OperandMatchResultTy
parseRegWithFPInputMods(OperandVector & Operands)3045 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3046 return parseRegOrImmWithFPInputMods(Operands, false);
3047 }
3048
3049 OperandMatchResultTy
parseRegWithIntInputMods(OperandVector & Operands)3050 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3051 return parseRegOrImmWithIntInputMods(Operands, false);
3052 }
3053
parseVReg32OrOff(OperandVector & Operands)3054 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3055 auto Loc = getLoc();
3056 if (trySkipId("off")) {
3057 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3058 AMDGPUOperand::ImmTyOff, false));
3059 return MatchOperand_Success;
3060 }
3061
3062 if (!isRegister())
3063 return MatchOperand_NoMatch;
3064
3065 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3066 if (Reg) {
3067 Operands.push_back(std::move(Reg));
3068 return MatchOperand_Success;
3069 }
3070
3071 return MatchOperand_ParseFail;
3072
3073 }
3074
checkTargetMatchPredicate(MCInst & Inst)3075 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3076 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3077
3078 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3079 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3080 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3081 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3082 return Match_InvalidOperand;
3083
3084 if ((TSFlags & SIInstrFlags::VOP3) &&
3085 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3086 getForcedEncodingSize() != 64)
3087 return Match_PreferE32;
3088
3089 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3090 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3091 // v_mac_f32/16 allow only dst_sel == DWORD;
3092 auto OpNum =
3093 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3094 const auto &Op = Inst.getOperand(OpNum);
3095 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3096 return Match_InvalidOperand;
3097 }
3098 }
3099
3100 return Match_Success;
3101 }
3102
getAllVariants()3103 static ArrayRef<unsigned> getAllVariants() {
3104 static const unsigned Variants[] = {
3105 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3106 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
3107 };
3108
3109 return makeArrayRef(Variants);
3110 }
3111
3112 // What asm variants we should check
getMatchedVariants() const3113 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3114 if (getForcedEncodingSize() == 32) {
3115 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3116 return makeArrayRef(Variants);
3117 }
3118
3119 if (isForcedVOP3()) {
3120 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3121 return makeArrayRef(Variants);
3122 }
3123
3124 if (isForcedSDWA()) {
3125 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3126 AMDGPUAsmVariants::SDWA9};
3127 return makeArrayRef(Variants);
3128 }
3129
3130 if (isForcedDPP()) {
3131 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3132 return makeArrayRef(Variants);
3133 }
3134
3135 return getAllVariants();
3136 }
3137
getMatchedVariantName() const3138 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3139 if (getForcedEncodingSize() == 32)
3140 return "e32";
3141
3142 if (isForcedVOP3())
3143 return "e64";
3144
3145 if (isForcedSDWA())
3146 return "sdwa";
3147
3148 if (isForcedDPP())
3149 return "dpp";
3150
3151 return "";
3152 }
3153
findImplicitSGPRReadInVOP(const MCInst & Inst) const3154 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3155 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3156 const unsigned Num = Desc.getNumImplicitUses();
3157 for (unsigned i = 0; i < Num; ++i) {
3158 unsigned Reg = Desc.ImplicitUses[i];
3159 switch (Reg) {
3160 case AMDGPU::FLAT_SCR:
3161 case AMDGPU::VCC:
3162 case AMDGPU::VCC_LO:
3163 case AMDGPU::VCC_HI:
3164 case AMDGPU::M0:
3165 return Reg;
3166 default:
3167 break;
3168 }
3169 }
3170 return AMDGPU::NoRegister;
3171 }
3172
3173 // NB: This code is correct only when used to check constant
3174 // bus limitations because GFX7 support no f16 inline constants.
3175 // Note that there are no cases when a GFX7 opcode violates
3176 // constant bus limitations due to the use of an f16 constant.
isInlineConstant(const MCInst & Inst,unsigned OpIdx) const3177 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3178 unsigned OpIdx) const {
3179 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3180
3181 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3182 return false;
3183 }
3184
3185 const MCOperand &MO = Inst.getOperand(OpIdx);
3186
3187 int64_t Val = MO.getImm();
3188 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3189
3190 switch (OpSize) { // expected operand size
3191 case 8:
3192 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3193 case 4:
3194 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3195 case 2: {
3196 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
3197 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3198 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3199 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3200 return AMDGPU::isInlinableIntLiteral(Val);
3201
3202 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3203 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3204 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3205 return AMDGPU::isInlinableIntLiteralV216(Val);
3206
3207 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3208 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3209 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3210 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
3211
3212 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3213 }
3214 default:
3215 llvm_unreachable("invalid operand size");
3216 }
3217 }
3218
getConstantBusLimit(unsigned Opcode) const3219 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3220 if (!isGFX10Plus())
3221 return 1;
3222
3223 switch (Opcode) {
3224 // 64-bit shift instructions can use only one scalar value input
3225 case AMDGPU::V_LSHLREV_B64_e64:
3226 case AMDGPU::V_LSHLREV_B64_gfx10:
3227 case AMDGPU::V_LSHRREV_B64_e64:
3228 case AMDGPU::V_LSHRREV_B64_gfx10:
3229 case AMDGPU::V_ASHRREV_I64_e64:
3230 case AMDGPU::V_ASHRREV_I64_gfx10:
3231 case AMDGPU::V_LSHL_B64_e64:
3232 case AMDGPU::V_LSHR_B64_e64:
3233 case AMDGPU::V_ASHR_I64_e64:
3234 return 1;
3235 default:
3236 return 2;
3237 }
3238 }
3239
usesConstantBus(const MCInst & Inst,unsigned OpIdx)3240 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3241 const MCOperand &MO = Inst.getOperand(OpIdx);
3242 if (MO.isImm()) {
3243 return !isInlineConstant(Inst, OpIdx);
3244 } else if (MO.isReg()) {
3245 auto Reg = MO.getReg();
3246 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3247 auto PReg = mc2PseudoReg(Reg);
3248 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3249 } else {
3250 return true;
3251 }
3252 }
3253
3254 bool
validateConstantBusLimitations(const MCInst & Inst,const OperandVector & Operands)3255 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3256 const OperandVector &Operands) {
3257 const unsigned Opcode = Inst.getOpcode();
3258 const MCInstrDesc &Desc = MII.get(Opcode);
3259 unsigned LastSGPR = AMDGPU::NoRegister;
3260 unsigned ConstantBusUseCount = 0;
3261 unsigned NumLiterals = 0;
3262 unsigned LiteralSize;
3263
3264 if (Desc.TSFlags &
3265 (SIInstrFlags::VOPC |
3266 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3267 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3268 SIInstrFlags::SDWA)) {
3269 // Check special imm operands (used by madmk, etc)
3270 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3271 ++NumLiterals;
3272 LiteralSize = 4;
3273 }
3274
3275 SmallDenseSet<unsigned> SGPRsUsed;
3276 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3277 if (SGPRUsed != AMDGPU::NoRegister) {
3278 SGPRsUsed.insert(SGPRUsed);
3279 ++ConstantBusUseCount;
3280 }
3281
3282 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3283 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3284 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3285
3286 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3287
3288 for (int OpIdx : OpIndices) {
3289 if (OpIdx == -1) break;
3290
3291 const MCOperand &MO = Inst.getOperand(OpIdx);
3292 if (usesConstantBus(Inst, OpIdx)) {
3293 if (MO.isReg()) {
3294 LastSGPR = mc2PseudoReg(MO.getReg());
3295 // Pairs of registers with a partial intersections like these
3296 // s0, s[0:1]
3297 // flat_scratch_lo, flat_scratch
3298 // flat_scratch_lo, flat_scratch_hi
3299 // are theoretically valid but they are disabled anyway.
3300 // Note that this code mimics SIInstrInfo::verifyInstruction
3301 if (!SGPRsUsed.count(LastSGPR)) {
3302 SGPRsUsed.insert(LastSGPR);
3303 ++ConstantBusUseCount;
3304 }
3305 } else { // Expression or a literal
3306
3307 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3308 continue; // special operand like VINTERP attr_chan
3309
3310 // An instruction may use only one literal.
3311 // This has been validated on the previous step.
3312 // See validateVOPLiteral.
3313 // This literal may be used as more than one operand.
3314 // If all these operands are of the same size,
3315 // this literal counts as one scalar value.
3316 // Otherwise it counts as 2 scalar values.
3317 // See "GFX10 Shader Programming", section 3.6.2.3.
3318
3319 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3320 if (Size < 4) Size = 4;
3321
3322 if (NumLiterals == 0) {
3323 NumLiterals = 1;
3324 LiteralSize = Size;
3325 } else if (LiteralSize != Size) {
3326 NumLiterals = 2;
3327 }
3328 }
3329 }
3330 }
3331 }
3332 ConstantBusUseCount += NumLiterals;
3333
3334 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3335 return true;
3336
3337 SMLoc LitLoc = getLitLoc(Operands);
3338 SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3339 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3340 Error(Loc, "invalid operand (violates constant bus restrictions)");
3341 return false;
3342 }
3343
3344 bool
validateEarlyClobberLimitations(const MCInst & Inst,const OperandVector & Operands)3345 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3346 const OperandVector &Operands) {
3347 const unsigned Opcode = Inst.getOpcode();
3348 const MCInstrDesc &Desc = MII.get(Opcode);
3349
3350 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3351 if (DstIdx == -1 ||
3352 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3353 return true;
3354 }
3355
3356 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3357
3358 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3359 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3360 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3361
3362 assert(DstIdx != -1);
3363 const MCOperand &Dst = Inst.getOperand(DstIdx);
3364 assert(Dst.isReg());
3365 const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3366
3367 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3368
3369 for (int SrcIdx : SrcIndices) {
3370 if (SrcIdx == -1) break;
3371 const MCOperand &Src = Inst.getOperand(SrcIdx);
3372 if (Src.isReg()) {
3373 const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3374 if (isRegIntersect(DstReg, SrcReg, TRI)) {
3375 Error(getRegLoc(SrcReg, Operands),
3376 "destination must be different than all sources");
3377 return false;
3378 }
3379 }
3380 }
3381
3382 return true;
3383 }
3384
validateIntClampSupported(const MCInst & Inst)3385 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3386
3387 const unsigned Opc = Inst.getOpcode();
3388 const MCInstrDesc &Desc = MII.get(Opc);
3389
3390 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3391 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3392 assert(ClampIdx != -1);
3393 return Inst.getOperand(ClampIdx).getImm() == 0;
3394 }
3395
3396 return true;
3397 }
3398
validateMIMGDataSize(const MCInst & Inst)3399 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3400
3401 const unsigned Opc = Inst.getOpcode();
3402 const MCInstrDesc &Desc = MII.get(Opc);
3403
3404 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3405 return true;
3406
3407 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3408 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3409 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3410
3411 assert(VDataIdx != -1);
3412
3413 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3414 return true;
3415
3416 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3417 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3418 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3419 if (DMask == 0)
3420 DMask = 1;
3421
3422 unsigned DataSize =
3423 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3424 if (hasPackedD16()) {
3425 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3426 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3427 DataSize = (DataSize + 1) / 2;
3428 }
3429
3430 return (VDataSize / 4) == DataSize + TFESize;
3431 }
3432
validateMIMGAddrSize(const MCInst & Inst)3433 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3434 const unsigned Opc = Inst.getOpcode();
3435 const MCInstrDesc &Desc = MII.get(Opc);
3436
3437 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3438 return true;
3439
3440 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3441
3442 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3443 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3444 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3445 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3446 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3447 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3448
3449 assert(VAddr0Idx != -1);
3450 assert(SrsrcIdx != -1);
3451 assert(SrsrcIdx > VAddr0Idx);
3452
3453 if (DimIdx == -1)
3454 return true; // intersect_ray
3455
3456 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3457 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3458 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3459 unsigned ActualAddrSize =
3460 IsNSA ? SrsrcIdx - VAddr0Idx
3461 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3462 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3463
3464 unsigned ExpectedAddrSize =
3465 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3466
3467 if (!IsNSA) {
3468 if (ExpectedAddrSize > 8)
3469 ExpectedAddrSize = 16;
3470
3471 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3472 // This provides backward compatibility for assembly created
3473 // before 160b/192b/224b types were directly supported.
3474 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3475 return true;
3476 }
3477
3478 return ActualAddrSize == ExpectedAddrSize;
3479 }
3480
validateMIMGAtomicDMask(const MCInst & Inst)3481 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3482
3483 const unsigned Opc = Inst.getOpcode();
3484 const MCInstrDesc &Desc = MII.get(Opc);
3485
3486 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3487 return true;
3488 if (!Desc.mayLoad() || !Desc.mayStore())
3489 return true; // Not atomic
3490
3491 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3492 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3493
3494 // This is an incomplete check because image_atomic_cmpswap
3495 // may only use 0x3 and 0xf while other atomic operations
3496 // may use 0x1 and 0x3. However these limitations are
3497 // verified when we check that dmask matches dst size.
3498 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3499 }
3500
validateMIMGGatherDMask(const MCInst & Inst)3501 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3502
3503 const unsigned Opc = Inst.getOpcode();
3504 const MCInstrDesc &Desc = MII.get(Opc);
3505
3506 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3507 return true;
3508
3509 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3510 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3511
3512 // GATHER4 instructions use dmask in a different fashion compared to
3513 // other MIMG instructions. The only useful DMASK values are
3514 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3515 // (red,red,red,red) etc.) The ISA document doesn't mention
3516 // this.
3517 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3518 }
3519
validateMIMGMSAA(const MCInst & Inst)3520 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3521 const unsigned Opc = Inst.getOpcode();
3522 const MCInstrDesc &Desc = MII.get(Opc);
3523
3524 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3525 return true;
3526
3527 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3528 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3529 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3530
3531 if (!BaseOpcode->MSAA)
3532 return true;
3533
3534 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3535 assert(DimIdx != -1);
3536
3537 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3538 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3539
3540 return DimInfo->MSAA;
3541 }
3542
IsMovrelsSDWAOpcode(const unsigned Opcode)3543 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3544 {
3545 switch (Opcode) {
3546 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3547 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3548 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3549 return true;
3550 default:
3551 return false;
3552 }
3553 }
3554
3555 // movrels* opcodes should only allow VGPRS as src0.
3556 // This is specified in .td description for vop1/vop3,
3557 // but sdwa is handled differently. See isSDWAOperand.
validateMovrels(const MCInst & Inst,const OperandVector & Operands)3558 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3559 const OperandVector &Operands) {
3560
3561 const unsigned Opc = Inst.getOpcode();
3562 const MCInstrDesc &Desc = MII.get(Opc);
3563
3564 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3565 return true;
3566
3567 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3568 assert(Src0Idx != -1);
3569
3570 SMLoc ErrLoc;
3571 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3572 if (Src0.isReg()) {
3573 auto Reg = mc2PseudoReg(Src0.getReg());
3574 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3575 if (!isSGPR(Reg, TRI))
3576 return true;
3577 ErrLoc = getRegLoc(Reg, Operands);
3578 } else {
3579 ErrLoc = getConstLoc(Operands);
3580 }
3581
3582 Error(ErrLoc, "source operand must be a VGPR");
3583 return false;
3584 }
3585
validateMAIAccWrite(const MCInst & Inst,const OperandVector & Operands)3586 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3587 const OperandVector &Operands) {
3588
3589 const unsigned Opc = Inst.getOpcode();
3590
3591 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3592 return true;
3593
3594 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3595 assert(Src0Idx != -1);
3596
3597 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3598 if (!Src0.isReg())
3599 return true;
3600
3601 auto Reg = mc2PseudoReg(Src0.getReg());
3602 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3603 if (isSGPR(Reg, TRI)) {
3604 Error(getRegLoc(Reg, Operands),
3605 "source operand must be either a VGPR or an inline constant");
3606 return false;
3607 }
3608
3609 return true;
3610 }
3611
validateDivScale(const MCInst & Inst)3612 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3613 switch (Inst.getOpcode()) {
3614 default:
3615 return true;
3616 case V_DIV_SCALE_F32_gfx6_gfx7:
3617 case V_DIV_SCALE_F32_vi:
3618 case V_DIV_SCALE_F32_gfx10:
3619 case V_DIV_SCALE_F64_gfx6_gfx7:
3620 case V_DIV_SCALE_F64_vi:
3621 case V_DIV_SCALE_F64_gfx10:
3622 break;
3623 }
3624
3625 // TODO: Check that src0 = src1 or src2.
3626
3627 for (auto Name : {AMDGPU::OpName::src0_modifiers,
3628 AMDGPU::OpName::src2_modifiers,
3629 AMDGPU::OpName::src2_modifiers}) {
3630 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3631 .getImm() &
3632 SISrcMods::ABS) {
3633 return false;
3634 }
3635 }
3636
3637 return true;
3638 }
3639
validateMIMGD16(const MCInst & Inst)3640 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3641
3642 const unsigned Opc = Inst.getOpcode();
3643 const MCInstrDesc &Desc = MII.get(Opc);
3644
3645 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3646 return true;
3647
3648 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3649 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3650 if (isCI() || isSI())
3651 return false;
3652 }
3653
3654 return true;
3655 }
3656
validateMIMGDim(const MCInst & Inst)3657 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3658 const unsigned Opc = Inst.getOpcode();
3659 const MCInstrDesc &Desc = MII.get(Opc);
3660
3661 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3662 return true;
3663
3664 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3665 if (DimIdx < 0)
3666 return true;
3667
3668 long Imm = Inst.getOperand(DimIdx).getImm();
3669 if (Imm < 0 || Imm >= 8)
3670 return false;
3671
3672 return true;
3673 }
3674
IsRevOpcode(const unsigned Opcode)3675 static bool IsRevOpcode(const unsigned Opcode)
3676 {
3677 switch (Opcode) {
3678 case AMDGPU::V_SUBREV_F32_e32:
3679 case AMDGPU::V_SUBREV_F32_e64:
3680 case AMDGPU::V_SUBREV_F32_e32_gfx10:
3681 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3682 case AMDGPU::V_SUBREV_F32_e32_vi:
3683 case AMDGPU::V_SUBREV_F32_e64_gfx10:
3684 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3685 case AMDGPU::V_SUBREV_F32_e64_vi:
3686
3687 case AMDGPU::V_SUBREV_CO_U32_e32:
3688 case AMDGPU::V_SUBREV_CO_U32_e64:
3689 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3690 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3691
3692 case AMDGPU::V_SUBBREV_U32_e32:
3693 case AMDGPU::V_SUBBREV_U32_e64:
3694 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3695 case AMDGPU::V_SUBBREV_U32_e32_vi:
3696 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3697 case AMDGPU::V_SUBBREV_U32_e64_vi:
3698
3699 case AMDGPU::V_SUBREV_U32_e32:
3700 case AMDGPU::V_SUBREV_U32_e64:
3701 case AMDGPU::V_SUBREV_U32_e32_gfx9:
3702 case AMDGPU::V_SUBREV_U32_e32_vi:
3703 case AMDGPU::V_SUBREV_U32_e64_gfx9:
3704 case AMDGPU::V_SUBREV_U32_e64_vi:
3705
3706 case AMDGPU::V_SUBREV_F16_e32:
3707 case AMDGPU::V_SUBREV_F16_e64:
3708 case AMDGPU::V_SUBREV_F16_e32_gfx10:
3709 case AMDGPU::V_SUBREV_F16_e32_vi:
3710 case AMDGPU::V_SUBREV_F16_e64_gfx10:
3711 case AMDGPU::V_SUBREV_F16_e64_vi:
3712
3713 case AMDGPU::V_SUBREV_U16_e32:
3714 case AMDGPU::V_SUBREV_U16_e64:
3715 case AMDGPU::V_SUBREV_U16_e32_vi:
3716 case AMDGPU::V_SUBREV_U16_e64_vi:
3717
3718 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3719 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3720 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3721
3722 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3723 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3724
3725 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3726 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3727
3728 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3729 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3730
3731 case AMDGPU::V_LSHRREV_B32_e32:
3732 case AMDGPU::V_LSHRREV_B32_e64:
3733 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3734 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3735 case AMDGPU::V_LSHRREV_B32_e32_vi:
3736 case AMDGPU::V_LSHRREV_B32_e64_vi:
3737 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3738 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3739
3740 case AMDGPU::V_ASHRREV_I32_e32:
3741 case AMDGPU::V_ASHRREV_I32_e64:
3742 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3743 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3744 case AMDGPU::V_ASHRREV_I32_e32_vi:
3745 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3746 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3747 case AMDGPU::V_ASHRREV_I32_e64_vi:
3748
3749 case AMDGPU::V_LSHLREV_B32_e32:
3750 case AMDGPU::V_LSHLREV_B32_e64:
3751 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3752 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3753 case AMDGPU::V_LSHLREV_B32_e32_vi:
3754 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3755 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3756 case AMDGPU::V_LSHLREV_B32_e64_vi:
3757
3758 case AMDGPU::V_LSHLREV_B16_e32:
3759 case AMDGPU::V_LSHLREV_B16_e64:
3760 case AMDGPU::V_LSHLREV_B16_e32_vi:
3761 case AMDGPU::V_LSHLREV_B16_e64_vi:
3762 case AMDGPU::V_LSHLREV_B16_gfx10:
3763
3764 case AMDGPU::V_LSHRREV_B16_e32:
3765 case AMDGPU::V_LSHRREV_B16_e64:
3766 case AMDGPU::V_LSHRREV_B16_e32_vi:
3767 case AMDGPU::V_LSHRREV_B16_e64_vi:
3768 case AMDGPU::V_LSHRREV_B16_gfx10:
3769
3770 case AMDGPU::V_ASHRREV_I16_e32:
3771 case AMDGPU::V_ASHRREV_I16_e64:
3772 case AMDGPU::V_ASHRREV_I16_e32_vi:
3773 case AMDGPU::V_ASHRREV_I16_e64_vi:
3774 case AMDGPU::V_ASHRREV_I16_gfx10:
3775
3776 case AMDGPU::V_LSHLREV_B64_e64:
3777 case AMDGPU::V_LSHLREV_B64_gfx10:
3778 case AMDGPU::V_LSHLREV_B64_vi:
3779
3780 case AMDGPU::V_LSHRREV_B64_e64:
3781 case AMDGPU::V_LSHRREV_B64_gfx10:
3782 case AMDGPU::V_LSHRREV_B64_vi:
3783
3784 case AMDGPU::V_ASHRREV_I64_e64:
3785 case AMDGPU::V_ASHRREV_I64_gfx10:
3786 case AMDGPU::V_ASHRREV_I64_vi:
3787
3788 case AMDGPU::V_PK_LSHLREV_B16:
3789 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3790 case AMDGPU::V_PK_LSHLREV_B16_vi:
3791
3792 case AMDGPU::V_PK_LSHRREV_B16:
3793 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3794 case AMDGPU::V_PK_LSHRREV_B16_vi:
3795 case AMDGPU::V_PK_ASHRREV_I16:
3796 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3797 case AMDGPU::V_PK_ASHRREV_I16_vi:
3798 return true;
3799 default:
3800 return false;
3801 }
3802 }
3803
validateLdsDirect(const MCInst & Inst)3804 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3805
3806 using namespace SIInstrFlags;
3807 const unsigned Opcode = Inst.getOpcode();
3808 const MCInstrDesc &Desc = MII.get(Opcode);
3809
3810 // lds_direct register is defined so that it can be used
3811 // with 9-bit operands only. Ignore encodings which do not accept these.
3812 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
3813 if ((Desc.TSFlags & Enc) == 0)
3814 return None;
3815
3816 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
3817 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
3818 if (SrcIdx == -1)
3819 break;
3820 const auto &Src = Inst.getOperand(SrcIdx);
3821 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3822
3823 if (isGFX90A())
3824 return StringRef("lds_direct is not supported on this GPU");
3825
3826 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
3827 return StringRef("lds_direct cannot be used with this instruction");
3828
3829 if (SrcName != OpName::src0)
3830 return StringRef("lds_direct may be used as src0 only");
3831 }
3832 }
3833
3834 return None;
3835 }
3836
getFlatOffsetLoc(const OperandVector & Operands) const3837 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3838 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3839 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3840 if (Op.isFlatOffset())
3841 return Op.getStartLoc();
3842 }
3843 return getLoc();
3844 }
3845
validateFlatOffset(const MCInst & Inst,const OperandVector & Operands)3846 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3847 const OperandVector &Operands) {
3848 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3849 if ((TSFlags & SIInstrFlags::FLAT) == 0)
3850 return true;
3851
3852 auto Opcode = Inst.getOpcode();
3853 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3854 assert(OpNum != -1);
3855
3856 const auto &Op = Inst.getOperand(OpNum);
3857 if (!hasFlatOffsets() && Op.getImm() != 0) {
3858 Error(getFlatOffsetLoc(Operands),
3859 "flat offset modifier is not supported on this GPU");
3860 return false;
3861 }
3862
3863 // For FLAT segment the offset must be positive;
3864 // MSB is ignored and forced to zero.
3865 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
3866 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3867 if (!isIntN(OffsetSize, Op.getImm())) {
3868 Error(getFlatOffsetLoc(Operands),
3869 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3870 return false;
3871 }
3872 } else {
3873 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3874 if (!isUIntN(OffsetSize, Op.getImm())) {
3875 Error(getFlatOffsetLoc(Operands),
3876 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3877 return false;
3878 }
3879 }
3880
3881 return true;
3882 }
3883
getSMEMOffsetLoc(const OperandVector & Operands) const3884 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3885 // Start with second operand because SMEM Offset cannot be dst or src0.
3886 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3887 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3888 if (Op.isSMEMOffset())
3889 return Op.getStartLoc();
3890 }
3891 return getLoc();
3892 }
3893
validateSMEMOffset(const MCInst & Inst,const OperandVector & Operands)3894 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3895 const OperandVector &Operands) {
3896 if (isCI() || isSI())
3897 return true;
3898
3899 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3900 if ((TSFlags & SIInstrFlags::SMRD) == 0)
3901 return true;
3902
3903 auto Opcode = Inst.getOpcode();
3904 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3905 if (OpNum == -1)
3906 return true;
3907
3908 const auto &Op = Inst.getOperand(OpNum);
3909 if (!Op.isImm())
3910 return true;
3911
3912 uint64_t Offset = Op.getImm();
3913 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3914 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3915 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3916 return true;
3917
3918 Error(getSMEMOffsetLoc(Operands),
3919 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3920 "expected a 21-bit signed offset");
3921
3922 return false;
3923 }
3924
validateSOPLiteral(const MCInst & Inst) const3925 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3926 unsigned Opcode = Inst.getOpcode();
3927 const MCInstrDesc &Desc = MII.get(Opcode);
3928 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3929 return true;
3930
3931 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3932 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3933
3934 const int OpIndices[] = { Src0Idx, Src1Idx };
3935
3936 unsigned NumExprs = 0;
3937 unsigned NumLiterals = 0;
3938 uint32_t LiteralValue;
3939
3940 for (int OpIdx : OpIndices) {
3941 if (OpIdx == -1) break;
3942
3943 const MCOperand &MO = Inst.getOperand(OpIdx);
3944 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3945 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3946 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3947 uint32_t Value = static_cast<uint32_t>(MO.getImm());
3948 if (NumLiterals == 0 || LiteralValue != Value) {
3949 LiteralValue = Value;
3950 ++NumLiterals;
3951 }
3952 } else if (MO.isExpr()) {
3953 ++NumExprs;
3954 }
3955 }
3956 }
3957
3958 return NumLiterals + NumExprs <= 1;
3959 }
3960
validateOpSel(const MCInst & Inst)3961 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3962 const unsigned Opc = Inst.getOpcode();
3963 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3964 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3965 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3966 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3967
3968 if (OpSel & ~3)
3969 return false;
3970 }
3971 return true;
3972 }
3973
validateDPP(const MCInst & Inst,const OperandVector & Operands)3974 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
3975 const OperandVector &Operands) {
3976 const unsigned Opc = Inst.getOpcode();
3977 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
3978 if (DppCtrlIdx < 0)
3979 return true;
3980 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
3981
3982 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
3983 // DPP64 is supported for row_newbcast only.
3984 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3985 if (Src0Idx >= 0 &&
3986 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
3987 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
3988 Error(S, "64 bit dpp only supports row_newbcast");
3989 return false;
3990 }
3991 }
3992
3993 return true;
3994 }
3995
3996 // Check if VCC register matches wavefront size
validateVccOperand(unsigned Reg) const3997 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3998 auto FB = getFeatureBits();
3999 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4000 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4001 }
4002
4003 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
validateVOPLiteral(const MCInst & Inst,const OperandVector & Operands)4004 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4005 const OperandVector &Operands) {
4006 unsigned Opcode = Inst.getOpcode();
4007 const MCInstrDesc &Desc = MII.get(Opcode);
4008 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm);
4009 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4010 ImmIdx == -1)
4011 return true;
4012
4013 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4014 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4015 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
4016
4017 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx};
4018
4019 unsigned NumExprs = 0;
4020 unsigned NumLiterals = 0;
4021 uint32_t LiteralValue;
4022
4023 for (int OpIdx : OpIndices) {
4024 if (OpIdx == -1)
4025 continue;
4026
4027 const MCOperand &MO = Inst.getOperand(OpIdx);
4028 if (!MO.isImm() && !MO.isExpr())
4029 continue;
4030 if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
4031 continue;
4032
4033 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
4034 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
4035 Error(getConstLoc(Operands),
4036 "inline constants are not allowed for this operand");
4037 return false;
4038 }
4039
4040 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4041 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4042 if (NumLiterals == 0 || LiteralValue != Value) {
4043 LiteralValue = Value;
4044 ++NumLiterals;
4045 }
4046 } else if (MO.isExpr()) {
4047 ++NumExprs;
4048 }
4049 }
4050 NumLiterals += NumExprs;
4051
4052 if (!NumLiterals)
4053 return true;
4054
4055 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
4056 Error(getLitLoc(Operands), "literal operands are not supported");
4057 return false;
4058 }
4059
4060 if (NumLiterals > 1) {
4061 Error(getLitLoc(Operands), "only one literal operand is allowed");
4062 return false;
4063 }
4064
4065 return true;
4066 }
4067
4068 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
IsAGPROperand(const MCInst & Inst,uint16_t NameIdx,const MCRegisterInfo * MRI)4069 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4070 const MCRegisterInfo *MRI) {
4071 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4072 if (OpIdx < 0)
4073 return -1;
4074
4075 const MCOperand &Op = Inst.getOperand(OpIdx);
4076 if (!Op.isReg())
4077 return -1;
4078
4079 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4080 auto Reg = Sub ? Sub : Op.getReg();
4081 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4082 return AGPR32.contains(Reg) ? 1 : 0;
4083 }
4084
validateAGPRLdSt(const MCInst & Inst) const4085 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4086 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4087 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4088 SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4089 SIInstrFlags::DS)) == 0)
4090 return true;
4091
4092 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4093 : AMDGPU::OpName::vdata;
4094
4095 const MCRegisterInfo *MRI = getMRI();
4096 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4097 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4098
4099 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4100 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4101 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4102 return false;
4103 }
4104
4105 auto FB = getFeatureBits();
4106 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4107 if (DataAreg < 0 || DstAreg < 0)
4108 return true;
4109 return DstAreg == DataAreg;
4110 }
4111
4112 return DstAreg < 1 && DataAreg < 1;
4113 }
4114
validateVGPRAlign(const MCInst & Inst) const4115 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4116 auto FB = getFeatureBits();
4117 if (!FB[AMDGPU::FeatureGFX90AInsts])
4118 return true;
4119
4120 const MCRegisterInfo *MRI = getMRI();
4121 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4122 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4123 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4124 const MCOperand &Op = Inst.getOperand(I);
4125 if (!Op.isReg())
4126 continue;
4127
4128 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4129 if (!Sub)
4130 continue;
4131
4132 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4133 return false;
4134 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4135 return false;
4136 }
4137
4138 return true;
4139 }
4140
4141 // gfx90a has an undocumented limitation:
4142 // DS_GWS opcodes must use even aligned registers.
validateGWS(const MCInst & Inst,const OperandVector & Operands)4143 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4144 const OperandVector &Operands) {
4145 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4146 return true;
4147
4148 int Opc = Inst.getOpcode();
4149 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4150 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4151 return true;
4152
4153 const MCRegisterInfo *MRI = getMRI();
4154 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4155 int Data0Pos =
4156 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4157 assert(Data0Pos != -1);
4158 auto Reg = Inst.getOperand(Data0Pos).getReg();
4159 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4160 if (RegIdx & 1) {
4161 SMLoc RegLoc = getRegLoc(Reg, Operands);
4162 Error(RegLoc, "vgpr must be even aligned");
4163 return false;
4164 }
4165
4166 return true;
4167 }
4168
validateCoherencyBits(const MCInst & Inst,const OperandVector & Operands,const SMLoc & IDLoc)4169 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4170 const OperandVector &Operands,
4171 const SMLoc &IDLoc) {
4172 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4173 AMDGPU::OpName::cpol);
4174 if (CPolPos == -1)
4175 return true;
4176
4177 unsigned CPol = Inst.getOperand(CPolPos).getImm();
4178
4179 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4180 if ((TSFlags & (SIInstrFlags::SMRD)) &&
4181 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
4182 Error(IDLoc, "invalid cache policy for SMRD instruction");
4183 return false;
4184 }
4185
4186 if (isGFX90A() && (CPol & CPol::SCC)) {
4187 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4188 StringRef CStr(S.getPointer());
4189 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4190 Error(S, "scc is not supported on this GPU");
4191 return false;
4192 }
4193
4194 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4195 return true;
4196
4197 if (TSFlags & SIInstrFlags::IsAtomicRet) {
4198 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4199 Error(IDLoc, "instruction must use glc");
4200 return false;
4201 }
4202 } else {
4203 if (CPol & CPol::GLC) {
4204 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4205 StringRef CStr(S.getPointer());
4206 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
4207 Error(S, "instruction must not use glc");
4208 return false;
4209 }
4210 }
4211
4212 return true;
4213 }
4214
validateInstruction(const MCInst & Inst,const SMLoc & IDLoc,const OperandVector & Operands)4215 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4216 const SMLoc &IDLoc,
4217 const OperandVector &Operands) {
4218 if (auto ErrMsg = validateLdsDirect(Inst)) {
4219 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4220 return false;
4221 }
4222 if (!validateSOPLiteral(Inst)) {
4223 Error(getLitLoc(Operands),
4224 "only one literal operand is allowed");
4225 return false;
4226 }
4227 if (!validateVOPLiteral(Inst, Operands)) {
4228 return false;
4229 }
4230 if (!validateConstantBusLimitations(Inst, Operands)) {
4231 return false;
4232 }
4233 if (!validateEarlyClobberLimitations(Inst, Operands)) {
4234 return false;
4235 }
4236 if (!validateIntClampSupported(Inst)) {
4237 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4238 "integer clamping is not supported on this GPU");
4239 return false;
4240 }
4241 if (!validateOpSel(Inst)) {
4242 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4243 "invalid op_sel operand");
4244 return false;
4245 }
4246 if (!validateDPP(Inst, Operands)) {
4247 return false;
4248 }
4249 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4250 if (!validateMIMGD16(Inst)) {
4251 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4252 "d16 modifier is not supported on this GPU");
4253 return false;
4254 }
4255 if (!validateMIMGDim(Inst)) {
4256 Error(IDLoc, "dim modifier is required on this GPU");
4257 return false;
4258 }
4259 if (!validateMIMGMSAA(Inst)) {
4260 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4261 "invalid dim; must be MSAA type");
4262 return false;
4263 }
4264 if (!validateMIMGDataSize(Inst)) {
4265 Error(IDLoc,
4266 "image data size does not match dmask and tfe");
4267 return false;
4268 }
4269 if (!validateMIMGAddrSize(Inst)) {
4270 Error(IDLoc,
4271 "image address size does not match dim and a16");
4272 return false;
4273 }
4274 if (!validateMIMGAtomicDMask(Inst)) {
4275 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4276 "invalid atomic image dmask");
4277 return false;
4278 }
4279 if (!validateMIMGGatherDMask(Inst)) {
4280 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4281 "invalid image_gather dmask: only one bit must be set");
4282 return false;
4283 }
4284 if (!validateMovrels(Inst, Operands)) {
4285 return false;
4286 }
4287 if (!validateFlatOffset(Inst, Operands)) {
4288 return false;
4289 }
4290 if (!validateSMEMOffset(Inst, Operands)) {
4291 return false;
4292 }
4293 if (!validateMAIAccWrite(Inst, Operands)) {
4294 return false;
4295 }
4296 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4297 return false;
4298 }
4299
4300 if (!validateAGPRLdSt(Inst)) {
4301 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4302 ? "invalid register class: data and dst should be all VGPR or AGPR"
4303 : "invalid register class: agpr loads and stores not supported on this GPU"
4304 );
4305 return false;
4306 }
4307 if (!validateVGPRAlign(Inst)) {
4308 Error(IDLoc,
4309 "invalid register class: vgpr tuples must be 64 bit aligned");
4310 return false;
4311 }
4312 if (!validateGWS(Inst, Operands)) {
4313 return false;
4314 }
4315
4316 if (!validateDivScale(Inst)) {
4317 Error(IDLoc, "ABS not allowed in VOP3B instructions");
4318 return false;
4319 }
4320 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4321 return false;
4322 }
4323
4324 return true;
4325 }
4326
4327 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
4328 const FeatureBitset &FBS,
4329 unsigned VariantID = 0);
4330
4331 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
4332 const FeatureBitset &AvailableFeatures,
4333 unsigned VariantID);
4334
isSupportedMnemo(StringRef Mnemo,const FeatureBitset & FBS)4335 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4336 const FeatureBitset &FBS) {
4337 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
4338 }
4339
isSupportedMnemo(StringRef Mnemo,const FeatureBitset & FBS,ArrayRef<unsigned> Variants)4340 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
4341 const FeatureBitset &FBS,
4342 ArrayRef<unsigned> Variants) {
4343 for (auto Variant : Variants) {
4344 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
4345 return true;
4346 }
4347
4348 return false;
4349 }
4350
checkUnsupportedInstruction(StringRef Mnemo,const SMLoc & IDLoc)4351 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
4352 const SMLoc &IDLoc) {
4353 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
4354
4355 // Check if requested instruction variant is supported.
4356 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
4357 return false;
4358
4359 // This instruction is not supported.
4360 // Clear any other pending errors because they are no longer relevant.
4361 getParser().clearPendingErrors();
4362
4363 // Requested instruction variant is not supported.
4364 // Check if any other variants are supported.
4365 StringRef VariantName = getMatchedVariantName();
4366 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
4367 return Error(IDLoc,
4368 Twine(VariantName,
4369 " variant of this instruction is not supported"));
4370 }
4371
4372 // Finally check if this instruction is supported on any other GPU.
4373 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
4374 return Error(IDLoc, "instruction not supported on this GPU");
4375 }
4376
4377 // Instruction not supported on any GPU. Probably a typo.
4378 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
4379 return Error(IDLoc, "invalid instruction" + Suggestion);
4380 }
4381
MatchAndEmitInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)4382 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4383 OperandVector &Operands,
4384 MCStreamer &Out,
4385 uint64_t &ErrorInfo,
4386 bool MatchingInlineAsm) {
4387 MCInst Inst;
4388 unsigned Result = Match_Success;
4389 for (auto Variant : getMatchedVariants()) {
4390 uint64_t EI;
4391 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
4392 Variant);
4393 // We order match statuses from least to most specific. We use most specific
4394 // status as resulting
4395 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
4396 if ((R == Match_Success) ||
4397 (R == Match_PreferE32) ||
4398 (R == Match_MissingFeature && Result != Match_PreferE32) ||
4399 (R == Match_InvalidOperand && Result != Match_MissingFeature
4400 && Result != Match_PreferE32) ||
4401 (R == Match_MnemonicFail && Result != Match_InvalidOperand
4402 && Result != Match_MissingFeature
4403 && Result != Match_PreferE32)) {
4404 Result = R;
4405 ErrorInfo = EI;
4406 }
4407 if (R == Match_Success)
4408 break;
4409 }
4410
4411 if (Result == Match_Success) {
4412 if (!validateInstruction(Inst, IDLoc, Operands)) {
4413 return true;
4414 }
4415 Inst.setLoc(IDLoc);
4416 Out.emitInstruction(Inst, getSTI());
4417 return false;
4418 }
4419
4420 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4421 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4422 return true;
4423 }
4424
4425 switch (Result) {
4426 default: break;
4427 case Match_MissingFeature:
4428 // It has been verified that the specified instruction
4429 // mnemonic is valid. A match was found but it requires
4430 // features which are not supported on this GPU.
4431 return Error(IDLoc, "operands are not valid for this GPU or mode");
4432
4433 case Match_InvalidOperand: {
4434 SMLoc ErrorLoc = IDLoc;
4435 if (ErrorInfo != ~0ULL) {
4436 if (ErrorInfo >= Operands.size()) {
4437 return Error(IDLoc, "too few operands for instruction");
4438 }
4439 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4440 if (ErrorLoc == SMLoc())
4441 ErrorLoc = IDLoc;
4442 }
4443 return Error(ErrorLoc, "invalid operand for instruction");
4444 }
4445
4446 case Match_PreferE32:
4447 return Error(IDLoc, "internal error: instruction without _e64 suffix "
4448 "should be encoded as e32");
4449 case Match_MnemonicFail:
4450 llvm_unreachable("Invalid instructions should have been handled already");
4451 }
4452 llvm_unreachable("Implement any new match types added!");
4453 }
4454
ParseAsAbsoluteExpression(uint32_t & Ret)4455 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4456 int64_t Tmp = -1;
4457 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4458 return true;
4459 }
4460 if (getParser().parseAbsoluteExpression(Tmp)) {
4461 return true;
4462 }
4463 Ret = static_cast<uint32_t>(Tmp);
4464 return false;
4465 }
4466
ParseDirectiveMajorMinor(uint32_t & Major,uint32_t & Minor)4467 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4468 uint32_t &Minor) {
4469 if (ParseAsAbsoluteExpression(Major))
4470 return TokError("invalid major version");
4471
4472 if (!trySkipToken(AsmToken::Comma))
4473 return TokError("minor version number required, comma expected");
4474
4475 if (ParseAsAbsoluteExpression(Minor))
4476 return TokError("invalid minor version");
4477
4478 return false;
4479 }
4480
ParseDirectiveAMDGCNTarget()4481 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4482 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4483 return TokError("directive only supported for amdgcn architecture");
4484
4485 std::string TargetIDDirective;
4486 SMLoc TargetStart = getTok().getLoc();
4487 if (getParser().parseEscapedString(TargetIDDirective))
4488 return true;
4489
4490 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
4491 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
4492 return getParser().Error(TargetRange.Start,
4493 (Twine(".amdgcn_target directive's target id ") +
4494 Twine(TargetIDDirective) +
4495 Twine(" does not match the specified target id ") +
4496 Twine(getTargetStreamer().getTargetID()->toString())).str());
4497
4498 return false;
4499 }
4500
OutOfRangeError(SMRange Range)4501 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4502 return Error(Range.Start, "value out of range", Range);
4503 }
4504
calculateGPRBlocks(const FeatureBitset & Features,bool VCCUsed,bool FlatScrUsed,bool XNACKUsed,Optional<bool> EnableWavefrontSize32,unsigned NextFreeVGPR,SMRange VGPRRange,unsigned NextFreeSGPR,SMRange SGPRRange,unsigned & VGPRBlocks,unsigned & SGPRBlocks)4505 bool AMDGPUAsmParser::calculateGPRBlocks(
4506 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4507 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4508 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4509 unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4510 // TODO(scott.linder): These calculations are duplicated from
4511 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4512 IsaVersion Version = getIsaVersion(getSTI().getCPU());
4513
4514 unsigned NumVGPRs = NextFreeVGPR;
4515 unsigned NumSGPRs = NextFreeSGPR;
4516
4517 if (Version.Major >= 10)
4518 NumSGPRs = 0;
4519 else {
4520 unsigned MaxAddressableNumSGPRs =
4521 IsaInfo::getAddressableNumSGPRs(&getSTI());
4522
4523 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4524 NumSGPRs > MaxAddressableNumSGPRs)
4525 return OutOfRangeError(SGPRRange);
4526
4527 NumSGPRs +=
4528 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4529
4530 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4531 NumSGPRs > MaxAddressableNumSGPRs)
4532 return OutOfRangeError(SGPRRange);
4533
4534 if (Features.test(FeatureSGPRInitBug))
4535 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4536 }
4537
4538 VGPRBlocks =
4539 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4540 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4541
4542 return false;
4543 }
4544
ParseDirectiveAMDHSAKernel()4545 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4546 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4547 return TokError("directive only supported for amdgcn architecture");
4548
4549 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4550 return TokError("directive only supported for amdhsa OS");
4551
4552 StringRef KernelName;
4553 if (getParser().parseIdentifier(KernelName))
4554 return true;
4555
4556 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4557
4558 StringSet<> Seen;
4559
4560 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4561
4562 SMRange VGPRRange;
4563 uint64_t NextFreeVGPR = 0;
4564 uint64_t AccumOffset = 0;
4565 SMRange SGPRRange;
4566 uint64_t NextFreeSGPR = 0;
4567 unsigned UserSGPRCount = 0;
4568 bool ReserveVCC = true;
4569 bool ReserveFlatScr = true;
4570 Optional<bool> EnableWavefrontSize32;
4571
4572 while (true) {
4573 while (trySkipToken(AsmToken::EndOfStatement));
4574
4575 StringRef ID;
4576 SMRange IDRange = getTok().getLocRange();
4577 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4578 return true;
4579
4580 if (ID == ".end_amdhsa_kernel")
4581 break;
4582
4583 if (Seen.find(ID) != Seen.end())
4584 return TokError(".amdhsa_ directives cannot be repeated");
4585 Seen.insert(ID);
4586
4587 SMLoc ValStart = getLoc();
4588 int64_t IVal;
4589 if (getParser().parseAbsoluteExpression(IVal))
4590 return true;
4591 SMLoc ValEnd = getLoc();
4592 SMRange ValRange = SMRange(ValStart, ValEnd);
4593
4594 if (IVal < 0)
4595 return OutOfRangeError(ValRange);
4596
4597 uint64_t Val = IVal;
4598
4599 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
4600 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
4601 return OutOfRangeError(RANGE); \
4602 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4603
4604 if (ID == ".amdhsa_group_segment_fixed_size") {
4605 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4606 return OutOfRangeError(ValRange);
4607 KD.group_segment_fixed_size = Val;
4608 } else if (ID == ".amdhsa_private_segment_fixed_size") {
4609 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4610 return OutOfRangeError(ValRange);
4611 KD.private_segment_fixed_size = Val;
4612 } else if (ID == ".amdhsa_kernarg_size") {
4613 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
4614 return OutOfRangeError(ValRange);
4615 KD.kernarg_size = Val;
4616 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4617 if (hasArchitectedFlatScratch())
4618 return Error(IDRange.Start,
4619 "directive is not supported with architected flat scratch",
4620 IDRange);
4621 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4622 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4623 Val, ValRange);
4624 if (Val)
4625 UserSGPRCount += 4;
4626 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4627 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4628 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4629 ValRange);
4630 if (Val)
4631 UserSGPRCount += 2;
4632 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4633 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4634 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4635 ValRange);
4636 if (Val)
4637 UserSGPRCount += 2;
4638 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4639 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4640 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4641 Val, ValRange);
4642 if (Val)
4643 UserSGPRCount += 2;
4644 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4645 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4646 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4647 ValRange);
4648 if (Val)
4649 UserSGPRCount += 2;
4650 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4651 if (hasArchitectedFlatScratch())
4652 return Error(IDRange.Start,
4653 "directive is not supported with architected flat scratch",
4654 IDRange);
4655 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4656 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4657 ValRange);
4658 if (Val)
4659 UserSGPRCount += 2;
4660 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4661 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4662 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4663 Val, ValRange);
4664 if (Val)
4665 UserSGPRCount += 1;
4666 } else if (ID == ".amdhsa_wavefront_size32") {
4667 if (IVersion.Major < 10)
4668 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4669 EnableWavefrontSize32 = Val;
4670 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4671 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4672 Val, ValRange);
4673 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4674 if (hasArchitectedFlatScratch())
4675 return Error(IDRange.Start,
4676 "directive is not supported with architected flat scratch",
4677 IDRange);
4678 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4679 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4680 } else if (ID == ".amdhsa_enable_private_segment") {
4681 if (!hasArchitectedFlatScratch())
4682 return Error(
4683 IDRange.Start,
4684 "directive is not supported without architected flat scratch",
4685 IDRange);
4686 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4687 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
4688 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4689 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4690 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4691 ValRange);
4692 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4693 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4694 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4695 ValRange);
4696 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4697 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4698 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4699 ValRange);
4700 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4701 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4702 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4703 ValRange);
4704 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4705 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4706 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4707 ValRange);
4708 } else if (ID == ".amdhsa_next_free_vgpr") {
4709 VGPRRange = ValRange;
4710 NextFreeVGPR = Val;
4711 } else if (ID == ".amdhsa_next_free_sgpr") {
4712 SGPRRange = ValRange;
4713 NextFreeSGPR = Val;
4714 } else if (ID == ".amdhsa_accum_offset") {
4715 if (!isGFX90A())
4716 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4717 AccumOffset = Val;
4718 } else if (ID == ".amdhsa_reserve_vcc") {
4719 if (!isUInt<1>(Val))
4720 return OutOfRangeError(ValRange);
4721 ReserveVCC = Val;
4722 } else if (ID == ".amdhsa_reserve_flat_scratch") {
4723 if (IVersion.Major < 7)
4724 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4725 if (hasArchitectedFlatScratch())
4726 return Error(IDRange.Start,
4727 "directive is not supported with architected flat scratch",
4728 IDRange);
4729 if (!isUInt<1>(Val))
4730 return OutOfRangeError(ValRange);
4731 ReserveFlatScr = Val;
4732 } else if (ID == ".amdhsa_reserve_xnack_mask") {
4733 if (IVersion.Major < 8)
4734 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4735 if (!isUInt<1>(Val))
4736 return OutOfRangeError(ValRange);
4737 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
4738 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
4739 IDRange);
4740 } else if (ID == ".amdhsa_float_round_mode_32") {
4741 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4742 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4743 } else if (ID == ".amdhsa_float_round_mode_16_64") {
4744 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4745 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4746 } else if (ID == ".amdhsa_float_denorm_mode_32") {
4747 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4748 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4749 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4750 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4751 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4752 ValRange);
4753 } else if (ID == ".amdhsa_dx10_clamp") {
4754 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4755 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4756 } else if (ID == ".amdhsa_ieee_mode") {
4757 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4758 Val, ValRange);
4759 } else if (ID == ".amdhsa_fp16_overflow") {
4760 if (IVersion.Major < 9)
4761 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4762 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4763 ValRange);
4764 } else if (ID == ".amdhsa_tg_split") {
4765 if (!isGFX90A())
4766 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
4767 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
4768 ValRange);
4769 } else if (ID == ".amdhsa_workgroup_processor_mode") {
4770 if (IVersion.Major < 10)
4771 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4772 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4773 ValRange);
4774 } else if (ID == ".amdhsa_memory_ordered") {
4775 if (IVersion.Major < 10)
4776 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4777 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4778 ValRange);
4779 } else if (ID == ".amdhsa_forward_progress") {
4780 if (IVersion.Major < 10)
4781 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4782 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4783 ValRange);
4784 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4785 PARSE_BITS_ENTRY(
4786 KD.compute_pgm_rsrc2,
4787 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4788 ValRange);
4789 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4790 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4791 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4792 Val, ValRange);
4793 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4794 PARSE_BITS_ENTRY(
4795 KD.compute_pgm_rsrc2,
4796 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4797 ValRange);
4798 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4799 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4800 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4801 Val, ValRange);
4802 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4803 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4804 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4805 Val, ValRange);
4806 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4807 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4808 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4809 Val, ValRange);
4810 } else if (ID == ".amdhsa_exception_int_div_zero") {
4811 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4812 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4813 Val, ValRange);
4814 } else {
4815 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4816 }
4817
4818 #undef PARSE_BITS_ENTRY
4819 }
4820
4821 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4822 return TokError(".amdhsa_next_free_vgpr directive is required");
4823
4824 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4825 return TokError(".amdhsa_next_free_sgpr directive is required");
4826
4827 unsigned VGPRBlocks;
4828 unsigned SGPRBlocks;
4829 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4830 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
4831 EnableWavefrontSize32, NextFreeVGPR,
4832 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4833 SGPRBlocks))
4834 return true;
4835
4836 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4837 VGPRBlocks))
4838 return OutOfRangeError(VGPRRange);
4839 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4840 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4841
4842 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4843 SGPRBlocks))
4844 return OutOfRangeError(SGPRRange);
4845 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4846 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4847 SGPRBlocks);
4848
4849 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4850 return TokError("too many user SGPRs enabled");
4851 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4852 UserSGPRCount);
4853
4854 if (isGFX90A()) {
4855 if (Seen.find(".amdhsa_accum_offset") == Seen.end())
4856 return TokError(".amdhsa_accum_offset directive is required");
4857 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
4858 return TokError("accum_offset should be in range [4..256] in "
4859 "increments of 4");
4860 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
4861 return TokError("accum_offset exceeds total VGPR allocation");
4862 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
4863 (AccumOffset / 4 - 1));
4864 }
4865
4866 getTargetStreamer().EmitAmdhsaKernelDescriptor(
4867 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4868 ReserveFlatScr);
4869 return false;
4870 }
4871
ParseDirectiveHSACodeObjectVersion()4872 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4873 uint32_t Major;
4874 uint32_t Minor;
4875
4876 if (ParseDirectiveMajorMinor(Major, Minor))
4877 return true;
4878
4879 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4880 return false;
4881 }
4882
ParseDirectiveHSACodeObjectISA()4883 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4884 uint32_t Major;
4885 uint32_t Minor;
4886 uint32_t Stepping;
4887 StringRef VendorName;
4888 StringRef ArchName;
4889
4890 // If this directive has no arguments, then use the ISA version for the
4891 // targeted GPU.
4892 if (isToken(AsmToken::EndOfStatement)) {
4893 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4894 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
4895 ISA.Stepping,
4896 "AMD", "AMDGPU");
4897 return false;
4898 }
4899
4900 if (ParseDirectiveMajorMinor(Major, Minor))
4901 return true;
4902
4903 if (!trySkipToken(AsmToken::Comma))
4904 return TokError("stepping version number required, comma expected");
4905
4906 if (ParseAsAbsoluteExpression(Stepping))
4907 return TokError("invalid stepping version");
4908
4909 if (!trySkipToken(AsmToken::Comma))
4910 return TokError("vendor name required, comma expected");
4911
4912 if (!parseString(VendorName, "invalid vendor name"))
4913 return true;
4914
4915 if (!trySkipToken(AsmToken::Comma))
4916 return TokError("arch name required, comma expected");
4917
4918 if (!parseString(ArchName, "invalid arch name"))
4919 return true;
4920
4921 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
4922 VendorName, ArchName);
4923 return false;
4924 }
4925
ParseAMDKernelCodeTValue(StringRef ID,amd_kernel_code_t & Header)4926 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4927 amd_kernel_code_t &Header) {
4928 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4929 // assembly for backwards compatibility.
4930 if (ID == "max_scratch_backing_memory_byte_size") {
4931 Parser.eatToEndOfStatement();
4932 return false;
4933 }
4934
4935 SmallString<40> ErrStr;
4936 raw_svector_ostream Err(ErrStr);
4937 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4938 return TokError(Err.str());
4939 }
4940 Lex();
4941
4942 if (ID == "enable_wavefront_size32") {
4943 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4944 if (!isGFX10Plus())
4945 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4946 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4947 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4948 } else {
4949 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4950 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4951 }
4952 }
4953
4954 if (ID == "wavefront_size") {
4955 if (Header.wavefront_size == 5) {
4956 if (!isGFX10Plus())
4957 return TokError("wavefront_size=5 is only allowed on GFX10+");
4958 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4959 return TokError("wavefront_size=5 requires +WavefrontSize32");
4960 } else if (Header.wavefront_size == 6) {
4961 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4962 return TokError("wavefront_size=6 requires +WavefrontSize64");
4963 }
4964 }
4965
4966 if (ID == "enable_wgp_mode") {
4967 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
4968 !isGFX10Plus())
4969 return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4970 }
4971
4972 if (ID == "enable_mem_ordered") {
4973 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
4974 !isGFX10Plus())
4975 return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4976 }
4977
4978 if (ID == "enable_fwd_progress") {
4979 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
4980 !isGFX10Plus())
4981 return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4982 }
4983
4984 return false;
4985 }
4986
ParseDirectiveAMDKernelCodeT()4987 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4988 amd_kernel_code_t Header;
4989 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4990
4991 while (true) {
4992 // Lex EndOfStatement. This is in a while loop, because lexing a comment
4993 // will set the current token to EndOfStatement.
4994 while(trySkipToken(AsmToken::EndOfStatement));
4995
4996 StringRef ID;
4997 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
4998 return true;
4999
5000 if (ID == ".end_amd_kernel_code_t")
5001 break;
5002
5003 if (ParseAMDKernelCodeTValue(ID, Header))
5004 return true;
5005 }
5006
5007 getTargetStreamer().EmitAMDKernelCodeT(Header);
5008
5009 return false;
5010 }
5011
ParseDirectiveAMDGPUHsaKernel()5012 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5013 StringRef KernelName;
5014 if (!parseId(KernelName, "expected symbol name"))
5015 return true;
5016
5017 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5018 ELF::STT_AMDGPU_HSA_KERNEL);
5019
5020 KernelScope.initialize(getContext());
5021 return false;
5022 }
5023
ParseDirectiveISAVersion()5024 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5025 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5026 return Error(getLoc(),
5027 ".amd_amdgpu_isa directive is not available on non-amdgcn "
5028 "architectures");
5029 }
5030
5031 auto TargetIDDirective = getLexer().getTok().getStringContents();
5032 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5033 return Error(getParser().getTok().getLoc(), "target id must match options");
5034
5035 getTargetStreamer().EmitISAVersion();
5036 Lex();
5037
5038 return false;
5039 }
5040
ParseDirectiveHSAMetadata()5041 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5042 const char *AssemblerDirectiveBegin;
5043 const char *AssemblerDirectiveEnd;
5044 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
5045 isHsaAbiVersion3Or4(&getSTI())
5046 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
5047 HSAMD::V3::AssemblerDirectiveEnd)
5048 : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
5049 HSAMD::AssemblerDirectiveEnd);
5050
5051 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
5052 return Error(getLoc(),
5053 (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
5054 "not available on non-amdhsa OSes")).str());
5055 }
5056
5057 std::string HSAMetadataString;
5058 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
5059 HSAMetadataString))
5060 return true;
5061
5062 if (isHsaAbiVersion3Or4(&getSTI())) {
5063 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5064 return Error(getLoc(), "invalid HSA metadata");
5065 } else {
5066 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
5067 return Error(getLoc(), "invalid HSA metadata");
5068 }
5069
5070 return false;
5071 }
5072
5073 /// Common code to parse out a block of text (typically YAML) between start and
5074 /// end directives.
ParseToEndDirective(const char * AssemblerDirectiveBegin,const char * AssemblerDirectiveEnd,std::string & CollectString)5075 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5076 const char *AssemblerDirectiveEnd,
5077 std::string &CollectString) {
5078
5079 raw_string_ostream CollectStream(CollectString);
5080
5081 getLexer().setSkipSpace(false);
5082
5083 bool FoundEnd = false;
5084 while (!isToken(AsmToken::Eof)) {
5085 while (isToken(AsmToken::Space)) {
5086 CollectStream << getTokenStr();
5087 Lex();
5088 }
5089
5090 if (trySkipId(AssemblerDirectiveEnd)) {
5091 FoundEnd = true;
5092 break;
5093 }
5094
5095 CollectStream << Parser.parseStringToEndOfStatement()
5096 << getContext().getAsmInfo()->getSeparatorString();
5097
5098 Parser.eatToEndOfStatement();
5099 }
5100
5101 getLexer().setSkipSpace(true);
5102
5103 if (isToken(AsmToken::Eof) && !FoundEnd) {
5104 return TokError(Twine("expected directive ") +
5105 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5106 }
5107
5108 CollectStream.flush();
5109 return false;
5110 }
5111
5112 /// Parse the assembler directive for new MsgPack-format PAL metadata.
ParseDirectivePALMetadataBegin()5113 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5114 std::string String;
5115 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5116 AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5117 return true;
5118
5119 auto PALMetadata = getTargetStreamer().getPALMetadata();
5120 if (!PALMetadata->setFromString(String))
5121 return Error(getLoc(), "invalid PAL metadata");
5122 return false;
5123 }
5124
5125 /// Parse the assembler directive for old linear-format PAL metadata.
ParseDirectivePALMetadata()5126 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5127 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5128 return Error(getLoc(),
5129 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5130 "not available on non-amdpal OSes")).str());
5131 }
5132
5133 auto PALMetadata = getTargetStreamer().getPALMetadata();
5134 PALMetadata->setLegacy();
5135 for (;;) {
5136 uint32_t Key, Value;
5137 if (ParseAsAbsoluteExpression(Key)) {
5138 return TokError(Twine("invalid value in ") +
5139 Twine(PALMD::AssemblerDirective));
5140 }
5141 if (!trySkipToken(AsmToken::Comma)) {
5142 return TokError(Twine("expected an even number of values in ") +
5143 Twine(PALMD::AssemblerDirective));
5144 }
5145 if (ParseAsAbsoluteExpression(Value)) {
5146 return TokError(Twine("invalid value in ") +
5147 Twine(PALMD::AssemblerDirective));
5148 }
5149 PALMetadata->setRegister(Key, Value);
5150 if (!trySkipToken(AsmToken::Comma))
5151 break;
5152 }
5153 return false;
5154 }
5155
5156 /// ParseDirectiveAMDGPULDS
5157 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
ParseDirectiveAMDGPULDS()5158 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5159 if (getParser().checkForValidSection())
5160 return true;
5161
5162 StringRef Name;
5163 SMLoc NameLoc = getLoc();
5164 if (getParser().parseIdentifier(Name))
5165 return TokError("expected identifier in directive");
5166
5167 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5168 if (parseToken(AsmToken::Comma, "expected ','"))
5169 return true;
5170
5171 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5172
5173 int64_t Size;
5174 SMLoc SizeLoc = getLoc();
5175 if (getParser().parseAbsoluteExpression(Size))
5176 return true;
5177 if (Size < 0)
5178 return Error(SizeLoc, "size must be non-negative");
5179 if (Size > LocalMemorySize)
5180 return Error(SizeLoc, "size is too large");
5181
5182 int64_t Alignment = 4;
5183 if (trySkipToken(AsmToken::Comma)) {
5184 SMLoc AlignLoc = getLoc();
5185 if (getParser().parseAbsoluteExpression(Alignment))
5186 return true;
5187 if (Alignment < 0 || !isPowerOf2_64(Alignment))
5188 return Error(AlignLoc, "alignment must be a power of two");
5189
5190 // Alignment larger than the size of LDS is possible in theory, as long
5191 // as the linker manages to place to symbol at address 0, but we do want
5192 // to make sure the alignment fits nicely into a 32-bit integer.
5193 if (Alignment >= 1u << 31)
5194 return Error(AlignLoc, "alignment is too large");
5195 }
5196
5197 if (parseToken(AsmToken::EndOfStatement,
5198 "unexpected token in '.amdgpu_lds' directive"))
5199 return true;
5200
5201 Symbol->redefineIfPossible();
5202 if (!Symbol->isUndefined())
5203 return Error(NameLoc, "invalid symbol redefinition");
5204
5205 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5206 return false;
5207 }
5208
ParseDirective(AsmToken DirectiveID)5209 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5210 StringRef IDVal = DirectiveID.getString();
5211
5212 if (isHsaAbiVersion3Or4(&getSTI())) {
5213 if (IDVal == ".amdhsa_kernel")
5214 return ParseDirectiveAMDHSAKernel();
5215
5216 // TODO: Restructure/combine with PAL metadata directive.
5217 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5218 return ParseDirectiveHSAMetadata();
5219 } else {
5220 if (IDVal == ".hsa_code_object_version")
5221 return ParseDirectiveHSACodeObjectVersion();
5222
5223 if (IDVal == ".hsa_code_object_isa")
5224 return ParseDirectiveHSACodeObjectISA();
5225
5226 if (IDVal == ".amd_kernel_code_t")
5227 return ParseDirectiveAMDKernelCodeT();
5228
5229 if (IDVal == ".amdgpu_hsa_kernel")
5230 return ParseDirectiveAMDGPUHsaKernel();
5231
5232 if (IDVal == ".amd_amdgpu_isa")
5233 return ParseDirectiveISAVersion();
5234
5235 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
5236 return ParseDirectiveHSAMetadata();
5237 }
5238
5239 if (IDVal == ".amdgcn_target")
5240 return ParseDirectiveAMDGCNTarget();
5241
5242 if (IDVal == ".amdgpu_lds")
5243 return ParseDirectiveAMDGPULDS();
5244
5245 if (IDVal == PALMD::AssemblerDirectiveBegin)
5246 return ParseDirectivePALMetadataBegin();
5247
5248 if (IDVal == PALMD::AssemblerDirective)
5249 return ParseDirectivePALMetadata();
5250
5251 return true;
5252 }
5253
subtargetHasRegister(const MCRegisterInfo & MRI,unsigned RegNo)5254 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5255 unsigned RegNo) {
5256
5257 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
5258 R.isValid(); ++R) {
5259 if (*R == RegNo)
5260 return isGFX9Plus();
5261 }
5262
5263 // GFX10 has 2 more SGPRs 104 and 105.
5264 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
5265 R.isValid(); ++R) {
5266 if (*R == RegNo)
5267 return hasSGPR104_SGPR105();
5268 }
5269
5270 switch (RegNo) {
5271 case AMDGPU::SRC_SHARED_BASE:
5272 case AMDGPU::SRC_SHARED_LIMIT:
5273 case AMDGPU::SRC_PRIVATE_BASE:
5274 case AMDGPU::SRC_PRIVATE_LIMIT:
5275 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5276 return isGFX9Plus();
5277 case AMDGPU::TBA:
5278 case AMDGPU::TBA_LO:
5279 case AMDGPU::TBA_HI:
5280 case AMDGPU::TMA:
5281 case AMDGPU::TMA_LO:
5282 case AMDGPU::TMA_HI:
5283 return !isGFX9Plus();
5284 case AMDGPU::XNACK_MASK:
5285 case AMDGPU::XNACK_MASK_LO:
5286 case AMDGPU::XNACK_MASK_HI:
5287 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
5288 case AMDGPU::SGPR_NULL:
5289 return isGFX10Plus();
5290 default:
5291 break;
5292 }
5293
5294 if (isCI())
5295 return true;
5296
5297 if (isSI() || isGFX10Plus()) {
5298 // No flat_scr on SI.
5299 // On GFX10 flat scratch is not a valid register operand and can only be
5300 // accessed with s_setreg/s_getreg.
5301 switch (RegNo) {
5302 case AMDGPU::FLAT_SCR:
5303 case AMDGPU::FLAT_SCR_LO:
5304 case AMDGPU::FLAT_SCR_HI:
5305 return false;
5306 default:
5307 return true;
5308 }
5309 }
5310
5311 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
5312 // SI/CI have.
5313 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
5314 R.isValid(); ++R) {
5315 if (*R == RegNo)
5316 return hasSGPR102_SGPR103();
5317 }
5318
5319 return true;
5320 }
5321
5322 OperandMatchResultTy
parseOperand(OperandVector & Operands,StringRef Mnemonic,OperandMode Mode)5323 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
5324 OperandMode Mode) {
5325 // Try to parse with a custom parser
5326 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
5327
5328 // If we successfully parsed the operand or if there as an error parsing,
5329 // we are done.
5330 //
5331 // If we are parsing after we reach EndOfStatement then this means we
5332 // are appending default values to the Operands list. This is only done
5333 // by custom parser, so we shouldn't continue on to the generic parsing.
5334 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
5335 isToken(AsmToken::EndOfStatement))
5336 return ResTy;
5337
5338 SMLoc RBraceLoc;
5339 SMLoc LBraceLoc = getLoc();
5340 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
5341 unsigned Prefix = Operands.size();
5342
5343 for (;;) {
5344 auto Loc = getLoc();
5345 ResTy = parseReg(Operands);
5346 if (ResTy == MatchOperand_NoMatch)
5347 Error(Loc, "expected a register");
5348 if (ResTy != MatchOperand_Success)
5349 return MatchOperand_ParseFail;
5350
5351 RBraceLoc = getLoc();
5352 if (trySkipToken(AsmToken::RBrac))
5353 break;
5354
5355 if (!skipToken(AsmToken::Comma,
5356 "expected a comma or a closing square bracket")) {
5357 return MatchOperand_ParseFail;
5358 }
5359 }
5360
5361 if (Operands.size() - Prefix > 1) {
5362 Operands.insert(Operands.begin() + Prefix,
5363 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
5364 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
5365 }
5366
5367 return MatchOperand_Success;
5368 }
5369
5370 return parseRegOrImm(Operands);
5371 }
5372
parseMnemonicSuffix(StringRef Name)5373 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
5374 // Clear any forced encodings from the previous instruction.
5375 setForcedEncodingSize(0);
5376 setForcedDPP(false);
5377 setForcedSDWA(false);
5378
5379 if (Name.endswith("_e64")) {
5380 setForcedEncodingSize(64);
5381 return Name.substr(0, Name.size() - 4);
5382 } else if (Name.endswith("_e32")) {
5383 setForcedEncodingSize(32);
5384 return Name.substr(0, Name.size() - 4);
5385 } else if (Name.endswith("_dpp")) {
5386 setForcedDPP(true);
5387 return Name.substr(0, Name.size() - 4);
5388 } else if (Name.endswith("_sdwa")) {
5389 setForcedSDWA(true);
5390 return Name.substr(0, Name.size() - 5);
5391 }
5392 return Name;
5393 }
5394
ParseInstruction(ParseInstructionInfo & Info,StringRef Name,SMLoc NameLoc,OperandVector & Operands)5395 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
5396 StringRef Name,
5397 SMLoc NameLoc, OperandVector &Operands) {
5398 // Add the instruction mnemonic
5399 Name = parseMnemonicSuffix(Name);
5400 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
5401
5402 bool IsMIMG = Name.startswith("image_");
5403
5404 while (!trySkipToken(AsmToken::EndOfStatement)) {
5405 OperandMode Mode = OperandMode_Default;
5406 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
5407 Mode = OperandMode_NSA;
5408 CPolSeen = 0;
5409 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
5410
5411 if (Res != MatchOperand_Success) {
5412 checkUnsupportedInstruction(Name, NameLoc);
5413 if (!Parser.hasPendingError()) {
5414 // FIXME: use real operand location rather than the current location.
5415 StringRef Msg =
5416 (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
5417 "not a valid operand.";
5418 Error(getLoc(), Msg);
5419 }
5420 while (!trySkipToken(AsmToken::EndOfStatement)) {
5421 lex();
5422 }
5423 return true;
5424 }
5425
5426 // Eat the comma or space if there is one.
5427 trySkipToken(AsmToken::Comma);
5428 }
5429
5430 return false;
5431 }
5432
5433 //===----------------------------------------------------------------------===//
5434 // Utility functions
5435 //===----------------------------------------------------------------------===//
5436
5437 OperandMatchResultTy
parseIntWithPrefix(const char * Prefix,int64_t & IntVal)5438 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
5439
5440 if (!trySkipId(Prefix, AsmToken::Colon))
5441 return MatchOperand_NoMatch;
5442
5443 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
5444 }
5445
5446 OperandMatchResultTy
parseIntWithPrefix(const char * Prefix,OperandVector & Operands,AMDGPUOperand::ImmTy ImmTy,bool (* ConvertResult)(int64_t &))5447 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
5448 AMDGPUOperand::ImmTy ImmTy,
5449 bool (*ConvertResult)(int64_t&)) {
5450 SMLoc S = getLoc();
5451 int64_t Value = 0;
5452
5453 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
5454 if (Res != MatchOperand_Success)
5455 return Res;
5456
5457 if (ConvertResult && !ConvertResult(Value)) {
5458 Error(S, "invalid " + StringRef(Prefix) + " value.");
5459 }
5460
5461 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
5462 return MatchOperand_Success;
5463 }
5464
5465 OperandMatchResultTy
parseOperandArrayWithPrefix(const char * Prefix,OperandVector & Operands,AMDGPUOperand::ImmTy ImmTy,bool (* ConvertResult)(int64_t &))5466 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5467 OperandVector &Operands,
5468 AMDGPUOperand::ImmTy ImmTy,
5469 bool (*ConvertResult)(int64_t&)) {
5470 SMLoc S = getLoc();
5471 if (!trySkipId(Prefix, AsmToken::Colon))
5472 return MatchOperand_NoMatch;
5473
5474 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5475 return MatchOperand_ParseFail;
5476
5477 unsigned Val = 0;
5478 const unsigned MaxSize = 4;
5479
5480 // FIXME: How to verify the number of elements matches the number of src
5481 // operands?
5482 for (int I = 0; ; ++I) {
5483 int64_t Op;
5484 SMLoc Loc = getLoc();
5485 if (!parseExpr(Op))
5486 return MatchOperand_ParseFail;
5487
5488 if (Op != 0 && Op != 1) {
5489 Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5490 return MatchOperand_ParseFail;
5491 }
5492
5493 Val |= (Op << I);
5494
5495 if (trySkipToken(AsmToken::RBrac))
5496 break;
5497
5498 if (I + 1 == MaxSize) {
5499 Error(getLoc(), "expected a closing square bracket");
5500 return MatchOperand_ParseFail;
5501 }
5502
5503 if (!skipToken(AsmToken::Comma, "expected a comma"))
5504 return MatchOperand_ParseFail;
5505 }
5506
5507 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5508 return MatchOperand_Success;
5509 }
5510
5511 OperandMatchResultTy
parseNamedBit(StringRef Name,OperandVector & Operands,AMDGPUOperand::ImmTy ImmTy)5512 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
5513 AMDGPUOperand::ImmTy ImmTy) {
5514 int64_t Bit;
5515 SMLoc S = getLoc();
5516
5517 if (trySkipId(Name)) {
5518 Bit = 1;
5519 } else if (trySkipId("no", Name)) {
5520 Bit = 0;
5521 } else {
5522 return MatchOperand_NoMatch;
5523 }
5524
5525 if (Name == "r128" && !hasMIMG_R128()) {
5526 Error(S, "r128 modifier is not supported on this GPU");
5527 return MatchOperand_ParseFail;
5528 }
5529 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
5530 Error(S, "a16 modifier is not supported on this GPU");
5531 return MatchOperand_ParseFail;
5532 }
5533
5534 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5535 ImmTy = AMDGPUOperand::ImmTyR128A16;
5536
5537 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5538 return MatchOperand_Success;
5539 }
5540
5541 OperandMatchResultTy
parseCPol(OperandVector & Operands)5542 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
5543 unsigned CPolOn = 0;
5544 unsigned CPolOff = 0;
5545 SMLoc S = getLoc();
5546
5547 if (trySkipId("glc"))
5548 CPolOn = AMDGPU::CPol::GLC;
5549 else if (trySkipId("noglc"))
5550 CPolOff = AMDGPU::CPol::GLC;
5551 else if (trySkipId("slc"))
5552 CPolOn = AMDGPU::CPol::SLC;
5553 else if (trySkipId("noslc"))
5554 CPolOff = AMDGPU::CPol::SLC;
5555 else if (trySkipId("dlc"))
5556 CPolOn = AMDGPU::CPol::DLC;
5557 else if (trySkipId("nodlc"))
5558 CPolOff = AMDGPU::CPol::DLC;
5559 else if (trySkipId("scc"))
5560 CPolOn = AMDGPU::CPol::SCC;
5561 else if (trySkipId("noscc"))
5562 CPolOff = AMDGPU::CPol::SCC;
5563 else
5564 return MatchOperand_NoMatch;
5565
5566 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
5567 Error(S, "dlc modifier is not supported on this GPU");
5568 return MatchOperand_ParseFail;
5569 }
5570
5571 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
5572 Error(S, "scc modifier is not supported on this GPU");
5573 return MatchOperand_ParseFail;
5574 }
5575
5576 if (CPolSeen & (CPolOn | CPolOff)) {
5577 Error(S, "duplicate cache policy modifier");
5578 return MatchOperand_ParseFail;
5579 }
5580
5581 CPolSeen |= (CPolOn | CPolOff);
5582
5583 for (unsigned I = 1; I != Operands.size(); ++I) {
5584 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
5585 if (Op.isCPol()) {
5586 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
5587 return MatchOperand_Success;
5588 }
5589 }
5590
5591 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
5592 AMDGPUOperand::ImmTyCPol));
5593
5594 return MatchOperand_Success;
5595 }
5596
addOptionalImmOperand(MCInst & Inst,const OperandVector & Operands,AMDGPUAsmParser::OptionalImmIndexMap & OptionalIdx,AMDGPUOperand::ImmTy ImmT,int64_t Default=0)5597 static void addOptionalImmOperand(
5598 MCInst& Inst, const OperandVector& Operands,
5599 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5600 AMDGPUOperand::ImmTy ImmT,
5601 int64_t Default = 0) {
5602 auto i = OptionalIdx.find(ImmT);
5603 if (i != OptionalIdx.end()) {
5604 unsigned Idx = i->second;
5605 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5606 } else {
5607 Inst.addOperand(MCOperand::createImm(Default));
5608 }
5609 }
5610
5611 OperandMatchResultTy
parseStringWithPrefix(StringRef Prefix,StringRef & Value,SMLoc & StringLoc)5612 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5613 StringRef &Value,
5614 SMLoc &StringLoc) {
5615 if (!trySkipId(Prefix, AsmToken::Colon))
5616 return MatchOperand_NoMatch;
5617
5618 StringLoc = getLoc();
5619 return parseId(Value, "expected an identifier") ? MatchOperand_Success
5620 : MatchOperand_ParseFail;
5621 }
5622
5623 //===----------------------------------------------------------------------===//
5624 // MTBUF format
5625 //===----------------------------------------------------------------------===//
5626
tryParseFmt(const char * Pref,int64_t MaxVal,int64_t & Fmt)5627 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5628 int64_t MaxVal,
5629 int64_t &Fmt) {
5630 int64_t Val;
5631 SMLoc Loc = getLoc();
5632
5633 auto Res = parseIntWithPrefix(Pref, Val);
5634 if (Res == MatchOperand_ParseFail)
5635 return false;
5636 if (Res == MatchOperand_NoMatch)
5637 return true;
5638
5639 if (Val < 0 || Val > MaxVal) {
5640 Error(Loc, Twine("out of range ", StringRef(Pref)));
5641 return false;
5642 }
5643
5644 Fmt = Val;
5645 return true;
5646 }
5647
5648 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5649 // values to live in a joint format operand in the MCInst encoding.
5650 OperandMatchResultTy
parseDfmtNfmt(int64_t & Format)5651 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5652 using namespace llvm::AMDGPU::MTBUFFormat;
5653
5654 int64_t Dfmt = DFMT_UNDEF;
5655 int64_t Nfmt = NFMT_UNDEF;
5656
5657 // dfmt and nfmt can appear in either order, and each is optional.
5658 for (int I = 0; I < 2; ++I) {
5659 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5660 return MatchOperand_ParseFail;
5661
5662 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5663 return MatchOperand_ParseFail;
5664 }
5665 // Skip optional comma between dfmt/nfmt
5666 // but guard against 2 commas following each other.
5667 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5668 !peekToken().is(AsmToken::Comma)) {
5669 trySkipToken(AsmToken::Comma);
5670 }
5671 }
5672
5673 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5674 return MatchOperand_NoMatch;
5675
5676 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5677 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5678
5679 Format = encodeDfmtNfmt(Dfmt, Nfmt);
5680 return MatchOperand_Success;
5681 }
5682
5683 OperandMatchResultTy
parseUfmt(int64_t & Format)5684 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5685 using namespace llvm::AMDGPU::MTBUFFormat;
5686
5687 int64_t Fmt = UFMT_UNDEF;
5688
5689 if (!tryParseFmt("format", UFMT_MAX, Fmt))
5690 return MatchOperand_ParseFail;
5691
5692 if (Fmt == UFMT_UNDEF)
5693 return MatchOperand_NoMatch;
5694
5695 Format = Fmt;
5696 return MatchOperand_Success;
5697 }
5698
matchDfmtNfmt(int64_t & Dfmt,int64_t & Nfmt,StringRef FormatStr,SMLoc Loc)5699 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5700 int64_t &Nfmt,
5701 StringRef FormatStr,
5702 SMLoc Loc) {
5703 using namespace llvm::AMDGPU::MTBUFFormat;
5704 int64_t Format;
5705
5706 Format = getDfmt(FormatStr);
5707 if (Format != DFMT_UNDEF) {
5708 Dfmt = Format;
5709 return true;
5710 }
5711
5712 Format = getNfmt(FormatStr, getSTI());
5713 if (Format != NFMT_UNDEF) {
5714 Nfmt = Format;
5715 return true;
5716 }
5717
5718 Error(Loc, "unsupported format");
5719 return false;
5720 }
5721
5722 OperandMatchResultTy
parseSymbolicSplitFormat(StringRef FormatStr,SMLoc FormatLoc,int64_t & Format)5723 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5724 SMLoc FormatLoc,
5725 int64_t &Format) {
5726 using namespace llvm::AMDGPU::MTBUFFormat;
5727
5728 int64_t Dfmt = DFMT_UNDEF;
5729 int64_t Nfmt = NFMT_UNDEF;
5730 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5731 return MatchOperand_ParseFail;
5732
5733 if (trySkipToken(AsmToken::Comma)) {
5734 StringRef Str;
5735 SMLoc Loc = getLoc();
5736 if (!parseId(Str, "expected a format string") ||
5737 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5738 return MatchOperand_ParseFail;
5739 }
5740 if (Dfmt == DFMT_UNDEF) {
5741 Error(Loc, "duplicate numeric format");
5742 return MatchOperand_ParseFail;
5743 } else if (Nfmt == NFMT_UNDEF) {
5744 Error(Loc, "duplicate data format");
5745 return MatchOperand_ParseFail;
5746 }
5747 }
5748
5749 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5750 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5751
5752 if (isGFX10Plus()) {
5753 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5754 if (Ufmt == UFMT_UNDEF) {
5755 Error(FormatLoc, "unsupported format");
5756 return MatchOperand_ParseFail;
5757 }
5758 Format = Ufmt;
5759 } else {
5760 Format = encodeDfmtNfmt(Dfmt, Nfmt);
5761 }
5762
5763 return MatchOperand_Success;
5764 }
5765
5766 OperandMatchResultTy
parseSymbolicUnifiedFormat(StringRef FormatStr,SMLoc Loc,int64_t & Format)5767 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5768 SMLoc Loc,
5769 int64_t &Format) {
5770 using namespace llvm::AMDGPU::MTBUFFormat;
5771
5772 auto Id = getUnifiedFormat(FormatStr);
5773 if (Id == UFMT_UNDEF)
5774 return MatchOperand_NoMatch;
5775
5776 if (!isGFX10Plus()) {
5777 Error(Loc, "unified format is not supported on this GPU");
5778 return MatchOperand_ParseFail;
5779 }
5780
5781 Format = Id;
5782 return MatchOperand_Success;
5783 }
5784
5785 OperandMatchResultTy
parseNumericFormat(int64_t & Format)5786 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5787 using namespace llvm::AMDGPU::MTBUFFormat;
5788 SMLoc Loc = getLoc();
5789
5790 if (!parseExpr(Format))
5791 return MatchOperand_ParseFail;
5792 if (!isValidFormatEncoding(Format, getSTI())) {
5793 Error(Loc, "out of range format");
5794 return MatchOperand_ParseFail;
5795 }
5796
5797 return MatchOperand_Success;
5798 }
5799
5800 OperandMatchResultTy
parseSymbolicOrNumericFormat(int64_t & Format)5801 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5802 using namespace llvm::AMDGPU::MTBUFFormat;
5803
5804 if (!trySkipId("format", AsmToken::Colon))
5805 return MatchOperand_NoMatch;
5806
5807 if (trySkipToken(AsmToken::LBrac)) {
5808 StringRef FormatStr;
5809 SMLoc Loc = getLoc();
5810 if (!parseId(FormatStr, "expected a format string"))
5811 return MatchOperand_ParseFail;
5812
5813 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5814 if (Res == MatchOperand_NoMatch)
5815 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5816 if (Res != MatchOperand_Success)
5817 return Res;
5818
5819 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5820 return MatchOperand_ParseFail;
5821
5822 return MatchOperand_Success;
5823 }
5824
5825 return parseNumericFormat(Format);
5826 }
5827
5828 OperandMatchResultTy
parseFORMAT(OperandVector & Operands)5829 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5830 using namespace llvm::AMDGPU::MTBUFFormat;
5831
5832 int64_t Format = getDefaultFormatEncoding(getSTI());
5833 OperandMatchResultTy Res;
5834 SMLoc Loc = getLoc();
5835
5836 // Parse legacy format syntax.
5837 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5838 if (Res == MatchOperand_ParseFail)
5839 return Res;
5840
5841 bool FormatFound = (Res == MatchOperand_Success);
5842
5843 Operands.push_back(
5844 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5845
5846 if (FormatFound)
5847 trySkipToken(AsmToken::Comma);
5848
5849 if (isToken(AsmToken::EndOfStatement)) {
5850 // We are expecting an soffset operand,
5851 // but let matcher handle the error.
5852 return MatchOperand_Success;
5853 }
5854
5855 // Parse soffset.
5856 Res = parseRegOrImm(Operands);
5857 if (Res != MatchOperand_Success)
5858 return Res;
5859
5860 trySkipToken(AsmToken::Comma);
5861
5862 if (!FormatFound) {
5863 Res = parseSymbolicOrNumericFormat(Format);
5864 if (Res == MatchOperand_ParseFail)
5865 return Res;
5866 if (Res == MatchOperand_Success) {
5867 auto Size = Operands.size();
5868 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5869 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5870 Op.setImm(Format);
5871 }
5872 return MatchOperand_Success;
5873 }
5874
5875 if (isId("format") && peekToken().is(AsmToken::Colon)) {
5876 Error(getLoc(), "duplicate format");
5877 return MatchOperand_ParseFail;
5878 }
5879 return MatchOperand_Success;
5880 }
5881
5882 //===----------------------------------------------------------------------===//
5883 // ds
5884 //===----------------------------------------------------------------------===//
5885
cvtDSOffset01(MCInst & Inst,const OperandVector & Operands)5886 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5887 const OperandVector &Operands) {
5888 OptionalImmIndexMap OptionalIdx;
5889
5890 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5891 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5892
5893 // Add the register arguments
5894 if (Op.isReg()) {
5895 Op.addRegOperands(Inst, 1);
5896 continue;
5897 }
5898
5899 // Handle optional arguments
5900 OptionalIdx[Op.getImmTy()] = i;
5901 }
5902
5903 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5904 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5905 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5906
5907 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5908 }
5909
cvtDSImpl(MCInst & Inst,const OperandVector & Operands,bool IsGdsHardcoded)5910 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5911 bool IsGdsHardcoded) {
5912 OptionalImmIndexMap OptionalIdx;
5913
5914 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5915 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5916
5917 // Add the register arguments
5918 if (Op.isReg()) {
5919 Op.addRegOperands(Inst, 1);
5920 continue;
5921 }
5922
5923 if (Op.isToken() && Op.getToken() == "gds") {
5924 IsGdsHardcoded = true;
5925 continue;
5926 }
5927
5928 // Handle optional arguments
5929 OptionalIdx[Op.getImmTy()] = i;
5930 }
5931
5932 AMDGPUOperand::ImmTy OffsetType =
5933 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5934 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5935 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5936 AMDGPUOperand::ImmTyOffset;
5937
5938 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5939
5940 if (!IsGdsHardcoded) {
5941 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5942 }
5943 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5944 }
5945
cvtExp(MCInst & Inst,const OperandVector & Operands)5946 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5947 OptionalImmIndexMap OptionalIdx;
5948
5949 unsigned OperandIdx[4];
5950 unsigned EnMask = 0;
5951 int SrcIdx = 0;
5952
5953 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5954 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5955
5956 // Add the register arguments
5957 if (Op.isReg()) {
5958 assert(SrcIdx < 4);
5959 OperandIdx[SrcIdx] = Inst.size();
5960 Op.addRegOperands(Inst, 1);
5961 ++SrcIdx;
5962 continue;
5963 }
5964
5965 if (Op.isOff()) {
5966 assert(SrcIdx < 4);
5967 OperandIdx[SrcIdx] = Inst.size();
5968 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5969 ++SrcIdx;
5970 continue;
5971 }
5972
5973 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5974 Op.addImmOperands(Inst, 1);
5975 continue;
5976 }
5977
5978 if (Op.isToken() && Op.getToken() == "done")
5979 continue;
5980
5981 // Handle optional arguments
5982 OptionalIdx[Op.getImmTy()] = i;
5983 }
5984
5985 assert(SrcIdx == 4);
5986
5987 bool Compr = false;
5988 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5989 Compr = true;
5990 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5991 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5992 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5993 }
5994
5995 for (auto i = 0; i < SrcIdx; ++i) {
5996 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5997 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5998 }
5999 }
6000
6001 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6002 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6003
6004 Inst.addOperand(MCOperand::createImm(EnMask));
6005 }
6006
6007 //===----------------------------------------------------------------------===//
6008 // s_waitcnt
6009 //===----------------------------------------------------------------------===//
6010
6011 static bool
encodeCnt(const AMDGPU::IsaVersion ISA,int64_t & IntVal,int64_t CntVal,bool Saturate,unsigned (* encode)(const IsaVersion & Version,unsigned,unsigned),unsigned (* decode)(const IsaVersion & Version,unsigned))6012 encodeCnt(
6013 const AMDGPU::IsaVersion ISA,
6014 int64_t &IntVal,
6015 int64_t CntVal,
6016 bool Saturate,
6017 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6018 unsigned (*decode)(const IsaVersion &Version, unsigned))
6019 {
6020 bool Failed = false;
6021
6022 IntVal = encode(ISA, IntVal, CntVal);
6023 if (CntVal != decode(ISA, IntVal)) {
6024 if (Saturate) {
6025 IntVal = encode(ISA, IntVal, -1);
6026 } else {
6027 Failed = true;
6028 }
6029 }
6030 return Failed;
6031 }
6032
parseCnt(int64_t & IntVal)6033 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6034
6035 SMLoc CntLoc = getLoc();
6036 StringRef CntName = getTokenStr();
6037
6038 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6039 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6040 return false;
6041
6042 int64_t CntVal;
6043 SMLoc ValLoc = getLoc();
6044 if (!parseExpr(CntVal))
6045 return false;
6046
6047 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6048
6049 bool Failed = true;
6050 bool Sat = CntName.endswith("_sat");
6051
6052 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6053 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6054 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6055 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6056 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6057 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6058 } else {
6059 Error(CntLoc, "invalid counter name " + CntName);
6060 return false;
6061 }
6062
6063 if (Failed) {
6064 Error(ValLoc, "too large value for " + CntName);
6065 return false;
6066 }
6067
6068 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6069 return false;
6070
6071 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6072 if (isToken(AsmToken::EndOfStatement)) {
6073 Error(getLoc(), "expected a counter name");
6074 return false;
6075 }
6076 }
6077
6078 return true;
6079 }
6080
6081 OperandMatchResultTy
parseSWaitCntOps(OperandVector & Operands)6082 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
6083 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6084 int64_t Waitcnt = getWaitcntBitMask(ISA);
6085 SMLoc S = getLoc();
6086
6087 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6088 while (!isToken(AsmToken::EndOfStatement)) {
6089 if (!parseCnt(Waitcnt))
6090 return MatchOperand_ParseFail;
6091 }
6092 } else {
6093 if (!parseExpr(Waitcnt))
6094 return MatchOperand_ParseFail;
6095 }
6096
6097 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6098 return MatchOperand_Success;
6099 }
6100
6101 bool
isSWaitCnt() const6102 AMDGPUOperand::isSWaitCnt() const {
6103 return isImm();
6104 }
6105
6106 //===----------------------------------------------------------------------===//
6107 // hwreg
6108 //===----------------------------------------------------------------------===//
6109
6110 bool
parseHwregBody(OperandInfoTy & HwReg,OperandInfoTy & Offset,OperandInfoTy & Width)6111 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
6112 OperandInfoTy &Offset,
6113 OperandInfoTy &Width) {
6114 using namespace llvm::AMDGPU::Hwreg;
6115
6116 // The register may be specified by name or using a numeric code
6117 HwReg.Loc = getLoc();
6118 if (isToken(AsmToken::Identifier) &&
6119 (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
6120 HwReg.IsSymbolic = true;
6121 lex(); // skip register name
6122 } else if (!parseExpr(HwReg.Id, "a register name")) {
6123 return false;
6124 }
6125
6126 if (trySkipToken(AsmToken::RParen))
6127 return true;
6128
6129 // parse optional params
6130 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
6131 return false;
6132
6133 Offset.Loc = getLoc();
6134 if (!parseExpr(Offset.Id))
6135 return false;
6136
6137 if (!skipToken(AsmToken::Comma, "expected a comma"))
6138 return false;
6139
6140 Width.Loc = getLoc();
6141 return parseExpr(Width.Id) &&
6142 skipToken(AsmToken::RParen, "expected a closing parenthesis");
6143 }
6144
6145 bool
validateHwreg(const OperandInfoTy & HwReg,const OperandInfoTy & Offset,const OperandInfoTy & Width)6146 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
6147 const OperandInfoTy &Offset,
6148 const OperandInfoTy &Width) {
6149
6150 using namespace llvm::AMDGPU::Hwreg;
6151
6152 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
6153 Error(HwReg.Loc,
6154 "specified hardware register is not supported on this GPU");
6155 return false;
6156 }
6157 if (!isValidHwreg(HwReg.Id)) {
6158 Error(HwReg.Loc,
6159 "invalid code of hardware register: only 6-bit values are legal");
6160 return false;
6161 }
6162 if (!isValidHwregOffset(Offset.Id)) {
6163 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
6164 return false;
6165 }
6166 if (!isValidHwregWidth(Width.Id)) {
6167 Error(Width.Loc,
6168 "invalid bitfield width: only values from 1 to 32 are legal");
6169 return false;
6170 }
6171 return true;
6172 }
6173
6174 OperandMatchResultTy
parseHwreg(OperandVector & Operands)6175 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
6176 using namespace llvm::AMDGPU::Hwreg;
6177
6178 int64_t ImmVal = 0;
6179 SMLoc Loc = getLoc();
6180
6181 if (trySkipId("hwreg", AsmToken::LParen)) {
6182 OperandInfoTy HwReg(ID_UNKNOWN_);
6183 OperandInfoTy Offset(OFFSET_DEFAULT_);
6184 OperandInfoTy Width(WIDTH_DEFAULT_);
6185 if (parseHwregBody(HwReg, Offset, Width) &&
6186 validateHwreg(HwReg, Offset, Width)) {
6187 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
6188 } else {
6189 return MatchOperand_ParseFail;
6190 }
6191 } else if (parseExpr(ImmVal, "a hwreg macro")) {
6192 if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6193 Error(Loc, "invalid immediate: only 16-bit values are legal");
6194 return MatchOperand_ParseFail;
6195 }
6196 } else {
6197 return MatchOperand_ParseFail;
6198 }
6199
6200 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
6201 return MatchOperand_Success;
6202 }
6203
isHwreg() const6204 bool AMDGPUOperand::isHwreg() const {
6205 return isImmTy(ImmTyHwreg);
6206 }
6207
6208 //===----------------------------------------------------------------------===//
6209 // sendmsg
6210 //===----------------------------------------------------------------------===//
6211
6212 bool
parseSendMsgBody(OperandInfoTy & Msg,OperandInfoTy & Op,OperandInfoTy & Stream)6213 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
6214 OperandInfoTy &Op,
6215 OperandInfoTy &Stream) {
6216 using namespace llvm::AMDGPU::SendMsg;
6217
6218 Msg.Loc = getLoc();
6219 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
6220 Msg.IsSymbolic = true;
6221 lex(); // skip message name
6222 } else if (!parseExpr(Msg.Id, "a message name")) {
6223 return false;
6224 }
6225
6226 if (trySkipToken(AsmToken::Comma)) {
6227 Op.IsDefined = true;
6228 Op.Loc = getLoc();
6229 if (isToken(AsmToken::Identifier) &&
6230 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
6231 lex(); // skip operation name
6232 } else if (!parseExpr(Op.Id, "an operation name")) {
6233 return false;
6234 }
6235
6236 if (trySkipToken(AsmToken::Comma)) {
6237 Stream.IsDefined = true;
6238 Stream.Loc = getLoc();
6239 if (!parseExpr(Stream.Id))
6240 return false;
6241 }
6242 }
6243
6244 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
6245 }
6246
6247 bool
validateSendMsg(const OperandInfoTy & Msg,const OperandInfoTy & Op,const OperandInfoTy & Stream)6248 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
6249 const OperandInfoTy &Op,
6250 const OperandInfoTy &Stream) {
6251 using namespace llvm::AMDGPU::SendMsg;
6252
6253 // Validation strictness depends on whether message is specified
6254 // in a symbolc or in a numeric form. In the latter case
6255 // only encoding possibility is checked.
6256 bool Strict = Msg.IsSymbolic;
6257
6258 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
6259 Error(Msg.Loc, "invalid message id");
6260 return false;
6261 }
6262 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
6263 if (Op.IsDefined) {
6264 Error(Op.Loc, "message does not support operations");
6265 } else {
6266 Error(Msg.Loc, "missing message operation");
6267 }
6268 return false;
6269 }
6270 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
6271 Error(Op.Loc, "invalid operation id");
6272 return false;
6273 }
6274 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
6275 Error(Stream.Loc, "message operation does not support streams");
6276 return false;
6277 }
6278 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
6279 Error(Stream.Loc, "invalid message stream id");
6280 return false;
6281 }
6282 return true;
6283 }
6284
6285 OperandMatchResultTy
parseSendMsgOp(OperandVector & Operands)6286 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
6287 using namespace llvm::AMDGPU::SendMsg;
6288
6289 int64_t ImmVal = 0;
6290 SMLoc Loc = getLoc();
6291
6292 if (trySkipId("sendmsg", AsmToken::LParen)) {
6293 OperandInfoTy Msg(ID_UNKNOWN_);
6294 OperandInfoTy Op(OP_NONE_);
6295 OperandInfoTy Stream(STREAM_ID_NONE_);
6296 if (parseSendMsgBody(Msg, Op, Stream) &&
6297 validateSendMsg(Msg, Op, Stream)) {
6298 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
6299 } else {
6300 return MatchOperand_ParseFail;
6301 }
6302 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
6303 if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
6304 Error(Loc, "invalid immediate: only 16-bit values are legal");
6305 return MatchOperand_ParseFail;
6306 }
6307 } else {
6308 return MatchOperand_ParseFail;
6309 }
6310
6311 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
6312 return MatchOperand_Success;
6313 }
6314
isSendMsg() const6315 bool AMDGPUOperand::isSendMsg() const {
6316 return isImmTy(ImmTySendMsg);
6317 }
6318
6319 //===----------------------------------------------------------------------===//
6320 // v_interp
6321 //===----------------------------------------------------------------------===//
6322
parseInterpSlot(OperandVector & Operands)6323 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
6324 StringRef Str;
6325 SMLoc S = getLoc();
6326
6327 if (!parseId(Str))
6328 return MatchOperand_NoMatch;
6329
6330 int Slot = StringSwitch<int>(Str)
6331 .Case("p10", 0)
6332 .Case("p20", 1)
6333 .Case("p0", 2)
6334 .Default(-1);
6335
6336 if (Slot == -1) {
6337 Error(S, "invalid interpolation slot");
6338 return MatchOperand_ParseFail;
6339 }
6340
6341 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
6342 AMDGPUOperand::ImmTyInterpSlot));
6343 return MatchOperand_Success;
6344 }
6345
parseInterpAttr(OperandVector & Operands)6346 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
6347 StringRef Str;
6348 SMLoc S = getLoc();
6349
6350 if (!parseId(Str))
6351 return MatchOperand_NoMatch;
6352
6353 if (!Str.startswith("attr")) {
6354 Error(S, "invalid interpolation attribute");
6355 return MatchOperand_ParseFail;
6356 }
6357
6358 StringRef Chan = Str.take_back(2);
6359 int AttrChan = StringSwitch<int>(Chan)
6360 .Case(".x", 0)
6361 .Case(".y", 1)
6362 .Case(".z", 2)
6363 .Case(".w", 3)
6364 .Default(-1);
6365 if (AttrChan == -1) {
6366 Error(S, "invalid or missing interpolation attribute channel");
6367 return MatchOperand_ParseFail;
6368 }
6369
6370 Str = Str.drop_back(2).drop_front(4);
6371
6372 uint8_t Attr;
6373 if (Str.getAsInteger(10, Attr)) {
6374 Error(S, "invalid or missing interpolation attribute number");
6375 return MatchOperand_ParseFail;
6376 }
6377
6378 if (Attr > 63) {
6379 Error(S, "out of bounds interpolation attribute number");
6380 return MatchOperand_ParseFail;
6381 }
6382
6383 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
6384
6385 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
6386 AMDGPUOperand::ImmTyInterpAttr));
6387 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
6388 AMDGPUOperand::ImmTyAttrChan));
6389 return MatchOperand_Success;
6390 }
6391
6392 //===----------------------------------------------------------------------===//
6393 // exp
6394 //===----------------------------------------------------------------------===//
6395
parseExpTgt(OperandVector & Operands)6396 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
6397 using namespace llvm::AMDGPU::Exp;
6398
6399 StringRef Str;
6400 SMLoc S = getLoc();
6401
6402 if (!parseId(Str))
6403 return MatchOperand_NoMatch;
6404
6405 unsigned Id = getTgtId(Str);
6406 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
6407 Error(S, (Id == ET_INVALID) ?
6408 "invalid exp target" :
6409 "exp target is not supported on this GPU");
6410 return MatchOperand_ParseFail;
6411 }
6412
6413 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
6414 AMDGPUOperand::ImmTyExpTgt));
6415 return MatchOperand_Success;
6416 }
6417
6418 //===----------------------------------------------------------------------===//
6419 // parser helpers
6420 //===----------------------------------------------------------------------===//
6421
6422 bool
isId(const AsmToken & Token,const StringRef Id) const6423 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
6424 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
6425 }
6426
6427 bool
isId(const StringRef Id) const6428 AMDGPUAsmParser::isId(const StringRef Id) const {
6429 return isId(getToken(), Id);
6430 }
6431
6432 bool
isToken(const AsmToken::TokenKind Kind) const6433 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
6434 return getTokenKind() == Kind;
6435 }
6436
6437 bool
trySkipId(const StringRef Id)6438 AMDGPUAsmParser::trySkipId(const StringRef Id) {
6439 if (isId(Id)) {
6440 lex();
6441 return true;
6442 }
6443 return false;
6444 }
6445
6446 bool
trySkipId(const StringRef Pref,const StringRef Id)6447 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
6448 if (isToken(AsmToken::Identifier)) {
6449 StringRef Tok = getTokenStr();
6450 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
6451 lex();
6452 return true;
6453 }
6454 }
6455 return false;
6456 }
6457
6458 bool
trySkipId(const StringRef Id,const AsmToken::TokenKind Kind)6459 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
6460 if (isId(Id) && peekToken().is(Kind)) {
6461 lex();
6462 lex();
6463 return true;
6464 }
6465 return false;
6466 }
6467
6468 bool
trySkipToken(const AsmToken::TokenKind Kind)6469 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
6470 if (isToken(Kind)) {
6471 lex();
6472 return true;
6473 }
6474 return false;
6475 }
6476
6477 bool
skipToken(const AsmToken::TokenKind Kind,const StringRef ErrMsg)6478 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
6479 const StringRef ErrMsg) {
6480 if (!trySkipToken(Kind)) {
6481 Error(getLoc(), ErrMsg);
6482 return false;
6483 }
6484 return true;
6485 }
6486
6487 bool
parseExpr(int64_t & Imm,StringRef Expected)6488 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
6489 SMLoc S = getLoc();
6490
6491 const MCExpr *Expr;
6492 if (Parser.parseExpression(Expr))
6493 return false;
6494
6495 if (Expr->evaluateAsAbsolute(Imm))
6496 return true;
6497
6498 if (Expected.empty()) {
6499 Error(S, "expected absolute expression");
6500 } else {
6501 Error(S, Twine("expected ", Expected) +
6502 Twine(" or an absolute expression"));
6503 }
6504 return false;
6505 }
6506
6507 bool
parseExpr(OperandVector & Operands)6508 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
6509 SMLoc S = getLoc();
6510
6511 const MCExpr *Expr;
6512 if (Parser.parseExpression(Expr))
6513 return false;
6514
6515 int64_t IntVal;
6516 if (Expr->evaluateAsAbsolute(IntVal)) {
6517 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
6518 } else {
6519 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
6520 }
6521 return true;
6522 }
6523
6524 bool
parseString(StringRef & Val,const StringRef ErrMsg)6525 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6526 if (isToken(AsmToken::String)) {
6527 Val = getToken().getStringContents();
6528 lex();
6529 return true;
6530 } else {
6531 Error(getLoc(), ErrMsg);
6532 return false;
6533 }
6534 }
6535
6536 bool
parseId(StringRef & Val,const StringRef ErrMsg)6537 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6538 if (isToken(AsmToken::Identifier)) {
6539 Val = getTokenStr();
6540 lex();
6541 return true;
6542 } else {
6543 if (!ErrMsg.empty())
6544 Error(getLoc(), ErrMsg);
6545 return false;
6546 }
6547 }
6548
6549 AsmToken
getToken() const6550 AMDGPUAsmParser::getToken() const {
6551 return Parser.getTok();
6552 }
6553
6554 AsmToken
peekToken()6555 AMDGPUAsmParser::peekToken() {
6556 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6557 }
6558
6559 void
peekTokens(MutableArrayRef<AsmToken> Tokens)6560 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6561 auto TokCount = getLexer().peekTokens(Tokens);
6562
6563 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6564 Tokens[Idx] = AsmToken(AsmToken::Error, "");
6565 }
6566
6567 AsmToken::TokenKind
getTokenKind() const6568 AMDGPUAsmParser::getTokenKind() const {
6569 return getLexer().getKind();
6570 }
6571
6572 SMLoc
getLoc() const6573 AMDGPUAsmParser::getLoc() const {
6574 return getToken().getLoc();
6575 }
6576
6577 StringRef
getTokenStr() const6578 AMDGPUAsmParser::getTokenStr() const {
6579 return getToken().getString();
6580 }
6581
6582 void
lex()6583 AMDGPUAsmParser::lex() {
6584 Parser.Lex();
6585 }
6586
6587 SMLoc
getOperandLoc(std::function<bool (const AMDGPUOperand &)> Test,const OperandVector & Operands) const6588 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6589 const OperandVector &Operands) const {
6590 for (unsigned i = Operands.size() - 1; i > 0; --i) {
6591 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6592 if (Test(Op))
6593 return Op.getStartLoc();
6594 }
6595 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6596 }
6597
6598 SMLoc
getImmLoc(AMDGPUOperand::ImmTy Type,const OperandVector & Operands) const6599 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6600 const OperandVector &Operands) const {
6601 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6602 return getOperandLoc(Test, Operands);
6603 }
6604
6605 SMLoc
getRegLoc(unsigned Reg,const OperandVector & Operands) const6606 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6607 const OperandVector &Operands) const {
6608 auto Test = [=](const AMDGPUOperand& Op) {
6609 return Op.isRegKind() && Op.getReg() == Reg;
6610 };
6611 return getOperandLoc(Test, Operands);
6612 }
6613
6614 SMLoc
getLitLoc(const OperandVector & Operands) const6615 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6616 auto Test = [](const AMDGPUOperand& Op) {
6617 return Op.IsImmKindLiteral() || Op.isExpr();
6618 };
6619 return getOperandLoc(Test, Operands);
6620 }
6621
6622 SMLoc
getConstLoc(const OperandVector & Operands) const6623 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6624 auto Test = [](const AMDGPUOperand& Op) {
6625 return Op.isImmKindConst();
6626 };
6627 return getOperandLoc(Test, Operands);
6628 }
6629
6630 //===----------------------------------------------------------------------===//
6631 // swizzle
6632 //===----------------------------------------------------------------------===//
6633
6634 LLVM_READNONE
6635 static unsigned
encodeBitmaskPerm(const unsigned AndMask,const unsigned OrMask,const unsigned XorMask)6636 encodeBitmaskPerm(const unsigned AndMask,
6637 const unsigned OrMask,
6638 const unsigned XorMask) {
6639 using namespace llvm::AMDGPU::Swizzle;
6640
6641 return BITMASK_PERM_ENC |
6642 (AndMask << BITMASK_AND_SHIFT) |
6643 (OrMask << BITMASK_OR_SHIFT) |
6644 (XorMask << BITMASK_XOR_SHIFT);
6645 }
6646
6647 bool
parseSwizzleOperand(int64_t & Op,const unsigned MinVal,const unsigned MaxVal,const StringRef ErrMsg,SMLoc & Loc)6648 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6649 const unsigned MinVal,
6650 const unsigned MaxVal,
6651 const StringRef ErrMsg,
6652 SMLoc &Loc) {
6653 if (!skipToken(AsmToken::Comma, "expected a comma")) {
6654 return false;
6655 }
6656 Loc = getLoc();
6657 if (!parseExpr(Op)) {
6658 return false;
6659 }
6660 if (Op < MinVal || Op > MaxVal) {
6661 Error(Loc, ErrMsg);
6662 return false;
6663 }
6664
6665 return true;
6666 }
6667
6668 bool
parseSwizzleOperands(const unsigned OpNum,int64_t * Op,const unsigned MinVal,const unsigned MaxVal,const StringRef ErrMsg)6669 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6670 const unsigned MinVal,
6671 const unsigned MaxVal,
6672 const StringRef ErrMsg) {
6673 SMLoc Loc;
6674 for (unsigned i = 0; i < OpNum; ++i) {
6675 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6676 return false;
6677 }
6678
6679 return true;
6680 }
6681
6682 bool
parseSwizzleQuadPerm(int64_t & Imm)6683 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6684 using namespace llvm::AMDGPU::Swizzle;
6685
6686 int64_t Lane[LANE_NUM];
6687 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6688 "expected a 2-bit lane id")) {
6689 Imm = QUAD_PERM_ENC;
6690 for (unsigned I = 0; I < LANE_NUM; ++I) {
6691 Imm |= Lane[I] << (LANE_SHIFT * I);
6692 }
6693 return true;
6694 }
6695 return false;
6696 }
6697
6698 bool
parseSwizzleBroadcast(int64_t & Imm)6699 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6700 using namespace llvm::AMDGPU::Swizzle;
6701
6702 SMLoc Loc;
6703 int64_t GroupSize;
6704 int64_t LaneIdx;
6705
6706 if (!parseSwizzleOperand(GroupSize,
6707 2, 32,
6708 "group size must be in the interval [2,32]",
6709 Loc)) {
6710 return false;
6711 }
6712 if (!isPowerOf2_64(GroupSize)) {
6713 Error(Loc, "group size must be a power of two");
6714 return false;
6715 }
6716 if (parseSwizzleOperand(LaneIdx,
6717 0, GroupSize - 1,
6718 "lane id must be in the interval [0,group size - 1]",
6719 Loc)) {
6720 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6721 return true;
6722 }
6723 return false;
6724 }
6725
6726 bool
parseSwizzleReverse(int64_t & Imm)6727 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6728 using namespace llvm::AMDGPU::Swizzle;
6729
6730 SMLoc Loc;
6731 int64_t GroupSize;
6732
6733 if (!parseSwizzleOperand(GroupSize,
6734 2, 32,
6735 "group size must be in the interval [2,32]",
6736 Loc)) {
6737 return false;
6738 }
6739 if (!isPowerOf2_64(GroupSize)) {
6740 Error(Loc, "group size must be a power of two");
6741 return false;
6742 }
6743
6744 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6745 return true;
6746 }
6747
6748 bool
parseSwizzleSwap(int64_t & Imm)6749 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6750 using namespace llvm::AMDGPU::Swizzle;
6751
6752 SMLoc Loc;
6753 int64_t GroupSize;
6754
6755 if (!parseSwizzleOperand(GroupSize,
6756 1, 16,
6757 "group size must be in the interval [1,16]",
6758 Loc)) {
6759 return false;
6760 }
6761 if (!isPowerOf2_64(GroupSize)) {
6762 Error(Loc, "group size must be a power of two");
6763 return false;
6764 }
6765
6766 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6767 return true;
6768 }
6769
6770 bool
parseSwizzleBitmaskPerm(int64_t & Imm)6771 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6772 using namespace llvm::AMDGPU::Swizzle;
6773
6774 if (!skipToken(AsmToken::Comma, "expected a comma")) {
6775 return false;
6776 }
6777
6778 StringRef Ctl;
6779 SMLoc StrLoc = getLoc();
6780 if (!parseString(Ctl)) {
6781 return false;
6782 }
6783 if (Ctl.size() != BITMASK_WIDTH) {
6784 Error(StrLoc, "expected a 5-character mask");
6785 return false;
6786 }
6787
6788 unsigned AndMask = 0;
6789 unsigned OrMask = 0;
6790 unsigned XorMask = 0;
6791
6792 for (size_t i = 0; i < Ctl.size(); ++i) {
6793 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6794 switch(Ctl[i]) {
6795 default:
6796 Error(StrLoc, "invalid mask");
6797 return false;
6798 case '0':
6799 break;
6800 case '1':
6801 OrMask |= Mask;
6802 break;
6803 case 'p':
6804 AndMask |= Mask;
6805 break;
6806 case 'i':
6807 AndMask |= Mask;
6808 XorMask |= Mask;
6809 break;
6810 }
6811 }
6812
6813 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6814 return true;
6815 }
6816
6817 bool
parseSwizzleOffset(int64_t & Imm)6818 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6819
6820 SMLoc OffsetLoc = getLoc();
6821
6822 if (!parseExpr(Imm, "a swizzle macro")) {
6823 return false;
6824 }
6825 if (!isUInt<16>(Imm)) {
6826 Error(OffsetLoc, "expected a 16-bit offset");
6827 return false;
6828 }
6829 return true;
6830 }
6831
6832 bool
parseSwizzleMacro(int64_t & Imm)6833 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6834 using namespace llvm::AMDGPU::Swizzle;
6835
6836 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6837
6838 SMLoc ModeLoc = getLoc();
6839 bool Ok = false;
6840
6841 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6842 Ok = parseSwizzleQuadPerm(Imm);
6843 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6844 Ok = parseSwizzleBitmaskPerm(Imm);
6845 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6846 Ok = parseSwizzleBroadcast(Imm);
6847 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6848 Ok = parseSwizzleSwap(Imm);
6849 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6850 Ok = parseSwizzleReverse(Imm);
6851 } else {
6852 Error(ModeLoc, "expected a swizzle mode");
6853 }
6854
6855 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6856 }
6857
6858 return false;
6859 }
6860
6861 OperandMatchResultTy
parseSwizzleOp(OperandVector & Operands)6862 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6863 SMLoc S = getLoc();
6864 int64_t Imm = 0;
6865
6866 if (trySkipId("offset")) {
6867
6868 bool Ok = false;
6869 if (skipToken(AsmToken::Colon, "expected a colon")) {
6870 if (trySkipId("swizzle")) {
6871 Ok = parseSwizzleMacro(Imm);
6872 } else {
6873 Ok = parseSwizzleOffset(Imm);
6874 }
6875 }
6876
6877 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6878
6879 return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6880 } else {
6881 // Swizzle "offset" operand is optional.
6882 // If it is omitted, try parsing other optional operands.
6883 return parseOptionalOpr(Operands);
6884 }
6885 }
6886
6887 bool
isSwizzle() const6888 AMDGPUOperand::isSwizzle() const {
6889 return isImmTy(ImmTySwizzle);
6890 }
6891
6892 //===----------------------------------------------------------------------===//
6893 // VGPR Index Mode
6894 //===----------------------------------------------------------------------===//
6895
parseGPRIdxMacro()6896 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6897
6898 using namespace llvm::AMDGPU::VGPRIndexMode;
6899
6900 if (trySkipToken(AsmToken::RParen)) {
6901 return OFF;
6902 }
6903
6904 int64_t Imm = 0;
6905
6906 while (true) {
6907 unsigned Mode = 0;
6908 SMLoc S = getLoc();
6909
6910 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6911 if (trySkipId(IdSymbolic[ModeId])) {
6912 Mode = 1 << ModeId;
6913 break;
6914 }
6915 }
6916
6917 if (Mode == 0) {
6918 Error(S, (Imm == 0)?
6919 "expected a VGPR index mode or a closing parenthesis" :
6920 "expected a VGPR index mode");
6921 return UNDEF;
6922 }
6923
6924 if (Imm & Mode) {
6925 Error(S, "duplicate VGPR index mode");
6926 return UNDEF;
6927 }
6928 Imm |= Mode;
6929
6930 if (trySkipToken(AsmToken::RParen))
6931 break;
6932 if (!skipToken(AsmToken::Comma,
6933 "expected a comma or a closing parenthesis"))
6934 return UNDEF;
6935 }
6936
6937 return Imm;
6938 }
6939
6940 OperandMatchResultTy
parseGPRIdxMode(OperandVector & Operands)6941 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6942
6943 using namespace llvm::AMDGPU::VGPRIndexMode;
6944
6945 int64_t Imm = 0;
6946 SMLoc S = getLoc();
6947
6948 if (trySkipId("gpr_idx", AsmToken::LParen)) {
6949 Imm = parseGPRIdxMacro();
6950 if (Imm == UNDEF)
6951 return MatchOperand_ParseFail;
6952 } else {
6953 if (getParser().parseAbsoluteExpression(Imm))
6954 return MatchOperand_ParseFail;
6955 if (Imm < 0 || !isUInt<4>(Imm)) {
6956 Error(S, "invalid immediate: only 4-bit values are legal");
6957 return MatchOperand_ParseFail;
6958 }
6959 }
6960
6961 Operands.push_back(
6962 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6963 return MatchOperand_Success;
6964 }
6965
isGPRIdxMode() const6966 bool AMDGPUOperand::isGPRIdxMode() const {
6967 return isImmTy(ImmTyGprIdxMode);
6968 }
6969
6970 //===----------------------------------------------------------------------===//
6971 // sopp branch targets
6972 //===----------------------------------------------------------------------===//
6973
6974 OperandMatchResultTy
parseSOppBrTarget(OperandVector & Operands)6975 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6976
6977 // Make sure we are not parsing something
6978 // that looks like a label or an expression but is not.
6979 // This will improve error messages.
6980 if (isRegister() || isModifier())
6981 return MatchOperand_NoMatch;
6982
6983 if (!parseExpr(Operands))
6984 return MatchOperand_ParseFail;
6985
6986 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6987 assert(Opr.isImm() || Opr.isExpr());
6988 SMLoc Loc = Opr.getStartLoc();
6989
6990 // Currently we do not support arbitrary expressions as branch targets.
6991 // Only labels and absolute expressions are accepted.
6992 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6993 Error(Loc, "expected an absolute expression or a label");
6994 } else if (Opr.isImm() && !Opr.isS16Imm()) {
6995 Error(Loc, "expected a 16-bit signed jump offset");
6996 }
6997
6998 return MatchOperand_Success;
6999 }
7000
7001 //===----------------------------------------------------------------------===//
7002 // Boolean holding registers
7003 //===----------------------------------------------------------------------===//
7004
7005 OperandMatchResultTy
parseBoolReg(OperandVector & Operands)7006 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
7007 return parseReg(Operands);
7008 }
7009
7010 //===----------------------------------------------------------------------===//
7011 // mubuf
7012 //===----------------------------------------------------------------------===//
7013
defaultCPol() const7014 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
7015 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
7016 }
7017
cvtMubufImpl(MCInst & Inst,const OperandVector & Operands,bool IsAtomic,bool IsLds)7018 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
7019 const OperandVector &Operands,
7020 bool IsAtomic,
7021 bool IsLds) {
7022 bool IsLdsOpcode = IsLds;
7023 bool HasLdsModifier = false;
7024 OptionalImmIndexMap OptionalIdx;
7025 unsigned FirstOperandIdx = 1;
7026 bool IsAtomicReturn = false;
7027
7028 if (IsAtomic) {
7029 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7030 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7031 if (!Op.isCPol())
7032 continue;
7033 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7034 break;
7035 }
7036
7037 if (!IsAtomicReturn) {
7038 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7039 if (NewOpc != -1)
7040 Inst.setOpcode(NewOpc);
7041 }
7042
7043 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
7044 SIInstrFlags::IsAtomicRet;
7045 }
7046
7047 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
7048 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7049
7050 // Add the register arguments
7051 if (Op.isReg()) {
7052 Op.addRegOperands(Inst, 1);
7053 // Insert a tied src for atomic return dst.
7054 // This cannot be postponed as subsequent calls to
7055 // addImmOperands rely on correct number of MC operands.
7056 if (IsAtomicReturn && i == FirstOperandIdx)
7057 Op.addRegOperands(Inst, 1);
7058 continue;
7059 }
7060
7061 // Handle the case where soffset is an immediate
7062 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7063 Op.addImmOperands(Inst, 1);
7064 continue;
7065 }
7066
7067 HasLdsModifier |= Op.isLDS();
7068
7069 // Handle tokens like 'offen' which are sometimes hard-coded into the
7070 // asm string. There are no MCInst operands for these.
7071 if (Op.isToken()) {
7072 continue;
7073 }
7074 assert(Op.isImm());
7075
7076 // Handle optional arguments
7077 OptionalIdx[Op.getImmTy()] = i;
7078 }
7079
7080 // This is a workaround for an llvm quirk which may result in an
7081 // incorrect instruction selection. Lds and non-lds versions of
7082 // MUBUF instructions are identical except that lds versions
7083 // have mandatory 'lds' modifier. However this modifier follows
7084 // optional modifiers and llvm asm matcher regards this 'lds'
7085 // modifier as an optional one. As a result, an lds version
7086 // of opcode may be selected even if it has no 'lds' modifier.
7087 if (IsLdsOpcode && !HasLdsModifier) {
7088 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
7089 if (NoLdsOpcode != -1) { // Got lds version - correct it.
7090 Inst.setOpcode(NoLdsOpcode);
7091 IsLdsOpcode = false;
7092 }
7093 }
7094
7095 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7096 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7097
7098 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
7099 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7100 }
7101 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7102 }
7103
cvtMtbuf(MCInst & Inst,const OperandVector & Operands)7104 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
7105 OptionalImmIndexMap OptionalIdx;
7106
7107 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7108 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7109
7110 // Add the register arguments
7111 if (Op.isReg()) {
7112 Op.addRegOperands(Inst, 1);
7113 continue;
7114 }
7115
7116 // Handle the case where soffset is an immediate
7117 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7118 Op.addImmOperands(Inst, 1);
7119 continue;
7120 }
7121
7122 // Handle tokens like 'offen' which are sometimes hard-coded into the
7123 // asm string. There are no MCInst operands for these.
7124 if (Op.isToken()) {
7125 continue;
7126 }
7127 assert(Op.isImm());
7128
7129 // Handle optional arguments
7130 OptionalIdx[Op.getImmTy()] = i;
7131 }
7132
7133 addOptionalImmOperand(Inst, Operands, OptionalIdx,
7134 AMDGPUOperand::ImmTyOffset);
7135 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
7136 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7137 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7138 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
7139 }
7140
7141 //===----------------------------------------------------------------------===//
7142 // mimg
7143 //===----------------------------------------------------------------------===//
7144
cvtMIMG(MCInst & Inst,const OperandVector & Operands,bool IsAtomic)7145 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
7146 bool IsAtomic) {
7147 unsigned I = 1;
7148 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7149 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7150 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7151 }
7152
7153 if (IsAtomic) {
7154 // Add src, same as dst
7155 assert(Desc.getNumDefs() == 1);
7156 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
7157 }
7158
7159 OptionalImmIndexMap OptionalIdx;
7160
7161 for (unsigned E = Operands.size(); I != E; ++I) {
7162 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7163
7164 // Add the register arguments
7165 if (Op.isReg()) {
7166 Op.addRegOperands(Inst, 1);
7167 } else if (Op.isImmModifier()) {
7168 OptionalIdx[Op.getImmTy()] = I;
7169 } else if (!Op.isToken()) {
7170 llvm_unreachable("unexpected operand type");
7171 }
7172 }
7173
7174 bool IsGFX10Plus = isGFX10Plus();
7175
7176 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
7177 if (IsGFX10Plus)
7178 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
7179 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
7180 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
7181 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
7182 if (IsGFX10Plus)
7183 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
7184 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
7185 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
7186 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
7187 if (!IsGFX10Plus)
7188 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
7189 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
7190 }
7191
cvtMIMGAtomic(MCInst & Inst,const OperandVector & Operands)7192 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
7193 cvtMIMG(Inst, Operands, true);
7194 }
7195
cvtSMEMAtomic(MCInst & Inst,const OperandVector & Operands)7196 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
7197 OptionalImmIndexMap OptionalIdx;
7198 bool IsAtomicReturn = false;
7199
7200 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7201 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7202 if (!Op.isCPol())
7203 continue;
7204 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
7205 break;
7206 }
7207
7208 if (!IsAtomicReturn) {
7209 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
7210 if (NewOpc != -1)
7211 Inst.setOpcode(NewOpc);
7212 }
7213
7214 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
7215 SIInstrFlags::IsAtomicRet;
7216
7217 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7218 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7219
7220 // Add the register arguments
7221 if (Op.isReg()) {
7222 Op.addRegOperands(Inst, 1);
7223 if (IsAtomicReturn && i == 1)
7224 Op.addRegOperands(Inst, 1);
7225 continue;
7226 }
7227
7228 // Handle the case where soffset is an immediate
7229 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
7230 Op.addImmOperands(Inst, 1);
7231 continue;
7232 }
7233
7234 // Handle tokens like 'offen' which are sometimes hard-coded into the
7235 // asm string. There are no MCInst operands for these.
7236 if (Op.isToken()) {
7237 continue;
7238 }
7239 assert(Op.isImm());
7240
7241 // Handle optional arguments
7242 OptionalIdx[Op.getImmTy()] = i;
7243 }
7244
7245 if ((int)Inst.getNumOperands() <=
7246 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
7247 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
7248 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
7249 }
7250
cvtIntersectRay(MCInst & Inst,const OperandVector & Operands)7251 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
7252 const OperandVector &Operands) {
7253 for (unsigned I = 1; I < Operands.size(); ++I) {
7254 auto &Operand = (AMDGPUOperand &)*Operands[I];
7255 if (Operand.isReg())
7256 Operand.addRegOperands(Inst, 1);
7257 }
7258
7259 Inst.addOperand(MCOperand::createImm(1)); // a16
7260 }
7261
7262 //===----------------------------------------------------------------------===//
7263 // smrd
7264 //===----------------------------------------------------------------------===//
7265
isSMRDOffset8() const7266 bool AMDGPUOperand::isSMRDOffset8() const {
7267 return isImm() && isUInt<8>(getImm());
7268 }
7269
isSMEMOffset() const7270 bool AMDGPUOperand::isSMEMOffset() const {
7271 return isImm(); // Offset range is checked later by validator.
7272 }
7273
isSMRDLiteralOffset() const7274 bool AMDGPUOperand::isSMRDLiteralOffset() const {
7275 // 32-bit literals are only supported on CI and we only want to use them
7276 // when the offset is > 8-bits.
7277 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
7278 }
7279
defaultSMRDOffset8() const7280 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
7281 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7282 }
7283
defaultSMEMOffset() const7284 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
7285 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7286 }
7287
defaultSMRDLiteralOffset() const7288 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
7289 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7290 }
7291
defaultFlatOffset() const7292 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
7293 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
7294 }
7295
7296 //===----------------------------------------------------------------------===//
7297 // vop3
7298 //===----------------------------------------------------------------------===//
7299
ConvertOmodMul(int64_t & Mul)7300 static bool ConvertOmodMul(int64_t &Mul) {
7301 if (Mul != 1 && Mul != 2 && Mul != 4)
7302 return false;
7303
7304 Mul >>= 1;
7305 return true;
7306 }
7307
ConvertOmodDiv(int64_t & Div)7308 static bool ConvertOmodDiv(int64_t &Div) {
7309 if (Div == 1) {
7310 Div = 0;
7311 return true;
7312 }
7313
7314 if (Div == 2) {
7315 Div = 3;
7316 return true;
7317 }
7318
7319 return false;
7320 }
7321
7322 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
7323 // This is intentional and ensures compatibility with sp3.
7324 // See bug 35397 for details.
ConvertBoundCtrl(int64_t & BoundCtrl)7325 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
7326 if (BoundCtrl == 0 || BoundCtrl == 1) {
7327 BoundCtrl = 1;
7328 return true;
7329 }
7330 return false;
7331 }
7332
7333 // Note: the order in this table matches the order of operands in AsmString.
7334 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
7335 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr},
7336 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr},
7337 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr},
7338 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
7339 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
7340 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr},
7341 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr},
7342 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},
7343 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
7344 {"", AMDGPUOperand::ImmTyCPol, false, nullptr},
7345 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr},
7346 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},
7347 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
7348 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr},
7349 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr},
7350 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
7351 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr},
7352 {"da", AMDGPUOperand::ImmTyDA, true, nullptr},
7353 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr},
7354 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr},
7355 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr},
7356 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
7357 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr},
7358 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr},
7359 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
7360 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
7361 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
7362 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr},
7363 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
7364 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
7365 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
7366 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
7367 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
7368 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
7369 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
7370 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
7371 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
7372 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
7373 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
7374 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
7375 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
7376 };
7377
onBeginOfFile()7378 void AMDGPUAsmParser::onBeginOfFile() {
7379 if (!getParser().getStreamer().getTargetStreamer() ||
7380 getSTI().getTargetTriple().getArch() == Triple::r600)
7381 return;
7382
7383 if (!getTargetStreamer().getTargetID())
7384 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
7385
7386 if (isHsaAbiVersion3Or4(&getSTI()))
7387 getTargetStreamer().EmitDirectiveAMDGCNTarget();
7388 }
7389
parseOptionalOperand(OperandVector & Operands)7390 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
7391
7392 OperandMatchResultTy res = parseOptionalOpr(Operands);
7393
7394 // This is a hack to enable hardcoded mandatory operands which follow
7395 // optional operands.
7396 //
7397 // Current design assumes that all operands after the first optional operand
7398 // are also optional. However implementation of some instructions violates
7399 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
7400 //
7401 // To alleviate this problem, we have to (implicitly) parse extra operands
7402 // to make sure autogenerated parser of custom operands never hit hardcoded
7403 // mandatory operands.
7404
7405 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
7406 if (res != MatchOperand_Success ||
7407 isToken(AsmToken::EndOfStatement))
7408 break;
7409
7410 trySkipToken(AsmToken::Comma);
7411 res = parseOptionalOpr(Operands);
7412 }
7413
7414 return res;
7415 }
7416
parseOptionalOpr(OperandVector & Operands)7417 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
7418 OperandMatchResultTy res;
7419 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
7420 // try to parse any optional operand here
7421 if (Op.IsBit) {
7422 res = parseNamedBit(Op.Name, Operands, Op.Type);
7423 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
7424 res = parseOModOperand(Operands);
7425 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
7426 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
7427 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
7428 res = parseSDWASel(Operands, Op.Name, Op.Type);
7429 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
7430 res = parseSDWADstUnused(Operands);
7431 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
7432 Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
7433 Op.Type == AMDGPUOperand::ImmTyNegLo ||
7434 Op.Type == AMDGPUOperand::ImmTyNegHi) {
7435 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
7436 Op.ConvertResult);
7437 } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
7438 res = parseDim(Operands);
7439 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
7440 res = parseCPol(Operands);
7441 } else {
7442 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
7443 }
7444 if (res != MatchOperand_NoMatch) {
7445 return res;
7446 }
7447 }
7448 return MatchOperand_NoMatch;
7449 }
7450
parseOModOperand(OperandVector & Operands)7451 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
7452 StringRef Name = getTokenStr();
7453 if (Name == "mul") {
7454 return parseIntWithPrefix("mul", Operands,
7455 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
7456 }
7457
7458 if (Name == "div") {
7459 return parseIntWithPrefix("div", Operands,
7460 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
7461 }
7462
7463 return MatchOperand_NoMatch;
7464 }
7465
cvtVOP3OpSel(MCInst & Inst,const OperandVector & Operands)7466 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
7467 cvtVOP3P(Inst, Operands);
7468
7469 int Opc = Inst.getOpcode();
7470
7471 int SrcNum;
7472 const int Ops[] = { AMDGPU::OpName::src0,
7473 AMDGPU::OpName::src1,
7474 AMDGPU::OpName::src2 };
7475 for (SrcNum = 0;
7476 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
7477 ++SrcNum);
7478 assert(SrcNum > 0);
7479
7480 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7481 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7482
7483 if ((OpSel & (1 << SrcNum)) != 0) {
7484 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
7485 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
7486 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
7487 }
7488 }
7489
isRegOrImmWithInputMods(const MCInstrDesc & Desc,unsigned OpNum)7490 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
7491 // 1. This operand is input modifiers
7492 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
7493 // 2. This is not last operand
7494 && Desc.NumOperands > (OpNum + 1)
7495 // 3. Next operand is register class
7496 && Desc.OpInfo[OpNum + 1].RegClass != -1
7497 // 4. Next register is not tied to any other operand
7498 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
7499 }
7500
cvtVOP3Interp(MCInst & Inst,const OperandVector & Operands)7501 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
7502 {
7503 OptionalImmIndexMap OptionalIdx;
7504 unsigned Opc = Inst.getOpcode();
7505
7506 unsigned I = 1;
7507 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7508 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7509 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7510 }
7511
7512 for (unsigned E = Operands.size(); I != E; ++I) {
7513 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7514 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7515 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7516 } else if (Op.isInterpSlot() ||
7517 Op.isInterpAttr() ||
7518 Op.isAttrChan()) {
7519 Inst.addOperand(MCOperand::createImm(Op.getImm()));
7520 } else if (Op.isImmModifier()) {
7521 OptionalIdx[Op.getImmTy()] = I;
7522 } else {
7523 llvm_unreachable("unhandled operand type");
7524 }
7525 }
7526
7527 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
7528 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
7529 }
7530
7531 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7532 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7533 }
7534
7535 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7536 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7537 }
7538 }
7539
cvtVOP3(MCInst & Inst,const OperandVector & Operands,OptionalImmIndexMap & OptionalIdx)7540 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
7541 OptionalImmIndexMap &OptionalIdx) {
7542 unsigned Opc = Inst.getOpcode();
7543
7544 unsigned I = 1;
7545 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7546 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7547 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7548 }
7549
7550 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
7551 // This instruction has src modifiers
7552 for (unsigned E = Operands.size(); I != E; ++I) {
7553 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7554 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7555 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
7556 } else if (Op.isImmModifier()) {
7557 OptionalIdx[Op.getImmTy()] = I;
7558 } else if (Op.isRegOrImm()) {
7559 Op.addRegOrImmOperands(Inst, 1);
7560 } else {
7561 llvm_unreachable("unhandled operand type");
7562 }
7563 }
7564 } else {
7565 // No src modifiers
7566 for (unsigned E = Operands.size(); I != E; ++I) {
7567 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7568 if (Op.isMod()) {
7569 OptionalIdx[Op.getImmTy()] = I;
7570 } else {
7571 Op.addRegOrImmOperands(Inst, 1);
7572 }
7573 }
7574 }
7575
7576 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
7577 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
7578 }
7579
7580 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7581 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7582 }
7583
7584 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7585 // it has src2 register operand that is tied to dst operand
7586 // we don't allow modifiers for this operand in assembler so src2_modifiers
7587 // should be 0.
7588 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7589 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7590 Opc == AMDGPU::V_MAC_F32_e64_vi ||
7591 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7592 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7593 Opc == AMDGPU::V_MAC_F16_e64_vi ||
7594 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
7595 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7596 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7597 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7598 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7599 auto it = Inst.begin();
7600 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7601 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7602 ++it;
7603 // Copy the operand to ensure it's not invalidated when Inst grows.
7604 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7605 }
7606 }
7607
cvtVOP3(MCInst & Inst,const OperandVector & Operands)7608 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7609 OptionalImmIndexMap OptionalIdx;
7610 cvtVOP3(Inst, Operands, OptionalIdx);
7611 }
7612
cvtVOP3P(MCInst & Inst,const OperandVector & Operands,OptionalImmIndexMap & OptIdx)7613 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
7614 OptionalImmIndexMap &OptIdx) {
7615 const int Opc = Inst.getOpcode();
7616 const MCInstrDesc &Desc = MII.get(Opc);
7617
7618 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7619
7620 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7621 assert(!IsPacked);
7622 Inst.addOperand(Inst.getOperand(0));
7623 }
7624
7625 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7626 // instruction, and then figure out where to actually put the modifiers
7627
7628 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7629 if (OpSelIdx != -1) {
7630 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7631 }
7632
7633 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7634 if (OpSelHiIdx != -1) {
7635 int DefaultVal = IsPacked ? -1 : 0;
7636 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7637 DefaultVal);
7638 }
7639
7640 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7641 if (NegLoIdx != -1) {
7642 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7643 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7644 }
7645
7646 const int Ops[] = { AMDGPU::OpName::src0,
7647 AMDGPU::OpName::src1,
7648 AMDGPU::OpName::src2 };
7649 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7650 AMDGPU::OpName::src1_modifiers,
7651 AMDGPU::OpName::src2_modifiers };
7652
7653 unsigned OpSel = 0;
7654 unsigned OpSelHi = 0;
7655 unsigned NegLo = 0;
7656 unsigned NegHi = 0;
7657
7658 if (OpSelIdx != -1)
7659 OpSel = Inst.getOperand(OpSelIdx).getImm();
7660
7661 if (OpSelHiIdx != -1)
7662 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7663
7664 if (NegLoIdx != -1) {
7665 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7666 NegLo = Inst.getOperand(NegLoIdx).getImm();
7667 NegHi = Inst.getOperand(NegHiIdx).getImm();
7668 }
7669
7670 for (int J = 0; J < 3; ++J) {
7671 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7672 if (OpIdx == -1)
7673 break;
7674
7675 uint32_t ModVal = 0;
7676
7677 if ((OpSel & (1 << J)) != 0)
7678 ModVal |= SISrcMods::OP_SEL_0;
7679
7680 if ((OpSelHi & (1 << J)) != 0)
7681 ModVal |= SISrcMods::OP_SEL_1;
7682
7683 if ((NegLo & (1 << J)) != 0)
7684 ModVal |= SISrcMods::NEG;
7685
7686 if ((NegHi & (1 << J)) != 0)
7687 ModVal |= SISrcMods::NEG_HI;
7688
7689 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7690
7691 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7692 }
7693 }
7694
cvtVOP3P(MCInst & Inst,const OperandVector & Operands)7695 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
7696 OptionalImmIndexMap OptIdx;
7697 cvtVOP3(Inst, Operands, OptIdx);
7698 cvtVOP3P(Inst, Operands, OptIdx);
7699 }
7700
7701 //===----------------------------------------------------------------------===//
7702 // dpp
7703 //===----------------------------------------------------------------------===//
7704
isDPP8() const7705 bool AMDGPUOperand::isDPP8() const {
7706 return isImmTy(ImmTyDPP8);
7707 }
7708
isDPPCtrl() const7709 bool AMDGPUOperand::isDPPCtrl() const {
7710 using namespace AMDGPU::DPP;
7711
7712 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7713 if (result) {
7714 int64_t Imm = getImm();
7715 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7716 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7717 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7718 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7719 (Imm == DppCtrl::WAVE_SHL1) ||
7720 (Imm == DppCtrl::WAVE_ROL1) ||
7721 (Imm == DppCtrl::WAVE_SHR1) ||
7722 (Imm == DppCtrl::WAVE_ROR1) ||
7723 (Imm == DppCtrl::ROW_MIRROR) ||
7724 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7725 (Imm == DppCtrl::BCAST15) ||
7726 (Imm == DppCtrl::BCAST31) ||
7727 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7728 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7729 }
7730 return false;
7731 }
7732
7733 //===----------------------------------------------------------------------===//
7734 // mAI
7735 //===----------------------------------------------------------------------===//
7736
isBLGP() const7737 bool AMDGPUOperand::isBLGP() const {
7738 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7739 }
7740
isCBSZ() const7741 bool AMDGPUOperand::isCBSZ() const {
7742 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7743 }
7744
isABID() const7745 bool AMDGPUOperand::isABID() const {
7746 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7747 }
7748
isS16Imm() const7749 bool AMDGPUOperand::isS16Imm() const {
7750 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7751 }
7752
isU16Imm() const7753 bool AMDGPUOperand::isU16Imm() const {
7754 return isImm() && isUInt<16>(getImm());
7755 }
7756
7757 //===----------------------------------------------------------------------===//
7758 // dim
7759 //===----------------------------------------------------------------------===//
7760
parseDimId(unsigned & Encoding)7761 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
7762 // We want to allow "dim:1D" etc.,
7763 // but the initial 1 is tokenized as an integer.
7764 std::string Token;
7765 if (isToken(AsmToken::Integer)) {
7766 SMLoc Loc = getToken().getEndLoc();
7767 Token = std::string(getTokenStr());
7768 lex();
7769 if (getLoc() != Loc)
7770 return false;
7771 }
7772
7773 StringRef Suffix;
7774 if (!parseId(Suffix))
7775 return false;
7776 Token += Suffix;
7777
7778 StringRef DimId = Token;
7779 if (DimId.startswith("SQ_RSRC_IMG_"))
7780 DimId = DimId.drop_front(12);
7781
7782 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7783 if (!DimInfo)
7784 return false;
7785
7786 Encoding = DimInfo->Encoding;
7787 return true;
7788 }
7789
parseDim(OperandVector & Operands)7790 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7791 if (!isGFX10Plus())
7792 return MatchOperand_NoMatch;
7793
7794 SMLoc S = getLoc();
7795
7796 if (!trySkipId("dim", AsmToken::Colon))
7797 return MatchOperand_NoMatch;
7798
7799 unsigned Encoding;
7800 SMLoc Loc = getLoc();
7801 if (!parseDimId(Encoding)) {
7802 Error(Loc, "invalid dim value");
7803 return MatchOperand_ParseFail;
7804 }
7805
7806 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
7807 AMDGPUOperand::ImmTyDim));
7808 return MatchOperand_Success;
7809 }
7810
7811 //===----------------------------------------------------------------------===//
7812 // dpp
7813 //===----------------------------------------------------------------------===//
7814
parseDPP8(OperandVector & Operands)7815 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7816 SMLoc S = getLoc();
7817
7818 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7819 return MatchOperand_NoMatch;
7820
7821 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7822
7823 int64_t Sels[8];
7824
7825 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7826 return MatchOperand_ParseFail;
7827
7828 for (size_t i = 0; i < 8; ++i) {
7829 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7830 return MatchOperand_ParseFail;
7831
7832 SMLoc Loc = getLoc();
7833 if (getParser().parseAbsoluteExpression(Sels[i]))
7834 return MatchOperand_ParseFail;
7835 if (0 > Sels[i] || 7 < Sels[i]) {
7836 Error(Loc, "expected a 3-bit value");
7837 return MatchOperand_ParseFail;
7838 }
7839 }
7840
7841 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7842 return MatchOperand_ParseFail;
7843
7844 unsigned DPP8 = 0;
7845 for (size_t i = 0; i < 8; ++i)
7846 DPP8 |= (Sels[i] << (i * 3));
7847
7848 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7849 return MatchOperand_Success;
7850 }
7851
7852 bool
isSupportedDPPCtrl(StringRef Ctrl,const OperandVector & Operands)7853 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7854 const OperandVector &Operands) {
7855 if (Ctrl == "row_newbcast")
7856 return isGFX90A();
7857
7858 if (Ctrl == "row_share" ||
7859 Ctrl == "row_xmask")
7860 return isGFX10Plus();
7861
7862 if (Ctrl == "wave_shl" ||
7863 Ctrl == "wave_shr" ||
7864 Ctrl == "wave_rol" ||
7865 Ctrl == "wave_ror" ||
7866 Ctrl == "row_bcast")
7867 return isVI() || isGFX9();
7868
7869 return Ctrl == "row_mirror" ||
7870 Ctrl == "row_half_mirror" ||
7871 Ctrl == "quad_perm" ||
7872 Ctrl == "row_shl" ||
7873 Ctrl == "row_shr" ||
7874 Ctrl == "row_ror";
7875 }
7876
7877 int64_t
parseDPPCtrlPerm()7878 AMDGPUAsmParser::parseDPPCtrlPerm() {
7879 // quad_perm:[%d,%d,%d,%d]
7880
7881 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7882 return -1;
7883
7884 int64_t Val = 0;
7885 for (int i = 0; i < 4; ++i) {
7886 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7887 return -1;
7888
7889 int64_t Temp;
7890 SMLoc Loc = getLoc();
7891 if (getParser().parseAbsoluteExpression(Temp))
7892 return -1;
7893 if (Temp < 0 || Temp > 3) {
7894 Error(Loc, "expected a 2-bit value");
7895 return -1;
7896 }
7897
7898 Val += (Temp << i * 2);
7899 }
7900
7901 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7902 return -1;
7903
7904 return Val;
7905 }
7906
7907 int64_t
parseDPPCtrlSel(StringRef Ctrl)7908 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
7909 using namespace AMDGPU::DPP;
7910
7911 // sel:%d
7912
7913 int64_t Val;
7914 SMLoc Loc = getLoc();
7915
7916 if (getParser().parseAbsoluteExpression(Val))
7917 return -1;
7918
7919 struct DppCtrlCheck {
7920 int64_t Ctrl;
7921 int Lo;
7922 int Hi;
7923 };
7924
7925 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
7926 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
7927 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
7928 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
7929 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
7930 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
7931 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
7932 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
7933 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
7934 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
7935 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
7936 .Default({-1, 0, 0});
7937
7938 bool Valid;
7939 if (Check.Ctrl == -1) {
7940 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
7941 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
7942 } else {
7943 Valid = Check.Lo <= Val && Val <= Check.Hi;
7944 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
7945 }
7946
7947 if (!Valid) {
7948 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
7949 return -1;
7950 }
7951
7952 return Val;
7953 }
7954
7955 OperandMatchResultTy
parseDPPCtrl(OperandVector & Operands)7956 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7957 using namespace AMDGPU::DPP;
7958
7959 if (!isToken(AsmToken::Identifier) ||
7960 !isSupportedDPPCtrl(getTokenStr(), Operands))
7961 return MatchOperand_NoMatch;
7962
7963 SMLoc S = getLoc();
7964 int64_t Val = -1;
7965 StringRef Ctrl;
7966
7967 parseId(Ctrl);
7968
7969 if (Ctrl == "row_mirror") {
7970 Val = DppCtrl::ROW_MIRROR;
7971 } else if (Ctrl == "row_half_mirror") {
7972 Val = DppCtrl::ROW_HALF_MIRROR;
7973 } else {
7974 if (skipToken(AsmToken::Colon, "expected a colon")) {
7975 if (Ctrl == "quad_perm") {
7976 Val = parseDPPCtrlPerm();
7977 } else {
7978 Val = parseDPPCtrlSel(Ctrl);
7979 }
7980 }
7981 }
7982
7983 if (Val == -1)
7984 return MatchOperand_ParseFail;
7985
7986 Operands.push_back(
7987 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
7988 return MatchOperand_Success;
7989 }
7990
defaultRowMask() const7991 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7992 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7993 }
7994
defaultEndpgmImmOperands() const7995 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7996 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7997 }
7998
defaultBankMask() const7999 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
8000 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
8001 }
8002
defaultBoundCtrl() const8003 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
8004 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
8005 }
8006
defaultFI() const8007 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
8008 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
8009 }
8010
cvtDPP(MCInst & Inst,const OperandVector & Operands,bool IsDPP8)8011 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8012 OptionalImmIndexMap OptionalIdx;
8013
8014 unsigned Opc = Inst.getOpcode();
8015 bool HasModifiers =
8016 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1;
8017 unsigned I = 1;
8018 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8019 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8020 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8021 }
8022
8023 int Fi = 0;
8024 for (unsigned E = Operands.size(); I != E; ++I) {
8025 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8026 MCOI::TIED_TO);
8027 if (TiedTo != -1) {
8028 assert((unsigned)TiedTo < Inst.getNumOperands());
8029 // handle tied old or src2 for MAC instructions
8030 Inst.addOperand(Inst.getOperand(TiedTo));
8031 }
8032 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8033 // Add the register arguments
8034 if (Op.isReg() && validateVccOperand(Op.getReg())) {
8035 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8036 // Skip it.
8037 continue;
8038 }
8039
8040 if (IsDPP8) {
8041 if (Op.isDPP8()) {
8042 Op.addImmOperands(Inst, 1);
8043 } else if (HasModifiers &&
8044 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8045 Op.addRegWithFPInputModsOperands(Inst, 2);
8046 } else if (Op.isFI()) {
8047 Fi = Op.getImm();
8048 } else if (Op.isReg()) {
8049 Op.addRegOperands(Inst, 1);
8050 } else {
8051 llvm_unreachable("Invalid operand type");
8052 }
8053 } else {
8054 if (HasModifiers &&
8055 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8056 Op.addRegWithFPInputModsOperands(Inst, 2);
8057 } else if (Op.isReg()) {
8058 Op.addRegOperands(Inst, 1);
8059 } else if (Op.isDPPCtrl()) {
8060 Op.addImmOperands(Inst, 1);
8061 } else if (Op.isImm()) {
8062 // Handle optional arguments
8063 OptionalIdx[Op.getImmTy()] = I;
8064 } else {
8065 llvm_unreachable("Invalid operand type");
8066 }
8067 }
8068 }
8069
8070 if (IsDPP8) {
8071 using namespace llvm::AMDGPU::DPP;
8072 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8073 } else {
8074 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8075 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8076 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8077 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
8078 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
8079 }
8080 }
8081 }
8082
8083 //===----------------------------------------------------------------------===//
8084 // sdwa
8085 //===----------------------------------------------------------------------===//
8086
8087 OperandMatchResultTy
parseSDWASel(OperandVector & Operands,StringRef Prefix,AMDGPUOperand::ImmTy Type)8088 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
8089 AMDGPUOperand::ImmTy Type) {
8090 using namespace llvm::AMDGPU::SDWA;
8091
8092 SMLoc S = getLoc();
8093 StringRef Value;
8094 OperandMatchResultTy res;
8095
8096 SMLoc StringLoc;
8097 res = parseStringWithPrefix(Prefix, Value, StringLoc);
8098 if (res != MatchOperand_Success) {
8099 return res;
8100 }
8101
8102 int64_t Int;
8103 Int = StringSwitch<int64_t>(Value)
8104 .Case("BYTE_0", SdwaSel::BYTE_0)
8105 .Case("BYTE_1", SdwaSel::BYTE_1)
8106 .Case("BYTE_2", SdwaSel::BYTE_2)
8107 .Case("BYTE_3", SdwaSel::BYTE_3)
8108 .Case("WORD_0", SdwaSel::WORD_0)
8109 .Case("WORD_1", SdwaSel::WORD_1)
8110 .Case("DWORD", SdwaSel::DWORD)
8111 .Default(0xffffffff);
8112
8113 if (Int == 0xffffffff) {
8114 Error(StringLoc, "invalid " + Twine(Prefix) + " value");
8115 return MatchOperand_ParseFail;
8116 }
8117
8118 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
8119 return MatchOperand_Success;
8120 }
8121
8122 OperandMatchResultTy
parseSDWADstUnused(OperandVector & Operands)8123 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
8124 using namespace llvm::AMDGPU::SDWA;
8125
8126 SMLoc S = getLoc();
8127 StringRef Value;
8128 OperandMatchResultTy res;
8129
8130 SMLoc StringLoc;
8131 res = parseStringWithPrefix("dst_unused", Value, StringLoc);
8132 if (res != MatchOperand_Success) {
8133 return res;
8134 }
8135
8136 int64_t Int;
8137 Int = StringSwitch<int64_t>(Value)
8138 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
8139 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
8140 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
8141 .Default(0xffffffff);
8142
8143 if (Int == 0xffffffff) {
8144 Error(StringLoc, "invalid dst_unused value");
8145 return MatchOperand_ParseFail;
8146 }
8147
8148 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
8149 return MatchOperand_Success;
8150 }
8151
cvtSdwaVOP1(MCInst & Inst,const OperandVector & Operands)8152 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
8153 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
8154 }
8155
cvtSdwaVOP2(MCInst & Inst,const OperandVector & Operands)8156 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
8157 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
8158 }
8159
cvtSdwaVOP2b(MCInst & Inst,const OperandVector & Operands)8160 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
8161 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
8162 }
8163
cvtSdwaVOP2e(MCInst & Inst,const OperandVector & Operands)8164 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
8165 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
8166 }
8167
cvtSdwaVOPC(MCInst & Inst,const OperandVector & Operands)8168 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
8169 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
8170 }
8171
cvtSDWA(MCInst & Inst,const OperandVector & Operands,uint64_t BasicInstType,bool SkipDstVcc,bool SkipSrcVcc)8172 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
8173 uint64_t BasicInstType,
8174 bool SkipDstVcc,
8175 bool SkipSrcVcc) {
8176 using namespace llvm::AMDGPU::SDWA;
8177
8178 OptionalImmIndexMap OptionalIdx;
8179 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
8180 bool SkippedVcc = false;
8181
8182 unsigned I = 1;
8183 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8184 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8185 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8186 }
8187
8188 for (unsigned E = Operands.size(); I != E; ++I) {
8189 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8190 if (SkipVcc && !SkippedVcc && Op.isReg() &&
8191 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
8192 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
8193 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
8194 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
8195 // Skip VCC only if we didn't skip it on previous iteration.
8196 // Note that src0 and src1 occupy 2 slots each because of modifiers.
8197 if (BasicInstType == SIInstrFlags::VOP2 &&
8198 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
8199 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
8200 SkippedVcc = true;
8201 continue;
8202 } else if (BasicInstType == SIInstrFlags::VOPC &&
8203 Inst.getNumOperands() == 0) {
8204 SkippedVcc = true;
8205 continue;
8206 }
8207 }
8208 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8209 Op.addRegOrImmWithInputModsOperands(Inst, 2);
8210 } else if (Op.isImm()) {
8211 // Handle optional arguments
8212 OptionalIdx[Op.getImmTy()] = I;
8213 } else {
8214 llvm_unreachable("Invalid operand type");
8215 }
8216 SkippedVcc = false;
8217 }
8218
8219 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
8220 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
8221 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
8222 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
8223 switch (BasicInstType) {
8224 case SIInstrFlags::VOP1:
8225 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8226 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8227 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8228 }
8229 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8230 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8231 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8232 break;
8233
8234 case SIInstrFlags::VOP2:
8235 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8236 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
8237 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
8238 }
8239 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
8240 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
8241 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8242 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8243 break;
8244
8245 case SIInstrFlags::VOPC:
8246 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
8247 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
8248 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
8249 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
8250 break;
8251
8252 default:
8253 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
8254 }
8255 }
8256
8257 // special case v_mac_{f16, f32}:
8258 // it has src2 register operand that is tied to dst operand
8259 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
8260 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
8261 auto it = Inst.begin();
8262 std::advance(
8263 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
8264 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
8265 }
8266 }
8267
8268 //===----------------------------------------------------------------------===//
8269 // mAI
8270 //===----------------------------------------------------------------------===//
8271
defaultBLGP() const8272 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
8273 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
8274 }
8275
defaultCBSZ() const8276 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
8277 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
8278 }
8279
defaultABID() const8280 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
8281 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
8282 }
8283
8284 /// Force static initialization.
LLVMInitializeAMDGPUAsmParser()8285 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
8286 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
8287 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
8288 }
8289
8290 #define GET_REGISTER_MATCHER
8291 #define GET_MATCHER_IMPLEMENTATION
8292 #define GET_MNEMONIC_SPELL_CHECKER
8293 #define GET_MNEMONIC_CHECKER
8294 #include "AMDGPUGenAsmMatcher.inc"
8295
8296 // This fuction should be defined after auto-generated include so that we have
8297 // MatchClassKind enum defined
validateTargetOperandClass(MCParsedAsmOperand & Op,unsigned Kind)8298 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
8299 unsigned Kind) {
8300 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
8301 // But MatchInstructionImpl() expects to meet token and fails to validate
8302 // operand. This method checks if we are given immediate operand but expect to
8303 // get corresponding token.
8304 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
8305 switch (Kind) {
8306 case MCK_addr64:
8307 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
8308 case MCK_gds:
8309 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
8310 case MCK_lds:
8311 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
8312 case MCK_idxen:
8313 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
8314 case MCK_offen:
8315 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
8316 case MCK_SSrcB32:
8317 // When operands have expression values, they will return true for isToken,
8318 // because it is not possible to distinguish between a token and an
8319 // expression at parse time. MatchInstructionImpl() will always try to
8320 // match an operand as a token, when isToken returns true, and when the
8321 // name of the expression is not a valid token, the match will fail,
8322 // so we need to handle it here.
8323 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
8324 case MCK_SSrcF32:
8325 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
8326 case MCK_SoppBrTarget:
8327 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
8328 case MCK_VReg32OrOff:
8329 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
8330 case MCK_InterpSlot:
8331 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
8332 case MCK_Attr:
8333 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
8334 case MCK_AttrChan:
8335 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
8336 case MCK_ImmSMEMOffset:
8337 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
8338 case MCK_SReg_64:
8339 case MCK_SReg_64_XEXEC:
8340 // Null is defined as a 32-bit register but
8341 // it should also be enabled with 64-bit operands.
8342 // The following code enables it for SReg_64 operands
8343 // used as source and destination. Remaining source
8344 // operands are handled in isInlinableImm.
8345 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
8346 default:
8347 return Match_InvalidOperand;
8348 }
8349 }
8350
8351 //===----------------------------------------------------------------------===//
8352 // endpgm
8353 //===----------------------------------------------------------------------===//
8354
parseEndpgmOp(OperandVector & Operands)8355 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
8356 SMLoc S = getLoc();
8357 int64_t Imm = 0;
8358
8359 if (!parseExpr(Imm)) {
8360 // The operand is optional, if not present default to 0
8361 Imm = 0;
8362 }
8363
8364 if (!isUInt<16>(Imm)) {
8365 Error(S, "expected a 16-bit value");
8366 return MatchOperand_ParseFail;
8367 }
8368
8369 Operands.push_back(
8370 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
8371 return MatchOperand_Success;
8372 }
8373
isEndpgm() const8374 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
8375