1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "SIRegisterInfo.h"
15 #include "TargetInfo/AMDGPUTargetInfo.h"
16 #include "Utils/AMDGPUAsmUtils.h"
17 #include "Utils/AMDGPUBaseInfo.h"
18 #include "Utils/AMDKernelCodeTUtils.h"
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/SmallBitVector.h"
21 #include "llvm/ADT/StringSet.h"
22 #include "llvm/ADT/Twine.h"
23 #include "llvm/BinaryFormat/ELF.h"
24 #include "llvm/CodeGen/MachineValueType.h"
25 #include "llvm/MC/MCAsmInfo.h"
26 #include "llvm/MC/MCContext.h"
27 #include "llvm/MC/MCExpr.h"
28 #include "llvm/MC/MCInst.h"
29 #include "llvm/MC/MCInstrDesc.h"
30 #include "llvm/MC/MCParser/MCAsmLexer.h"
31 #include "llvm/MC/MCParser/MCAsmParser.h"
32 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
33 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
34 #include "llvm/MC/MCSymbol.h"
35 #include "llvm/MC/TargetRegistry.h"
36 #include "llvm/Support/AMDGPUMetadata.h"
37 #include "llvm/Support/AMDHSAKernelDescriptor.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/MathExtras.h"
40 #include "llvm/TargetParser/TargetParser.h"
41 #include <optional>
42
43 using namespace llvm;
44 using namespace llvm::AMDGPU;
45 using namespace llvm::amdhsa;
46
47 namespace {
48
49 class AMDGPUAsmParser;
50
51 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
52
53 //===----------------------------------------------------------------------===//
54 // Operand
55 //===----------------------------------------------------------------------===//
56
57 class AMDGPUOperand : public MCParsedAsmOperand {
58 enum KindTy {
59 Token,
60 Immediate,
61 Register,
62 Expression
63 } Kind;
64
65 SMLoc StartLoc, EndLoc;
66 const AMDGPUAsmParser *AsmParser;
67
68 public:
AMDGPUOperand(KindTy Kind_,const AMDGPUAsmParser * AsmParser_)69 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
70 : Kind(Kind_), AsmParser(AsmParser_) {}
71
72 using Ptr = std::unique_ptr<AMDGPUOperand>;
73
74 struct Modifiers {
75 bool Abs = false;
76 bool Neg = false;
77 bool Sext = false;
78 bool Lit = false;
79
hasFPModifiers__anon2d2848640111::AMDGPUOperand::Modifiers80 bool hasFPModifiers() const { return Abs || Neg; }
hasIntModifiers__anon2d2848640111::AMDGPUOperand::Modifiers81 bool hasIntModifiers() const { return Sext; }
hasModifiers__anon2d2848640111::AMDGPUOperand::Modifiers82 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
83
getFPModifiersOperand__anon2d2848640111::AMDGPUOperand::Modifiers84 int64_t getFPModifiersOperand() const {
85 int64_t Operand = 0;
86 Operand |= Abs ? SISrcMods::ABS : 0u;
87 Operand |= Neg ? SISrcMods::NEG : 0u;
88 return Operand;
89 }
90
getIntModifiersOperand__anon2d2848640111::AMDGPUOperand::Modifiers91 int64_t getIntModifiersOperand() const {
92 int64_t Operand = 0;
93 Operand |= Sext ? SISrcMods::SEXT : 0u;
94 return Operand;
95 }
96
getModifiersOperand__anon2d2848640111::AMDGPUOperand::Modifiers97 int64_t getModifiersOperand() const {
98 assert(!(hasFPModifiers() && hasIntModifiers())
99 && "fp and int modifiers should not be used simultaneously");
100 if (hasFPModifiers()) {
101 return getFPModifiersOperand();
102 } else if (hasIntModifiers()) {
103 return getIntModifiersOperand();
104 } else {
105 return 0;
106 }
107 }
108
109 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
110 };
111
112 enum ImmTy {
113 ImmTyNone,
114 ImmTyGDS,
115 ImmTyLDS,
116 ImmTyOffen,
117 ImmTyIdxen,
118 ImmTyAddr64,
119 ImmTyOffset,
120 ImmTyInstOffset,
121 ImmTyOffset0,
122 ImmTyOffset1,
123 ImmTySMEMOffsetMod,
124 ImmTyCPol,
125 ImmTyTFE,
126 ImmTyD16,
127 ImmTyClampSI,
128 ImmTyOModSI,
129 ImmTySDWADstSel,
130 ImmTySDWASrc0Sel,
131 ImmTySDWASrc1Sel,
132 ImmTySDWADstUnused,
133 ImmTyDMask,
134 ImmTyDim,
135 ImmTyUNorm,
136 ImmTyDA,
137 ImmTyR128A16,
138 ImmTyA16,
139 ImmTyLWE,
140 ImmTyExpTgt,
141 ImmTyExpCompr,
142 ImmTyExpVM,
143 ImmTyFORMAT,
144 ImmTyHwreg,
145 ImmTyOff,
146 ImmTySendMsg,
147 ImmTyInterpSlot,
148 ImmTyInterpAttr,
149 ImmTyInterpAttrChan,
150 ImmTyOpSel,
151 ImmTyOpSelHi,
152 ImmTyNegLo,
153 ImmTyNegHi,
154 ImmTyIndexKey8bit,
155 ImmTyIndexKey16bit,
156 ImmTyDPP8,
157 ImmTyDppCtrl,
158 ImmTyDppRowMask,
159 ImmTyDppBankMask,
160 ImmTyDppBoundCtrl,
161 ImmTyDppFI,
162 ImmTySwizzle,
163 ImmTyGprIdxMode,
164 ImmTyHigh,
165 ImmTyBLGP,
166 ImmTyCBSZ,
167 ImmTyABID,
168 ImmTyEndpgm,
169 ImmTyWaitVDST,
170 ImmTyWaitEXP,
171 ImmTyWaitVAVDst,
172 ImmTyWaitVMVSrc,
173 };
174
175 // Immediate operand kind.
176 // It helps to identify the location of an offending operand after an error.
177 // Note that regular literals and mandatory literals (KImm) must be handled
178 // differently. When looking for an offending operand, we should usually
179 // ignore mandatory literals because they are part of the instruction and
180 // cannot be changed. Report location of mandatory operands only for VOPD,
181 // when both OpX and OpY have a KImm and there are no other literals.
182 enum ImmKindTy {
183 ImmKindTyNone,
184 ImmKindTyLiteral,
185 ImmKindTyMandatoryLiteral,
186 ImmKindTyConst,
187 };
188
189 private:
190 struct TokOp {
191 const char *Data;
192 unsigned Length;
193 };
194
195 struct ImmOp {
196 int64_t Val;
197 ImmTy Type;
198 bool IsFPImm;
199 mutable ImmKindTy Kind;
200 Modifiers Mods;
201 };
202
203 struct RegOp {
204 unsigned RegNo;
205 Modifiers Mods;
206 };
207
208 union {
209 TokOp Tok;
210 ImmOp Imm;
211 RegOp Reg;
212 const MCExpr *Expr;
213 };
214
215 public:
isToken() const216 bool isToken() const override { return Kind == Token; }
217
isSymbolRefExpr() const218 bool isSymbolRefExpr() const {
219 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
220 }
221
isImm() const222 bool isImm() const override {
223 return Kind == Immediate;
224 }
225
setImmKindNone() const226 void setImmKindNone() const {
227 assert(isImm());
228 Imm.Kind = ImmKindTyNone;
229 }
230
setImmKindLiteral() const231 void setImmKindLiteral() const {
232 assert(isImm());
233 Imm.Kind = ImmKindTyLiteral;
234 }
235
setImmKindMandatoryLiteral() const236 void setImmKindMandatoryLiteral() const {
237 assert(isImm());
238 Imm.Kind = ImmKindTyMandatoryLiteral;
239 }
240
setImmKindConst() const241 void setImmKindConst() const {
242 assert(isImm());
243 Imm.Kind = ImmKindTyConst;
244 }
245
IsImmKindLiteral() const246 bool IsImmKindLiteral() const {
247 return isImm() && Imm.Kind == ImmKindTyLiteral;
248 }
249
IsImmKindMandatoryLiteral() const250 bool IsImmKindMandatoryLiteral() const {
251 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral;
252 }
253
isImmKindConst() const254 bool isImmKindConst() const {
255 return isImm() && Imm.Kind == ImmKindTyConst;
256 }
257
258 bool isInlinableImm(MVT type) const;
259 bool isLiteralImm(MVT type) const;
260
isRegKind() const261 bool isRegKind() const {
262 return Kind == Register;
263 }
264
isReg() const265 bool isReg() const override {
266 return isRegKind() && !hasModifiers();
267 }
268
isRegOrInline(unsigned RCID,MVT type) const269 bool isRegOrInline(unsigned RCID, MVT type) const {
270 return isRegClass(RCID) || isInlinableImm(type);
271 }
272
isRegOrImmWithInputMods(unsigned RCID,MVT type) const273 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
274 return isRegOrInline(RCID, type) || isLiteralImm(type);
275 }
276
isRegOrImmWithInt16InputMods() const277 bool isRegOrImmWithInt16InputMods() const {
278 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
279 }
280
isRegOrImmWithIntT16InputMods() const281 bool isRegOrImmWithIntT16InputMods() const {
282 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16);
283 }
284
isRegOrImmWithInt32InputMods() const285 bool isRegOrImmWithInt32InputMods() const {
286 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
287 }
288
isRegOrInlineImmWithInt16InputMods() const289 bool isRegOrInlineImmWithInt16InputMods() const {
290 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
291 }
292
isRegOrInlineImmWithInt32InputMods() const293 bool isRegOrInlineImmWithInt32InputMods() const {
294 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
295 }
296
isRegOrImmWithInt64InputMods() const297 bool isRegOrImmWithInt64InputMods() const {
298 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
299 }
300
isRegOrImmWithFP16InputMods() const301 bool isRegOrImmWithFP16InputMods() const {
302 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
303 }
304
isRegOrImmWithFPT16InputMods() const305 bool isRegOrImmWithFPT16InputMods() const {
306 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16);
307 }
308
isRegOrImmWithFP32InputMods() const309 bool isRegOrImmWithFP32InputMods() const {
310 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
311 }
312
isRegOrImmWithFP64InputMods() const313 bool isRegOrImmWithFP64InputMods() const {
314 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
315 }
316
isRegOrInlineImmWithFP16InputMods() const317 bool isRegOrInlineImmWithFP16InputMods() const {
318 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16);
319 }
320
isRegOrInlineImmWithFP32InputMods() const321 bool isRegOrInlineImmWithFP32InputMods() const {
322 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
323 }
324
325
isVReg() const326 bool isVReg() const {
327 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
328 isRegClass(AMDGPU::VReg_64RegClassID) ||
329 isRegClass(AMDGPU::VReg_96RegClassID) ||
330 isRegClass(AMDGPU::VReg_128RegClassID) ||
331 isRegClass(AMDGPU::VReg_160RegClassID) ||
332 isRegClass(AMDGPU::VReg_192RegClassID) ||
333 isRegClass(AMDGPU::VReg_256RegClassID) ||
334 isRegClass(AMDGPU::VReg_512RegClassID) ||
335 isRegClass(AMDGPU::VReg_1024RegClassID);
336 }
337
isVReg32() const338 bool isVReg32() const {
339 return isRegClass(AMDGPU::VGPR_32RegClassID);
340 }
341
isVReg32OrOff() const342 bool isVReg32OrOff() const {
343 return isOff() || isVReg32();
344 }
345
isNull() const346 bool isNull() const {
347 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
348 }
349
350 bool isVRegWithInputMods() const;
351 template <bool IsFake16> bool isT16VRegWithInputMods() const;
352
353 bool isSDWAOperand(MVT type) const;
354 bool isSDWAFP16Operand() const;
355 bool isSDWAFP32Operand() const;
356 bool isSDWAInt16Operand() const;
357 bool isSDWAInt32Operand() const;
358
isImmTy(ImmTy ImmT) const359 bool isImmTy(ImmTy ImmT) const {
360 return isImm() && Imm.Type == ImmT;
361 }
362
isImmTy() const363 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
364
isImmLiteral() const365 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
366
isImmModifier() const367 bool isImmModifier() const {
368 return isImm() && Imm.Type != ImmTyNone;
369 }
370
isOModSI() const371 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
isDMask() const372 bool isDMask() const { return isImmTy(ImmTyDMask); }
isDim() const373 bool isDim() const { return isImmTy(ImmTyDim); }
isR128A16() const374 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
isOff() const375 bool isOff() const { return isImmTy(ImmTyOff); }
isExpTgt() const376 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
isOffen() const377 bool isOffen() const { return isImmTy(ImmTyOffen); }
isIdxen() const378 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
isAddr64() const379 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
isOffset() const380 bool isOffset() const { return isImmTy(ImmTyOffset); }
isOffset0() const381 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
isOffset1() const382 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
isSMEMOffsetMod() const383 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
isFlatOffset() const384 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
isGDS() const385 bool isGDS() const { return isImmTy(ImmTyGDS); }
isLDS() const386 bool isLDS() const { return isImmTy(ImmTyLDS); }
isCPol() const387 bool isCPol() const { return isImmTy(ImmTyCPol); }
isIndexKey8bit() const388 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
isIndexKey16bit() const389 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
isTFE() const390 bool isTFE() const { return isImmTy(ImmTyTFE); }
isFORMAT() const391 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
isDppBankMask() const392 bool isDppBankMask() const { return isImmTy(ImmTyDppBankMask); }
isDppRowMask() const393 bool isDppRowMask() const { return isImmTy(ImmTyDppRowMask); }
isDppBoundCtrl() const394 bool isDppBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
isDppFI() const395 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
isSDWADstSel() const396 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
isSDWASrc0Sel() const397 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
isSDWASrc1Sel() const398 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
isSDWADstUnused() const399 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
isInterpSlot() const400 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
isInterpAttr() const401 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
isInterpAttrChan() const402 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
isOpSel() const403 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
isOpSelHi() const404 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
isNegLo() const405 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
isNegHi() const406 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
407
isRegOrImm() const408 bool isRegOrImm() const {
409 return isReg() || isImm();
410 }
411
412 bool isRegClass(unsigned RCID) const;
413
414 bool isInlineValue() const;
415
isRegOrInlineNoMods(unsigned RCID,MVT type) const416 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
417 return isRegOrInline(RCID, type) && !hasModifiers();
418 }
419
isSCSrcB16() const420 bool isSCSrcB16() const {
421 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
422 }
423
isSCSrcV2B16() const424 bool isSCSrcV2B16() const {
425 return isSCSrcB16();
426 }
427
isSCSrcB32() const428 bool isSCSrcB32() const {
429 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
430 }
431
isSCSrcB64() const432 bool isSCSrcB64() const {
433 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
434 }
435
436 bool isBoolReg() const;
437
isSCSrcF16() const438 bool isSCSrcF16() const {
439 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
440 }
441
isSCSrcV2F16() const442 bool isSCSrcV2F16() const {
443 return isSCSrcF16();
444 }
445
isSCSrcF32() const446 bool isSCSrcF32() const {
447 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
448 }
449
isSCSrcF64() const450 bool isSCSrcF64() const {
451 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
452 }
453
isSSrcB32() const454 bool isSSrcB32() const {
455 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
456 }
457
isSSrcB16() const458 bool isSSrcB16() const {
459 return isSCSrcB16() || isLiteralImm(MVT::i16);
460 }
461
isSSrcV2B16() const462 bool isSSrcV2B16() const {
463 llvm_unreachable("cannot happen");
464 return isSSrcB16();
465 }
466
isSSrcB64() const467 bool isSSrcB64() const {
468 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
469 // See isVSrc64().
470 return isSCSrcB64() || isLiteralImm(MVT::i64);
471 }
472
isSSrcF32() const473 bool isSSrcF32() const {
474 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
475 }
476
isSSrcF64() const477 bool isSSrcF64() const {
478 return isSCSrcB64() || isLiteralImm(MVT::f64);
479 }
480
isSSrcF16() const481 bool isSSrcF16() const {
482 return isSCSrcB16() || isLiteralImm(MVT::f16);
483 }
484
isSSrcV2F16() const485 bool isSSrcV2F16() const {
486 llvm_unreachable("cannot happen");
487 return isSSrcF16();
488 }
489
isSSrcV2FP32() const490 bool isSSrcV2FP32() const {
491 llvm_unreachable("cannot happen");
492 return isSSrcF32();
493 }
494
isSCSrcV2FP32() const495 bool isSCSrcV2FP32() const {
496 llvm_unreachable("cannot happen");
497 return isSCSrcF32();
498 }
499
isSSrcV2INT32() const500 bool isSSrcV2INT32() const {
501 llvm_unreachable("cannot happen");
502 return isSSrcB32();
503 }
504
isSCSrcV2INT32() const505 bool isSCSrcV2INT32() const {
506 llvm_unreachable("cannot happen");
507 return isSCSrcB32();
508 }
509
isSSrcOrLdsB32() const510 bool isSSrcOrLdsB32() const {
511 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
512 isLiteralImm(MVT::i32) || isExpr();
513 }
514
isVCSrcB32() const515 bool isVCSrcB32() const {
516 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
517 }
518
isVCSrcB64() const519 bool isVCSrcB64() const {
520 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
521 }
522
isVCSrcTB16() const523 bool isVCSrcTB16() const {
524 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
525 }
526
isVCSrcTB16_Lo128() const527 bool isVCSrcTB16_Lo128() const {
528 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
529 }
530
isVCSrcFake16B16_Lo128() const531 bool isVCSrcFake16B16_Lo128() const {
532 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
533 }
534
isVCSrcB16() const535 bool isVCSrcB16() const {
536 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
537 }
538
isVCSrcV2B16() const539 bool isVCSrcV2B16() const {
540 return isVCSrcB16();
541 }
542
isVCSrcF32() const543 bool isVCSrcF32() const {
544 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
545 }
546
isVCSrcF64() const547 bool isVCSrcF64() const {
548 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
549 }
550
isVCSrcTF16() const551 bool isVCSrcTF16() const {
552 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
553 }
554
isVCSrcTF16_Lo128() const555 bool isVCSrcTF16_Lo128() const {
556 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
557 }
558
isVCSrcFake16F16_Lo128() const559 bool isVCSrcFake16F16_Lo128() const {
560 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
561 }
562
isVCSrcF16() const563 bool isVCSrcF16() const {
564 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
565 }
566
isVCSrcV2F16() const567 bool isVCSrcV2F16() const {
568 return isVCSrcF16();
569 }
570
isVSrcB32() const571 bool isVSrcB32() const {
572 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
573 }
574
isVSrcB64() const575 bool isVSrcB64() const {
576 return isVCSrcF64() || isLiteralImm(MVT::i64);
577 }
578
isVSrcTB16() const579 bool isVSrcTB16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); }
580
isVSrcTB16_Lo128() const581 bool isVSrcTB16_Lo128() const {
582 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
583 }
584
isVSrcFake16B16_Lo128() const585 bool isVSrcFake16B16_Lo128() const {
586 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
587 }
588
isVSrcB16() const589 bool isVSrcB16() const {
590 return isVCSrcB16() || isLiteralImm(MVT::i16);
591 }
592
isVSrcV2B16() const593 bool isVSrcV2B16() const {
594 return isVSrcB16() || isLiteralImm(MVT::v2i16);
595 }
596
isVCSrcV2FP32() const597 bool isVCSrcV2FP32() const {
598 return isVCSrcF64();
599 }
600
isVSrcV2FP32() const601 bool isVSrcV2FP32() const {
602 return isVSrcF64() || isLiteralImm(MVT::v2f32);
603 }
604
isVCSrcV2INT32() const605 bool isVCSrcV2INT32() const {
606 return isVCSrcB64();
607 }
608
isVSrcV2INT32() const609 bool isVSrcV2INT32() const {
610 return isVSrcB64() || isLiteralImm(MVT::v2i32);
611 }
612
isVSrcF32() const613 bool isVSrcF32() const {
614 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
615 }
616
isVSrcF64() const617 bool isVSrcF64() const {
618 return isVCSrcF64() || isLiteralImm(MVT::f64);
619 }
620
isVSrcTF16() const621 bool isVSrcTF16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); }
622
isVSrcTF16_Lo128() const623 bool isVSrcTF16_Lo128() const {
624 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
625 }
626
isVSrcFake16F16_Lo128() const627 bool isVSrcFake16F16_Lo128() const {
628 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
629 }
630
isVSrcF16() const631 bool isVSrcF16() const {
632 return isVCSrcF16() || isLiteralImm(MVT::f16);
633 }
634
isVSrcV2F16() const635 bool isVSrcV2F16() const {
636 return isVSrcF16() || isLiteralImm(MVT::v2f16);
637 }
638
isVISrcB32() const639 bool isVISrcB32() const {
640 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
641 }
642
isVISrcB16() const643 bool isVISrcB16() const {
644 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
645 }
646
isVISrcV2B16() const647 bool isVISrcV2B16() const {
648 return isVISrcB16();
649 }
650
isVISrcF32() const651 bool isVISrcF32() const {
652 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
653 }
654
isVISrcF16() const655 bool isVISrcF16() const {
656 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
657 }
658
isVISrcV2F16() const659 bool isVISrcV2F16() const {
660 return isVISrcF16() || isVISrcB32();
661 }
662
isVISrc_64F16() const663 bool isVISrc_64F16() const {
664 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
665 }
666
isVISrc_64B32() const667 bool isVISrc_64B32() const {
668 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
669 }
670
isVISrc_64B64() const671 bool isVISrc_64B64() const {
672 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
673 }
674
isVISrc_64F64() const675 bool isVISrc_64F64() const {
676 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
677 }
678
isVISrc_64V2FP32() const679 bool isVISrc_64V2FP32() const {
680 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
681 }
682
isVISrc_64V2INT32() const683 bool isVISrc_64V2INT32() const {
684 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
685 }
686
isVISrc_256B32() const687 bool isVISrc_256B32() const {
688 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
689 }
690
isVISrc_256F32() const691 bool isVISrc_256F32() const {
692 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
693 }
694
isVISrc_256B64() const695 bool isVISrc_256B64() const {
696 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
697 }
698
isVISrc_256F64() const699 bool isVISrc_256F64() const {
700 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
701 }
702
isVISrc_128B16() const703 bool isVISrc_128B16() const {
704 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
705 }
706
isVISrc_128V2B16() const707 bool isVISrc_128V2B16() const {
708 return isVISrc_128B16();
709 }
710
isVISrc_128B32() const711 bool isVISrc_128B32() const {
712 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
713 }
714
isVISrc_128F32() const715 bool isVISrc_128F32() const {
716 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
717 }
718
isVISrc_256V2FP32() const719 bool isVISrc_256V2FP32() const {
720 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
721 }
722
isVISrc_256V2INT32() const723 bool isVISrc_256V2INT32() const {
724 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
725 }
726
isVISrc_512B32() const727 bool isVISrc_512B32() const {
728 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
729 }
730
isVISrc_512B16() const731 bool isVISrc_512B16() const {
732 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
733 }
734
isVISrc_512V2B16() const735 bool isVISrc_512V2B16() const {
736 return isVISrc_512B16();
737 }
738
isVISrc_512F32() const739 bool isVISrc_512F32() const {
740 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
741 }
742
isVISrc_512F16() const743 bool isVISrc_512F16() const {
744 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
745 }
746
isVISrc_512V2F16() const747 bool isVISrc_512V2F16() const {
748 return isVISrc_512F16() || isVISrc_512B32();
749 }
750
isVISrc_1024B32() const751 bool isVISrc_1024B32() const {
752 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
753 }
754
isVISrc_1024B16() const755 bool isVISrc_1024B16() const {
756 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
757 }
758
isVISrc_1024V2B16() const759 bool isVISrc_1024V2B16() const {
760 return isVISrc_1024B16();
761 }
762
isVISrc_1024F32() const763 bool isVISrc_1024F32() const {
764 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
765 }
766
isVISrc_1024F16() const767 bool isVISrc_1024F16() const {
768 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
769 }
770
isVISrc_1024V2F16() const771 bool isVISrc_1024V2F16() const {
772 return isVISrc_1024F16() || isVISrc_1024B32();
773 }
774
isAISrcB32() const775 bool isAISrcB32() const {
776 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
777 }
778
isAISrcB16() const779 bool isAISrcB16() const {
780 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
781 }
782
isAISrcV2B16() const783 bool isAISrcV2B16() const {
784 return isAISrcB16();
785 }
786
isAISrcF32() const787 bool isAISrcF32() const {
788 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
789 }
790
isAISrcF16() const791 bool isAISrcF16() const {
792 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
793 }
794
isAISrcV2F16() const795 bool isAISrcV2F16() const {
796 return isAISrcF16() || isAISrcB32();
797 }
798
isAISrc_64B64() const799 bool isAISrc_64B64() const {
800 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
801 }
802
isAISrc_64F64() const803 bool isAISrc_64F64() const {
804 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
805 }
806
isAISrc_128B32() const807 bool isAISrc_128B32() const {
808 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
809 }
810
isAISrc_128B16() const811 bool isAISrc_128B16() const {
812 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
813 }
814
isAISrc_128V2B16() const815 bool isAISrc_128V2B16() const {
816 return isAISrc_128B16();
817 }
818
isAISrc_128F32() const819 bool isAISrc_128F32() const {
820 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
821 }
822
isAISrc_128F16() const823 bool isAISrc_128F16() const {
824 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
825 }
826
isAISrc_128V2F16() const827 bool isAISrc_128V2F16() const {
828 return isAISrc_128F16() || isAISrc_128B32();
829 }
830
isVISrc_128F16() const831 bool isVISrc_128F16() const {
832 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
833 }
834
isVISrc_128V2F16() const835 bool isVISrc_128V2F16() const {
836 return isVISrc_128F16() || isVISrc_128B32();
837 }
838
isAISrc_256B64() const839 bool isAISrc_256B64() const {
840 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
841 }
842
isAISrc_256F64() const843 bool isAISrc_256F64() const {
844 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
845 }
846
isAISrc_512B32() const847 bool isAISrc_512B32() const {
848 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
849 }
850
isAISrc_512B16() const851 bool isAISrc_512B16() const {
852 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
853 }
854
isAISrc_512V2B16() const855 bool isAISrc_512V2B16() const {
856 return isAISrc_512B16();
857 }
858
isAISrc_512F32() const859 bool isAISrc_512F32() const {
860 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
861 }
862
isAISrc_512F16() const863 bool isAISrc_512F16() const {
864 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
865 }
866
isAISrc_512V2F16() const867 bool isAISrc_512V2F16() const {
868 return isAISrc_512F16() || isAISrc_512B32();
869 }
870
isAISrc_1024B32() const871 bool isAISrc_1024B32() const {
872 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
873 }
874
isAISrc_1024B16() const875 bool isAISrc_1024B16() const {
876 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
877 }
878
isAISrc_1024V2B16() const879 bool isAISrc_1024V2B16() const {
880 return isAISrc_1024B16();
881 }
882
isAISrc_1024F32() const883 bool isAISrc_1024F32() const {
884 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
885 }
886
isAISrc_1024F16() const887 bool isAISrc_1024F16() const {
888 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
889 }
890
isAISrc_1024V2F16() const891 bool isAISrc_1024V2F16() const {
892 return isAISrc_1024F16() || isAISrc_1024B32();
893 }
894
isKImmFP32() const895 bool isKImmFP32() const {
896 return isLiteralImm(MVT::f32);
897 }
898
isKImmFP16() const899 bool isKImmFP16() const {
900 return isLiteralImm(MVT::f16);
901 }
902
isMem() const903 bool isMem() const override {
904 return false;
905 }
906
isExpr() const907 bool isExpr() const {
908 return Kind == Expression;
909 }
910
isSOPPBrTarget() const911 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
912
913 bool isSWaitCnt() const;
914 bool isDepCtr() const;
915 bool isSDelayALU() const;
916 bool isHwreg() const;
917 bool isSendMsg() const;
918 bool isSplitBarrier() const;
919 bool isSwizzle() const;
920 bool isSMRDOffset8() const;
921 bool isSMEMOffset() const;
922 bool isSMRDLiteralOffset() const;
923 bool isDPP8() const;
924 bool isDPPCtrl() const;
925 bool isBLGP() const;
926 bool isCBSZ() const;
927 bool isABID() const;
928 bool isGPRIdxMode() const;
929 bool isS16Imm() const;
930 bool isU16Imm() const;
931 bool isEndpgm() const;
932 bool isWaitVDST() const;
933 bool isWaitEXP() const;
934 bool isWaitVAVDst() const;
935 bool isWaitVMVSrc() const;
936
getPredicate(std::function<bool (const AMDGPUOperand & Op)> P) const937 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
938 return std::bind(P, *this);
939 }
940
getToken() const941 StringRef getToken() const {
942 assert(isToken());
943 return StringRef(Tok.Data, Tok.Length);
944 }
945
getImm() const946 int64_t getImm() const {
947 assert(isImm());
948 return Imm.Val;
949 }
950
setImm(int64_t Val)951 void setImm(int64_t Val) {
952 assert(isImm());
953 Imm.Val = Val;
954 }
955
getImmTy() const956 ImmTy getImmTy() const {
957 assert(isImm());
958 return Imm.Type;
959 }
960
getReg() const961 unsigned getReg() const override {
962 assert(isRegKind());
963 return Reg.RegNo;
964 }
965
getStartLoc() const966 SMLoc getStartLoc() const override {
967 return StartLoc;
968 }
969
getEndLoc() const970 SMLoc getEndLoc() const override {
971 return EndLoc;
972 }
973
getLocRange() const974 SMRange getLocRange() const {
975 return SMRange(StartLoc, EndLoc);
976 }
977
getModifiers() const978 Modifiers getModifiers() const {
979 assert(isRegKind() || isImmTy(ImmTyNone));
980 return isRegKind() ? Reg.Mods : Imm.Mods;
981 }
982
setModifiers(Modifiers Mods)983 void setModifiers(Modifiers Mods) {
984 assert(isRegKind() || isImmTy(ImmTyNone));
985 if (isRegKind())
986 Reg.Mods = Mods;
987 else
988 Imm.Mods = Mods;
989 }
990
hasModifiers() const991 bool hasModifiers() const {
992 return getModifiers().hasModifiers();
993 }
994
hasFPModifiers() const995 bool hasFPModifiers() const {
996 return getModifiers().hasFPModifiers();
997 }
998
hasIntModifiers() const999 bool hasIntModifiers() const {
1000 return getModifiers().hasIntModifiers();
1001 }
1002
1003 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1004
1005 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1006
1007 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1008
1009 void addRegOperands(MCInst &Inst, unsigned N) const;
1010
addRegOrImmOperands(MCInst & Inst,unsigned N) const1011 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1012 if (isRegKind())
1013 addRegOperands(Inst, N);
1014 else
1015 addImmOperands(Inst, N);
1016 }
1017
addRegOrImmWithInputModsOperands(MCInst & Inst,unsigned N) const1018 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1019 Modifiers Mods = getModifiers();
1020 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1021 if (isRegKind()) {
1022 addRegOperands(Inst, N);
1023 } else {
1024 addImmOperands(Inst, N, false);
1025 }
1026 }
1027
addRegOrImmWithFPInputModsOperands(MCInst & Inst,unsigned N) const1028 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1029 assert(!hasIntModifiers());
1030 addRegOrImmWithInputModsOperands(Inst, N);
1031 }
1032
addRegOrImmWithIntInputModsOperands(MCInst & Inst,unsigned N) const1033 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1034 assert(!hasFPModifiers());
1035 addRegOrImmWithInputModsOperands(Inst, N);
1036 }
1037
addRegWithInputModsOperands(MCInst & Inst,unsigned N) const1038 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1039 Modifiers Mods = getModifiers();
1040 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1041 assert(isRegKind());
1042 addRegOperands(Inst, N);
1043 }
1044
addRegWithFPInputModsOperands(MCInst & Inst,unsigned N) const1045 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1046 assert(!hasIntModifiers());
1047 addRegWithInputModsOperands(Inst, N);
1048 }
1049
addRegWithIntInputModsOperands(MCInst & Inst,unsigned N) const1050 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1051 assert(!hasFPModifiers());
1052 addRegWithInputModsOperands(Inst, N);
1053 }
1054
printImmTy(raw_ostream & OS,ImmTy Type)1055 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1056 // clang-format off
1057 switch (Type) {
1058 case ImmTyNone: OS << "None"; break;
1059 case ImmTyGDS: OS << "GDS"; break;
1060 case ImmTyLDS: OS << "LDS"; break;
1061 case ImmTyOffen: OS << "Offen"; break;
1062 case ImmTyIdxen: OS << "Idxen"; break;
1063 case ImmTyAddr64: OS << "Addr64"; break;
1064 case ImmTyOffset: OS << "Offset"; break;
1065 case ImmTyInstOffset: OS << "InstOffset"; break;
1066 case ImmTyOffset0: OS << "Offset0"; break;
1067 case ImmTyOffset1: OS << "Offset1"; break;
1068 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1069 case ImmTyCPol: OS << "CPol"; break;
1070 case ImmTyIndexKey8bit: OS << "index_key"; break;
1071 case ImmTyIndexKey16bit: OS << "index_key"; break;
1072 case ImmTyTFE: OS << "TFE"; break;
1073 case ImmTyD16: OS << "D16"; break;
1074 case ImmTyFORMAT: OS << "FORMAT"; break;
1075 case ImmTyClampSI: OS << "ClampSI"; break;
1076 case ImmTyOModSI: OS << "OModSI"; break;
1077 case ImmTyDPP8: OS << "DPP8"; break;
1078 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1079 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1080 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1081 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1082 case ImmTyDppFI: OS << "DppFI"; break;
1083 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1084 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1085 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1086 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1087 case ImmTyDMask: OS << "DMask"; break;
1088 case ImmTyDim: OS << "Dim"; break;
1089 case ImmTyUNorm: OS << "UNorm"; break;
1090 case ImmTyDA: OS << "DA"; break;
1091 case ImmTyR128A16: OS << "R128A16"; break;
1092 case ImmTyA16: OS << "A16"; break;
1093 case ImmTyLWE: OS << "LWE"; break;
1094 case ImmTyOff: OS << "Off"; break;
1095 case ImmTyExpTgt: OS << "ExpTgt"; break;
1096 case ImmTyExpCompr: OS << "ExpCompr"; break;
1097 case ImmTyExpVM: OS << "ExpVM"; break;
1098 case ImmTyHwreg: OS << "Hwreg"; break;
1099 case ImmTySendMsg: OS << "SendMsg"; break;
1100 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1101 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1102 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1103 case ImmTyOpSel: OS << "OpSel"; break;
1104 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1105 case ImmTyNegLo: OS << "NegLo"; break;
1106 case ImmTyNegHi: OS << "NegHi"; break;
1107 case ImmTySwizzle: OS << "Swizzle"; break;
1108 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1109 case ImmTyHigh: OS << "High"; break;
1110 case ImmTyBLGP: OS << "BLGP"; break;
1111 case ImmTyCBSZ: OS << "CBSZ"; break;
1112 case ImmTyABID: OS << "ABID"; break;
1113 case ImmTyEndpgm: OS << "Endpgm"; break;
1114 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1115 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1116 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1117 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1118 }
1119 // clang-format on
1120 }
1121
print(raw_ostream & OS) const1122 void print(raw_ostream &OS) const override {
1123 switch (Kind) {
1124 case Register:
1125 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
1126 break;
1127 case Immediate:
1128 OS << '<' << getImm();
1129 if (getImmTy() != ImmTyNone) {
1130 OS << " type: "; printImmTy(OS, getImmTy());
1131 }
1132 OS << " mods: " << Imm.Mods << '>';
1133 break;
1134 case Token:
1135 OS << '\'' << getToken() << '\'';
1136 break;
1137 case Expression:
1138 OS << "<expr " << *Expr << '>';
1139 break;
1140 }
1141 }
1142
CreateImm(const AMDGPUAsmParser * AsmParser,int64_t Val,SMLoc Loc,ImmTy Type=ImmTyNone,bool IsFPImm=false)1143 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1144 int64_t Val, SMLoc Loc,
1145 ImmTy Type = ImmTyNone,
1146 bool IsFPImm = false) {
1147 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1148 Op->Imm.Val = Val;
1149 Op->Imm.IsFPImm = IsFPImm;
1150 Op->Imm.Kind = ImmKindTyNone;
1151 Op->Imm.Type = Type;
1152 Op->Imm.Mods = Modifiers();
1153 Op->StartLoc = Loc;
1154 Op->EndLoc = Loc;
1155 return Op;
1156 }
1157
CreateToken(const AMDGPUAsmParser * AsmParser,StringRef Str,SMLoc Loc,bool HasExplicitEncodingSize=true)1158 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1159 StringRef Str, SMLoc Loc,
1160 bool HasExplicitEncodingSize = true) {
1161 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1162 Res->Tok.Data = Str.data();
1163 Res->Tok.Length = Str.size();
1164 Res->StartLoc = Loc;
1165 Res->EndLoc = Loc;
1166 return Res;
1167 }
1168
CreateReg(const AMDGPUAsmParser * AsmParser,unsigned RegNo,SMLoc S,SMLoc E)1169 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1170 unsigned RegNo, SMLoc S,
1171 SMLoc E) {
1172 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1173 Op->Reg.RegNo = RegNo;
1174 Op->Reg.Mods = Modifiers();
1175 Op->StartLoc = S;
1176 Op->EndLoc = E;
1177 return Op;
1178 }
1179
CreateExpr(const AMDGPUAsmParser * AsmParser,const class MCExpr * Expr,SMLoc S)1180 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1181 const class MCExpr *Expr, SMLoc S) {
1182 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1183 Op->Expr = Expr;
1184 Op->StartLoc = S;
1185 Op->EndLoc = S;
1186 return Op;
1187 }
1188 };
1189
operator <<(raw_ostream & OS,AMDGPUOperand::Modifiers Mods)1190 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1191 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1192 return OS;
1193 }
1194
1195 //===----------------------------------------------------------------------===//
1196 // AsmParser
1197 //===----------------------------------------------------------------------===//
1198
1199 // Holds info related to the current kernel, e.g. count of SGPRs used.
1200 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1201 // .amdgpu_hsa_kernel or at EOF.
1202 class KernelScopeInfo {
1203 int SgprIndexUnusedMin = -1;
1204 int VgprIndexUnusedMin = -1;
1205 int AgprIndexUnusedMin = -1;
1206 MCContext *Ctx = nullptr;
1207 MCSubtargetInfo const *MSTI = nullptr;
1208
usesSgprAt(int i)1209 void usesSgprAt(int i) {
1210 if (i >= SgprIndexUnusedMin) {
1211 SgprIndexUnusedMin = ++i;
1212 if (Ctx) {
1213 MCSymbol* const Sym =
1214 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1215 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1216 }
1217 }
1218 }
1219
usesVgprAt(int i)1220 void usesVgprAt(int i) {
1221 if (i >= VgprIndexUnusedMin) {
1222 VgprIndexUnusedMin = ++i;
1223 if (Ctx) {
1224 MCSymbol* const Sym =
1225 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1226 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1227 VgprIndexUnusedMin);
1228 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1229 }
1230 }
1231 }
1232
usesAgprAt(int i)1233 void usesAgprAt(int i) {
1234 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction
1235 if (!hasMAIInsts(*MSTI))
1236 return;
1237
1238 if (i >= AgprIndexUnusedMin) {
1239 AgprIndexUnusedMin = ++i;
1240 if (Ctx) {
1241 MCSymbol* const Sym =
1242 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1243 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1244
1245 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1246 MCSymbol* const vSym =
1247 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1248 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1249 VgprIndexUnusedMin);
1250 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1251 }
1252 }
1253 }
1254
1255 public:
1256 KernelScopeInfo() = default;
1257
initialize(MCContext & Context)1258 void initialize(MCContext &Context) {
1259 Ctx = &Context;
1260 MSTI = Ctx->getSubtargetInfo();
1261
1262 usesSgprAt(SgprIndexUnusedMin = -1);
1263 usesVgprAt(VgprIndexUnusedMin = -1);
1264 if (hasMAIInsts(*MSTI)) {
1265 usesAgprAt(AgprIndexUnusedMin = -1);
1266 }
1267 }
1268
usesRegister(RegisterKind RegKind,unsigned DwordRegIndex,unsigned RegWidth)1269 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1270 unsigned RegWidth) {
1271 switch (RegKind) {
1272 case IS_SGPR:
1273 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1274 break;
1275 case IS_AGPR:
1276 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1277 break;
1278 case IS_VGPR:
1279 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1280 break;
1281 default:
1282 break;
1283 }
1284 }
1285 };
1286
1287 class AMDGPUAsmParser : public MCTargetAsmParser {
1288 MCAsmParser &Parser;
1289
1290 unsigned ForcedEncodingSize = 0;
1291 bool ForcedDPP = false;
1292 bool ForcedSDWA = false;
1293 KernelScopeInfo KernelScope;
1294
1295 /// @name Auto-generated Match Functions
1296 /// {
1297
1298 #define GET_ASSEMBLER_HEADER
1299 #include "AMDGPUGenAsmMatcher.inc"
1300
1301 /// }
1302
1303 private:
1304 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1305 bool OutOfRangeError(SMRange Range);
1306 /// Calculate VGPR/SGPR blocks required for given target, reserved
1307 /// registers, and user-specified NextFreeXGPR values.
1308 ///
1309 /// \param Features [in] Target features, used for bug corrections.
1310 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1311 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1312 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1313 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1314 /// descriptor field, if valid.
1315 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1316 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1317 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1318 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1319 /// \param VGPRBlocks [out] Result VGPR block count.
1320 /// \param SGPRBlocks [out] Result SGPR block count.
1321 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1322 bool FlatScrUsed, bool XNACKUsed,
1323 std::optional<bool> EnableWavefrontSize32,
1324 unsigned NextFreeVGPR, SMRange VGPRRange,
1325 unsigned NextFreeSGPR, SMRange SGPRRange,
1326 unsigned &VGPRBlocks, unsigned &SGPRBlocks);
1327 bool ParseDirectiveAMDGCNTarget();
1328 bool ParseDirectiveAMDHSACodeObjectVersion();
1329 bool ParseDirectiveAMDHSAKernel();
1330 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1331 bool ParseDirectiveAMDKernelCodeT();
1332 // TODO: Possibly make subtargetHasRegister const.
1333 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
1334 bool ParseDirectiveAMDGPUHsaKernel();
1335
1336 bool ParseDirectiveISAVersion();
1337 bool ParseDirectiveHSAMetadata();
1338 bool ParseDirectivePALMetadataBegin();
1339 bool ParseDirectivePALMetadata();
1340 bool ParseDirectiveAMDGPULDS();
1341
1342 /// Common code to parse out a block of text (typically YAML) between start and
1343 /// end directives.
1344 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1345 const char *AssemblerDirectiveEnd,
1346 std::string &CollectString);
1347
1348 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1349 RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1350 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1351 unsigned &RegNum, unsigned &RegWidth,
1352 bool RestoreOnFailure = false);
1353 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1354 unsigned &RegNum, unsigned &RegWidth,
1355 SmallVectorImpl<AsmToken> &Tokens);
1356 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1357 unsigned &RegWidth,
1358 SmallVectorImpl<AsmToken> &Tokens);
1359 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1360 unsigned &RegWidth,
1361 SmallVectorImpl<AsmToken> &Tokens);
1362 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1363 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1364 bool ParseRegRange(unsigned& Num, unsigned& Width);
1365 unsigned getRegularReg(RegisterKind RegKind, unsigned RegNum, unsigned SubReg,
1366 unsigned RegWidth, SMLoc Loc);
1367
1368 bool isRegister();
1369 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1370 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1371 void initializeGprCountSymbol(RegisterKind RegKind);
1372 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1373 unsigned RegWidth);
1374 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1375 bool IsAtomic);
1376
1377 public:
1378 enum AMDGPUMatchResultTy {
1379 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1380 };
1381 enum OperandMode {
1382 OperandMode_Default,
1383 OperandMode_NSA,
1384 };
1385
1386 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1387
AMDGPUAsmParser(const MCSubtargetInfo & STI,MCAsmParser & _Parser,const MCInstrInfo & MII,const MCTargetOptions & Options)1388 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1389 const MCInstrInfo &MII,
1390 const MCTargetOptions &Options)
1391 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1392 MCAsmParserExtension::Initialize(Parser);
1393
1394 if (getFeatureBits().none()) {
1395 // Set default features.
1396 copySTI().ToggleFeature("southern-islands");
1397 }
1398
1399 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1400
1401 {
1402 // TODO: make those pre-defined variables read-only.
1403 // Currently there is none suitable machinery in the core llvm-mc for this.
1404 // MCSymbol::isRedefinable is intended for another purpose, and
1405 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1406 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1407 MCContext &Ctx = getContext();
1408 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1409 MCSymbol *Sym =
1410 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1411 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1412 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1413 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1414 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1415 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1416 } else {
1417 MCSymbol *Sym =
1418 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1419 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1420 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1421 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1422 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1423 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1424 }
1425 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1426 initializeGprCountSymbol(IS_VGPR);
1427 initializeGprCountSymbol(IS_SGPR);
1428 } else
1429 KernelScope.initialize(getContext());
1430 }
1431 }
1432
hasMIMG_R128() const1433 bool hasMIMG_R128() const {
1434 return AMDGPU::hasMIMG_R128(getSTI());
1435 }
1436
hasPackedD16() const1437 bool hasPackedD16() const {
1438 return AMDGPU::hasPackedD16(getSTI());
1439 }
1440
hasA16() const1441 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1442
hasG16() const1443 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1444
hasGDS() const1445 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1446
isSI() const1447 bool isSI() const {
1448 return AMDGPU::isSI(getSTI());
1449 }
1450
isCI() const1451 bool isCI() const {
1452 return AMDGPU::isCI(getSTI());
1453 }
1454
isVI() const1455 bool isVI() const {
1456 return AMDGPU::isVI(getSTI());
1457 }
1458
isGFX9() const1459 bool isGFX9() const {
1460 return AMDGPU::isGFX9(getSTI());
1461 }
1462
1463 // TODO: isGFX90A is also true for GFX940. We need to clean it.
isGFX90A() const1464 bool isGFX90A() const {
1465 return AMDGPU::isGFX90A(getSTI());
1466 }
1467
isGFX940() const1468 bool isGFX940() const {
1469 return AMDGPU::isGFX940(getSTI());
1470 }
1471
isGFX9Plus() const1472 bool isGFX9Plus() const {
1473 return AMDGPU::isGFX9Plus(getSTI());
1474 }
1475
isGFX10() const1476 bool isGFX10() const {
1477 return AMDGPU::isGFX10(getSTI());
1478 }
1479
isGFX10Plus() const1480 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1481
isGFX11() const1482 bool isGFX11() const {
1483 return AMDGPU::isGFX11(getSTI());
1484 }
1485
isGFX11Plus() const1486 bool isGFX11Plus() const {
1487 return AMDGPU::isGFX11Plus(getSTI());
1488 }
1489
isGFX12() const1490 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1491
isGFX12Plus() const1492 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1493
isGFX10_AEncoding() const1494 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1495
isGFX10_BEncoding() const1496 bool isGFX10_BEncoding() const {
1497 return AMDGPU::isGFX10_BEncoding(getSTI());
1498 }
1499
hasInv2PiInlineImm() const1500 bool hasInv2PiInlineImm() const {
1501 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1502 }
1503
hasFlatOffsets() const1504 bool hasFlatOffsets() const {
1505 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1506 }
1507
hasArchitectedFlatScratch() const1508 bool hasArchitectedFlatScratch() const {
1509 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1510 }
1511
hasSGPR102_SGPR103() const1512 bool hasSGPR102_SGPR103() const {
1513 return !isVI() && !isGFX9();
1514 }
1515
hasSGPR104_SGPR105() const1516 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1517
hasIntClamp() const1518 bool hasIntClamp() const {
1519 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1520 }
1521
hasPartialNSAEncoding() const1522 bool hasPartialNSAEncoding() const {
1523 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1524 }
1525
getNSAMaxSize(bool HasSampler=false) const1526 unsigned getNSAMaxSize(bool HasSampler = false) const {
1527 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1528 }
1529
getMaxNumUserSGPRs() const1530 unsigned getMaxNumUserSGPRs() const {
1531 return AMDGPU::getMaxNumUserSGPRs(getSTI());
1532 }
1533
hasKernargPreload() const1534 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1535
getTargetStreamer()1536 AMDGPUTargetStreamer &getTargetStreamer() {
1537 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1538 return static_cast<AMDGPUTargetStreamer &>(TS);
1539 }
1540
getMRI() const1541 const MCRegisterInfo *getMRI() const {
1542 // We need this const_cast because for some reason getContext() is not const
1543 // in MCAsmParser.
1544 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1545 }
1546
getMII() const1547 const MCInstrInfo *getMII() const {
1548 return &MII;
1549 }
1550
getFeatureBits() const1551 const FeatureBitset &getFeatureBits() const {
1552 return getSTI().getFeatureBits();
1553 }
1554
setForcedEncodingSize(unsigned Size)1555 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
setForcedDPP(bool ForceDPP_)1556 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
setForcedSDWA(bool ForceSDWA_)1557 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1558
getForcedEncodingSize() const1559 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
isForcedVOP3() const1560 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
isForcedDPP() const1561 bool isForcedDPP() const { return ForcedDPP; }
isForcedSDWA() const1562 bool isForcedSDWA() const { return ForcedSDWA; }
1563 ArrayRef<unsigned> getMatchedVariants() const;
1564 StringRef getMatchedVariantName() const;
1565
1566 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1567 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1568 bool RestoreOnFailure);
1569 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1570 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1571 SMLoc &EndLoc) override;
1572 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1573 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1574 unsigned Kind) override;
1575 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1576 OperandVector &Operands, MCStreamer &Out,
1577 uint64_t &ErrorInfo,
1578 bool MatchingInlineAsm) override;
1579 bool ParseDirective(AsmToken DirectiveID) override;
1580 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1581 OperandMode Mode = OperandMode_Default);
1582 StringRef parseMnemonicSuffix(StringRef Name);
1583 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1584 SMLoc NameLoc, OperandVector &Operands) override;
1585 //bool ProcessInstruction(MCInst &Inst);
1586
1587 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1588
1589 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1590
1591 ParseStatus
1592 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1593 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1594 std::function<bool(int64_t &)> ConvertResult = nullptr);
1595
1596 ParseStatus parseOperandArrayWithPrefix(
1597 const char *Prefix, OperandVector &Operands,
1598 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1599 bool (*ConvertResult)(int64_t &) = nullptr);
1600
1601 ParseStatus
1602 parseNamedBit(StringRef Name, OperandVector &Operands,
1603 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1604 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1605 ParseStatus parseCPol(OperandVector &Operands);
1606 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1607 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1608 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1609 SMLoc &StringLoc);
1610
1611 bool isModifier();
1612 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1613 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1614 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1615 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1616 bool parseSP3NegModifier();
1617 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1618 bool HasLit = false);
1619 ParseStatus parseReg(OperandVector &Operands);
1620 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1621 bool HasLit = false);
1622 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1623 bool AllowImm = true);
1624 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1625 bool AllowImm = true);
1626 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1627 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1628 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1629 ParseStatus tryParseIndexKey(OperandVector &Operands,
1630 AMDGPUOperand::ImmTy ImmTy);
1631 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1632 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1633
1634 ParseStatus parseDfmtNfmt(int64_t &Format);
1635 ParseStatus parseUfmt(int64_t &Format);
1636 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1637 int64_t &Format);
1638 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1639 int64_t &Format);
1640 ParseStatus parseFORMAT(OperandVector &Operands);
1641 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1642 ParseStatus parseNumericFormat(int64_t &Format);
1643 ParseStatus parseFlatOffset(OperandVector &Operands);
1644 ParseStatus parseR128A16(OperandVector &Operands);
1645 ParseStatus parseBLGP(OperandVector &Operands);
1646 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1647 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1648
1649 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1650
1651 bool parseCnt(int64_t &IntVal);
1652 ParseStatus parseSWaitCnt(OperandVector &Operands);
1653
1654 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1655 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1656 ParseStatus parseDepCtr(OperandVector &Operands);
1657
1658 bool parseDelay(int64_t &Delay);
1659 ParseStatus parseSDelayALU(OperandVector &Operands);
1660
1661 ParseStatus parseHwreg(OperandVector &Operands);
1662
1663 private:
1664 struct OperandInfoTy {
1665 SMLoc Loc;
1666 int64_t Id;
1667 bool IsSymbolic = false;
1668 bool IsDefined = false;
1669
OperandInfoTy__anon2d2848640111::AMDGPUAsmParser::OperandInfoTy1670 OperandInfoTy(int64_t Id_) : Id(Id_) {}
1671 };
1672
1673 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1674 bool validateSendMsg(const OperandInfoTy &Msg,
1675 const OperandInfoTy &Op,
1676 const OperandInfoTy &Stream);
1677
1678 bool parseHwregBody(OperandInfoTy &HwReg,
1679 OperandInfoTy &Offset,
1680 OperandInfoTy &Width);
1681 bool validateHwreg(const OperandInfoTy &HwReg,
1682 const OperandInfoTy &Offset,
1683 const OperandInfoTy &Width);
1684
1685 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1686 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1687 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1688
1689 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1690 const OperandVector &Operands) const;
1691 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1692 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1693 SMLoc getLitLoc(const OperandVector &Operands,
1694 bool SearchMandatoryLiterals = false) const;
1695 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const;
1696 SMLoc getConstLoc(const OperandVector &Operands) const;
1697 SMLoc getInstLoc(const OperandVector &Operands) const;
1698
1699 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1700 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1701 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1702 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1703 bool validateSOPLiteral(const MCInst &Inst) const;
1704 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1705 bool validateVOPDRegBankConstraints(const MCInst &Inst,
1706 const OperandVector &Operands);
1707 bool validateIntClampSupported(const MCInst &Inst);
1708 bool validateMIMGAtomicDMask(const MCInst &Inst);
1709 bool validateMIMGGatherDMask(const MCInst &Inst);
1710 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1711 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
1712 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
1713 bool validateMIMGD16(const MCInst &Inst);
1714 bool validateMIMGMSAA(const MCInst &Inst);
1715 bool validateOpSel(const MCInst &Inst);
1716 bool validateNeg(const MCInst &Inst, int OpName);
1717 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1718 bool validateVccOperand(unsigned Reg) const;
1719 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1720 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1721 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1722 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1723 bool validateAGPRLdSt(const MCInst &Inst) const;
1724 bool validateVGPRAlign(const MCInst &Inst) const;
1725 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1726 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1727 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1728 bool validateDivScale(const MCInst &Inst);
1729 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1730 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1731 const SMLoc &IDLoc);
1732 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1733 const unsigned CPol);
1734 bool validateExeczVcczOperands(const OperandVector &Operands);
1735 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1736 std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
1737 unsigned getConstantBusLimit(unsigned Opcode) const;
1738 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1739 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1740 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1741
1742 bool isSupportedMnemo(StringRef Mnemo,
1743 const FeatureBitset &FBS);
1744 bool isSupportedMnemo(StringRef Mnemo,
1745 const FeatureBitset &FBS,
1746 ArrayRef<unsigned> Variants);
1747 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1748
1749 bool isId(const StringRef Id) const;
1750 bool isId(const AsmToken &Token, const StringRef Id) const;
1751 bool isToken(const AsmToken::TokenKind Kind) const;
1752 StringRef getId() const;
1753 bool trySkipId(const StringRef Id);
1754 bool trySkipId(const StringRef Pref, const StringRef Id);
1755 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1756 bool trySkipToken(const AsmToken::TokenKind Kind);
1757 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1758 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1759 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1760
1761 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1762 AsmToken::TokenKind getTokenKind() const;
1763 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1764 bool parseExpr(OperandVector &Operands);
1765 StringRef getTokenStr() const;
1766 AsmToken peekToken(bool ShouldSkipSpace = true);
1767 AsmToken getToken() const;
1768 SMLoc getLoc() const;
1769 void lex();
1770
1771 public:
1772 void onBeginOfFile() override;
1773
1774 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1775
1776 ParseStatus parseExpTgt(OperandVector &Operands);
1777 ParseStatus parseSendMsg(OperandVector &Operands);
1778 ParseStatus parseInterpSlot(OperandVector &Operands);
1779 ParseStatus parseInterpAttr(OperandVector &Operands);
1780 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1781 ParseStatus parseBoolReg(OperandVector &Operands);
1782
1783 bool parseSwizzleOperand(int64_t &Op,
1784 const unsigned MinVal,
1785 const unsigned MaxVal,
1786 const StringRef ErrMsg,
1787 SMLoc &Loc);
1788 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1789 const unsigned MinVal,
1790 const unsigned MaxVal,
1791 const StringRef ErrMsg);
1792 ParseStatus parseSwizzle(OperandVector &Operands);
1793 bool parseSwizzleOffset(int64_t &Imm);
1794 bool parseSwizzleMacro(int64_t &Imm);
1795 bool parseSwizzleQuadPerm(int64_t &Imm);
1796 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1797 bool parseSwizzleBroadcast(int64_t &Imm);
1798 bool parseSwizzleSwap(int64_t &Imm);
1799 bool parseSwizzleReverse(int64_t &Imm);
1800
1801 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1802 int64_t parseGPRIdxMacro();
1803
cvtMubuf(MCInst & Inst,const OperandVector & Operands)1804 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
cvtMubufAtomic(MCInst & Inst,const OperandVector & Operands)1805 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1806
1807 ParseStatus parseOModSI(OperandVector &Operands);
1808
1809 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1810 OptionalImmIndexMap &OptionalIdx);
1811 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1812 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1813 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1814 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1815
1816 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1817 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1818 OptionalImmIndexMap &OptionalIdx);
1819 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1820 OptionalImmIndexMap &OptionalIdx);
1821
1822 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1823 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1824
1825 bool parseDimId(unsigned &Encoding);
1826 ParseStatus parseDim(OperandVector &Operands);
1827 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1828 ParseStatus parseDPP8(OperandVector &Operands);
1829 ParseStatus parseDPPCtrl(OperandVector &Operands);
1830 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1831 int64_t parseDPPCtrlSel(StringRef Ctrl);
1832 int64_t parseDPPCtrlPerm();
1833 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
cvtDPP8(MCInst & Inst,const OperandVector & Operands)1834 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1835 cvtDPP(Inst, Operands, true);
1836 }
1837 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1838 bool IsDPP8 = false);
cvtVOP3DPP8(MCInst & Inst,const OperandVector & Operands)1839 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1840 cvtVOP3DPP(Inst, Operands, true);
1841 }
1842
1843 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
1844 AMDGPUOperand::ImmTy Type);
1845 ParseStatus parseSDWADstUnused(OperandVector &Operands);
1846 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1847 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1848 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1849 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1850 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1851 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1852 uint64_t BasicInstType,
1853 bool SkipDstVcc = false,
1854 bool SkipSrcVcc = false);
1855
1856 ParseStatus parseEndpgm(OperandVector &Operands);
1857
1858 ParseStatus parseVOPD(OperandVector &Operands);
1859 };
1860
1861 } // end anonymous namespace
1862
1863 // May be called with integer type with equivalent bitwidth.
getFltSemantics(unsigned Size)1864 static const fltSemantics *getFltSemantics(unsigned Size) {
1865 switch (Size) {
1866 case 4:
1867 return &APFloat::IEEEsingle();
1868 case 8:
1869 return &APFloat::IEEEdouble();
1870 case 2:
1871 return &APFloat::IEEEhalf();
1872 default:
1873 llvm_unreachable("unsupported fp type");
1874 }
1875 }
1876
getFltSemantics(MVT VT)1877 static const fltSemantics *getFltSemantics(MVT VT) {
1878 return getFltSemantics(VT.getSizeInBits() / 8);
1879 }
1880
getOpFltSemantics(uint8_t OperandType)1881 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1882 switch (OperandType) {
1883 case AMDGPU::OPERAND_REG_IMM_INT32:
1884 case AMDGPU::OPERAND_REG_IMM_FP32:
1885 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1886 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1887 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1888 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1889 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1890 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1891 case AMDGPU::OPERAND_REG_IMM_V2FP32:
1892 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
1893 case AMDGPU::OPERAND_REG_IMM_V2INT32:
1894 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1895 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1896 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1897 case AMDGPU::OPERAND_KIMM32:
1898 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
1899 return &APFloat::IEEEsingle();
1900 case AMDGPU::OPERAND_REG_IMM_INT64:
1901 case AMDGPU::OPERAND_REG_IMM_FP64:
1902 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1903 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1904 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1905 return &APFloat::IEEEdouble();
1906 case AMDGPU::OPERAND_REG_IMM_INT16:
1907 case AMDGPU::OPERAND_REG_IMM_FP16:
1908 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1909 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1910 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1911 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1912 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1913 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1914 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1915 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1916 case AMDGPU::OPERAND_KIMM16:
1917 return &APFloat::IEEEhalf();
1918 default:
1919 llvm_unreachable("unsupported fp type");
1920 }
1921 }
1922
1923 //===----------------------------------------------------------------------===//
1924 // Operand
1925 //===----------------------------------------------------------------------===//
1926
canLosslesslyConvertToFPType(APFloat & FPLiteral,MVT VT)1927 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1928 bool Lost;
1929
1930 // Convert literal to single precision
1931 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1932 APFloat::rmNearestTiesToEven,
1933 &Lost);
1934 // We allow precision lost but not overflow or underflow
1935 if (Status != APFloat::opOK &&
1936 Lost &&
1937 ((Status & APFloat::opOverflow) != 0 ||
1938 (Status & APFloat::opUnderflow) != 0)) {
1939 return false;
1940 }
1941
1942 return true;
1943 }
1944
isSafeTruncation(int64_t Val,unsigned Size)1945 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1946 return isUIntN(Size, Val) || isIntN(Size, Val);
1947 }
1948
isInlineableLiteralOp16(int64_t Val,MVT VT,bool HasInv2Pi)1949 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1950 if (VT.getScalarType() == MVT::i16) {
1951 // FP immediate values are broken.
1952 return isInlinableIntLiteral(Val);
1953 }
1954
1955 // f16/v2f16 operands work correctly for all values.
1956 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1957 }
1958
isInlinableImm(MVT type) const1959 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1960
1961 // This is a hack to enable named inline values like
1962 // shared_base with both 32-bit and 64-bit operands.
1963 // Note that these values are defined as
1964 // 32-bit operands only.
1965 if (isInlineValue()) {
1966 return true;
1967 }
1968
1969 if (!isImmTy(ImmTyNone)) {
1970 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1971 return false;
1972 }
1973 // TODO: We should avoid using host float here. It would be better to
1974 // check the float bit values which is what a few other places do.
1975 // We've had bot failures before due to weird NaN support on mips hosts.
1976
1977 APInt Literal(64, Imm.Val);
1978
1979 if (Imm.IsFPImm) { // We got fp literal token
1980 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1981 return AMDGPU::isInlinableLiteral64(Imm.Val,
1982 AsmParser->hasInv2PiInlineImm());
1983 }
1984
1985 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1986 if (!canLosslesslyConvertToFPType(FPLiteral, type))
1987 return false;
1988
1989 if (type.getScalarSizeInBits() == 16) {
1990 return isInlineableLiteralOp16(
1991 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1992 type, AsmParser->hasInv2PiInlineImm());
1993 }
1994
1995 // Check if single precision literal is inlinable
1996 return AMDGPU::isInlinableLiteral32(
1997 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1998 AsmParser->hasInv2PiInlineImm());
1999 }
2000
2001 // We got int literal token.
2002 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2003 return AMDGPU::isInlinableLiteral64(Imm.Val,
2004 AsmParser->hasInv2PiInlineImm());
2005 }
2006
2007 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2008 return false;
2009 }
2010
2011 if (type.getScalarSizeInBits() == 16) {
2012 return isInlineableLiteralOp16(
2013 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2014 type, AsmParser->hasInv2PiInlineImm());
2015 }
2016
2017 return AMDGPU::isInlinableLiteral32(
2018 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2019 AsmParser->hasInv2PiInlineImm());
2020 }
2021
isLiteralImm(MVT type) const2022 bool AMDGPUOperand::isLiteralImm(MVT type) const {
2023 // Check that this immediate can be added as literal
2024 if (!isImmTy(ImmTyNone)) {
2025 return false;
2026 }
2027
2028 if (!Imm.IsFPImm) {
2029 // We got int literal token.
2030
2031 if (type == MVT::f64 && hasFPModifiers()) {
2032 // Cannot apply fp modifiers to int literals preserving the same semantics
2033 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2034 // disable these cases.
2035 return false;
2036 }
2037
2038 unsigned Size = type.getSizeInBits();
2039 if (Size == 64)
2040 Size = 32;
2041
2042 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2043 // types.
2044 return isSafeTruncation(Imm.Val, Size);
2045 }
2046
2047 // We got fp literal token
2048 if (type == MVT::f64) { // Expected 64-bit fp operand
2049 // We would set low 64-bits of literal to zeroes but we accept this literals
2050 return true;
2051 }
2052
2053 if (type == MVT::i64) { // Expected 64-bit int operand
2054 // We don't allow fp literals in 64-bit integer instructions. It is
2055 // unclear how we should encode them.
2056 return false;
2057 }
2058
2059 // We allow fp literals with f16x2 operands assuming that the specified
2060 // literal goes into the lower half and the upper half is zero. We also
2061 // require that the literal may be losslessly converted to f16.
2062 //
2063 // For i16x2 operands, we assume that the specified literal is encoded as a
2064 // single-precision float. This is pretty odd, but it matches SP3 and what
2065 // happens in hardware.
2066 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2067 : (type == MVT::v2i16) ? MVT::f32
2068 : (type == MVT::v2f32) ? MVT::f32
2069 : type;
2070
2071 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2072 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2073 }
2074
isRegClass(unsigned RCID) const2075 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2076 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2077 }
2078
isVRegWithInputMods() const2079 bool AMDGPUOperand::isVRegWithInputMods() const {
2080 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2081 // GFX90A allows DPP on 64-bit operands.
2082 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2083 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2084 }
2085
isT16VRegWithInputMods() const2086 template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2087 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2088 : AMDGPU::VGPR_16_Lo128RegClassID);
2089 }
2090
isSDWAOperand(MVT type) const2091 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2092 if (AsmParser->isVI())
2093 return isVReg32();
2094 else if (AsmParser->isGFX9Plus())
2095 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2096 else
2097 return false;
2098 }
2099
isSDWAFP16Operand() const2100 bool AMDGPUOperand::isSDWAFP16Operand() const {
2101 return isSDWAOperand(MVT::f16);
2102 }
2103
isSDWAFP32Operand() const2104 bool AMDGPUOperand::isSDWAFP32Operand() const {
2105 return isSDWAOperand(MVT::f32);
2106 }
2107
isSDWAInt16Operand() const2108 bool AMDGPUOperand::isSDWAInt16Operand() const {
2109 return isSDWAOperand(MVT::i16);
2110 }
2111
isSDWAInt32Operand() const2112 bool AMDGPUOperand::isSDWAInt32Operand() const {
2113 return isSDWAOperand(MVT::i32);
2114 }
2115
isBoolReg() const2116 bool AMDGPUOperand::isBoolReg() const {
2117 auto FB = AsmParser->getFeatureBits();
2118 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
2119 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
2120 }
2121
applyInputFPModifiers(uint64_t Val,unsigned Size) const2122 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2123 {
2124 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2125 assert(Size == 2 || Size == 4 || Size == 8);
2126
2127 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2128
2129 if (Imm.Mods.Abs) {
2130 Val &= ~FpSignMask;
2131 }
2132 if (Imm.Mods.Neg) {
2133 Val ^= FpSignMask;
2134 }
2135
2136 return Val;
2137 }
2138
addImmOperands(MCInst & Inst,unsigned N,bool ApplyModifiers) const2139 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2140 if (isExpr()) {
2141 Inst.addOperand(MCOperand::createExpr(Expr));
2142 return;
2143 }
2144
2145 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2146 Inst.getNumOperands())) {
2147 addLiteralImmOperand(Inst, Imm.Val,
2148 ApplyModifiers &
2149 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2150 } else {
2151 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2152 Inst.addOperand(MCOperand::createImm(Imm.Val));
2153 setImmKindNone();
2154 }
2155 }
2156
addLiteralImmOperand(MCInst & Inst,int64_t Val,bool ApplyModifiers) const2157 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2158 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2159 auto OpNum = Inst.getNumOperands();
2160 // Check that this operand accepts literals
2161 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2162
2163 if (ApplyModifiers) {
2164 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2165 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2166 Val = applyInputFPModifiers(Val, Size);
2167 }
2168
2169 APInt Literal(64, Val);
2170 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2171
2172 if (Imm.IsFPImm) { // We got fp literal token
2173 switch (OpTy) {
2174 case AMDGPU::OPERAND_REG_IMM_INT64:
2175 case AMDGPU::OPERAND_REG_IMM_FP64:
2176 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2177 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2178 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2179 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
2180 AsmParser->hasInv2PiInlineImm())) {
2181 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2182 setImmKindConst();
2183 return;
2184 }
2185
2186 // Non-inlineable
2187 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
2188 // For fp operands we check if low 32 bits are zeros
2189 if (Literal.getLoBits(32) != 0) {
2190 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
2191 "Can't encode literal as exact 64-bit floating-point operand. "
2192 "Low 32-bits will be set to zero");
2193 Val &= 0xffffffff00000000u;
2194 }
2195
2196 Inst.addOperand(MCOperand::createImm(Val));
2197 setImmKindLiteral();
2198 return;
2199 }
2200
2201 // We don't allow fp literals in 64-bit integer instructions. It is
2202 // unclear how we should encode them. This case should be checked earlier
2203 // in predicate methods (isLiteralImm())
2204 llvm_unreachable("fp literal in 64-bit integer instruction.");
2205
2206 case AMDGPU::OPERAND_REG_IMM_INT32:
2207 case AMDGPU::OPERAND_REG_IMM_FP32:
2208 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2209 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2210 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2211 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2212 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2213 case AMDGPU::OPERAND_REG_IMM_INT16:
2214 case AMDGPU::OPERAND_REG_IMM_FP16:
2215 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2216 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2217 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2218 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2219 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2220 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2221 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2222 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2223 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
2224 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2225 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2226 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2227 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2228 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2229 case AMDGPU::OPERAND_REG_IMM_V2INT32:
2230 case AMDGPU::OPERAND_KIMM32:
2231 case AMDGPU::OPERAND_KIMM16:
2232 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: {
2233 bool lost;
2234 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2235 // Convert literal to single precision
2236 FPLiteral.convert(*getOpFltSemantics(OpTy),
2237 APFloat::rmNearestTiesToEven, &lost);
2238 // We allow precision lost but not overflow or underflow. This should be
2239 // checked earlier in isLiteralImm()
2240
2241 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2242 Inst.addOperand(MCOperand::createImm(ImmVal));
2243 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) {
2244 setImmKindMandatoryLiteral();
2245 } else {
2246 setImmKindLiteral();
2247 }
2248 return;
2249 }
2250 default:
2251 llvm_unreachable("invalid operand size");
2252 }
2253
2254 return;
2255 }
2256
2257 // We got int literal token.
2258 // Only sign extend inline immediates.
2259 switch (OpTy) {
2260 case AMDGPU::OPERAND_REG_IMM_INT32:
2261 case AMDGPU::OPERAND_REG_IMM_FP32:
2262 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
2263 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
2264 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
2265 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
2266 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
2267 case AMDGPU::OPERAND_REG_IMM_V2INT16:
2268 case AMDGPU::OPERAND_REG_IMM_V2FP16:
2269 case AMDGPU::OPERAND_REG_IMM_V2FP32:
2270 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
2271 case AMDGPU::OPERAND_REG_IMM_V2INT32:
2272 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
2273 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32:
2274 if (isSafeTruncation(Val, 32) &&
2275 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
2276 AsmParser->hasInv2PiInlineImm())) {
2277 Inst.addOperand(MCOperand::createImm(Val));
2278 setImmKindConst();
2279 return;
2280 }
2281
2282 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
2283 setImmKindLiteral();
2284 return;
2285
2286 case AMDGPU::OPERAND_REG_IMM_INT64:
2287 case AMDGPU::OPERAND_REG_IMM_FP64:
2288 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
2289 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
2290 case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
2291 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2292 Inst.addOperand(MCOperand::createImm(Val));
2293 setImmKindConst();
2294 return;
2295 }
2296
2297 Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32
2298 : Lo_32(Val);
2299
2300 Inst.addOperand(MCOperand::createImm(Val));
2301 setImmKindLiteral();
2302 return;
2303
2304 case AMDGPU::OPERAND_REG_IMM_INT16:
2305 case AMDGPU::OPERAND_REG_IMM_FP16:
2306 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
2307 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2308 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
2309 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2310 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
2311 if (isSafeTruncation(Val, 16) &&
2312 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2313 AsmParser->hasInv2PiInlineImm())) {
2314 Inst.addOperand(MCOperand::createImm(Val));
2315 setImmKindConst();
2316 return;
2317 }
2318
2319 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
2320 setImmKindLiteral();
2321 return;
2322
2323 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2324 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
2325 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2326 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
2327 assert(isSafeTruncation(Val, 16));
2328 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
2329 AsmParser->hasInv2PiInlineImm()));
2330
2331 Inst.addOperand(MCOperand::createImm(Val));
2332 return;
2333 }
2334 case AMDGPU::OPERAND_KIMM32:
2335 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
2336 setImmKindMandatoryLiteral();
2337 return;
2338 case AMDGPU::OPERAND_KIMM16:
2339 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue()));
2340 setImmKindMandatoryLiteral();
2341 return;
2342 default:
2343 llvm_unreachable("invalid operand size");
2344 }
2345 }
2346
addRegOperands(MCInst & Inst,unsigned N) const2347 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2348 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2349 }
2350
isInlineValue() const2351 bool AMDGPUOperand::isInlineValue() const {
2352 return isRegKind() && ::isInlineValue(getReg());
2353 }
2354
2355 //===----------------------------------------------------------------------===//
2356 // AsmParser
2357 //===----------------------------------------------------------------------===//
2358
getRegClass(RegisterKind Is,unsigned RegWidth)2359 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2360 if (Is == IS_VGPR) {
2361 switch (RegWidth) {
2362 default: return -1;
2363 case 32:
2364 return AMDGPU::VGPR_32RegClassID;
2365 case 64:
2366 return AMDGPU::VReg_64RegClassID;
2367 case 96:
2368 return AMDGPU::VReg_96RegClassID;
2369 case 128:
2370 return AMDGPU::VReg_128RegClassID;
2371 case 160:
2372 return AMDGPU::VReg_160RegClassID;
2373 case 192:
2374 return AMDGPU::VReg_192RegClassID;
2375 case 224:
2376 return AMDGPU::VReg_224RegClassID;
2377 case 256:
2378 return AMDGPU::VReg_256RegClassID;
2379 case 288:
2380 return AMDGPU::VReg_288RegClassID;
2381 case 320:
2382 return AMDGPU::VReg_320RegClassID;
2383 case 352:
2384 return AMDGPU::VReg_352RegClassID;
2385 case 384:
2386 return AMDGPU::VReg_384RegClassID;
2387 case 512:
2388 return AMDGPU::VReg_512RegClassID;
2389 case 1024:
2390 return AMDGPU::VReg_1024RegClassID;
2391 }
2392 } else if (Is == IS_TTMP) {
2393 switch (RegWidth) {
2394 default: return -1;
2395 case 32:
2396 return AMDGPU::TTMP_32RegClassID;
2397 case 64:
2398 return AMDGPU::TTMP_64RegClassID;
2399 case 128:
2400 return AMDGPU::TTMP_128RegClassID;
2401 case 256:
2402 return AMDGPU::TTMP_256RegClassID;
2403 case 512:
2404 return AMDGPU::TTMP_512RegClassID;
2405 }
2406 } else if (Is == IS_SGPR) {
2407 switch (RegWidth) {
2408 default: return -1;
2409 case 32:
2410 return AMDGPU::SGPR_32RegClassID;
2411 case 64:
2412 return AMDGPU::SGPR_64RegClassID;
2413 case 96:
2414 return AMDGPU::SGPR_96RegClassID;
2415 case 128:
2416 return AMDGPU::SGPR_128RegClassID;
2417 case 160:
2418 return AMDGPU::SGPR_160RegClassID;
2419 case 192:
2420 return AMDGPU::SGPR_192RegClassID;
2421 case 224:
2422 return AMDGPU::SGPR_224RegClassID;
2423 case 256:
2424 return AMDGPU::SGPR_256RegClassID;
2425 case 288:
2426 return AMDGPU::SGPR_288RegClassID;
2427 case 320:
2428 return AMDGPU::SGPR_320RegClassID;
2429 case 352:
2430 return AMDGPU::SGPR_352RegClassID;
2431 case 384:
2432 return AMDGPU::SGPR_384RegClassID;
2433 case 512:
2434 return AMDGPU::SGPR_512RegClassID;
2435 }
2436 } else if (Is == IS_AGPR) {
2437 switch (RegWidth) {
2438 default: return -1;
2439 case 32:
2440 return AMDGPU::AGPR_32RegClassID;
2441 case 64:
2442 return AMDGPU::AReg_64RegClassID;
2443 case 96:
2444 return AMDGPU::AReg_96RegClassID;
2445 case 128:
2446 return AMDGPU::AReg_128RegClassID;
2447 case 160:
2448 return AMDGPU::AReg_160RegClassID;
2449 case 192:
2450 return AMDGPU::AReg_192RegClassID;
2451 case 224:
2452 return AMDGPU::AReg_224RegClassID;
2453 case 256:
2454 return AMDGPU::AReg_256RegClassID;
2455 case 288:
2456 return AMDGPU::AReg_288RegClassID;
2457 case 320:
2458 return AMDGPU::AReg_320RegClassID;
2459 case 352:
2460 return AMDGPU::AReg_352RegClassID;
2461 case 384:
2462 return AMDGPU::AReg_384RegClassID;
2463 case 512:
2464 return AMDGPU::AReg_512RegClassID;
2465 case 1024:
2466 return AMDGPU::AReg_1024RegClassID;
2467 }
2468 }
2469 return -1;
2470 }
2471
getSpecialRegForName(StringRef RegName)2472 static unsigned getSpecialRegForName(StringRef RegName) {
2473 return StringSwitch<unsigned>(RegName)
2474 .Case("exec", AMDGPU::EXEC)
2475 .Case("vcc", AMDGPU::VCC)
2476 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2477 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2478 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2479 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2480 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2481 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2482 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2483 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2484 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2485 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2486 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2487 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2488 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2489 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2490 .Case("m0", AMDGPU::M0)
2491 .Case("vccz", AMDGPU::SRC_VCCZ)
2492 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2493 .Case("execz", AMDGPU::SRC_EXECZ)
2494 .Case("src_execz", AMDGPU::SRC_EXECZ)
2495 .Case("scc", AMDGPU::SRC_SCC)
2496 .Case("src_scc", AMDGPU::SRC_SCC)
2497 .Case("tba", AMDGPU::TBA)
2498 .Case("tma", AMDGPU::TMA)
2499 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2500 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2501 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2502 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2503 .Case("vcc_lo", AMDGPU::VCC_LO)
2504 .Case("vcc_hi", AMDGPU::VCC_HI)
2505 .Case("exec_lo", AMDGPU::EXEC_LO)
2506 .Case("exec_hi", AMDGPU::EXEC_HI)
2507 .Case("tma_lo", AMDGPU::TMA_LO)
2508 .Case("tma_hi", AMDGPU::TMA_HI)
2509 .Case("tba_lo", AMDGPU::TBA_LO)
2510 .Case("tba_hi", AMDGPU::TBA_HI)
2511 .Case("pc", AMDGPU::PC_REG)
2512 .Case("null", AMDGPU::SGPR_NULL)
2513 .Default(AMDGPU::NoRegister);
2514 }
2515
ParseRegister(MCRegister & RegNo,SMLoc & StartLoc,SMLoc & EndLoc,bool RestoreOnFailure)2516 bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2517 SMLoc &EndLoc, bool RestoreOnFailure) {
2518 auto R = parseRegister();
2519 if (!R) return true;
2520 assert(R->isReg());
2521 RegNo = R->getReg();
2522 StartLoc = R->getStartLoc();
2523 EndLoc = R->getEndLoc();
2524 return false;
2525 }
2526
parseRegister(MCRegister & Reg,SMLoc & StartLoc,SMLoc & EndLoc)2527 bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2528 SMLoc &EndLoc) {
2529 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2530 }
2531
tryParseRegister(MCRegister & Reg,SMLoc & StartLoc,SMLoc & EndLoc)2532 ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2533 SMLoc &EndLoc) {
2534 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2535 bool PendingErrors = getParser().hasPendingError();
2536 getParser().clearPendingErrors();
2537 if (PendingErrors)
2538 return ParseStatus::Failure;
2539 if (Result)
2540 return ParseStatus::NoMatch;
2541 return ParseStatus::Success;
2542 }
2543
AddNextRegisterToList(unsigned & Reg,unsigned & RegWidth,RegisterKind RegKind,unsigned Reg1,SMLoc Loc)2544 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2545 RegisterKind RegKind, unsigned Reg1,
2546 SMLoc Loc) {
2547 switch (RegKind) {
2548 case IS_SPECIAL:
2549 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2550 Reg = AMDGPU::EXEC;
2551 RegWidth = 64;
2552 return true;
2553 }
2554 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2555 Reg = AMDGPU::FLAT_SCR;
2556 RegWidth = 64;
2557 return true;
2558 }
2559 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2560 Reg = AMDGPU::XNACK_MASK;
2561 RegWidth = 64;
2562 return true;
2563 }
2564 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2565 Reg = AMDGPU::VCC;
2566 RegWidth = 64;
2567 return true;
2568 }
2569 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2570 Reg = AMDGPU::TBA;
2571 RegWidth = 64;
2572 return true;
2573 }
2574 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2575 Reg = AMDGPU::TMA;
2576 RegWidth = 64;
2577 return true;
2578 }
2579 Error(Loc, "register does not fit in the list");
2580 return false;
2581 case IS_VGPR:
2582 case IS_SGPR:
2583 case IS_AGPR:
2584 case IS_TTMP:
2585 if (Reg1 != Reg + RegWidth / 32) {
2586 Error(Loc, "registers in a list must have consecutive indices");
2587 return false;
2588 }
2589 RegWidth += 32;
2590 return true;
2591 default:
2592 llvm_unreachable("unexpected register kind");
2593 }
2594 }
2595
2596 struct RegInfo {
2597 StringLiteral Name;
2598 RegisterKind Kind;
2599 };
2600
2601 static constexpr RegInfo RegularRegisters[] = {
2602 {{"v"}, IS_VGPR},
2603 {{"s"}, IS_SGPR},
2604 {{"ttmp"}, IS_TTMP},
2605 {{"acc"}, IS_AGPR},
2606 {{"a"}, IS_AGPR},
2607 };
2608
isRegularReg(RegisterKind Kind)2609 static bool isRegularReg(RegisterKind Kind) {
2610 return Kind == IS_VGPR ||
2611 Kind == IS_SGPR ||
2612 Kind == IS_TTMP ||
2613 Kind == IS_AGPR;
2614 }
2615
getRegularRegInfo(StringRef Str)2616 static const RegInfo* getRegularRegInfo(StringRef Str) {
2617 for (const RegInfo &Reg : RegularRegisters)
2618 if (Str.starts_with(Reg.Name))
2619 return &Reg;
2620 return nullptr;
2621 }
2622
getRegNum(StringRef Str,unsigned & Num)2623 static bool getRegNum(StringRef Str, unsigned& Num) {
2624 return !Str.getAsInteger(10, Num);
2625 }
2626
2627 bool
isRegister(const AsmToken & Token,const AsmToken & NextToken) const2628 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2629 const AsmToken &NextToken) const {
2630
2631 // A list of consecutive registers: [s0,s1,s2,s3]
2632 if (Token.is(AsmToken::LBrac))
2633 return true;
2634
2635 if (!Token.is(AsmToken::Identifier))
2636 return false;
2637
2638 // A single register like s0 or a range of registers like s[0:1]
2639
2640 StringRef Str = Token.getString();
2641 const RegInfo *Reg = getRegularRegInfo(Str);
2642 if (Reg) {
2643 StringRef RegName = Reg->Name;
2644 StringRef RegSuffix = Str.substr(RegName.size());
2645 if (!RegSuffix.empty()) {
2646 RegSuffix.consume_back(".l");
2647 RegSuffix.consume_back(".h");
2648 unsigned Num;
2649 // A single register with an index: rXX
2650 if (getRegNum(RegSuffix, Num))
2651 return true;
2652 } else {
2653 // A range of registers: r[XX:YY].
2654 if (NextToken.is(AsmToken::LBrac))
2655 return true;
2656 }
2657 }
2658
2659 return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2660 }
2661
2662 bool
isRegister()2663 AMDGPUAsmParser::isRegister()
2664 {
2665 return isRegister(getToken(), peekToken());
2666 }
2667
getRegularReg(RegisterKind RegKind,unsigned RegNum,unsigned SubReg,unsigned RegWidth,SMLoc Loc)2668 unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2669 unsigned SubReg, unsigned RegWidth,
2670 SMLoc Loc) {
2671 assert(isRegularReg(RegKind));
2672
2673 unsigned AlignSize = 1;
2674 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2675 // SGPR and TTMP registers must be aligned.
2676 // Max required alignment is 4 dwords.
2677 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2678 }
2679
2680 if (RegNum % AlignSize != 0) {
2681 Error(Loc, "invalid register alignment");
2682 return AMDGPU::NoRegister;
2683 }
2684
2685 unsigned RegIdx = RegNum / AlignSize;
2686 int RCID = getRegClass(RegKind, RegWidth);
2687 if (RCID == -1) {
2688 Error(Loc, "invalid or unsupported register size");
2689 return AMDGPU::NoRegister;
2690 }
2691
2692 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2693 const MCRegisterClass RC = TRI->getRegClass(RCID);
2694 if (RegIdx >= RC.getNumRegs()) {
2695 Error(Loc, "register index is out of range");
2696 return AMDGPU::NoRegister;
2697 }
2698
2699 unsigned Reg = RC.getRegister(RegIdx);
2700
2701 if (SubReg) {
2702 Reg = TRI->getSubReg(Reg, SubReg);
2703
2704 // Currently all regular registers have their .l and .h subregisters, so
2705 // we should never need to generate an error here.
2706 assert(Reg && "Invalid subregister!");
2707 }
2708
2709 return Reg;
2710 }
2711
ParseRegRange(unsigned & Num,unsigned & RegWidth)2712 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
2713 int64_t RegLo, RegHi;
2714 if (!skipToken(AsmToken::LBrac, "missing register index"))
2715 return false;
2716
2717 SMLoc FirstIdxLoc = getLoc();
2718 SMLoc SecondIdxLoc;
2719
2720 if (!parseExpr(RegLo))
2721 return false;
2722
2723 if (trySkipToken(AsmToken::Colon)) {
2724 SecondIdxLoc = getLoc();
2725 if (!parseExpr(RegHi))
2726 return false;
2727 } else {
2728 RegHi = RegLo;
2729 }
2730
2731 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2732 return false;
2733
2734 if (!isUInt<32>(RegLo)) {
2735 Error(FirstIdxLoc, "invalid register index");
2736 return false;
2737 }
2738
2739 if (!isUInt<32>(RegHi)) {
2740 Error(SecondIdxLoc, "invalid register index");
2741 return false;
2742 }
2743
2744 if (RegLo > RegHi) {
2745 Error(FirstIdxLoc, "first register index should not exceed second index");
2746 return false;
2747 }
2748
2749 Num = static_cast<unsigned>(RegLo);
2750 RegWidth = 32 * ((RegHi - RegLo) + 1);
2751 return true;
2752 }
2753
ParseSpecialReg(RegisterKind & RegKind,unsigned & RegNum,unsigned & RegWidth,SmallVectorImpl<AsmToken> & Tokens)2754 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2755 unsigned &RegNum, unsigned &RegWidth,
2756 SmallVectorImpl<AsmToken> &Tokens) {
2757 assert(isToken(AsmToken::Identifier));
2758 unsigned Reg = getSpecialRegForName(getTokenStr());
2759 if (Reg) {
2760 RegNum = 0;
2761 RegWidth = 32;
2762 RegKind = IS_SPECIAL;
2763 Tokens.push_back(getToken());
2764 lex(); // skip register name
2765 }
2766 return Reg;
2767 }
2768
ParseRegularReg(RegisterKind & RegKind,unsigned & RegNum,unsigned & RegWidth,SmallVectorImpl<AsmToken> & Tokens)2769 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2770 unsigned &RegNum, unsigned &RegWidth,
2771 SmallVectorImpl<AsmToken> &Tokens) {
2772 assert(isToken(AsmToken::Identifier));
2773 StringRef RegName = getTokenStr();
2774 auto Loc = getLoc();
2775
2776 const RegInfo *RI = getRegularRegInfo(RegName);
2777 if (!RI) {
2778 Error(Loc, "invalid register name");
2779 return AMDGPU::NoRegister;
2780 }
2781
2782 Tokens.push_back(getToken());
2783 lex(); // skip register name
2784
2785 RegKind = RI->Kind;
2786 StringRef RegSuffix = RegName.substr(RI->Name.size());
2787 unsigned SubReg = NoSubRegister;
2788 if (!RegSuffix.empty()) {
2789 // We don't know the opcode till we are done parsing, so we don't know if
2790 // registers should be 16 or 32 bit. It is therefore mandatory to put .l or
2791 // .h to correctly specify 16 bit registers. We also can't determine class
2792 // VGPR_16_Lo128 or VGPR_16, so always parse them as VGPR_16.
2793 if (RegSuffix.consume_back(".l"))
2794 SubReg = AMDGPU::lo16;
2795 else if (RegSuffix.consume_back(".h"))
2796 SubReg = AMDGPU::hi16;
2797
2798 // Single 32-bit register: vXX.
2799 if (!getRegNum(RegSuffix, RegNum)) {
2800 Error(Loc, "invalid register index");
2801 return AMDGPU::NoRegister;
2802 }
2803 RegWidth = 32;
2804 } else {
2805 // Range of registers: v[XX:YY]. ":YY" is optional.
2806 if (!ParseRegRange(RegNum, RegWidth))
2807 return AMDGPU::NoRegister;
2808 }
2809
2810 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
2811 }
2812
ParseRegList(RegisterKind & RegKind,unsigned & RegNum,unsigned & RegWidth,SmallVectorImpl<AsmToken> & Tokens)2813 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2814 unsigned &RegWidth,
2815 SmallVectorImpl<AsmToken> &Tokens) {
2816 unsigned Reg = AMDGPU::NoRegister;
2817 auto ListLoc = getLoc();
2818
2819 if (!skipToken(AsmToken::LBrac,
2820 "expected a register or a list of registers")) {
2821 return AMDGPU::NoRegister;
2822 }
2823
2824 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2825
2826 auto Loc = getLoc();
2827 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2828 return AMDGPU::NoRegister;
2829 if (RegWidth != 32) {
2830 Error(Loc, "expected a single 32-bit register");
2831 return AMDGPU::NoRegister;
2832 }
2833
2834 for (; trySkipToken(AsmToken::Comma); ) {
2835 RegisterKind NextRegKind;
2836 unsigned NextReg, NextRegNum, NextRegWidth;
2837 Loc = getLoc();
2838
2839 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2840 NextRegNum, NextRegWidth,
2841 Tokens)) {
2842 return AMDGPU::NoRegister;
2843 }
2844 if (NextRegWidth != 32) {
2845 Error(Loc, "expected a single 32-bit register");
2846 return AMDGPU::NoRegister;
2847 }
2848 if (NextRegKind != RegKind) {
2849 Error(Loc, "registers in a list must be of the same kind");
2850 return AMDGPU::NoRegister;
2851 }
2852 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2853 return AMDGPU::NoRegister;
2854 }
2855
2856 if (!skipToken(AsmToken::RBrac,
2857 "expected a comma or a closing square bracket")) {
2858 return AMDGPU::NoRegister;
2859 }
2860
2861 if (isRegularReg(RegKind))
2862 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
2863
2864 return Reg;
2865 }
2866
ParseAMDGPURegister(RegisterKind & RegKind,unsigned & Reg,unsigned & RegNum,unsigned & RegWidth,SmallVectorImpl<AsmToken> & Tokens)2867 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2868 unsigned &RegNum, unsigned &RegWidth,
2869 SmallVectorImpl<AsmToken> &Tokens) {
2870 auto Loc = getLoc();
2871 Reg = AMDGPU::NoRegister;
2872
2873 if (isToken(AsmToken::Identifier)) {
2874 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2875 if (Reg == AMDGPU::NoRegister)
2876 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2877 } else {
2878 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2879 }
2880
2881 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2882 if (Reg == AMDGPU::NoRegister) {
2883 assert(Parser.hasPendingError());
2884 return false;
2885 }
2886
2887 if (!subtargetHasRegister(*TRI, Reg)) {
2888 if (Reg == AMDGPU::SGPR_NULL) {
2889 Error(Loc, "'null' operand is not supported on this GPU");
2890 } else {
2891 Error(Loc, "register not available on this GPU");
2892 }
2893 return false;
2894 }
2895
2896 return true;
2897 }
2898
ParseAMDGPURegister(RegisterKind & RegKind,unsigned & Reg,unsigned & RegNum,unsigned & RegWidth,bool RestoreOnFailure)2899 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2900 unsigned &RegNum, unsigned &RegWidth,
2901 bool RestoreOnFailure /*=false*/) {
2902 Reg = AMDGPU::NoRegister;
2903
2904 SmallVector<AsmToken, 1> Tokens;
2905 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2906 if (RestoreOnFailure) {
2907 while (!Tokens.empty()) {
2908 getLexer().UnLex(Tokens.pop_back_val());
2909 }
2910 }
2911 return true;
2912 }
2913 return false;
2914 }
2915
2916 std::optional<StringRef>
getGprCountSymbolName(RegisterKind RegKind)2917 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2918 switch (RegKind) {
2919 case IS_VGPR:
2920 return StringRef(".amdgcn.next_free_vgpr");
2921 case IS_SGPR:
2922 return StringRef(".amdgcn.next_free_sgpr");
2923 default:
2924 return std::nullopt;
2925 }
2926 }
2927
initializeGprCountSymbol(RegisterKind RegKind)2928 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2929 auto SymbolName = getGprCountSymbolName(RegKind);
2930 assert(SymbolName && "initializing invalid register kind");
2931 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2932 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2933 }
2934
updateGprCountSymbols(RegisterKind RegKind,unsigned DwordRegIndex,unsigned RegWidth)2935 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2936 unsigned DwordRegIndex,
2937 unsigned RegWidth) {
2938 // Symbols are only defined for GCN targets
2939 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2940 return true;
2941
2942 auto SymbolName = getGprCountSymbolName(RegKind);
2943 if (!SymbolName)
2944 return true;
2945 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2946
2947 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
2948 int64_t OldCount;
2949
2950 if (!Sym->isVariable())
2951 return !Error(getLoc(),
2952 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2953 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2954 return !Error(
2955 getLoc(),
2956 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2957
2958 if (OldCount <= NewMax)
2959 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2960
2961 return true;
2962 }
2963
2964 std::unique_ptr<AMDGPUOperand>
parseRegister(bool RestoreOnFailure)2965 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2966 const auto &Tok = getToken();
2967 SMLoc StartLoc = Tok.getLoc();
2968 SMLoc EndLoc = Tok.getEndLoc();
2969 RegisterKind RegKind;
2970 unsigned Reg, RegNum, RegWidth;
2971
2972 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2973 return nullptr;
2974 }
2975 if (isHsaAbi(getSTI())) {
2976 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2977 return nullptr;
2978 } else
2979 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2980 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2981 }
2982
parseImm(OperandVector & Operands,bool HasSP3AbsModifier,bool HasLit)2983 ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
2984 bool HasSP3AbsModifier, bool HasLit) {
2985 // TODO: add syntactic sugar for 1/(2*PI)
2986
2987 if (isRegister())
2988 return ParseStatus::NoMatch;
2989 assert(!isModifier());
2990
2991 if (!HasLit) {
2992 HasLit = trySkipId("lit");
2993 if (HasLit) {
2994 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
2995 return ParseStatus::Failure;
2996 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit);
2997 if (S.isSuccess() &&
2998 !skipToken(AsmToken::RParen, "expected closing parentheses"))
2999 return ParseStatus::Failure;
3000 return S;
3001 }
3002 }
3003
3004 const auto& Tok = getToken();
3005 const auto& NextTok = peekToken();
3006 bool IsReal = Tok.is(AsmToken::Real);
3007 SMLoc S = getLoc();
3008 bool Negate = false;
3009
3010 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3011 lex();
3012 IsReal = true;
3013 Negate = true;
3014 }
3015
3016 AMDGPUOperand::Modifiers Mods;
3017 Mods.Lit = HasLit;
3018
3019 if (IsReal) {
3020 // Floating-point expressions are not supported.
3021 // Can only allow floating-point literals with an
3022 // optional sign.
3023
3024 StringRef Num = getTokenStr();
3025 lex();
3026
3027 APFloat RealVal(APFloat::IEEEdouble());
3028 auto roundMode = APFloat::rmNearestTiesToEven;
3029 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3030 return ParseStatus::Failure;
3031 if (Negate)
3032 RealVal.changeSign();
3033
3034 Operands.push_back(
3035 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3036 AMDGPUOperand::ImmTyNone, true));
3037 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3038 Op.setModifiers(Mods);
3039
3040 return ParseStatus::Success;
3041
3042 } else {
3043 int64_t IntVal;
3044 const MCExpr *Expr;
3045 SMLoc S = getLoc();
3046
3047 if (HasSP3AbsModifier) {
3048 // This is a workaround for handling expressions
3049 // as arguments of SP3 'abs' modifier, for example:
3050 // |1.0|
3051 // |-1|
3052 // |1+x|
3053 // This syntax is not compatible with syntax of standard
3054 // MC expressions (due to the trailing '|').
3055 SMLoc EndLoc;
3056 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3057 return ParseStatus::Failure;
3058 } else {
3059 if (Parser.parseExpression(Expr))
3060 return ParseStatus::Failure;
3061 }
3062
3063 if (Expr->evaluateAsAbsolute(IntVal)) {
3064 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3065 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3066 Op.setModifiers(Mods);
3067 } else {
3068 if (HasLit)
3069 return ParseStatus::NoMatch;
3070 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3071 }
3072
3073 return ParseStatus::Success;
3074 }
3075
3076 return ParseStatus::NoMatch;
3077 }
3078
parseReg(OperandVector & Operands)3079 ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3080 if (!isRegister())
3081 return ParseStatus::NoMatch;
3082
3083 if (auto R = parseRegister()) {
3084 assert(R->isReg());
3085 Operands.push_back(std::move(R));
3086 return ParseStatus::Success;
3087 }
3088 return ParseStatus::Failure;
3089 }
3090
parseRegOrImm(OperandVector & Operands,bool HasSP3AbsMod,bool HasLit)3091 ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3092 bool HasSP3AbsMod, bool HasLit) {
3093 ParseStatus Res = parseReg(Operands);
3094 if (!Res.isNoMatch())
3095 return Res;
3096 if (isModifier())
3097 return ParseStatus::NoMatch;
3098 return parseImm(Operands, HasSP3AbsMod, HasLit);
3099 }
3100
3101 bool
isNamedOperandModifier(const AsmToken & Token,const AsmToken & NextToken) const3102 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3103 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3104 const auto &str = Token.getString();
3105 return str == "abs" || str == "neg" || str == "sext";
3106 }
3107 return false;
3108 }
3109
3110 bool
isOpcodeModifierWithVal(const AsmToken & Token,const AsmToken & NextToken) const3111 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3112 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3113 }
3114
3115 bool
isOperandModifier(const AsmToken & Token,const AsmToken & NextToken) const3116 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3117 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3118 }
3119
3120 bool
isRegOrOperandModifier(const AsmToken & Token,const AsmToken & NextToken) const3121 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3122 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3123 }
3124
3125 // Check if this is an operand modifier or an opcode modifier
3126 // which may look like an expression but it is not. We should
3127 // avoid parsing these modifiers as expressions. Currently
3128 // recognized sequences are:
3129 // |...|
3130 // abs(...)
3131 // neg(...)
3132 // sext(...)
3133 // -reg
3134 // -|...|
3135 // -abs(...)
3136 // name:...
3137 //
3138 bool
isModifier()3139 AMDGPUAsmParser::isModifier() {
3140
3141 AsmToken Tok = getToken();
3142 AsmToken NextToken[2];
3143 peekTokens(NextToken);
3144
3145 return isOperandModifier(Tok, NextToken[0]) ||
3146 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3147 isOpcodeModifierWithVal(Tok, NextToken[0]);
3148 }
3149
3150 // Check if the current token is an SP3 'neg' modifier.
3151 // Currently this modifier is allowed in the following context:
3152 //
3153 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3154 // 2. Before an 'abs' modifier: -abs(...)
3155 // 3. Before an SP3 'abs' modifier: -|...|
3156 //
3157 // In all other cases "-" is handled as a part
3158 // of an expression that follows the sign.
3159 //
3160 // Note: When "-" is followed by an integer literal,
3161 // this is interpreted as integer negation rather
3162 // than a floating-point NEG modifier applied to N.
3163 // Beside being contr-intuitive, such use of floating-point
3164 // NEG modifier would have resulted in different meaning
3165 // of integer literals used with VOP1/2/C and VOP3,
3166 // for example:
3167 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3168 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3169 // Negative fp literals with preceding "-" are
3170 // handled likewise for uniformity
3171 //
3172 bool
parseSP3NegModifier()3173 AMDGPUAsmParser::parseSP3NegModifier() {
3174
3175 AsmToken NextToken[2];
3176 peekTokens(NextToken);
3177
3178 if (isToken(AsmToken::Minus) &&
3179 (isRegister(NextToken[0], NextToken[1]) ||
3180 NextToken[0].is(AsmToken::Pipe) ||
3181 isId(NextToken[0], "abs"))) {
3182 lex();
3183 return true;
3184 }
3185
3186 return false;
3187 }
3188
3189 ParseStatus
parseRegOrImmWithFPInputMods(OperandVector & Operands,bool AllowImm)3190 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3191 bool AllowImm) {
3192 bool Neg, SP3Neg;
3193 bool Abs, SP3Abs;
3194 bool Lit;
3195 SMLoc Loc;
3196
3197 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3198 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3199 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3200
3201 SP3Neg = parseSP3NegModifier();
3202
3203 Loc = getLoc();
3204 Neg = trySkipId("neg");
3205 if (Neg && SP3Neg)
3206 return Error(Loc, "expected register or immediate");
3207 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3208 return ParseStatus::Failure;
3209
3210 Abs = trySkipId("abs");
3211 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3212 return ParseStatus::Failure;
3213
3214 Lit = trySkipId("lit");
3215 if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit"))
3216 return ParseStatus::Failure;
3217
3218 Loc = getLoc();
3219 SP3Abs = trySkipToken(AsmToken::Pipe);
3220 if (Abs && SP3Abs)
3221 return Error(Loc, "expected register or immediate");
3222
3223 ParseStatus Res;
3224 if (AllowImm) {
3225 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3226 } else {
3227 Res = parseReg(Operands);
3228 }
3229 if (!Res.isSuccess())
3230 return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res;
3231
3232 if (Lit && !Operands.back()->isImm())
3233 Error(Loc, "expected immediate with lit modifier");
3234
3235 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3236 return ParseStatus::Failure;
3237 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3238 return ParseStatus::Failure;
3239 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3240 return ParseStatus::Failure;
3241 if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3242 return ParseStatus::Failure;
3243
3244 AMDGPUOperand::Modifiers Mods;
3245 Mods.Abs = Abs || SP3Abs;
3246 Mods.Neg = Neg || SP3Neg;
3247 Mods.Lit = Lit;
3248
3249 if (Mods.hasFPModifiers() || Lit) {
3250 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3251 if (Op.isExpr())
3252 return Error(Op.getStartLoc(), "expected an absolute expression");
3253 Op.setModifiers(Mods);
3254 }
3255 return ParseStatus::Success;
3256 }
3257
3258 ParseStatus
parseRegOrImmWithIntInputMods(OperandVector & Operands,bool AllowImm)3259 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3260 bool AllowImm) {
3261 bool Sext = trySkipId("sext");
3262 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3263 return ParseStatus::Failure;
3264
3265 ParseStatus Res;
3266 if (AllowImm) {
3267 Res = parseRegOrImm(Operands);
3268 } else {
3269 Res = parseReg(Operands);
3270 }
3271 if (!Res.isSuccess())
3272 return Sext ? ParseStatus::Failure : Res;
3273
3274 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3275 return ParseStatus::Failure;
3276
3277 AMDGPUOperand::Modifiers Mods;
3278 Mods.Sext = Sext;
3279
3280 if (Mods.hasIntModifiers()) {
3281 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3282 if (Op.isExpr())
3283 return Error(Op.getStartLoc(), "expected an absolute expression");
3284 Op.setModifiers(Mods);
3285 }
3286
3287 return ParseStatus::Success;
3288 }
3289
parseRegWithFPInputMods(OperandVector & Operands)3290 ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3291 return parseRegOrImmWithFPInputMods(Operands, false);
3292 }
3293
parseRegWithIntInputMods(OperandVector & Operands)3294 ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3295 return parseRegOrImmWithIntInputMods(Operands, false);
3296 }
3297
parseVReg32OrOff(OperandVector & Operands)3298 ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3299 auto Loc = getLoc();
3300 if (trySkipId("off")) {
3301 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3302 AMDGPUOperand::ImmTyOff, false));
3303 return ParseStatus::Success;
3304 }
3305
3306 if (!isRegister())
3307 return ParseStatus::NoMatch;
3308
3309 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3310 if (Reg) {
3311 Operands.push_back(std::move(Reg));
3312 return ParseStatus::Success;
3313 }
3314
3315 return ParseStatus::Failure;
3316 }
3317
checkTargetMatchPredicate(MCInst & Inst)3318 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3319 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3320
3321 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3322 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3323 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3324 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3325 return Match_InvalidOperand;
3326
3327 if ((TSFlags & SIInstrFlags::VOP3) &&
3328 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
3329 getForcedEncodingSize() != 64)
3330 return Match_PreferE32;
3331
3332 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3333 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3334 // v_mac_f32/16 allow only dst_sel == DWORD;
3335 auto OpNum =
3336 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3337 const auto &Op = Inst.getOperand(OpNum);
3338 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3339 return Match_InvalidOperand;
3340 }
3341 }
3342
3343 return Match_Success;
3344 }
3345
getAllVariants()3346 static ArrayRef<unsigned> getAllVariants() {
3347 static const unsigned Variants[] = {
3348 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
3349 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9,
3350 AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP
3351 };
3352
3353 return ArrayRef(Variants);
3354 }
3355
3356 // What asm variants we should check
getMatchedVariants() const3357 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3358 if (isForcedDPP() && isForcedVOP3()) {
3359 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3360 return ArrayRef(Variants);
3361 }
3362 if (getForcedEncodingSize() == 32) {
3363 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3364 return ArrayRef(Variants);
3365 }
3366
3367 if (isForcedVOP3()) {
3368 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3369 return ArrayRef(Variants);
3370 }
3371
3372 if (isForcedSDWA()) {
3373 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3374 AMDGPUAsmVariants::SDWA9};
3375 return ArrayRef(Variants);
3376 }
3377
3378 if (isForcedDPP()) {
3379 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3380 return ArrayRef(Variants);
3381 }
3382
3383 return getAllVariants();
3384 }
3385
getMatchedVariantName() const3386 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3387 if (isForcedDPP() && isForcedVOP3())
3388 return "e64_dpp";
3389
3390 if (getForcedEncodingSize() == 32)
3391 return "e32";
3392
3393 if (isForcedVOP3())
3394 return "e64";
3395
3396 if (isForcedSDWA())
3397 return "sdwa";
3398
3399 if (isForcedDPP())
3400 return "dpp";
3401
3402 return "";
3403 }
3404
findImplicitSGPRReadInVOP(const MCInst & Inst) const3405 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3406 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3407 for (MCPhysReg Reg : Desc.implicit_uses()) {
3408 switch (Reg) {
3409 case AMDGPU::FLAT_SCR:
3410 case AMDGPU::VCC:
3411 case AMDGPU::VCC_LO:
3412 case AMDGPU::VCC_HI:
3413 case AMDGPU::M0:
3414 return Reg;
3415 default:
3416 break;
3417 }
3418 }
3419 return AMDGPU::NoRegister;
3420 }
3421
3422 // NB: This code is correct only when used to check constant
3423 // bus limitations because GFX7 support no f16 inline constants.
3424 // Note that there are no cases when a GFX7 opcode violates
3425 // constant bus limitations due to the use of an f16 constant.
isInlineConstant(const MCInst & Inst,unsigned OpIdx) const3426 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3427 unsigned OpIdx) const {
3428 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3429
3430 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) ||
3431 AMDGPU::isKImmOperand(Desc, OpIdx)) {
3432 return false;
3433 }
3434
3435 const MCOperand &MO = Inst.getOperand(OpIdx);
3436
3437 int64_t Val = MO.getImm();
3438 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3439
3440 switch (OpSize) { // expected operand size
3441 case 8:
3442 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3443 case 4:
3444 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3445 case 2: {
3446 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3447 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
3448 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
3449 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
3450 return AMDGPU::isInlinableIntLiteral(Val);
3451
3452 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
3453 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
3454 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
3455 return AMDGPU::isInlinableLiteralV2I16(Val);
3456
3457 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
3458 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
3459 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
3460 return AMDGPU::isInlinableLiteralV2F16(Val);
3461
3462 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3463 }
3464 default:
3465 llvm_unreachable("invalid operand size");
3466 }
3467 }
3468
getConstantBusLimit(unsigned Opcode) const3469 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3470 if (!isGFX10Plus())
3471 return 1;
3472
3473 switch (Opcode) {
3474 // 64-bit shift instructions can use only one scalar value input
3475 case AMDGPU::V_LSHLREV_B64_e64:
3476 case AMDGPU::V_LSHLREV_B64_gfx10:
3477 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3478 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3479 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3480 case AMDGPU::V_LSHRREV_B64_e64:
3481 case AMDGPU::V_LSHRREV_B64_gfx10:
3482 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3483 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3484 case AMDGPU::V_ASHRREV_I64_e64:
3485 case AMDGPU::V_ASHRREV_I64_gfx10:
3486 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3487 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3488 case AMDGPU::V_LSHL_B64_e64:
3489 case AMDGPU::V_LSHR_B64_e64:
3490 case AMDGPU::V_ASHR_I64_e64:
3491 return 1;
3492 default:
3493 return 2;
3494 }
3495 }
3496
3497 constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3498 using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>;
3499
3500 // Get regular operand indices in the same order as specified
3501 // in the instruction (but append mandatory literals to the end).
getSrcOperandIndices(unsigned Opcode,bool AddMandatoryLiterals=false)3502 static OperandIndices getSrcOperandIndices(unsigned Opcode,
3503 bool AddMandatoryLiterals = false) {
3504
3505 int16_t ImmIdx =
3506 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3507
3508 if (isVOPD(Opcode)) {
3509 int16_t ImmDeferredIdx =
3510 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred)
3511 : -1;
3512
3513 return {getNamedOperandIdx(Opcode, OpName::src0X),
3514 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3515 getNamedOperandIdx(Opcode, OpName::src0Y),
3516 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3517 ImmDeferredIdx,
3518 ImmIdx};
3519 }
3520
3521 return {getNamedOperandIdx(Opcode, OpName::src0),
3522 getNamedOperandIdx(Opcode, OpName::src1),
3523 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3524 }
3525
usesConstantBus(const MCInst & Inst,unsigned OpIdx)3526 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3527 const MCOperand &MO = Inst.getOperand(OpIdx);
3528 if (MO.isImm()) {
3529 return !isInlineConstant(Inst, OpIdx);
3530 } else if (MO.isReg()) {
3531 auto Reg = MO.getReg();
3532 if (!Reg) {
3533 return false;
3534 }
3535 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3536 auto PReg = mc2PseudoReg(Reg);
3537 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3538 } else {
3539 return true;
3540 }
3541 }
3542
validateConstantBusLimitations(const MCInst & Inst,const OperandVector & Operands)3543 bool AMDGPUAsmParser::validateConstantBusLimitations(
3544 const MCInst &Inst, const OperandVector &Operands) {
3545 const unsigned Opcode = Inst.getOpcode();
3546 const MCInstrDesc &Desc = MII.get(Opcode);
3547 unsigned LastSGPR = AMDGPU::NoRegister;
3548 unsigned ConstantBusUseCount = 0;
3549 unsigned NumLiterals = 0;
3550 unsigned LiteralSize;
3551
3552 if (!(Desc.TSFlags &
3553 (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3554 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) &&
3555 !isVOPD(Opcode))
3556 return true;
3557
3558 // Check special imm operands (used by madmk, etc)
3559 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3560 ++NumLiterals;
3561 LiteralSize = 4;
3562 }
3563
3564 SmallDenseSet<unsigned> SGPRsUsed;
3565 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3566 if (SGPRUsed != AMDGPU::NoRegister) {
3567 SGPRsUsed.insert(SGPRUsed);
3568 ++ConstantBusUseCount;
3569 }
3570
3571 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3572
3573 for (int OpIdx : OpIndices) {
3574 if (OpIdx == -1)
3575 continue;
3576
3577 const MCOperand &MO = Inst.getOperand(OpIdx);
3578 if (usesConstantBus(Inst, OpIdx)) {
3579 if (MO.isReg()) {
3580 LastSGPR = mc2PseudoReg(MO.getReg());
3581 // Pairs of registers with a partial intersections like these
3582 // s0, s[0:1]
3583 // flat_scratch_lo, flat_scratch
3584 // flat_scratch_lo, flat_scratch_hi
3585 // are theoretically valid but they are disabled anyway.
3586 // Note that this code mimics SIInstrInfo::verifyInstruction
3587 if (SGPRsUsed.insert(LastSGPR).second) {
3588 ++ConstantBusUseCount;
3589 }
3590 } else { // Expression or a literal
3591
3592 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3593 continue; // special operand like VINTERP attr_chan
3594
3595 // An instruction may use only one literal.
3596 // This has been validated on the previous step.
3597 // See validateVOPLiteral.
3598 // This literal may be used as more than one operand.
3599 // If all these operands are of the same size,
3600 // this literal counts as one scalar value.
3601 // Otherwise it counts as 2 scalar values.
3602 // See "GFX10 Shader Programming", section 3.6.2.3.
3603
3604 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3605 if (Size < 4)
3606 Size = 4;
3607
3608 if (NumLiterals == 0) {
3609 NumLiterals = 1;
3610 LiteralSize = Size;
3611 } else if (LiteralSize != Size) {
3612 NumLiterals = 2;
3613 }
3614 }
3615 }
3616 }
3617 ConstantBusUseCount += NumLiterals;
3618
3619 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3620 return true;
3621
3622 SMLoc LitLoc = getLitLoc(Operands);
3623 SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3624 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3625 Error(Loc, "invalid operand (violates constant bus restrictions)");
3626 return false;
3627 }
3628
validateVOPDRegBankConstraints(const MCInst & Inst,const OperandVector & Operands)3629 bool AMDGPUAsmParser::validateVOPDRegBankConstraints(
3630 const MCInst &Inst, const OperandVector &Operands) {
3631
3632 const unsigned Opcode = Inst.getOpcode();
3633 if (!isVOPD(Opcode))
3634 return true;
3635
3636 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3637
3638 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3639 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3640 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3641 ? Opr.getReg()
3642 : MCRegister::NoRegister;
3643 };
3644
3645 // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.
3646 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12;
3647
3648 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
3649 auto InvalidCompOprIdx =
3650 InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc);
3651 if (!InvalidCompOprIdx)
3652 return true;
3653
3654 auto CompOprIdx = *InvalidCompOprIdx;
3655 auto ParsedIdx =
3656 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
3657 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
3658 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
3659
3660 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
3661 if (CompOprIdx == VOPD::Component::DST) {
3662 Error(Loc, "one dst register must be even and the other odd");
3663 } else {
3664 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
3665 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
3666 " operands must use different VGPR banks");
3667 }
3668
3669 return false;
3670 }
3671
validateIntClampSupported(const MCInst & Inst)3672 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3673
3674 const unsigned Opc = Inst.getOpcode();
3675 const MCInstrDesc &Desc = MII.get(Opc);
3676
3677 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3678 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3679 assert(ClampIdx != -1);
3680 return Inst.getOperand(ClampIdx).getImm() == 0;
3681 }
3682
3683 return true;
3684 }
3685
3686 constexpr uint64_t MIMGFlags =
3687 SIInstrFlags::MIMG | SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE;
3688
validateMIMGDataSize(const MCInst & Inst,const SMLoc & IDLoc)3689 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
3690 const SMLoc &IDLoc) {
3691
3692 const unsigned Opc = Inst.getOpcode();
3693 const MCInstrDesc &Desc = MII.get(Opc);
3694
3695 if ((Desc.TSFlags & MIMGFlags) == 0)
3696 return true;
3697
3698 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3699 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3700 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3701
3702 assert(VDataIdx != -1);
3703
3704 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
3705 return true;
3706
3707 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3708 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
3709 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3710 if (DMask == 0)
3711 DMask = 1;
3712
3713 bool IsPackedD16 = false;
3714 unsigned DataSize =
3715 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
3716 if (hasPackedD16()) {
3717 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3718 IsPackedD16 = D16Idx >= 0;
3719 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
3720 DataSize = (DataSize + 1) / 2;
3721 }
3722
3723 if ((VDataSize / 4) == DataSize + TFESize)
3724 return true;
3725
3726 StringRef Modifiers;
3727 if (isGFX90A())
3728 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
3729 else
3730 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
3731
3732 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
3733 return false;
3734 }
3735
validateMIMGAddrSize(const MCInst & Inst,const SMLoc & IDLoc)3736 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
3737 const SMLoc &IDLoc) {
3738 const unsigned Opc = Inst.getOpcode();
3739 const MCInstrDesc &Desc = MII.get(Opc);
3740
3741 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
3742 return true;
3743
3744 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3745
3746 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3747 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3748 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3749 int RSrcOpName = Desc.TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc
3750 : AMDGPU::OpName::rsrc;
3751 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
3752 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3753 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
3754
3755 assert(VAddr0Idx != -1);
3756 assert(SrsrcIdx != -1);
3757 assert(SrsrcIdx > VAddr0Idx);
3758
3759 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
3760 if (BaseOpcode->BVH) {
3761 if (IsA16 == BaseOpcode->A16)
3762 return true;
3763 Error(IDLoc, "image address size does not match a16");
3764 return false;
3765 }
3766
3767 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3768 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3769 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3770 unsigned ActualAddrSize =
3771 IsNSA ? SrsrcIdx - VAddr0Idx
3772 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3773
3774 unsigned ExpectedAddrSize =
3775 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
3776
3777 if (IsNSA) {
3778 if (hasPartialNSAEncoding() &&
3779 ExpectedAddrSize >
3780 getNSAMaxSize(Desc.TSFlags & SIInstrFlags::VSAMPLE)) {
3781 int VAddrLastIdx = SrsrcIdx - 1;
3782 unsigned VAddrLastSize =
3783 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
3784
3785 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
3786 }
3787 } else {
3788 if (ExpectedAddrSize > 12)
3789 ExpectedAddrSize = 16;
3790
3791 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
3792 // This provides backward compatibility for assembly created
3793 // before 160b/192b/224b types were directly supported.
3794 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
3795 return true;
3796 }
3797
3798 if (ActualAddrSize == ExpectedAddrSize)
3799 return true;
3800
3801 Error(IDLoc, "image address size does not match dim and a16");
3802 return false;
3803 }
3804
validateMIMGAtomicDMask(const MCInst & Inst)3805 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3806
3807 const unsigned Opc = Inst.getOpcode();
3808 const MCInstrDesc &Desc = MII.get(Opc);
3809
3810 if ((Desc.TSFlags & MIMGFlags) == 0)
3811 return true;
3812 if (!Desc.mayLoad() || !Desc.mayStore())
3813 return true; // Not atomic
3814
3815 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3816 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3817
3818 // This is an incomplete check because image_atomic_cmpswap
3819 // may only use 0x3 and 0xf while other atomic operations
3820 // may use 0x1 and 0x3. However these limitations are
3821 // verified when we check that dmask matches dst size.
3822 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3823 }
3824
validateMIMGGatherDMask(const MCInst & Inst)3825 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3826
3827 const unsigned Opc = Inst.getOpcode();
3828 const MCInstrDesc &Desc = MII.get(Opc);
3829
3830 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3831 return true;
3832
3833 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3834 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3835
3836 // GATHER4 instructions use dmask in a different fashion compared to
3837 // other MIMG instructions. The only useful DMASK values are
3838 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3839 // (red,red,red,red) etc.) The ISA document doesn't mention
3840 // this.
3841 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3842 }
3843
validateMIMGMSAA(const MCInst & Inst)3844 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
3845 const unsigned Opc = Inst.getOpcode();
3846 const MCInstrDesc &Desc = MII.get(Opc);
3847
3848 if ((Desc.TSFlags & MIMGFlags) == 0)
3849 return true;
3850
3851 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3852 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3853 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3854
3855 if (!BaseOpcode->MSAA)
3856 return true;
3857
3858 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3859 assert(DimIdx != -1);
3860
3861 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3862 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3863
3864 return DimInfo->MSAA;
3865 }
3866
IsMovrelsSDWAOpcode(const unsigned Opcode)3867 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3868 {
3869 switch (Opcode) {
3870 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3871 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3872 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3873 return true;
3874 default:
3875 return false;
3876 }
3877 }
3878
3879 // movrels* opcodes should only allow VGPRS as src0.
3880 // This is specified in .td description for vop1/vop3,
3881 // but sdwa is handled differently. See isSDWAOperand.
validateMovrels(const MCInst & Inst,const OperandVector & Operands)3882 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3883 const OperandVector &Operands) {
3884
3885 const unsigned Opc = Inst.getOpcode();
3886 const MCInstrDesc &Desc = MII.get(Opc);
3887
3888 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3889 return true;
3890
3891 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3892 assert(Src0Idx != -1);
3893
3894 SMLoc ErrLoc;
3895 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3896 if (Src0.isReg()) {
3897 auto Reg = mc2PseudoReg(Src0.getReg());
3898 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3899 if (!isSGPR(Reg, TRI))
3900 return true;
3901 ErrLoc = getRegLoc(Reg, Operands);
3902 } else {
3903 ErrLoc = getConstLoc(Operands);
3904 }
3905
3906 Error(ErrLoc, "source operand must be a VGPR");
3907 return false;
3908 }
3909
validateMAIAccWrite(const MCInst & Inst,const OperandVector & Operands)3910 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3911 const OperandVector &Operands) {
3912
3913 const unsigned Opc = Inst.getOpcode();
3914
3915 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3916 return true;
3917
3918 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3919 assert(Src0Idx != -1);
3920
3921 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3922 if (!Src0.isReg())
3923 return true;
3924
3925 auto Reg = mc2PseudoReg(Src0.getReg());
3926 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3927 if (!isGFX90A() && isSGPR(Reg, TRI)) {
3928 Error(getRegLoc(Reg, Operands),
3929 "source operand must be either a VGPR or an inline constant");
3930 return false;
3931 }
3932
3933 return true;
3934 }
3935
validateMAISrc2(const MCInst & Inst,const OperandVector & Operands)3936 bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
3937 const OperandVector &Operands) {
3938 unsigned Opcode = Inst.getOpcode();
3939 const MCInstrDesc &Desc = MII.get(Opcode);
3940
3941 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
3942 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
3943 return true;
3944
3945 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
3946 if (Src2Idx == -1)
3947 return true;
3948
3949 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
3950 Error(getConstLoc(Operands),
3951 "inline constants are not allowed for this operand");
3952 return false;
3953 }
3954
3955 return true;
3956 }
3957
validateMFMA(const MCInst & Inst,const OperandVector & Operands)3958 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
3959 const OperandVector &Operands) {
3960 const unsigned Opc = Inst.getOpcode();
3961 const MCInstrDesc &Desc = MII.get(Opc);
3962
3963 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
3964 return true;
3965
3966 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
3967 if (Src2Idx == -1)
3968 return true;
3969
3970 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
3971 if (!Src2.isReg())
3972 return true;
3973
3974 MCRegister Src2Reg = Src2.getReg();
3975 MCRegister DstReg = Inst.getOperand(0).getReg();
3976 if (Src2Reg == DstReg)
3977 return true;
3978
3979 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3980 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128)
3981 return true;
3982
3983 if (TRI->regsOverlap(Src2Reg, DstReg)) {
3984 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
3985 "source 2 operand must not partially overlap with dst");
3986 return false;
3987 }
3988
3989 return true;
3990 }
3991
validateDivScale(const MCInst & Inst)3992 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3993 switch (Inst.getOpcode()) {
3994 default:
3995 return true;
3996 case V_DIV_SCALE_F32_gfx6_gfx7:
3997 case V_DIV_SCALE_F32_vi:
3998 case V_DIV_SCALE_F32_gfx10:
3999 case V_DIV_SCALE_F64_gfx6_gfx7:
4000 case V_DIV_SCALE_F64_vi:
4001 case V_DIV_SCALE_F64_gfx10:
4002 break;
4003 }
4004
4005 // TODO: Check that src0 = src1 or src2.
4006
4007 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4008 AMDGPU::OpName::src2_modifiers,
4009 AMDGPU::OpName::src2_modifiers}) {
4010 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
4011 .getImm() &
4012 SISrcMods::ABS) {
4013 return false;
4014 }
4015 }
4016
4017 return true;
4018 }
4019
validateMIMGD16(const MCInst & Inst)4020 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4021
4022 const unsigned Opc = Inst.getOpcode();
4023 const MCInstrDesc &Desc = MII.get(Opc);
4024
4025 if ((Desc.TSFlags & MIMGFlags) == 0)
4026 return true;
4027
4028 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4029 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4030 if (isCI() || isSI())
4031 return false;
4032 }
4033
4034 return true;
4035 }
4036
IsRevOpcode(const unsigned Opcode)4037 static bool IsRevOpcode(const unsigned Opcode)
4038 {
4039 switch (Opcode) {
4040 case AMDGPU::V_SUBREV_F32_e32:
4041 case AMDGPU::V_SUBREV_F32_e64:
4042 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4043 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4044 case AMDGPU::V_SUBREV_F32_e32_vi:
4045 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4046 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4047 case AMDGPU::V_SUBREV_F32_e64_vi:
4048
4049 case AMDGPU::V_SUBREV_CO_U32_e32:
4050 case AMDGPU::V_SUBREV_CO_U32_e64:
4051 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4052 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4053
4054 case AMDGPU::V_SUBBREV_U32_e32:
4055 case AMDGPU::V_SUBBREV_U32_e64:
4056 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4057 case AMDGPU::V_SUBBREV_U32_e32_vi:
4058 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4059 case AMDGPU::V_SUBBREV_U32_e64_vi:
4060
4061 case AMDGPU::V_SUBREV_U32_e32:
4062 case AMDGPU::V_SUBREV_U32_e64:
4063 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4064 case AMDGPU::V_SUBREV_U32_e32_vi:
4065 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4066 case AMDGPU::V_SUBREV_U32_e64_vi:
4067
4068 case AMDGPU::V_SUBREV_F16_e32:
4069 case AMDGPU::V_SUBREV_F16_e64:
4070 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4071 case AMDGPU::V_SUBREV_F16_e32_vi:
4072 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4073 case AMDGPU::V_SUBREV_F16_e64_vi:
4074
4075 case AMDGPU::V_SUBREV_U16_e32:
4076 case AMDGPU::V_SUBREV_U16_e64:
4077 case AMDGPU::V_SUBREV_U16_e32_vi:
4078 case AMDGPU::V_SUBREV_U16_e64_vi:
4079
4080 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4081 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4082 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4083
4084 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4085 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4086
4087 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4088 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4089
4090 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4091 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4092
4093 case AMDGPU::V_LSHRREV_B32_e32:
4094 case AMDGPU::V_LSHRREV_B32_e64:
4095 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4096 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4097 case AMDGPU::V_LSHRREV_B32_e32_vi:
4098 case AMDGPU::V_LSHRREV_B32_e64_vi:
4099 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4100 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4101
4102 case AMDGPU::V_ASHRREV_I32_e32:
4103 case AMDGPU::V_ASHRREV_I32_e64:
4104 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4105 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4106 case AMDGPU::V_ASHRREV_I32_e32_vi:
4107 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4108 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4109 case AMDGPU::V_ASHRREV_I32_e64_vi:
4110
4111 case AMDGPU::V_LSHLREV_B32_e32:
4112 case AMDGPU::V_LSHLREV_B32_e64:
4113 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4114 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4115 case AMDGPU::V_LSHLREV_B32_e32_vi:
4116 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4117 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4118 case AMDGPU::V_LSHLREV_B32_e64_vi:
4119
4120 case AMDGPU::V_LSHLREV_B16_e32:
4121 case AMDGPU::V_LSHLREV_B16_e64:
4122 case AMDGPU::V_LSHLREV_B16_e32_vi:
4123 case AMDGPU::V_LSHLREV_B16_e64_vi:
4124 case AMDGPU::V_LSHLREV_B16_gfx10:
4125
4126 case AMDGPU::V_LSHRREV_B16_e32:
4127 case AMDGPU::V_LSHRREV_B16_e64:
4128 case AMDGPU::V_LSHRREV_B16_e32_vi:
4129 case AMDGPU::V_LSHRREV_B16_e64_vi:
4130 case AMDGPU::V_LSHRREV_B16_gfx10:
4131
4132 case AMDGPU::V_ASHRREV_I16_e32:
4133 case AMDGPU::V_ASHRREV_I16_e64:
4134 case AMDGPU::V_ASHRREV_I16_e32_vi:
4135 case AMDGPU::V_ASHRREV_I16_e64_vi:
4136 case AMDGPU::V_ASHRREV_I16_gfx10:
4137
4138 case AMDGPU::V_LSHLREV_B64_e64:
4139 case AMDGPU::V_LSHLREV_B64_gfx10:
4140 case AMDGPU::V_LSHLREV_B64_vi:
4141
4142 case AMDGPU::V_LSHRREV_B64_e64:
4143 case AMDGPU::V_LSHRREV_B64_gfx10:
4144 case AMDGPU::V_LSHRREV_B64_vi:
4145
4146 case AMDGPU::V_ASHRREV_I64_e64:
4147 case AMDGPU::V_ASHRREV_I64_gfx10:
4148 case AMDGPU::V_ASHRREV_I64_vi:
4149
4150 case AMDGPU::V_PK_LSHLREV_B16:
4151 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4152 case AMDGPU::V_PK_LSHLREV_B16_vi:
4153
4154 case AMDGPU::V_PK_LSHRREV_B16:
4155 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4156 case AMDGPU::V_PK_LSHRREV_B16_vi:
4157 case AMDGPU::V_PK_ASHRREV_I16:
4158 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4159 case AMDGPU::V_PK_ASHRREV_I16_vi:
4160 return true;
4161 default:
4162 return false;
4163 }
4164 }
4165
4166 std::optional<StringRef>
validateLdsDirect(const MCInst & Inst)4167 AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
4168
4169 using namespace SIInstrFlags;
4170 const unsigned Opcode = Inst.getOpcode();
4171 const MCInstrDesc &Desc = MII.get(Opcode);
4172
4173 // lds_direct register is defined so that it can be used
4174 // with 9-bit operands only. Ignore encodings which do not accept these.
4175 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4176 if ((Desc.TSFlags & Enc) == 0)
4177 return std::nullopt;
4178
4179 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4180 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4181 if (SrcIdx == -1)
4182 break;
4183 const auto &Src = Inst.getOperand(SrcIdx);
4184 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4185
4186 if (isGFX90A() || isGFX11Plus())
4187 return StringRef("lds_direct is not supported on this GPU");
4188
4189 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
4190 return StringRef("lds_direct cannot be used with this instruction");
4191
4192 if (SrcName != OpName::src0)
4193 return StringRef("lds_direct may be used as src0 only");
4194 }
4195 }
4196
4197 return std::nullopt;
4198 }
4199
getFlatOffsetLoc(const OperandVector & Operands) const4200 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4201 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4202 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4203 if (Op.isFlatOffset())
4204 return Op.getStartLoc();
4205 }
4206 return getLoc();
4207 }
4208
validateOffset(const MCInst & Inst,const OperandVector & Operands)4209 bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4210 const OperandVector &Operands) {
4211 auto Opcode = Inst.getOpcode();
4212 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4213 if (OpNum == -1)
4214 return true;
4215
4216 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4217 if ((TSFlags & SIInstrFlags::FLAT))
4218 return validateFlatOffset(Inst, Operands);
4219
4220 if ((TSFlags & SIInstrFlags::SMRD))
4221 return validateSMEMOffset(Inst, Operands);
4222
4223 const auto &Op = Inst.getOperand(OpNum);
4224 if (isGFX12Plus() &&
4225 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4226 const unsigned OffsetSize = 24;
4227 if (!isIntN(OffsetSize, Op.getImm())) {
4228 Error(getFlatOffsetLoc(Operands),
4229 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
4230 return false;
4231 }
4232 } else {
4233 const unsigned OffsetSize = 16;
4234 if (!isUIntN(OffsetSize, Op.getImm())) {
4235 Error(getFlatOffsetLoc(Operands),
4236 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4237 return false;
4238 }
4239 }
4240 return true;
4241 }
4242
validateFlatOffset(const MCInst & Inst,const OperandVector & Operands)4243 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4244 const OperandVector &Operands) {
4245 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4246 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4247 return true;
4248
4249 auto Opcode = Inst.getOpcode();
4250 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4251 assert(OpNum != -1);
4252
4253 const auto &Op = Inst.getOperand(OpNum);
4254 if (!hasFlatOffsets() && Op.getImm() != 0) {
4255 Error(getFlatOffsetLoc(Operands),
4256 "flat offset modifier is not supported on this GPU");
4257 return false;
4258 }
4259
4260 // For pre-GFX12 FLAT instructions the offset must be positive;
4261 // MSB is ignored and forced to zero.
4262 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4263 bool AllowNegative =
4264 (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) ||
4265 isGFX12Plus();
4266 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4267 Error(getFlatOffsetLoc(Operands),
4268 Twine("expected a ") +
4269 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4270 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4271 return false;
4272 }
4273
4274 return true;
4275 }
4276
getSMEMOffsetLoc(const OperandVector & Operands) const4277 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4278 // Start with second operand because SMEM Offset cannot be dst or src0.
4279 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4280 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4281 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4282 return Op.getStartLoc();
4283 }
4284 return getLoc();
4285 }
4286
validateSMEMOffset(const MCInst & Inst,const OperandVector & Operands)4287 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4288 const OperandVector &Operands) {
4289 if (isCI() || isSI())
4290 return true;
4291
4292 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4293 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4294 return true;
4295
4296 auto Opcode = Inst.getOpcode();
4297 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4298 if (OpNum == -1)
4299 return true;
4300
4301 const auto &Op = Inst.getOperand(OpNum);
4302 if (!Op.isImm())
4303 return true;
4304
4305 uint64_t Offset = Op.getImm();
4306 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4307 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
4308 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
4309 return true;
4310
4311 Error(getSMEMOffsetLoc(Operands),
4312 isGFX12Plus() ? "expected a 24-bit signed offset"
4313 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4314 : "expected a 21-bit signed offset");
4315
4316 return false;
4317 }
4318
validateSOPLiteral(const MCInst & Inst) const4319 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
4320 unsigned Opcode = Inst.getOpcode();
4321 const MCInstrDesc &Desc = MII.get(Opcode);
4322 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4323 return true;
4324
4325 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4326 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4327
4328 const int OpIndices[] = { Src0Idx, Src1Idx };
4329
4330 unsigned NumExprs = 0;
4331 unsigned NumLiterals = 0;
4332 uint32_t LiteralValue;
4333
4334 for (int OpIdx : OpIndices) {
4335 if (OpIdx == -1) break;
4336
4337 const MCOperand &MO = Inst.getOperand(OpIdx);
4338 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4339 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
4340 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4341 uint32_t Value = static_cast<uint32_t>(MO.getImm());
4342 if (NumLiterals == 0 || LiteralValue != Value) {
4343 LiteralValue = Value;
4344 ++NumLiterals;
4345 }
4346 } else if (MO.isExpr()) {
4347 ++NumExprs;
4348 }
4349 }
4350 }
4351
4352 return NumLiterals + NumExprs <= 1;
4353 }
4354
validateOpSel(const MCInst & Inst)4355 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4356 const unsigned Opc = Inst.getOpcode();
4357 if (isPermlane16(Opc)) {
4358 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4359 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4360
4361 if (OpSel & ~3)
4362 return false;
4363 }
4364
4365 uint64_t TSFlags = MII.get(Opc).TSFlags;
4366
4367 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4368 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4369 if (OpSelIdx != -1) {
4370 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4371 return false;
4372 }
4373 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4374 if (OpSelHiIdx != -1) {
4375 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4376 return false;
4377 }
4378 }
4379
4380 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4381 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4382 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4383 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4384 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4385 if (OpSel & 3)
4386 return false;
4387 }
4388
4389 return true;
4390 }
4391
validateNeg(const MCInst & Inst,int OpName)4392 bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, int OpName) {
4393 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
4394
4395 const unsigned Opc = Inst.getOpcode();
4396 uint64_t TSFlags = MII.get(Opc).TSFlags;
4397
4398 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
4399 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
4400 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
4401 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
4402 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
4403 !(TSFlags & SIInstrFlags::IsSWMMAC))
4404 return true;
4405
4406 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
4407 if (NegIdx == -1)
4408 return true;
4409
4410 unsigned Neg = Inst.getOperand(NegIdx).getImm();
4411
4412 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
4413 // on some src operands but not allowed on other.
4414 // It is convenient that such instructions don't have src_modifiers operand
4415 // for src operands that don't allow neg because they also don't allow opsel.
4416
4417 int SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
4418 AMDGPU::OpName::src1_modifiers,
4419 AMDGPU::OpName::src2_modifiers};
4420
4421 for (unsigned i = 0; i < 3; ++i) {
4422 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
4423 if (Neg & (1 << i))
4424 return false;
4425 }
4426 }
4427
4428 return true;
4429 }
4430
validateDPP(const MCInst & Inst,const OperandVector & Operands)4431 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
4432 const OperandVector &Operands) {
4433 const unsigned Opc = Inst.getOpcode();
4434 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
4435 if (DppCtrlIdx >= 0) {
4436 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
4437
4438 if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
4439 AMDGPU::isDPALU_DPP(MII.get(Opc))) {
4440 // DP ALU DPP is supported for row_newbcast only on GFX9*
4441 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
4442 Error(S, "DP ALU dpp only supports row_newbcast");
4443 return false;
4444 }
4445 }
4446
4447 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
4448 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
4449
4450 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
4451 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4452 if (Src1Idx >= 0) {
4453 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4454 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4455 if (Src1.isImm() ||
4456 (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI))) {
4457 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[Src1Idx]);
4458 Error(Op.getStartLoc(), "invalid operand for instruction");
4459 return false;
4460 }
4461 }
4462 }
4463
4464 return true;
4465 }
4466
4467 // Check if VCC register matches wavefront size
validateVccOperand(unsigned Reg) const4468 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
4469 auto FB = getFeatureBits();
4470 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
4471 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
4472 }
4473
4474 // One unique literal can be used. VOP3 literal is only allowed in GFX10+
validateVOPLiteral(const MCInst & Inst,const OperandVector & Operands)4475 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
4476 const OperandVector &Operands) {
4477 unsigned Opcode = Inst.getOpcode();
4478 const MCInstrDesc &Desc = MII.get(Opcode);
4479 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
4480 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
4481 !HasMandatoryLiteral && !isVOPD(Opcode))
4482 return true;
4483
4484 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
4485
4486 unsigned NumExprs = 0;
4487 unsigned NumLiterals = 0;
4488 uint32_t LiteralValue;
4489
4490 for (int OpIdx : OpIndices) {
4491 if (OpIdx == -1)
4492 continue;
4493
4494 const MCOperand &MO = Inst.getOperand(OpIdx);
4495 if (!MO.isImm() && !MO.isExpr())
4496 continue;
4497 if (!isSISrcOperand(Desc, OpIdx))
4498 continue;
4499
4500 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
4501 uint64_t Value = static_cast<uint64_t>(MO.getImm());
4502 bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) &&
4503 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
4504 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
4505
4506 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) {
4507 Error(getLitLoc(Operands), "invalid operand for instruction");
4508 return false;
4509 }
4510
4511 if (IsFP64 && IsValid32Op)
4512 Value = Hi_32(Value);
4513
4514 if (NumLiterals == 0 || LiteralValue != Value) {
4515 LiteralValue = Value;
4516 ++NumLiterals;
4517 }
4518 } else if (MO.isExpr()) {
4519 ++NumExprs;
4520 }
4521 }
4522 NumLiterals += NumExprs;
4523
4524 if (!NumLiterals)
4525 return true;
4526
4527 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) {
4528 Error(getLitLoc(Operands), "literal operands are not supported");
4529 return false;
4530 }
4531
4532 if (NumLiterals > 1) {
4533 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed");
4534 return false;
4535 }
4536
4537 return true;
4538 }
4539
4540 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
IsAGPROperand(const MCInst & Inst,uint16_t NameIdx,const MCRegisterInfo * MRI)4541 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
4542 const MCRegisterInfo *MRI) {
4543 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
4544 if (OpIdx < 0)
4545 return -1;
4546
4547 const MCOperand &Op = Inst.getOperand(OpIdx);
4548 if (!Op.isReg())
4549 return -1;
4550
4551 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4552 auto Reg = Sub ? Sub : Op.getReg();
4553 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4554 return AGPR32.contains(Reg) ? 1 : 0;
4555 }
4556
validateAGPRLdSt(const MCInst & Inst) const4557 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
4558 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4559 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
4560 SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4561 SIInstrFlags::DS)) == 0)
4562 return true;
4563
4564 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
4565 : AMDGPU::OpName::vdata;
4566
4567 const MCRegisterInfo *MRI = getMRI();
4568 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
4569 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
4570
4571 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
4572 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
4573 if (Data2Areg >= 0 && Data2Areg != DataAreg)
4574 return false;
4575 }
4576
4577 auto FB = getFeatureBits();
4578 if (FB[AMDGPU::FeatureGFX90AInsts]) {
4579 if (DataAreg < 0 || DstAreg < 0)
4580 return true;
4581 return DstAreg == DataAreg;
4582 }
4583
4584 return DstAreg < 1 && DataAreg < 1;
4585 }
4586
validateVGPRAlign(const MCInst & Inst) const4587 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
4588 auto FB = getFeatureBits();
4589 if (!FB[AMDGPU::FeatureGFX90AInsts])
4590 return true;
4591
4592 const MCRegisterInfo *MRI = getMRI();
4593 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4594 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
4595 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
4596 const MCOperand &Op = Inst.getOperand(I);
4597 if (!Op.isReg())
4598 continue;
4599
4600 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
4601 if (!Sub)
4602 continue;
4603
4604 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
4605 return false;
4606 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
4607 return false;
4608 }
4609
4610 return true;
4611 }
4612
getBLGPLoc(const OperandVector & Operands) const4613 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
4614 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4615 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4616 if (Op.isBLGP())
4617 return Op.getStartLoc();
4618 }
4619 return SMLoc();
4620 }
4621
validateBLGP(const MCInst & Inst,const OperandVector & Operands)4622 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
4623 const OperandVector &Operands) {
4624 unsigned Opc = Inst.getOpcode();
4625 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4626 if (BlgpIdx == -1)
4627 return true;
4628 SMLoc BLGPLoc = getBLGPLoc(Operands);
4629 if (!BLGPLoc.isValid())
4630 return true;
4631 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
4632 auto FB = getFeatureBits();
4633 bool UsesNeg = false;
4634 if (FB[AMDGPU::FeatureGFX940Insts]) {
4635 switch (Opc) {
4636 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
4637 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
4638 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
4639 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
4640 UsesNeg = true;
4641 }
4642 }
4643
4644 if (IsNeg == UsesNeg)
4645 return true;
4646
4647 Error(BLGPLoc,
4648 UsesNeg ? "invalid modifier: blgp is not supported"
4649 : "invalid modifier: neg is not supported");
4650
4651 return false;
4652 }
4653
validateWaitCnt(const MCInst & Inst,const OperandVector & Operands)4654 bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
4655 const OperandVector &Operands) {
4656 if (!isGFX11Plus())
4657 return true;
4658
4659 unsigned Opc = Inst.getOpcode();
4660 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
4661 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
4662 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
4663 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
4664 return true;
4665
4666 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
4667 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
4668 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
4669 if (Reg == AMDGPU::SGPR_NULL)
4670 return true;
4671
4672 SMLoc RegLoc = getRegLoc(Reg, Operands);
4673 Error(RegLoc, "src0 must be null");
4674 return false;
4675 }
4676
validateDS(const MCInst & Inst,const OperandVector & Operands)4677 bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
4678 const OperandVector &Operands) {
4679 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4680 if ((TSFlags & SIInstrFlags::DS) == 0)
4681 return true;
4682 if (TSFlags & SIInstrFlags::GWS)
4683 return validateGWS(Inst, Operands);
4684 // Only validate GDS for non-GWS instructions.
4685 if (hasGDS())
4686 return true;
4687 int GDSIdx =
4688 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
4689 if (GDSIdx < 0)
4690 return true;
4691 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
4692 if (GDS) {
4693 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
4694 Error(S, "gds modifier is not supported on this GPU");
4695 return false;
4696 }
4697 return true;
4698 }
4699
4700 // gfx90a has an undocumented limitation:
4701 // DS_GWS opcodes must use even aligned registers.
validateGWS(const MCInst & Inst,const OperandVector & Operands)4702 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
4703 const OperandVector &Operands) {
4704 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
4705 return true;
4706
4707 int Opc = Inst.getOpcode();
4708 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
4709 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
4710 return true;
4711
4712 const MCRegisterInfo *MRI = getMRI();
4713 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
4714 int Data0Pos =
4715 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
4716 assert(Data0Pos != -1);
4717 auto Reg = Inst.getOperand(Data0Pos).getReg();
4718 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
4719 if (RegIdx & 1) {
4720 SMLoc RegLoc = getRegLoc(Reg, Operands);
4721 Error(RegLoc, "vgpr must be even aligned");
4722 return false;
4723 }
4724
4725 return true;
4726 }
4727
validateCoherencyBits(const MCInst & Inst,const OperandVector & Operands,const SMLoc & IDLoc)4728 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
4729 const OperandVector &Operands,
4730 const SMLoc &IDLoc) {
4731 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
4732 AMDGPU::OpName::cpol);
4733 if (CPolPos == -1)
4734 return true;
4735
4736 unsigned CPol = Inst.getOperand(CPolPos).getImm();
4737
4738 if (isGFX12Plus())
4739 return validateTHAndScopeBits(Inst, Operands, CPol);
4740
4741 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4742 if (TSFlags & SIInstrFlags::SMRD) {
4743 if (CPol && (isSI() || isCI())) {
4744 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4745 Error(S, "cache policy is not supported for SMRD instructions");
4746 return false;
4747 }
4748 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
4749 Error(IDLoc, "invalid cache policy for SMEM instruction");
4750 return false;
4751 }
4752 }
4753
4754 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
4755 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
4756 SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
4757 SIInstrFlags::FLAT;
4758 if (!(TSFlags & AllowSCCModifier)) {
4759 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4760 StringRef CStr(S.getPointer());
4761 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
4762 Error(S,
4763 "scc modifier is not supported for this instruction on this GPU");
4764 return false;
4765 }
4766 }
4767
4768 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
4769 return true;
4770
4771 if (TSFlags & SIInstrFlags::IsAtomicRet) {
4772 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
4773 Error(IDLoc, isGFX940() ? "instruction must use sc0"
4774 : "instruction must use glc");
4775 return false;
4776 }
4777 } else {
4778 if (CPol & CPol::GLC) {
4779 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4780 StringRef CStr(S.getPointer());
4781 S = SMLoc::getFromPointer(
4782 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
4783 Error(S, isGFX940() ? "instruction must not use sc0"
4784 : "instruction must not use glc");
4785 return false;
4786 }
4787 }
4788
4789 return true;
4790 }
4791
validateTHAndScopeBits(const MCInst & Inst,const OperandVector & Operands,const unsigned CPol)4792 bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
4793 const OperandVector &Operands,
4794 const unsigned CPol) {
4795 const unsigned TH = CPol & AMDGPU::CPol::TH;
4796 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
4797
4798 const unsigned Opcode = Inst.getOpcode();
4799 const MCInstrDesc &TID = MII.get(Opcode);
4800
4801 auto PrintError = [&](StringRef Msg) {
4802 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
4803 Error(S, Msg);
4804 return false;
4805 };
4806
4807 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
4808 (TID.TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF)) &&
4809 (!(TH & AMDGPU::CPol::TH_ATOMIC_RETURN)))
4810 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
4811
4812 if (TH == 0)
4813 return true;
4814
4815 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
4816 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
4817 (TH == AMDGPU::CPol::TH_NT_HT)))
4818 return PrintError("invalid th value for SMEM instruction");
4819
4820 if (TH == AMDGPU::CPol::TH_BYPASS) {
4821 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
4822 CPol & AMDGPU::CPol::TH_REAL_BYPASS) ||
4823 (Scope == AMDGPU::CPol::SCOPE_SYS &&
4824 !(CPol & AMDGPU::CPol::TH_REAL_BYPASS)))
4825 return PrintError("scope and th combination is not valid");
4826 }
4827
4828 bool IsStore = TID.mayStore();
4829 bool IsAtomic =
4830 TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet);
4831
4832 if (IsAtomic) {
4833 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
4834 return PrintError("invalid th value for atomic instructions");
4835 } else if (IsStore) {
4836 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
4837 return PrintError("invalid th value for store instructions");
4838 } else {
4839 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
4840 return PrintError("invalid th value for load instructions");
4841 }
4842
4843 return true;
4844 }
4845
validateExeczVcczOperands(const OperandVector & Operands)4846 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
4847 if (!isGFX11Plus())
4848 return true;
4849 for (auto &Operand : Operands) {
4850 if (!Operand->isReg())
4851 continue;
4852 unsigned Reg = Operand->getReg();
4853 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) {
4854 Error(getRegLoc(Reg, Operands),
4855 "execz and vccz are not supported on this GPU");
4856 return false;
4857 }
4858 }
4859 return true;
4860 }
4861
validateTFE(const MCInst & Inst,const OperandVector & Operands)4862 bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
4863 const OperandVector &Operands) {
4864 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
4865 if (Desc.mayStore() &&
4866 (Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4867 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
4868 if (Loc != getInstLoc(Operands)) {
4869 Error(Loc, "TFE modifier has no meaning for store instructions");
4870 return false;
4871 }
4872 }
4873
4874 return true;
4875 }
4876
validateInstruction(const MCInst & Inst,const SMLoc & IDLoc,const OperandVector & Operands)4877 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
4878 const SMLoc &IDLoc,
4879 const OperandVector &Operands) {
4880 if (auto ErrMsg = validateLdsDirect(Inst)) {
4881 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
4882 return false;
4883 }
4884 if (!validateSOPLiteral(Inst)) {
4885 Error(getLitLoc(Operands),
4886 "only one unique literal operand is allowed");
4887 return false;
4888 }
4889 if (!validateVOPLiteral(Inst, Operands)) {
4890 return false;
4891 }
4892 if (!validateConstantBusLimitations(Inst, Operands)) {
4893 return false;
4894 }
4895 if (!validateVOPDRegBankConstraints(Inst, Operands)) {
4896 return false;
4897 }
4898 if (!validateIntClampSupported(Inst)) {
4899 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
4900 "integer clamping is not supported on this GPU");
4901 return false;
4902 }
4903 if (!validateOpSel(Inst)) {
4904 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
4905 "invalid op_sel operand");
4906 return false;
4907 }
4908 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
4909 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
4910 "invalid neg_lo operand");
4911 return false;
4912 }
4913 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
4914 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
4915 "invalid neg_hi operand");
4916 return false;
4917 }
4918 if (!validateDPP(Inst, Operands)) {
4919 return false;
4920 }
4921 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
4922 if (!validateMIMGD16(Inst)) {
4923 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
4924 "d16 modifier is not supported on this GPU");
4925 return false;
4926 }
4927 if (!validateMIMGMSAA(Inst)) {
4928 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
4929 "invalid dim; must be MSAA type");
4930 return false;
4931 }
4932 if (!validateMIMGDataSize(Inst, IDLoc)) {
4933 return false;
4934 }
4935 if (!validateMIMGAddrSize(Inst, IDLoc))
4936 return false;
4937 if (!validateMIMGAtomicDMask(Inst)) {
4938 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4939 "invalid atomic image dmask");
4940 return false;
4941 }
4942 if (!validateMIMGGatherDMask(Inst)) {
4943 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
4944 "invalid image_gather dmask: only one bit must be set");
4945 return false;
4946 }
4947 if (!validateMovrels(Inst, Operands)) {
4948 return false;
4949 }
4950 if (!validateOffset(Inst, Operands)) {
4951 return false;
4952 }
4953 if (!validateMAIAccWrite(Inst, Operands)) {
4954 return false;
4955 }
4956 if (!validateMAISrc2(Inst, Operands)) {
4957 return false;
4958 }
4959 if (!validateMFMA(Inst, Operands)) {
4960 return false;
4961 }
4962 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
4963 return false;
4964 }
4965
4966 if (!validateAGPRLdSt(Inst)) {
4967 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
4968 ? "invalid register class: data and dst should be all VGPR or AGPR"
4969 : "invalid register class: agpr loads and stores not supported on this GPU"
4970 );
4971 return false;
4972 }
4973 if (!validateVGPRAlign(Inst)) {
4974 Error(IDLoc,
4975 "invalid register class: vgpr tuples must be 64 bit aligned");
4976 return false;
4977 }
4978 if (!validateDS(Inst, Operands)) {
4979 return false;
4980 }
4981
4982 if (!validateBLGP(Inst, Operands)) {
4983 return false;
4984 }
4985
4986 if (!validateDivScale(Inst)) {
4987 Error(IDLoc, "ABS not allowed in VOP3B instructions");
4988 return false;
4989 }
4990 if (!validateWaitCnt(Inst, Operands)) {
4991 return false;
4992 }
4993 if (!validateExeczVcczOperands(Operands)) {
4994 return false;
4995 }
4996 if (!validateTFE(Inst, Operands)) {
4997 return false;
4998 }
4999
5000 return true;
5001 }
5002
5003 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
5004 const FeatureBitset &FBS,
5005 unsigned VariantID = 0);
5006
5007 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5008 const FeatureBitset &AvailableFeatures,
5009 unsigned VariantID);
5010
isSupportedMnemo(StringRef Mnemo,const FeatureBitset & FBS)5011 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5012 const FeatureBitset &FBS) {
5013 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5014 }
5015
isSupportedMnemo(StringRef Mnemo,const FeatureBitset & FBS,ArrayRef<unsigned> Variants)5016 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5017 const FeatureBitset &FBS,
5018 ArrayRef<unsigned> Variants) {
5019 for (auto Variant : Variants) {
5020 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5021 return true;
5022 }
5023
5024 return false;
5025 }
5026
checkUnsupportedInstruction(StringRef Mnemo,const SMLoc & IDLoc)5027 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5028 const SMLoc &IDLoc) {
5029 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5030
5031 // Check if requested instruction variant is supported.
5032 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5033 return false;
5034
5035 // This instruction is not supported.
5036 // Clear any other pending errors because they are no longer relevant.
5037 getParser().clearPendingErrors();
5038
5039 // Requested instruction variant is not supported.
5040 // Check if any other variants are supported.
5041 StringRef VariantName = getMatchedVariantName();
5042 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5043 return Error(IDLoc,
5044 Twine(VariantName,
5045 " variant of this instruction is not supported"));
5046 }
5047
5048 // Check if this instruction may be used with a different wavesize.
5049 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5050 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5051
5052 FeatureBitset FeaturesWS32 = getFeatureBits();
5053 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5054 .flip(AMDGPU::FeatureWavefrontSize32);
5055 FeatureBitset AvailableFeaturesWS32 =
5056 ComputeAvailableFeatures(FeaturesWS32);
5057
5058 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5059 return Error(IDLoc, "instruction requires wavesize=32");
5060 }
5061
5062 // Finally check if this instruction is supported on any other GPU.
5063 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5064 return Error(IDLoc, "instruction not supported on this GPU");
5065 }
5066
5067 // Instruction not supported on any GPU. Probably a typo.
5068 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5069 return Error(IDLoc, "invalid instruction" + Suggestion);
5070 }
5071
isInvalidVOPDY(const OperandVector & Operands,uint64_t InvalidOprIdx)5072 static bool isInvalidVOPDY(const OperandVector &Operands,
5073 uint64_t InvalidOprIdx) {
5074 assert(InvalidOprIdx < Operands.size());
5075 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5076 if (Op.isToken() && InvalidOprIdx > 1) {
5077 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5078 return PrevOp.isToken() && PrevOp.getToken() == "::";
5079 }
5080 return false;
5081 }
5082
MatchAndEmitInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)5083 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5084 OperandVector &Operands,
5085 MCStreamer &Out,
5086 uint64_t &ErrorInfo,
5087 bool MatchingInlineAsm) {
5088 MCInst Inst;
5089 unsigned Result = Match_Success;
5090 for (auto Variant : getMatchedVariants()) {
5091 uint64_t EI;
5092 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5093 Variant);
5094 // We order match statuses from least to most specific. We use most specific
5095 // status as resulting
5096 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
5097 if ((R == Match_Success) ||
5098 (R == Match_PreferE32) ||
5099 (R == Match_MissingFeature && Result != Match_PreferE32) ||
5100 (R == Match_InvalidOperand && Result != Match_MissingFeature
5101 && Result != Match_PreferE32) ||
5102 (R == Match_MnemonicFail && Result != Match_InvalidOperand
5103 && Result != Match_MissingFeature
5104 && Result != Match_PreferE32)) {
5105 Result = R;
5106 ErrorInfo = EI;
5107 }
5108 if (R == Match_Success)
5109 break;
5110 }
5111
5112 if (Result == Match_Success) {
5113 if (!validateInstruction(Inst, IDLoc, Operands)) {
5114 return true;
5115 }
5116 Inst.setLoc(IDLoc);
5117 Out.emitInstruction(Inst, getSTI());
5118 return false;
5119 }
5120
5121 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5122 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5123 return true;
5124 }
5125
5126 switch (Result) {
5127 default: break;
5128 case Match_MissingFeature:
5129 // It has been verified that the specified instruction
5130 // mnemonic is valid. A match was found but it requires
5131 // features which are not supported on this GPU.
5132 return Error(IDLoc, "operands are not valid for this GPU or mode");
5133
5134 case Match_InvalidOperand: {
5135 SMLoc ErrorLoc = IDLoc;
5136 if (ErrorInfo != ~0ULL) {
5137 if (ErrorInfo >= Operands.size()) {
5138 return Error(IDLoc, "too few operands for instruction");
5139 }
5140 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5141 if (ErrorLoc == SMLoc())
5142 ErrorLoc = IDLoc;
5143
5144 if (isInvalidVOPDY(Operands, ErrorInfo))
5145 return Error(ErrorLoc, "invalid VOPDY instruction");
5146 }
5147 return Error(ErrorLoc, "invalid operand for instruction");
5148 }
5149
5150 case Match_PreferE32:
5151 return Error(IDLoc, "internal error: instruction without _e64 suffix "
5152 "should be encoded as e32");
5153 case Match_MnemonicFail:
5154 llvm_unreachable("Invalid instructions should have been handled already");
5155 }
5156 llvm_unreachable("Implement any new match types added!");
5157 }
5158
ParseAsAbsoluteExpression(uint32_t & Ret)5159 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5160 int64_t Tmp = -1;
5161 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5162 return true;
5163 }
5164 if (getParser().parseAbsoluteExpression(Tmp)) {
5165 return true;
5166 }
5167 Ret = static_cast<uint32_t>(Tmp);
5168 return false;
5169 }
5170
ParseDirectiveAMDGCNTarget()5171 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5172 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5173 return TokError("directive only supported for amdgcn architecture");
5174
5175 std::string TargetIDDirective;
5176 SMLoc TargetStart = getTok().getLoc();
5177 if (getParser().parseEscapedString(TargetIDDirective))
5178 return true;
5179
5180 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5181 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5182 return getParser().Error(TargetRange.Start,
5183 (Twine(".amdgcn_target directive's target id ") +
5184 Twine(TargetIDDirective) +
5185 Twine(" does not match the specified target id ") +
5186 Twine(getTargetStreamer().getTargetID()->toString())).str());
5187
5188 return false;
5189 }
5190
OutOfRangeError(SMRange Range)5191 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5192 return Error(Range.Start, "value out of range", Range);
5193 }
5194
calculateGPRBlocks(const FeatureBitset & Features,bool VCCUsed,bool FlatScrUsed,bool XNACKUsed,std::optional<bool> EnableWavefrontSize32,unsigned NextFreeVGPR,SMRange VGPRRange,unsigned NextFreeSGPR,SMRange SGPRRange,unsigned & VGPRBlocks,unsigned & SGPRBlocks)5195 bool AMDGPUAsmParser::calculateGPRBlocks(
5196 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
5197 bool XNACKUsed, std::optional<bool> EnableWavefrontSize32,
5198 unsigned NextFreeVGPR, SMRange VGPRRange, unsigned NextFreeSGPR,
5199 SMRange SGPRRange, unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
5200 // TODO(scott.linder): These calculations are duplicated from
5201 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5202 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5203
5204 unsigned NumVGPRs = NextFreeVGPR;
5205 unsigned NumSGPRs = NextFreeSGPR;
5206
5207 if (Version.Major >= 10)
5208 NumSGPRs = 0;
5209 else {
5210 unsigned MaxAddressableNumSGPRs =
5211 IsaInfo::getAddressableNumSGPRs(&getSTI());
5212
5213 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
5214 NumSGPRs > MaxAddressableNumSGPRs)
5215 return OutOfRangeError(SGPRRange);
5216
5217 NumSGPRs +=
5218 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
5219
5220 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5221 NumSGPRs > MaxAddressableNumSGPRs)
5222 return OutOfRangeError(SGPRRange);
5223
5224 if (Features.test(FeatureSGPRInitBug))
5225 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
5226 }
5227
5228 VGPRBlocks =
5229 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
5230 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
5231
5232 return false;
5233 }
5234
ParseDirectiveAMDHSAKernel()5235 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5236 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
5237 return TokError("directive only supported for amdgcn architecture");
5238
5239 if (!isHsaAbi(getSTI()))
5240 return TokError("directive only supported for amdhsa OS");
5241
5242 StringRef KernelName;
5243 if (getParser().parseIdentifier(KernelName))
5244 return true;
5245
5246 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
5247
5248 StringSet<> Seen;
5249
5250 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5251
5252 SMRange VGPRRange;
5253 uint64_t NextFreeVGPR = 0;
5254 uint64_t AccumOffset = 0;
5255 uint64_t SharedVGPRCount = 0;
5256 uint64_t PreloadLength = 0;
5257 uint64_t PreloadOffset = 0;
5258 SMRange SGPRRange;
5259 uint64_t NextFreeSGPR = 0;
5260
5261 // Count the number of user SGPRs implied from the enabled feature bits.
5262 unsigned ImpliedUserSGPRCount = 0;
5263
5264 // Track if the asm explicitly contains the directive for the user SGPR
5265 // count.
5266 std::optional<unsigned> ExplicitUserSGPRCount;
5267 bool ReserveVCC = true;
5268 bool ReserveFlatScr = true;
5269 std::optional<bool> EnableWavefrontSize32;
5270
5271 while (true) {
5272 while (trySkipToken(AsmToken::EndOfStatement));
5273
5274 StringRef ID;
5275 SMRange IDRange = getTok().getLocRange();
5276 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
5277 return true;
5278
5279 if (ID == ".end_amdhsa_kernel")
5280 break;
5281
5282 if (!Seen.insert(ID).second)
5283 return TokError(".amdhsa_ directives cannot be repeated");
5284
5285 SMLoc ValStart = getLoc();
5286 int64_t IVal;
5287 if (getParser().parseAbsoluteExpression(IVal))
5288 return true;
5289 SMLoc ValEnd = getLoc();
5290 SMRange ValRange = SMRange(ValStart, ValEnd);
5291
5292 if (IVal < 0)
5293 return OutOfRangeError(ValRange);
5294
5295 uint64_t Val = IVal;
5296
5297 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
5298 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
5299 return OutOfRangeError(RANGE); \
5300 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
5301
5302 if (ID == ".amdhsa_group_segment_fixed_size") {
5303 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
5304 return OutOfRangeError(ValRange);
5305 KD.group_segment_fixed_size = Val;
5306 } else if (ID == ".amdhsa_private_segment_fixed_size") {
5307 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
5308 return OutOfRangeError(ValRange);
5309 KD.private_segment_fixed_size = Val;
5310 } else if (ID == ".amdhsa_kernarg_size") {
5311 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
5312 return OutOfRangeError(ValRange);
5313 KD.kernarg_size = Val;
5314 } else if (ID == ".amdhsa_user_sgpr_count") {
5315 ExplicitUserSGPRCount = Val;
5316 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
5317 if (hasArchitectedFlatScratch())
5318 return Error(IDRange.Start,
5319 "directive is not supported with architected flat scratch",
5320 IDRange);
5321 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5322 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
5323 Val, ValRange);
5324 if (Val)
5325 ImpliedUserSGPRCount += 4;
5326 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
5327 if (!hasKernargPreload())
5328 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5329
5330 if (Val > getMaxNumUserSGPRs())
5331 return OutOfRangeError(ValRange);
5332 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, Val,
5333 ValRange);
5334 if (Val) {
5335 ImpliedUserSGPRCount += Val;
5336 PreloadLength = Val;
5337 }
5338 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
5339 if (!hasKernargPreload())
5340 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5341
5342 if (Val >= 1024)
5343 return OutOfRangeError(ValRange);
5344 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, Val,
5345 ValRange);
5346 if (Val)
5347 PreloadOffset = Val;
5348 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
5349 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5350 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
5351 ValRange);
5352 if (Val)
5353 ImpliedUserSGPRCount += 2;
5354 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
5355 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5356 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
5357 ValRange);
5358 if (Val)
5359 ImpliedUserSGPRCount += 2;
5360 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
5361 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5362 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
5363 Val, ValRange);
5364 if (Val)
5365 ImpliedUserSGPRCount += 2;
5366 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
5367 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5368 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
5369 ValRange);
5370 if (Val)
5371 ImpliedUserSGPRCount += 2;
5372 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
5373 if (hasArchitectedFlatScratch())
5374 return Error(IDRange.Start,
5375 "directive is not supported with architected flat scratch",
5376 IDRange);
5377 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5378 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
5379 ValRange);
5380 if (Val)
5381 ImpliedUserSGPRCount += 2;
5382 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
5383 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5384 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
5385 Val, ValRange);
5386 if (Val)
5387 ImpliedUserSGPRCount += 1;
5388 } else if (ID == ".amdhsa_wavefront_size32") {
5389 if (IVersion.Major < 10)
5390 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5391 EnableWavefrontSize32 = Val;
5392 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5393 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
5394 Val, ValRange);
5395 } else if (ID == ".amdhsa_uses_dynamic_stack") {
5396 PARSE_BITS_ENTRY(KD.kernel_code_properties,
5397 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange);
5398 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
5399 if (hasArchitectedFlatScratch())
5400 return Error(IDRange.Start,
5401 "directive is not supported with architected flat scratch",
5402 IDRange);
5403 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5404 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5405 } else if (ID == ".amdhsa_enable_private_segment") {
5406 if (!hasArchitectedFlatScratch())
5407 return Error(
5408 IDRange.Start,
5409 "directive is not supported without architected flat scratch",
5410 IDRange);
5411 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5412 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
5413 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
5414 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5415 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
5416 ValRange);
5417 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
5418 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5419 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
5420 ValRange);
5421 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
5422 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5423 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
5424 ValRange);
5425 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
5426 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5427 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
5428 ValRange);
5429 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
5430 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5431 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
5432 ValRange);
5433 } else if (ID == ".amdhsa_next_free_vgpr") {
5434 VGPRRange = ValRange;
5435 NextFreeVGPR = Val;
5436 } else if (ID == ".amdhsa_next_free_sgpr") {
5437 SGPRRange = ValRange;
5438 NextFreeSGPR = Val;
5439 } else if (ID == ".amdhsa_accum_offset") {
5440 if (!isGFX90A())
5441 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5442 AccumOffset = Val;
5443 } else if (ID == ".amdhsa_reserve_vcc") {
5444 if (!isUInt<1>(Val))
5445 return OutOfRangeError(ValRange);
5446 ReserveVCC = Val;
5447 } else if (ID == ".amdhsa_reserve_flat_scratch") {
5448 if (IVersion.Major < 7)
5449 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
5450 if (hasArchitectedFlatScratch())
5451 return Error(IDRange.Start,
5452 "directive is not supported with architected flat scratch",
5453 IDRange);
5454 if (!isUInt<1>(Val))
5455 return OutOfRangeError(ValRange);
5456 ReserveFlatScr = Val;
5457 } else if (ID == ".amdhsa_reserve_xnack_mask") {
5458 if (IVersion.Major < 8)
5459 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
5460 if (!isUInt<1>(Val))
5461 return OutOfRangeError(ValRange);
5462 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
5463 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
5464 IDRange);
5465 } else if (ID == ".amdhsa_float_round_mode_32") {
5466 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5467 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
5468 } else if (ID == ".amdhsa_float_round_mode_16_64") {
5469 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5470 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
5471 } else if (ID == ".amdhsa_float_denorm_mode_32") {
5472 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5473 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
5474 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
5475 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5476 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
5477 ValRange);
5478 } else if (ID == ".amdhsa_dx10_clamp") {
5479 if (IVersion.Major >= 12)
5480 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5481 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5482 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Val,
5483 ValRange);
5484 } else if (ID == ".amdhsa_ieee_mode") {
5485 if (IVersion.Major >= 12)
5486 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange);
5487 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5488 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Val,
5489 ValRange);
5490 } else if (ID == ".amdhsa_fp16_overflow") {
5491 if (IVersion.Major < 9)
5492 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
5493 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, Val,
5494 ValRange);
5495 } else if (ID == ".amdhsa_tg_split") {
5496 if (!isGFX90A())
5497 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
5498 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
5499 ValRange);
5500 } else if (ID == ".amdhsa_workgroup_processor_mode") {
5501 if (IVersion.Major < 10)
5502 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5503 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Val,
5504 ValRange);
5505 } else if (ID == ".amdhsa_memory_ordered") {
5506 if (IVersion.Major < 10)
5507 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5508 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Val,
5509 ValRange);
5510 } else if (ID == ".amdhsa_forward_progress") {
5511 if (IVersion.Major < 10)
5512 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
5513 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, Val,
5514 ValRange);
5515 } else if (ID == ".amdhsa_shared_vgpr_count") {
5516 if (IVersion.Major < 10 || IVersion.Major >= 12)
5517 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
5518 IDRange);
5519 SharedVGPRCount = Val;
5520 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
5521 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, Val,
5522 ValRange);
5523 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
5524 PARSE_BITS_ENTRY(
5525 KD.compute_pgm_rsrc2,
5526 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
5527 ValRange);
5528 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
5529 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5530 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
5531 Val, ValRange);
5532 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
5533 PARSE_BITS_ENTRY(
5534 KD.compute_pgm_rsrc2,
5535 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
5536 ValRange);
5537 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
5538 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5539 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
5540 Val, ValRange);
5541 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
5542 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5543 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
5544 Val, ValRange);
5545 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
5546 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5547 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
5548 Val, ValRange);
5549 } else if (ID == ".amdhsa_exception_int_div_zero") {
5550 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
5551 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
5552 Val, ValRange);
5553 } else if (ID == ".amdhsa_round_robin_scheduling") {
5554 if (IVersion.Major < 12)
5555 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
5556 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
5557 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, Val,
5558 ValRange);
5559 } else {
5560 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
5561 }
5562
5563 #undef PARSE_BITS_ENTRY
5564 }
5565
5566 if (!Seen.contains(".amdhsa_next_free_vgpr"))
5567 return TokError(".amdhsa_next_free_vgpr directive is required");
5568
5569 if (!Seen.contains(".amdhsa_next_free_sgpr"))
5570 return TokError(".amdhsa_next_free_sgpr directive is required");
5571
5572 unsigned VGPRBlocks;
5573 unsigned SGPRBlocks;
5574 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
5575 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
5576 EnableWavefrontSize32, NextFreeVGPR,
5577 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
5578 SGPRBlocks))
5579 return true;
5580
5581 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
5582 VGPRBlocks))
5583 return OutOfRangeError(VGPRRange);
5584 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5585 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
5586
5587 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
5588 SGPRBlocks))
5589 return OutOfRangeError(SGPRRange);
5590 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
5591 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
5592 SGPRBlocks);
5593
5594 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
5595 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
5596 "enabled user SGPRs");
5597
5598 unsigned UserSGPRCount =
5599 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
5600
5601 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
5602 return TokError("too many user SGPRs enabled");
5603 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
5604 UserSGPRCount);
5605
5606 if (PreloadLength && KD.kernarg_size &&
5607 (PreloadLength * 4 + PreloadOffset * 4 > KD.kernarg_size))
5608 return TokError("Kernarg preload length + offset is larger than the "
5609 "kernarg segment size");
5610
5611 if (isGFX90A()) {
5612 if (!Seen.contains(".amdhsa_accum_offset"))
5613 return TokError(".amdhsa_accum_offset directive is required");
5614 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
5615 return TokError("accum_offset should be in range [4..256] in "
5616 "increments of 4");
5617 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
5618 return TokError("accum_offset exceeds total VGPR allocation");
5619 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
5620 (AccumOffset / 4 - 1));
5621 }
5622
5623 if (IVersion.Major >= 10 && IVersion.Major < 12) {
5624 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
5625 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
5626 return TokError("shared_vgpr_count directive not valid on "
5627 "wavefront size 32");
5628 }
5629 if (SharedVGPRCount * 2 + VGPRBlocks > 63) {
5630 return TokError("shared_vgpr_count*2 + "
5631 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
5632 "exceed 63\n");
5633 }
5634 }
5635
5636 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
5637 NextFreeVGPR, NextFreeSGPR,
5638 ReserveVCC, ReserveFlatScr);
5639 return false;
5640 }
5641
ParseDirectiveAMDHSACodeObjectVersion()5642 bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
5643 uint32_t Version;
5644 if (ParseAsAbsoluteExpression(Version))
5645 return true;
5646
5647 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
5648 return false;
5649 }
5650
ParseAMDKernelCodeTValue(StringRef ID,amd_kernel_code_t & Header)5651 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
5652 amd_kernel_code_t &Header) {
5653 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
5654 // assembly for backwards compatibility.
5655 if (ID == "max_scratch_backing_memory_byte_size") {
5656 Parser.eatToEndOfStatement();
5657 return false;
5658 }
5659
5660 SmallString<40> ErrStr;
5661 raw_svector_ostream Err(ErrStr);
5662 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
5663 return TokError(Err.str());
5664 }
5665 Lex();
5666
5667 if (ID == "enable_dx10_clamp") {
5668 if (G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) &&
5669 isGFX12Plus())
5670 return TokError("enable_dx10_clamp=1 is not allowed on GFX12+");
5671 }
5672
5673 if (ID == "enable_ieee_mode") {
5674 if (G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) &&
5675 isGFX12Plus())
5676 return TokError("enable_ieee_mode=1 is not allowed on GFX12+");
5677 }
5678
5679 if (ID == "enable_wavefront_size32") {
5680 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
5681 if (!isGFX10Plus())
5682 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
5683 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5684 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
5685 } else {
5686 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5687 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
5688 }
5689 }
5690
5691 if (ID == "wavefront_size") {
5692 if (Header.wavefront_size == 5) {
5693 if (!isGFX10Plus())
5694 return TokError("wavefront_size=5 is only allowed on GFX10+");
5695 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
5696 return TokError("wavefront_size=5 requires +WavefrontSize32");
5697 } else if (Header.wavefront_size == 6) {
5698 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
5699 return TokError("wavefront_size=6 requires +WavefrontSize64");
5700 }
5701 }
5702
5703 if (ID == "enable_wgp_mode") {
5704 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
5705 !isGFX10Plus())
5706 return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
5707 }
5708
5709 if (ID == "enable_mem_ordered") {
5710 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
5711 !isGFX10Plus())
5712 return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
5713 }
5714
5715 if (ID == "enable_fwd_progress") {
5716 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
5717 !isGFX10Plus())
5718 return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
5719 }
5720
5721 return false;
5722 }
5723
ParseDirectiveAMDKernelCodeT()5724 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
5725 amd_kernel_code_t Header;
5726 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
5727
5728 while (true) {
5729 // Lex EndOfStatement. This is in a while loop, because lexing a comment
5730 // will set the current token to EndOfStatement.
5731 while(trySkipToken(AsmToken::EndOfStatement));
5732
5733 StringRef ID;
5734 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
5735 return true;
5736
5737 if (ID == ".end_amd_kernel_code_t")
5738 break;
5739
5740 if (ParseAMDKernelCodeTValue(ID, Header))
5741 return true;
5742 }
5743
5744 getTargetStreamer().EmitAMDKernelCodeT(Header);
5745
5746 return false;
5747 }
5748
ParseDirectiveAMDGPUHsaKernel()5749 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
5750 StringRef KernelName;
5751 if (!parseId(KernelName, "expected symbol name"))
5752 return true;
5753
5754 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
5755 ELF::STT_AMDGPU_HSA_KERNEL);
5756
5757 KernelScope.initialize(getContext());
5758 return false;
5759 }
5760
ParseDirectiveISAVersion()5761 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
5762 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
5763 return Error(getLoc(),
5764 ".amd_amdgpu_isa directive is not available on non-amdgcn "
5765 "architectures");
5766 }
5767
5768 auto TargetIDDirective = getLexer().getTok().getStringContents();
5769 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5770 return Error(getParser().getTok().getLoc(), "target id must match options");
5771
5772 getTargetStreamer().EmitISAVersion();
5773 Lex();
5774
5775 return false;
5776 }
5777
ParseDirectiveHSAMetadata()5778 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
5779 assert(isHsaAbi(getSTI()));
5780
5781 std::string HSAMetadataString;
5782 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
5783 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
5784 return true;
5785
5786 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
5787 return Error(getLoc(), "invalid HSA metadata");
5788
5789 return false;
5790 }
5791
5792 /// Common code to parse out a block of text (typically YAML) between start and
5793 /// end directives.
ParseToEndDirective(const char * AssemblerDirectiveBegin,const char * AssemblerDirectiveEnd,std::string & CollectString)5794 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
5795 const char *AssemblerDirectiveEnd,
5796 std::string &CollectString) {
5797
5798 raw_string_ostream CollectStream(CollectString);
5799
5800 getLexer().setSkipSpace(false);
5801
5802 bool FoundEnd = false;
5803 while (!isToken(AsmToken::Eof)) {
5804 while (isToken(AsmToken::Space)) {
5805 CollectStream << getTokenStr();
5806 Lex();
5807 }
5808
5809 if (trySkipId(AssemblerDirectiveEnd)) {
5810 FoundEnd = true;
5811 break;
5812 }
5813
5814 CollectStream << Parser.parseStringToEndOfStatement()
5815 << getContext().getAsmInfo()->getSeparatorString();
5816
5817 Parser.eatToEndOfStatement();
5818 }
5819
5820 getLexer().setSkipSpace(true);
5821
5822 if (isToken(AsmToken::Eof) && !FoundEnd) {
5823 return TokError(Twine("expected directive ") +
5824 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
5825 }
5826
5827 CollectStream.flush();
5828 return false;
5829 }
5830
5831 /// Parse the assembler directive for new MsgPack-format PAL metadata.
ParseDirectivePALMetadataBegin()5832 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
5833 std::string String;
5834 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
5835 AMDGPU::PALMD::AssemblerDirectiveEnd, String))
5836 return true;
5837
5838 auto PALMetadata = getTargetStreamer().getPALMetadata();
5839 if (!PALMetadata->setFromString(String))
5840 return Error(getLoc(), "invalid PAL metadata");
5841 return false;
5842 }
5843
5844 /// Parse the assembler directive for old linear-format PAL metadata.
ParseDirectivePALMetadata()5845 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
5846 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
5847 return Error(getLoc(),
5848 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
5849 "not available on non-amdpal OSes")).str());
5850 }
5851
5852 auto PALMetadata = getTargetStreamer().getPALMetadata();
5853 PALMetadata->setLegacy();
5854 for (;;) {
5855 uint32_t Key, Value;
5856 if (ParseAsAbsoluteExpression(Key)) {
5857 return TokError(Twine("invalid value in ") +
5858 Twine(PALMD::AssemblerDirective));
5859 }
5860 if (!trySkipToken(AsmToken::Comma)) {
5861 return TokError(Twine("expected an even number of values in ") +
5862 Twine(PALMD::AssemblerDirective));
5863 }
5864 if (ParseAsAbsoluteExpression(Value)) {
5865 return TokError(Twine("invalid value in ") +
5866 Twine(PALMD::AssemblerDirective));
5867 }
5868 PALMetadata->setRegister(Key, Value);
5869 if (!trySkipToken(AsmToken::Comma))
5870 break;
5871 }
5872 return false;
5873 }
5874
5875 /// ParseDirectiveAMDGPULDS
5876 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
ParseDirectiveAMDGPULDS()5877 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
5878 if (getParser().checkForValidSection())
5879 return true;
5880
5881 StringRef Name;
5882 SMLoc NameLoc = getLoc();
5883 if (getParser().parseIdentifier(Name))
5884 return TokError("expected identifier in directive");
5885
5886 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
5887 if (getParser().parseComma())
5888 return true;
5889
5890 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
5891
5892 int64_t Size;
5893 SMLoc SizeLoc = getLoc();
5894 if (getParser().parseAbsoluteExpression(Size))
5895 return true;
5896 if (Size < 0)
5897 return Error(SizeLoc, "size must be non-negative");
5898 if (Size > LocalMemorySize)
5899 return Error(SizeLoc, "size is too large");
5900
5901 int64_t Alignment = 4;
5902 if (trySkipToken(AsmToken::Comma)) {
5903 SMLoc AlignLoc = getLoc();
5904 if (getParser().parseAbsoluteExpression(Alignment))
5905 return true;
5906 if (Alignment < 0 || !isPowerOf2_64(Alignment))
5907 return Error(AlignLoc, "alignment must be a power of two");
5908
5909 // Alignment larger than the size of LDS is possible in theory, as long
5910 // as the linker manages to place to symbol at address 0, but we do want
5911 // to make sure the alignment fits nicely into a 32-bit integer.
5912 if (Alignment >= 1u << 31)
5913 return Error(AlignLoc, "alignment is too large");
5914 }
5915
5916 if (parseEOL())
5917 return true;
5918
5919 Symbol->redefineIfPossible();
5920 if (!Symbol->isUndefined())
5921 return Error(NameLoc, "invalid symbol redefinition");
5922
5923 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
5924 return false;
5925 }
5926
ParseDirective(AsmToken DirectiveID)5927 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
5928 StringRef IDVal = DirectiveID.getString();
5929
5930 if (isHsaAbi(getSTI())) {
5931 if (IDVal == ".amdhsa_kernel")
5932 return ParseDirectiveAMDHSAKernel();
5933
5934 if (IDVal == ".amdhsa_code_object_version")
5935 return ParseDirectiveAMDHSACodeObjectVersion();
5936
5937 // TODO: Restructure/combine with PAL metadata directive.
5938 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
5939 return ParseDirectiveHSAMetadata();
5940 } else {
5941 if (IDVal == ".amd_kernel_code_t")
5942 return ParseDirectiveAMDKernelCodeT();
5943
5944 if (IDVal == ".amdgpu_hsa_kernel")
5945 return ParseDirectiveAMDGPUHsaKernel();
5946
5947 if (IDVal == ".amd_amdgpu_isa")
5948 return ParseDirectiveISAVersion();
5949
5950 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) {
5951 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
5952 Twine(" directive is "
5953 "not available on non-amdhsa OSes"))
5954 .str());
5955 }
5956 }
5957
5958 if (IDVal == ".amdgcn_target")
5959 return ParseDirectiveAMDGCNTarget();
5960
5961 if (IDVal == ".amdgpu_lds")
5962 return ParseDirectiveAMDGPULDS();
5963
5964 if (IDVal == PALMD::AssemblerDirectiveBegin)
5965 return ParseDirectivePALMetadataBegin();
5966
5967 if (IDVal == PALMD::AssemblerDirective)
5968 return ParseDirectivePALMetadata();
5969
5970 return true;
5971 }
5972
subtargetHasRegister(const MCRegisterInfo & MRI,unsigned RegNo)5973 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
5974 unsigned RegNo) {
5975
5976 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo))
5977 return isGFX9Plus();
5978
5979 // GFX10+ has 2 more SGPRs 104 and 105.
5980 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo))
5981 return hasSGPR104_SGPR105();
5982
5983 switch (RegNo) {
5984 case AMDGPU::SRC_SHARED_BASE_LO:
5985 case AMDGPU::SRC_SHARED_BASE:
5986 case AMDGPU::SRC_SHARED_LIMIT_LO:
5987 case AMDGPU::SRC_SHARED_LIMIT:
5988 case AMDGPU::SRC_PRIVATE_BASE_LO:
5989 case AMDGPU::SRC_PRIVATE_BASE:
5990 case AMDGPU::SRC_PRIVATE_LIMIT_LO:
5991 case AMDGPU::SRC_PRIVATE_LIMIT:
5992 return isGFX9Plus();
5993 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
5994 return isGFX9Plus() && !isGFX11Plus();
5995 case AMDGPU::TBA:
5996 case AMDGPU::TBA_LO:
5997 case AMDGPU::TBA_HI:
5998 case AMDGPU::TMA:
5999 case AMDGPU::TMA_LO:
6000 case AMDGPU::TMA_HI:
6001 return !isGFX9Plus();
6002 case AMDGPU::XNACK_MASK:
6003 case AMDGPU::XNACK_MASK_LO:
6004 case AMDGPU::XNACK_MASK_HI:
6005 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6006 case AMDGPU::SGPR_NULL:
6007 return isGFX10Plus();
6008 default:
6009 break;
6010 }
6011
6012 if (isCI())
6013 return true;
6014
6015 if (isSI() || isGFX10Plus()) {
6016 // No flat_scr on SI.
6017 // On GFX10Plus flat scratch is not a valid register operand and can only be
6018 // accessed with s_setreg/s_getreg.
6019 switch (RegNo) {
6020 case AMDGPU::FLAT_SCR:
6021 case AMDGPU::FLAT_SCR_LO:
6022 case AMDGPU::FLAT_SCR_HI:
6023 return false;
6024 default:
6025 return true;
6026 }
6027 }
6028
6029 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6030 // SI/CI have.
6031 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo))
6032 return hasSGPR102_SGPR103();
6033
6034 return true;
6035 }
6036
parseOperand(OperandVector & Operands,StringRef Mnemonic,OperandMode Mode)6037 ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6038 StringRef Mnemonic,
6039 OperandMode Mode) {
6040 ParseStatus Res = parseVOPD(Operands);
6041 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6042 return Res;
6043
6044 // Try to parse with a custom parser
6045 Res = MatchOperandParserImpl(Operands, Mnemonic);
6046
6047 // If we successfully parsed the operand or if there as an error parsing,
6048 // we are done.
6049 //
6050 // If we are parsing after we reach EndOfStatement then this means we
6051 // are appending default values to the Operands list. This is only done
6052 // by custom parser, so we shouldn't continue on to the generic parsing.
6053 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6054 return Res;
6055
6056 SMLoc RBraceLoc;
6057 SMLoc LBraceLoc = getLoc();
6058 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6059 unsigned Prefix = Operands.size();
6060
6061 for (;;) {
6062 auto Loc = getLoc();
6063 Res = parseReg(Operands);
6064 if (Res.isNoMatch())
6065 Error(Loc, "expected a register");
6066 if (!Res.isSuccess())
6067 return ParseStatus::Failure;
6068
6069 RBraceLoc = getLoc();
6070 if (trySkipToken(AsmToken::RBrac))
6071 break;
6072
6073 if (!skipToken(AsmToken::Comma,
6074 "expected a comma or a closing square bracket"))
6075 return ParseStatus::Failure;
6076 }
6077
6078 if (Operands.size() - Prefix > 1) {
6079 Operands.insert(Operands.begin() + Prefix,
6080 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6081 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6082 }
6083
6084 return ParseStatus::Success;
6085 }
6086
6087 return parseRegOrImm(Operands);
6088 }
6089
parseMnemonicSuffix(StringRef Name)6090 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6091 // Clear any forced encodings from the previous instruction.
6092 setForcedEncodingSize(0);
6093 setForcedDPP(false);
6094 setForcedSDWA(false);
6095
6096 if (Name.ends_with("_e64_dpp")) {
6097 setForcedDPP(true);
6098 setForcedEncodingSize(64);
6099 return Name.substr(0, Name.size() - 8);
6100 } else if (Name.ends_with("_e64")) {
6101 setForcedEncodingSize(64);
6102 return Name.substr(0, Name.size() - 4);
6103 } else if (Name.ends_with("_e32")) {
6104 setForcedEncodingSize(32);
6105 return Name.substr(0, Name.size() - 4);
6106 } else if (Name.ends_with("_dpp")) {
6107 setForcedDPP(true);
6108 return Name.substr(0, Name.size() - 4);
6109 } else if (Name.ends_with("_sdwa")) {
6110 setForcedSDWA(true);
6111 return Name.substr(0, Name.size() - 5);
6112 }
6113 return Name;
6114 }
6115
6116 static void applyMnemonicAliases(StringRef &Mnemonic,
6117 const FeatureBitset &Features,
6118 unsigned VariantID);
6119
ParseInstruction(ParseInstructionInfo & Info,StringRef Name,SMLoc NameLoc,OperandVector & Operands)6120 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
6121 StringRef Name,
6122 SMLoc NameLoc, OperandVector &Operands) {
6123 // Add the instruction mnemonic
6124 Name = parseMnemonicSuffix(Name);
6125
6126 // If the target architecture uses MnemonicAlias, call it here to parse
6127 // operands correctly.
6128 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6129
6130 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6131
6132 bool IsMIMG = Name.starts_with("image_");
6133
6134 while (!trySkipToken(AsmToken::EndOfStatement)) {
6135 OperandMode Mode = OperandMode_Default;
6136 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6137 Mode = OperandMode_NSA;
6138 ParseStatus Res = parseOperand(Operands, Name, Mode);
6139
6140 if (!Res.isSuccess()) {
6141 checkUnsupportedInstruction(Name, NameLoc);
6142 if (!Parser.hasPendingError()) {
6143 // FIXME: use real operand location rather than the current location.
6144 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6145 : "not a valid operand.";
6146 Error(getLoc(), Msg);
6147 }
6148 while (!trySkipToken(AsmToken::EndOfStatement)) {
6149 lex();
6150 }
6151 return true;
6152 }
6153
6154 // Eat the comma or space if there is one.
6155 trySkipToken(AsmToken::Comma);
6156 }
6157
6158 return false;
6159 }
6160
6161 //===----------------------------------------------------------------------===//
6162 // Utility functions
6163 //===----------------------------------------------------------------------===//
6164
parseTokenOp(StringRef Name,OperandVector & Operands)6165 ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6166 OperandVector &Operands) {
6167 SMLoc S = getLoc();
6168 if (!trySkipId(Name))
6169 return ParseStatus::NoMatch;
6170
6171 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6172 return ParseStatus::Success;
6173 }
6174
parseIntWithPrefix(const char * Prefix,int64_t & IntVal)6175 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
6176 int64_t &IntVal) {
6177
6178 if (!trySkipId(Prefix, AsmToken::Colon))
6179 return ParseStatus::NoMatch;
6180
6181 return parseExpr(IntVal) ? ParseStatus::Success : ParseStatus::Failure;
6182 }
6183
parseIntWithPrefix(const char * Prefix,OperandVector & Operands,AMDGPUOperand::ImmTy ImmTy,std::function<bool (int64_t &)> ConvertResult)6184 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
6185 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6186 std::function<bool(int64_t &)> ConvertResult) {
6187 SMLoc S = getLoc();
6188 int64_t Value = 0;
6189
6190 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
6191 if (!Res.isSuccess())
6192 return Res;
6193
6194 if (ConvertResult && !ConvertResult(Value)) {
6195 Error(S, "invalid " + StringRef(Prefix) + " value.");
6196 }
6197
6198 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
6199 return ParseStatus::Success;
6200 }
6201
parseOperandArrayWithPrefix(const char * Prefix,OperandVector & Operands,AMDGPUOperand::ImmTy ImmTy,bool (* ConvertResult)(int64_t &))6202 ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
6203 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
6204 bool (*ConvertResult)(int64_t &)) {
6205 SMLoc S = getLoc();
6206 if (!trySkipId(Prefix, AsmToken::Colon))
6207 return ParseStatus::NoMatch;
6208
6209 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
6210 return ParseStatus::Failure;
6211
6212 unsigned Val = 0;
6213 const unsigned MaxSize = 4;
6214
6215 // FIXME: How to verify the number of elements matches the number of src
6216 // operands?
6217 for (int I = 0; ; ++I) {
6218 int64_t Op;
6219 SMLoc Loc = getLoc();
6220 if (!parseExpr(Op))
6221 return ParseStatus::Failure;
6222
6223 if (Op != 0 && Op != 1)
6224 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
6225
6226 Val |= (Op << I);
6227
6228 if (trySkipToken(AsmToken::RBrac))
6229 break;
6230
6231 if (I + 1 == MaxSize)
6232 return Error(getLoc(), "expected a closing square bracket");
6233
6234 if (!skipToken(AsmToken::Comma, "expected a comma"))
6235 return ParseStatus::Failure;
6236 }
6237
6238 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
6239 return ParseStatus::Success;
6240 }
6241
parseNamedBit(StringRef Name,OperandVector & Operands,AMDGPUOperand::ImmTy ImmTy)6242 ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
6243 OperandVector &Operands,
6244 AMDGPUOperand::ImmTy ImmTy) {
6245 int64_t Bit;
6246 SMLoc S = getLoc();
6247
6248 if (trySkipId(Name)) {
6249 Bit = 1;
6250 } else if (trySkipId("no", Name)) {
6251 Bit = 0;
6252 } else {
6253 return ParseStatus::NoMatch;
6254 }
6255
6256 if (Name == "r128" && !hasMIMG_R128())
6257 return Error(S, "r128 modifier is not supported on this GPU");
6258 if (Name == "a16" && !hasA16())
6259 return Error(S, "a16 modifier is not supported on this GPU");
6260
6261 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
6262 ImmTy = AMDGPUOperand::ImmTyR128A16;
6263
6264 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
6265 return ParseStatus::Success;
6266 }
6267
getCPolKind(StringRef Id,StringRef Mnemo,bool & Disabling) const6268 unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
6269 bool &Disabling) const {
6270 Disabling = Id.consume_front("no");
6271
6272 if (isGFX940() && !Mnemo.starts_with("s_")) {
6273 return StringSwitch<unsigned>(Id)
6274 .Case("nt", AMDGPU::CPol::NT)
6275 .Case("sc0", AMDGPU::CPol::SC0)
6276 .Case("sc1", AMDGPU::CPol::SC1)
6277 .Default(0);
6278 }
6279
6280 return StringSwitch<unsigned>(Id)
6281 .Case("dlc", AMDGPU::CPol::DLC)
6282 .Case("glc", AMDGPU::CPol::GLC)
6283 .Case("scc", AMDGPU::CPol::SCC)
6284 .Case("slc", AMDGPU::CPol::SLC)
6285 .Default(0);
6286 }
6287
parseCPol(OperandVector & Operands)6288 ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
6289 if (isGFX12Plus()) {
6290 SMLoc StringLoc = getLoc();
6291
6292 int64_t CPolVal = 0;
6293 ParseStatus ResTH = ParseStatus::NoMatch;
6294 ParseStatus ResScope = ParseStatus::NoMatch;
6295
6296 for (;;) {
6297 if (ResTH.isNoMatch()) {
6298 int64_t TH;
6299 ResTH = parseTH(Operands, TH);
6300 if (ResTH.isFailure())
6301 return ResTH;
6302 if (ResTH.isSuccess()) {
6303 CPolVal |= TH;
6304 continue;
6305 }
6306 }
6307
6308 if (ResScope.isNoMatch()) {
6309 int64_t Scope;
6310 ResScope = parseScope(Operands, Scope);
6311 if (ResScope.isFailure())
6312 return ResScope;
6313 if (ResScope.isSuccess()) {
6314 CPolVal |= Scope;
6315 continue;
6316 }
6317 }
6318
6319 break;
6320 }
6321
6322 if (ResTH.isNoMatch() && ResScope.isNoMatch())
6323 return ParseStatus::NoMatch;
6324
6325 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
6326 AMDGPUOperand::ImmTyCPol));
6327 return ParseStatus::Success;
6328 }
6329
6330 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
6331 SMLoc OpLoc = getLoc();
6332 unsigned Enabled = 0, Seen = 0;
6333 for (;;) {
6334 SMLoc S = getLoc();
6335 bool Disabling;
6336 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
6337 if (!CPol)
6338 break;
6339
6340 lex();
6341
6342 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
6343 return Error(S, "dlc modifier is not supported on this GPU");
6344
6345 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
6346 return Error(S, "scc modifier is not supported on this GPU");
6347
6348 if (Seen & CPol)
6349 return Error(S, "duplicate cache policy modifier");
6350
6351 if (!Disabling)
6352 Enabled |= CPol;
6353
6354 Seen |= CPol;
6355 }
6356
6357 if (!Seen)
6358 return ParseStatus::NoMatch;
6359
6360 Operands.push_back(
6361 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
6362 return ParseStatus::Success;
6363 }
6364
parseScope(OperandVector & Operands,int64_t & Scope)6365 ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
6366 int64_t &Scope) {
6367 Scope = AMDGPU::CPol::SCOPE_CU; // default;
6368
6369 StringRef Value;
6370 SMLoc StringLoc;
6371 ParseStatus Res;
6372
6373 Res = parseStringWithPrefix("scope", Value, StringLoc);
6374 if (!Res.isSuccess())
6375 return Res;
6376
6377 Scope = StringSwitch<int64_t>(Value)
6378 .Case("SCOPE_CU", AMDGPU::CPol::SCOPE_CU)
6379 .Case("SCOPE_SE", AMDGPU::CPol::SCOPE_SE)
6380 .Case("SCOPE_DEV", AMDGPU::CPol::SCOPE_DEV)
6381 .Case("SCOPE_SYS", AMDGPU::CPol::SCOPE_SYS)
6382 .Default(0xffffffff);
6383
6384 if (Scope == 0xffffffff)
6385 return Error(StringLoc, "invalid scope value");
6386
6387 return ParseStatus::Success;
6388 }
6389
parseTH(OperandVector & Operands,int64_t & TH)6390 ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
6391 TH = AMDGPU::CPol::TH_RT; // default
6392
6393 StringRef Value;
6394 SMLoc StringLoc;
6395 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
6396 if (!Res.isSuccess())
6397 return Res;
6398
6399 if (Value == "TH_DEFAULT")
6400 TH = AMDGPU::CPol::TH_RT;
6401 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" ||
6402 Value == "TH_LOAD_NT_WB") {
6403 return Error(StringLoc, "invalid th value");
6404 } else if (Value.starts_with("TH_ATOMIC_")) {
6405 Value = Value.drop_front(10);
6406 TH = AMDGPU::CPol::TH_TYPE_ATOMIC;
6407 } else if (Value.starts_with("TH_LOAD_")) {
6408 Value = Value.drop_front(8);
6409 TH = AMDGPU::CPol::TH_TYPE_LOAD;
6410 } else if (Value.starts_with("TH_STORE_")) {
6411 Value = Value.drop_front(9);
6412 TH = AMDGPU::CPol::TH_TYPE_STORE;
6413 } else {
6414 return Error(StringLoc, "invalid th value");
6415 }
6416
6417 if (Value == "BYPASS")
6418 TH |= AMDGPU::CPol::TH_REAL_BYPASS;
6419
6420 if (TH != 0) {
6421 if (TH & AMDGPU::CPol::TH_TYPE_ATOMIC)
6422 TH |= StringSwitch<int64_t>(Value)
6423 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
6424 .Case("RT", AMDGPU::CPol::TH_RT)
6425 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
6426 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
6427 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
6428 AMDGPU::CPol::TH_ATOMIC_RETURN)
6429 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
6430 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
6431 AMDGPU::CPol::TH_ATOMIC_NT)
6432 .Default(0xffffffff);
6433 else
6434 TH |= StringSwitch<int64_t>(Value)
6435 .Case("RT", AMDGPU::CPol::TH_RT)
6436 .Case("NT", AMDGPU::CPol::TH_NT)
6437 .Case("HT", AMDGPU::CPol::TH_HT)
6438 .Case("LU", AMDGPU::CPol::TH_LU)
6439 .Case("RT_WB", AMDGPU::CPol::TH_RT_WB)
6440 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
6441 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
6442 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
6443 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
6444 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
6445 .Default(0xffffffff);
6446 }
6447
6448 if (TH == 0xffffffff)
6449 return Error(StringLoc, "invalid th value");
6450
6451 return ParseStatus::Success;
6452 }
6453
addOptionalImmOperand(MCInst & Inst,const OperandVector & Operands,AMDGPUAsmParser::OptionalImmIndexMap & OptionalIdx,AMDGPUOperand::ImmTy ImmT,int64_t Default=0)6454 static void addOptionalImmOperand(
6455 MCInst& Inst, const OperandVector& Operands,
6456 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
6457 AMDGPUOperand::ImmTy ImmT,
6458 int64_t Default = 0) {
6459 auto i = OptionalIdx.find(ImmT);
6460 if (i != OptionalIdx.end()) {
6461 unsigned Idx = i->second;
6462 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
6463 } else {
6464 Inst.addOperand(MCOperand::createImm(Default));
6465 }
6466 }
6467
parseStringWithPrefix(StringRef Prefix,StringRef & Value,SMLoc & StringLoc)6468 ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
6469 StringRef &Value,
6470 SMLoc &StringLoc) {
6471 if (!trySkipId(Prefix, AsmToken::Colon))
6472 return ParseStatus::NoMatch;
6473
6474 StringLoc = getLoc();
6475 return parseId(Value, "expected an identifier") ? ParseStatus::Success
6476 : ParseStatus::Failure;
6477 }
6478
6479 //===----------------------------------------------------------------------===//
6480 // MTBUF format
6481 //===----------------------------------------------------------------------===//
6482
tryParseFmt(const char * Pref,int64_t MaxVal,int64_t & Fmt)6483 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
6484 int64_t MaxVal,
6485 int64_t &Fmt) {
6486 int64_t Val;
6487 SMLoc Loc = getLoc();
6488
6489 auto Res = parseIntWithPrefix(Pref, Val);
6490 if (Res.isFailure())
6491 return false;
6492 if (Res.isNoMatch())
6493 return true;
6494
6495 if (Val < 0 || Val > MaxVal) {
6496 Error(Loc, Twine("out of range ", StringRef(Pref)));
6497 return false;
6498 }
6499
6500 Fmt = Val;
6501 return true;
6502 }
6503
tryParseIndexKey(OperandVector & Operands,AMDGPUOperand::ImmTy ImmTy)6504 ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
6505 AMDGPUOperand::ImmTy ImmTy) {
6506 const char *Pref = "index_key";
6507 int64_t ImmVal = 0;
6508 SMLoc Loc = getLoc();
6509 auto Res = parseIntWithPrefix(Pref, ImmVal);
6510 if (!Res.isSuccess())
6511 return Res;
6512
6513 if (ImmTy == AMDGPUOperand::ImmTyIndexKey16bit && (ImmVal < 0 || ImmVal > 1))
6514 return Error(Loc, Twine("out of range ", StringRef(Pref)));
6515
6516 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
6517 return Error(Loc, Twine("out of range ", StringRef(Pref)));
6518
6519 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
6520 return ParseStatus::Success;
6521 }
6522
parseIndexKey8bit(OperandVector & Operands)6523 ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
6524 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
6525 }
6526
parseIndexKey16bit(OperandVector & Operands)6527 ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
6528 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
6529 }
6530
6531 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
6532 // values to live in a joint format operand in the MCInst encoding.
parseDfmtNfmt(int64_t & Format)6533 ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
6534 using namespace llvm::AMDGPU::MTBUFFormat;
6535
6536 int64_t Dfmt = DFMT_UNDEF;
6537 int64_t Nfmt = NFMT_UNDEF;
6538
6539 // dfmt and nfmt can appear in either order, and each is optional.
6540 for (int I = 0; I < 2; ++I) {
6541 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
6542 return ParseStatus::Failure;
6543
6544 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
6545 return ParseStatus::Failure;
6546
6547 // Skip optional comma between dfmt/nfmt
6548 // but guard against 2 commas following each other.
6549 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
6550 !peekToken().is(AsmToken::Comma)) {
6551 trySkipToken(AsmToken::Comma);
6552 }
6553 }
6554
6555 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
6556 return ParseStatus::NoMatch;
6557
6558 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6559 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6560
6561 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6562 return ParseStatus::Success;
6563 }
6564
parseUfmt(int64_t & Format)6565 ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
6566 using namespace llvm::AMDGPU::MTBUFFormat;
6567
6568 int64_t Fmt = UFMT_UNDEF;
6569
6570 if (!tryParseFmt("format", UFMT_MAX, Fmt))
6571 return ParseStatus::Failure;
6572
6573 if (Fmt == UFMT_UNDEF)
6574 return ParseStatus::NoMatch;
6575
6576 Format = Fmt;
6577 return ParseStatus::Success;
6578 }
6579
matchDfmtNfmt(int64_t & Dfmt,int64_t & Nfmt,StringRef FormatStr,SMLoc Loc)6580 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
6581 int64_t &Nfmt,
6582 StringRef FormatStr,
6583 SMLoc Loc) {
6584 using namespace llvm::AMDGPU::MTBUFFormat;
6585 int64_t Format;
6586
6587 Format = getDfmt(FormatStr);
6588 if (Format != DFMT_UNDEF) {
6589 Dfmt = Format;
6590 return true;
6591 }
6592
6593 Format = getNfmt(FormatStr, getSTI());
6594 if (Format != NFMT_UNDEF) {
6595 Nfmt = Format;
6596 return true;
6597 }
6598
6599 Error(Loc, "unsupported format");
6600 return false;
6601 }
6602
parseSymbolicSplitFormat(StringRef FormatStr,SMLoc FormatLoc,int64_t & Format)6603 ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
6604 SMLoc FormatLoc,
6605 int64_t &Format) {
6606 using namespace llvm::AMDGPU::MTBUFFormat;
6607
6608 int64_t Dfmt = DFMT_UNDEF;
6609 int64_t Nfmt = NFMT_UNDEF;
6610 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
6611 return ParseStatus::Failure;
6612
6613 if (trySkipToken(AsmToken::Comma)) {
6614 StringRef Str;
6615 SMLoc Loc = getLoc();
6616 if (!parseId(Str, "expected a format string") ||
6617 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
6618 return ParseStatus::Failure;
6619 if (Dfmt == DFMT_UNDEF)
6620 return Error(Loc, "duplicate numeric format");
6621 if (Nfmt == NFMT_UNDEF)
6622 return Error(Loc, "duplicate data format");
6623 }
6624
6625 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
6626 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
6627
6628 if (isGFX10Plus()) {
6629 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
6630 if (Ufmt == UFMT_UNDEF)
6631 return Error(FormatLoc, "unsupported format");
6632 Format = Ufmt;
6633 } else {
6634 Format = encodeDfmtNfmt(Dfmt, Nfmt);
6635 }
6636
6637 return ParseStatus::Success;
6638 }
6639
parseSymbolicUnifiedFormat(StringRef FormatStr,SMLoc Loc,int64_t & Format)6640 ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
6641 SMLoc Loc,
6642 int64_t &Format) {
6643 using namespace llvm::AMDGPU::MTBUFFormat;
6644
6645 auto Id = getUnifiedFormat(FormatStr, getSTI());
6646 if (Id == UFMT_UNDEF)
6647 return ParseStatus::NoMatch;
6648
6649 if (!isGFX10Plus())
6650 return Error(Loc, "unified format is not supported on this GPU");
6651
6652 Format = Id;
6653 return ParseStatus::Success;
6654 }
6655
parseNumericFormat(int64_t & Format)6656 ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
6657 using namespace llvm::AMDGPU::MTBUFFormat;
6658 SMLoc Loc = getLoc();
6659
6660 if (!parseExpr(Format))
6661 return ParseStatus::Failure;
6662 if (!isValidFormatEncoding(Format, getSTI()))
6663 return Error(Loc, "out of range format");
6664
6665 return ParseStatus::Success;
6666 }
6667
parseSymbolicOrNumericFormat(int64_t & Format)6668 ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
6669 using namespace llvm::AMDGPU::MTBUFFormat;
6670
6671 if (!trySkipId("format", AsmToken::Colon))
6672 return ParseStatus::NoMatch;
6673
6674 if (trySkipToken(AsmToken::LBrac)) {
6675 StringRef FormatStr;
6676 SMLoc Loc = getLoc();
6677 if (!parseId(FormatStr, "expected a format string"))
6678 return ParseStatus::Failure;
6679
6680 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
6681 if (Res.isNoMatch())
6682 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
6683 if (!Res.isSuccess())
6684 return Res;
6685
6686 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
6687 return ParseStatus::Failure;
6688
6689 return ParseStatus::Success;
6690 }
6691
6692 return parseNumericFormat(Format);
6693 }
6694
parseFORMAT(OperandVector & Operands)6695 ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
6696 using namespace llvm::AMDGPU::MTBUFFormat;
6697
6698 int64_t Format = getDefaultFormatEncoding(getSTI());
6699 ParseStatus Res;
6700 SMLoc Loc = getLoc();
6701
6702 // Parse legacy format syntax.
6703 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
6704 if (Res.isFailure())
6705 return Res;
6706
6707 bool FormatFound = Res.isSuccess();
6708
6709 Operands.push_back(
6710 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
6711
6712 if (FormatFound)
6713 trySkipToken(AsmToken::Comma);
6714
6715 if (isToken(AsmToken::EndOfStatement)) {
6716 // We are expecting an soffset operand,
6717 // but let matcher handle the error.
6718 return ParseStatus::Success;
6719 }
6720
6721 // Parse soffset.
6722 Res = parseRegOrImm(Operands);
6723 if (!Res.isSuccess())
6724 return Res;
6725
6726 trySkipToken(AsmToken::Comma);
6727
6728 if (!FormatFound) {
6729 Res = parseSymbolicOrNumericFormat(Format);
6730 if (Res.isFailure())
6731 return Res;
6732 if (Res.isSuccess()) {
6733 auto Size = Operands.size();
6734 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
6735 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
6736 Op.setImm(Format);
6737 }
6738 return ParseStatus::Success;
6739 }
6740
6741 if (isId("format") && peekToken().is(AsmToken::Colon))
6742 return Error(getLoc(), "duplicate format");
6743 return ParseStatus::Success;
6744 }
6745
parseFlatOffset(OperandVector & Operands)6746 ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
6747 ParseStatus Res =
6748 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
6749 if (Res.isNoMatch()) {
6750 Res = parseIntWithPrefix("inst_offset", Operands,
6751 AMDGPUOperand::ImmTyInstOffset);
6752 }
6753 return Res;
6754 }
6755
parseR128A16(OperandVector & Operands)6756 ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
6757 ParseStatus Res =
6758 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
6759 if (Res.isNoMatch())
6760 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
6761 return Res;
6762 }
6763
parseBLGP(OperandVector & Operands)6764 ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
6765 ParseStatus Res =
6766 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
6767 if (Res.isNoMatch()) {
6768 Res =
6769 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
6770 }
6771 return Res;
6772 }
6773
6774 //===----------------------------------------------------------------------===//
6775 // Exp
6776 //===----------------------------------------------------------------------===//
6777
cvtExp(MCInst & Inst,const OperandVector & Operands)6778 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
6779 OptionalImmIndexMap OptionalIdx;
6780
6781 unsigned OperandIdx[4];
6782 unsigned EnMask = 0;
6783 int SrcIdx = 0;
6784
6785 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6786 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6787
6788 // Add the register arguments
6789 if (Op.isReg()) {
6790 assert(SrcIdx < 4);
6791 OperandIdx[SrcIdx] = Inst.size();
6792 Op.addRegOperands(Inst, 1);
6793 ++SrcIdx;
6794 continue;
6795 }
6796
6797 if (Op.isOff()) {
6798 assert(SrcIdx < 4);
6799 OperandIdx[SrcIdx] = Inst.size();
6800 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
6801 ++SrcIdx;
6802 continue;
6803 }
6804
6805 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
6806 Op.addImmOperands(Inst, 1);
6807 continue;
6808 }
6809
6810 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
6811 continue;
6812
6813 // Handle optional arguments
6814 OptionalIdx[Op.getImmTy()] = i;
6815 }
6816
6817 assert(SrcIdx == 4);
6818
6819 bool Compr = false;
6820 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
6821 Compr = true;
6822 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
6823 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
6824 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
6825 }
6826
6827 for (auto i = 0; i < SrcIdx; ++i) {
6828 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
6829 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
6830 }
6831 }
6832
6833 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
6834 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
6835
6836 Inst.addOperand(MCOperand::createImm(EnMask));
6837 }
6838
6839 //===----------------------------------------------------------------------===//
6840 // s_waitcnt
6841 //===----------------------------------------------------------------------===//
6842
6843 static bool
encodeCnt(const AMDGPU::IsaVersion ISA,int64_t & IntVal,int64_t CntVal,bool Saturate,unsigned (* encode)(const IsaVersion & Version,unsigned,unsigned),unsigned (* decode)(const IsaVersion & Version,unsigned))6844 encodeCnt(
6845 const AMDGPU::IsaVersion ISA,
6846 int64_t &IntVal,
6847 int64_t CntVal,
6848 bool Saturate,
6849 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
6850 unsigned (*decode)(const IsaVersion &Version, unsigned))
6851 {
6852 bool Failed = false;
6853
6854 IntVal = encode(ISA, IntVal, CntVal);
6855 if (CntVal != decode(ISA, IntVal)) {
6856 if (Saturate) {
6857 IntVal = encode(ISA, IntVal, -1);
6858 } else {
6859 Failed = true;
6860 }
6861 }
6862 return Failed;
6863 }
6864
parseCnt(int64_t & IntVal)6865 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
6866
6867 SMLoc CntLoc = getLoc();
6868 StringRef CntName = getTokenStr();
6869
6870 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
6871 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6872 return false;
6873
6874 int64_t CntVal;
6875 SMLoc ValLoc = getLoc();
6876 if (!parseExpr(CntVal))
6877 return false;
6878
6879 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6880
6881 bool Failed = true;
6882 bool Sat = CntName.ends_with("_sat");
6883
6884 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
6885 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
6886 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
6887 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
6888 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
6889 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
6890 } else {
6891 Error(CntLoc, "invalid counter name " + CntName);
6892 return false;
6893 }
6894
6895 if (Failed) {
6896 Error(ValLoc, "too large value for " + CntName);
6897 return false;
6898 }
6899
6900 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
6901 return false;
6902
6903 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
6904 if (isToken(AsmToken::EndOfStatement)) {
6905 Error(getLoc(), "expected a counter name");
6906 return false;
6907 }
6908 }
6909
6910 return true;
6911 }
6912
parseSWaitCnt(OperandVector & Operands)6913 ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
6914 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
6915 int64_t Waitcnt = getWaitcntBitMask(ISA);
6916 SMLoc S = getLoc();
6917
6918 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6919 while (!isToken(AsmToken::EndOfStatement)) {
6920 if (!parseCnt(Waitcnt))
6921 return ParseStatus::Failure;
6922 }
6923 } else {
6924 if (!parseExpr(Waitcnt))
6925 return ParseStatus::Failure;
6926 }
6927
6928 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
6929 return ParseStatus::Success;
6930 }
6931
parseDelay(int64_t & Delay)6932 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
6933 SMLoc FieldLoc = getLoc();
6934 StringRef FieldName = getTokenStr();
6935 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
6936 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
6937 return false;
6938
6939 SMLoc ValueLoc = getLoc();
6940 StringRef ValueName = getTokenStr();
6941 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
6942 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
6943 return false;
6944
6945 unsigned Shift;
6946 if (FieldName == "instid0") {
6947 Shift = 0;
6948 } else if (FieldName == "instskip") {
6949 Shift = 4;
6950 } else if (FieldName == "instid1") {
6951 Shift = 7;
6952 } else {
6953 Error(FieldLoc, "invalid field name " + FieldName);
6954 return false;
6955 }
6956
6957 int Value;
6958 if (Shift == 4) {
6959 // Parse values for instskip.
6960 Value = StringSwitch<int>(ValueName)
6961 .Case("SAME", 0)
6962 .Case("NEXT", 1)
6963 .Case("SKIP_1", 2)
6964 .Case("SKIP_2", 3)
6965 .Case("SKIP_3", 4)
6966 .Case("SKIP_4", 5)
6967 .Default(-1);
6968 } else {
6969 // Parse values for instid0 and instid1.
6970 Value = StringSwitch<int>(ValueName)
6971 .Case("NO_DEP", 0)
6972 .Case("VALU_DEP_1", 1)
6973 .Case("VALU_DEP_2", 2)
6974 .Case("VALU_DEP_3", 3)
6975 .Case("VALU_DEP_4", 4)
6976 .Case("TRANS32_DEP_1", 5)
6977 .Case("TRANS32_DEP_2", 6)
6978 .Case("TRANS32_DEP_3", 7)
6979 .Case("FMA_ACCUM_CYCLE_1", 8)
6980 .Case("SALU_CYCLE_1", 9)
6981 .Case("SALU_CYCLE_2", 10)
6982 .Case("SALU_CYCLE_3", 11)
6983 .Default(-1);
6984 }
6985 if (Value < 0) {
6986 Error(ValueLoc, "invalid value name " + ValueName);
6987 return false;
6988 }
6989
6990 Delay |= Value << Shift;
6991 return true;
6992 }
6993
parseSDelayALU(OperandVector & Operands)6994 ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
6995 int64_t Delay = 0;
6996 SMLoc S = getLoc();
6997
6998 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
6999 do {
7000 if (!parseDelay(Delay))
7001 return ParseStatus::Failure;
7002 } while (trySkipToken(AsmToken::Pipe));
7003 } else {
7004 if (!parseExpr(Delay))
7005 return ParseStatus::Failure;
7006 }
7007
7008 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7009 return ParseStatus::Success;
7010 }
7011
7012 bool
isSWaitCnt() const7013 AMDGPUOperand::isSWaitCnt() const {
7014 return isImm();
7015 }
7016
isSDelayALU() const7017 bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7018
7019 //===----------------------------------------------------------------------===//
7020 // DepCtr
7021 //===----------------------------------------------------------------------===//
7022
depCtrError(SMLoc Loc,int ErrorId,StringRef DepCtrName)7023 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7024 StringRef DepCtrName) {
7025 switch (ErrorId) {
7026 case OPR_ID_UNKNOWN:
7027 Error(Loc, Twine("invalid counter name ", DepCtrName));
7028 return;
7029 case OPR_ID_UNSUPPORTED:
7030 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7031 return;
7032 case OPR_ID_DUPLICATE:
7033 Error(Loc, Twine("duplicate counter name ", DepCtrName));
7034 return;
7035 case OPR_VAL_INVALID:
7036 Error(Loc, Twine("invalid value for ", DepCtrName));
7037 return;
7038 default:
7039 assert(false);
7040 }
7041 }
7042
parseDepCtr(int64_t & DepCtr,unsigned & UsedOprMask)7043 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7044
7045 using namespace llvm::AMDGPU::DepCtr;
7046
7047 SMLoc DepCtrLoc = getLoc();
7048 StringRef DepCtrName = getTokenStr();
7049
7050 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7051 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7052 return false;
7053
7054 int64_t ExprVal;
7055 if (!parseExpr(ExprVal))
7056 return false;
7057
7058 unsigned PrevOprMask = UsedOprMask;
7059 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
7060
7061 if (CntVal < 0) {
7062 depCtrError(DepCtrLoc, CntVal, DepCtrName);
7063 return false;
7064 }
7065
7066 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7067 return false;
7068
7069 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7070 if (isToken(AsmToken::EndOfStatement)) {
7071 Error(getLoc(), "expected a counter name");
7072 return false;
7073 }
7074 }
7075
7076 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
7077 DepCtr = (DepCtr & ~CntValMask) | CntVal;
7078 return true;
7079 }
7080
parseDepCtr(OperandVector & Operands)7081 ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
7082 using namespace llvm::AMDGPU::DepCtr;
7083
7084 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
7085 SMLoc Loc = getLoc();
7086
7087 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7088 unsigned UsedOprMask = 0;
7089 while (!isToken(AsmToken::EndOfStatement)) {
7090 if (!parseDepCtr(DepCtr, UsedOprMask))
7091 return ParseStatus::Failure;
7092 }
7093 } else {
7094 if (!parseExpr(DepCtr))
7095 return ParseStatus::Failure;
7096 }
7097
7098 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
7099 return ParseStatus::Success;
7100 }
7101
isDepCtr() const7102 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
7103
7104 //===----------------------------------------------------------------------===//
7105 // hwreg
7106 //===----------------------------------------------------------------------===//
7107
7108 bool
parseHwregBody(OperandInfoTy & HwReg,OperandInfoTy & Offset,OperandInfoTy & Width)7109 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
7110 OperandInfoTy &Offset,
7111 OperandInfoTy &Width) {
7112 using namespace llvm::AMDGPU::Hwreg;
7113
7114 // The register may be specified by name or using a numeric code
7115 HwReg.Loc = getLoc();
7116 if (isToken(AsmToken::Identifier) &&
7117 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7118 HwReg.IsSymbolic = true;
7119 lex(); // skip register name
7120 } else if (!parseExpr(HwReg.Id, "a register name")) {
7121 return false;
7122 }
7123
7124 if (trySkipToken(AsmToken::RParen))
7125 return true;
7126
7127 // parse optional params
7128 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
7129 return false;
7130
7131 Offset.Loc = getLoc();
7132 if (!parseExpr(Offset.Id))
7133 return false;
7134
7135 if (!skipToken(AsmToken::Comma, "expected a comma"))
7136 return false;
7137
7138 Width.Loc = getLoc();
7139 return parseExpr(Width.Id) &&
7140 skipToken(AsmToken::RParen, "expected a closing parenthesis");
7141 }
7142
7143 bool
validateHwreg(const OperandInfoTy & HwReg,const OperandInfoTy & Offset,const OperandInfoTy & Width)7144 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
7145 const OperandInfoTy &Offset,
7146 const OperandInfoTy &Width) {
7147
7148 using namespace llvm::AMDGPU::Hwreg;
7149
7150 if (HwReg.IsSymbolic) {
7151 if (HwReg.Id == OPR_ID_UNSUPPORTED) {
7152 Error(HwReg.Loc,
7153 "specified hardware register is not supported on this GPU");
7154 return false;
7155 }
7156 } else {
7157 if (!isValidHwreg(HwReg.Id)) {
7158 Error(HwReg.Loc,
7159 "invalid code of hardware register: only 6-bit values are legal");
7160 return false;
7161 }
7162 }
7163 if (!isValidHwregOffset(Offset.Id)) {
7164 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
7165 return false;
7166 }
7167 if (!isValidHwregWidth(Width.Id)) {
7168 Error(Width.Loc,
7169 "invalid bitfield width: only values from 1 to 32 are legal");
7170 return false;
7171 }
7172 return true;
7173 }
7174
parseHwreg(OperandVector & Operands)7175 ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
7176 using namespace llvm::AMDGPU::Hwreg;
7177
7178 int64_t ImmVal = 0;
7179 SMLoc Loc = getLoc();
7180
7181 if (trySkipId("hwreg", AsmToken::LParen)) {
7182 OperandInfoTy HwReg(OPR_ID_UNKNOWN);
7183 OperandInfoTy Offset(OFFSET_DEFAULT_);
7184 OperandInfoTy Width(WIDTH_DEFAULT_);
7185 if (parseHwregBody(HwReg, Offset, Width) &&
7186 validateHwreg(HwReg, Offset, Width)) {
7187 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
7188 } else {
7189 return ParseStatus::Failure;
7190 }
7191 } else if (parseExpr(ImmVal, "a hwreg macro")) {
7192 if (ImmVal < 0 || !isUInt<16>(ImmVal))
7193 return Error(Loc, "invalid immediate: only 16-bit values are legal");
7194 } else {
7195 return ParseStatus::Failure;
7196 }
7197
7198 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
7199 return ParseStatus::Success;
7200 }
7201
isHwreg() const7202 bool AMDGPUOperand::isHwreg() const {
7203 return isImmTy(ImmTyHwreg);
7204 }
7205
7206 //===----------------------------------------------------------------------===//
7207 // sendmsg
7208 //===----------------------------------------------------------------------===//
7209
7210 bool
parseSendMsgBody(OperandInfoTy & Msg,OperandInfoTy & Op,OperandInfoTy & Stream)7211 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
7212 OperandInfoTy &Op,
7213 OperandInfoTy &Stream) {
7214 using namespace llvm::AMDGPU::SendMsg;
7215
7216 Msg.Loc = getLoc();
7217 if (isToken(AsmToken::Identifier) &&
7218 (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
7219 Msg.IsSymbolic = true;
7220 lex(); // skip message name
7221 } else if (!parseExpr(Msg.Id, "a message name")) {
7222 return false;
7223 }
7224
7225 if (trySkipToken(AsmToken::Comma)) {
7226 Op.IsDefined = true;
7227 Op.Loc = getLoc();
7228 if (isToken(AsmToken::Identifier) &&
7229 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
7230 lex(); // skip operation name
7231 } else if (!parseExpr(Op.Id, "an operation name")) {
7232 return false;
7233 }
7234
7235 if (trySkipToken(AsmToken::Comma)) {
7236 Stream.IsDefined = true;
7237 Stream.Loc = getLoc();
7238 if (!parseExpr(Stream.Id))
7239 return false;
7240 }
7241 }
7242
7243 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
7244 }
7245
7246 bool
validateSendMsg(const OperandInfoTy & Msg,const OperandInfoTy & Op,const OperandInfoTy & Stream)7247 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
7248 const OperandInfoTy &Op,
7249 const OperandInfoTy &Stream) {
7250 using namespace llvm::AMDGPU::SendMsg;
7251
7252 // Validation strictness depends on whether message is specified
7253 // in a symbolic or in a numeric form. In the latter case
7254 // only encoding possibility is checked.
7255 bool Strict = Msg.IsSymbolic;
7256
7257 if (Strict) {
7258 if (Msg.Id == OPR_ID_UNSUPPORTED) {
7259 Error(Msg.Loc, "specified message id is not supported on this GPU");
7260 return false;
7261 }
7262 } else {
7263 if (!isValidMsgId(Msg.Id, getSTI())) {
7264 Error(Msg.Loc, "invalid message id");
7265 return false;
7266 }
7267 }
7268 if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) {
7269 if (Op.IsDefined) {
7270 Error(Op.Loc, "message does not support operations");
7271 } else {
7272 Error(Msg.Loc, "missing message operation");
7273 }
7274 return false;
7275 }
7276 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
7277 Error(Op.Loc, "invalid operation id");
7278 return false;
7279 }
7280 if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) &&
7281 Stream.IsDefined) {
7282 Error(Stream.Loc, "message operation does not support streams");
7283 return false;
7284 }
7285 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
7286 Error(Stream.Loc, "invalid message stream id");
7287 return false;
7288 }
7289 return true;
7290 }
7291
parseSendMsg(OperandVector & Operands)7292 ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
7293 using namespace llvm::AMDGPU::SendMsg;
7294
7295 int64_t ImmVal = 0;
7296 SMLoc Loc = getLoc();
7297
7298 if (trySkipId("sendmsg", AsmToken::LParen)) {
7299 OperandInfoTy Msg(OPR_ID_UNKNOWN);
7300 OperandInfoTy Op(OP_NONE_);
7301 OperandInfoTy Stream(STREAM_ID_NONE_);
7302 if (parseSendMsgBody(Msg, Op, Stream) &&
7303 validateSendMsg(Msg, Op, Stream)) {
7304 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
7305 } else {
7306 return ParseStatus::Failure;
7307 }
7308 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
7309 if (ImmVal < 0 || !isUInt<16>(ImmVal))
7310 return Error(Loc, "invalid immediate: only 16-bit values are legal");
7311 } else {
7312 return ParseStatus::Failure;
7313 }
7314
7315 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
7316 return ParseStatus::Success;
7317 }
7318
isSendMsg() const7319 bool AMDGPUOperand::isSendMsg() const {
7320 return isImmTy(ImmTySendMsg);
7321 }
7322
7323 //===----------------------------------------------------------------------===//
7324 // v_interp
7325 //===----------------------------------------------------------------------===//
7326
parseInterpSlot(OperandVector & Operands)7327 ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
7328 StringRef Str;
7329 SMLoc S = getLoc();
7330
7331 if (!parseId(Str))
7332 return ParseStatus::NoMatch;
7333
7334 int Slot = StringSwitch<int>(Str)
7335 .Case("p10", 0)
7336 .Case("p20", 1)
7337 .Case("p0", 2)
7338 .Default(-1);
7339
7340 if (Slot == -1)
7341 return Error(S, "invalid interpolation slot");
7342
7343 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
7344 AMDGPUOperand::ImmTyInterpSlot));
7345 return ParseStatus::Success;
7346 }
7347
parseInterpAttr(OperandVector & Operands)7348 ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
7349 StringRef Str;
7350 SMLoc S = getLoc();
7351
7352 if (!parseId(Str))
7353 return ParseStatus::NoMatch;
7354
7355 if (!Str.starts_with("attr"))
7356 return Error(S, "invalid interpolation attribute");
7357
7358 StringRef Chan = Str.take_back(2);
7359 int AttrChan = StringSwitch<int>(Chan)
7360 .Case(".x", 0)
7361 .Case(".y", 1)
7362 .Case(".z", 2)
7363 .Case(".w", 3)
7364 .Default(-1);
7365 if (AttrChan == -1)
7366 return Error(S, "invalid or missing interpolation attribute channel");
7367
7368 Str = Str.drop_back(2).drop_front(4);
7369
7370 uint8_t Attr;
7371 if (Str.getAsInteger(10, Attr))
7372 return Error(S, "invalid or missing interpolation attribute number");
7373
7374 if (Attr > 32)
7375 return Error(S, "out of bounds interpolation attribute number");
7376
7377 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
7378
7379 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
7380 AMDGPUOperand::ImmTyInterpAttr));
7381 Operands.push_back(AMDGPUOperand::CreateImm(
7382 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
7383 return ParseStatus::Success;
7384 }
7385
7386 //===----------------------------------------------------------------------===//
7387 // exp
7388 //===----------------------------------------------------------------------===//
7389
parseExpTgt(OperandVector & Operands)7390 ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
7391 using namespace llvm::AMDGPU::Exp;
7392
7393 StringRef Str;
7394 SMLoc S = getLoc();
7395
7396 if (!parseId(Str))
7397 return ParseStatus::NoMatch;
7398
7399 unsigned Id = getTgtId(Str);
7400 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
7401 return Error(S, (Id == ET_INVALID)
7402 ? "invalid exp target"
7403 : "exp target is not supported on this GPU");
7404
7405 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
7406 AMDGPUOperand::ImmTyExpTgt));
7407 return ParseStatus::Success;
7408 }
7409
7410 //===----------------------------------------------------------------------===//
7411 // parser helpers
7412 //===----------------------------------------------------------------------===//
7413
7414 bool
isId(const AsmToken & Token,const StringRef Id) const7415 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
7416 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
7417 }
7418
7419 bool
isId(const StringRef Id) const7420 AMDGPUAsmParser::isId(const StringRef Id) const {
7421 return isId(getToken(), Id);
7422 }
7423
7424 bool
isToken(const AsmToken::TokenKind Kind) const7425 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
7426 return getTokenKind() == Kind;
7427 }
7428
getId() const7429 StringRef AMDGPUAsmParser::getId() const {
7430 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
7431 }
7432
7433 bool
trySkipId(const StringRef Id)7434 AMDGPUAsmParser::trySkipId(const StringRef Id) {
7435 if (isId(Id)) {
7436 lex();
7437 return true;
7438 }
7439 return false;
7440 }
7441
7442 bool
trySkipId(const StringRef Pref,const StringRef Id)7443 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
7444 if (isToken(AsmToken::Identifier)) {
7445 StringRef Tok = getTokenStr();
7446 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
7447 lex();
7448 return true;
7449 }
7450 }
7451 return false;
7452 }
7453
7454 bool
trySkipId(const StringRef Id,const AsmToken::TokenKind Kind)7455 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
7456 if (isId(Id) && peekToken().is(Kind)) {
7457 lex();
7458 lex();
7459 return true;
7460 }
7461 return false;
7462 }
7463
7464 bool
trySkipToken(const AsmToken::TokenKind Kind)7465 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
7466 if (isToken(Kind)) {
7467 lex();
7468 return true;
7469 }
7470 return false;
7471 }
7472
7473 bool
skipToken(const AsmToken::TokenKind Kind,const StringRef ErrMsg)7474 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
7475 const StringRef ErrMsg) {
7476 if (!trySkipToken(Kind)) {
7477 Error(getLoc(), ErrMsg);
7478 return false;
7479 }
7480 return true;
7481 }
7482
7483 bool
parseExpr(int64_t & Imm,StringRef Expected)7484 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
7485 SMLoc S = getLoc();
7486
7487 const MCExpr *Expr;
7488 if (Parser.parseExpression(Expr))
7489 return false;
7490
7491 if (Expr->evaluateAsAbsolute(Imm))
7492 return true;
7493
7494 if (Expected.empty()) {
7495 Error(S, "expected absolute expression");
7496 } else {
7497 Error(S, Twine("expected ", Expected) +
7498 Twine(" or an absolute expression"));
7499 }
7500 return false;
7501 }
7502
7503 bool
parseExpr(OperandVector & Operands)7504 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
7505 SMLoc S = getLoc();
7506
7507 const MCExpr *Expr;
7508 if (Parser.parseExpression(Expr))
7509 return false;
7510
7511 int64_t IntVal;
7512 if (Expr->evaluateAsAbsolute(IntVal)) {
7513 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
7514 } else {
7515 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
7516 }
7517 return true;
7518 }
7519
7520 bool
parseString(StringRef & Val,const StringRef ErrMsg)7521 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
7522 if (isToken(AsmToken::String)) {
7523 Val = getToken().getStringContents();
7524 lex();
7525 return true;
7526 } else {
7527 Error(getLoc(), ErrMsg);
7528 return false;
7529 }
7530 }
7531
7532 bool
parseId(StringRef & Val,const StringRef ErrMsg)7533 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
7534 if (isToken(AsmToken::Identifier)) {
7535 Val = getTokenStr();
7536 lex();
7537 return true;
7538 } else {
7539 if (!ErrMsg.empty())
7540 Error(getLoc(), ErrMsg);
7541 return false;
7542 }
7543 }
7544
7545 AsmToken
getToken() const7546 AMDGPUAsmParser::getToken() const {
7547 return Parser.getTok();
7548 }
7549
peekToken(bool ShouldSkipSpace)7550 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
7551 return isToken(AsmToken::EndOfStatement)
7552 ? getToken()
7553 : getLexer().peekTok(ShouldSkipSpace);
7554 }
7555
7556 void
peekTokens(MutableArrayRef<AsmToken> Tokens)7557 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
7558 auto TokCount = getLexer().peekTokens(Tokens);
7559
7560 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
7561 Tokens[Idx] = AsmToken(AsmToken::Error, "");
7562 }
7563
7564 AsmToken::TokenKind
getTokenKind() const7565 AMDGPUAsmParser::getTokenKind() const {
7566 return getLexer().getKind();
7567 }
7568
7569 SMLoc
getLoc() const7570 AMDGPUAsmParser::getLoc() const {
7571 return getToken().getLoc();
7572 }
7573
7574 StringRef
getTokenStr() const7575 AMDGPUAsmParser::getTokenStr() const {
7576 return getToken().getString();
7577 }
7578
7579 void
lex()7580 AMDGPUAsmParser::lex() {
7581 Parser.Lex();
7582 }
7583
getInstLoc(const OperandVector & Operands) const7584 SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
7585 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
7586 }
7587
7588 SMLoc
getOperandLoc(std::function<bool (const AMDGPUOperand &)> Test,const OperandVector & Operands) const7589 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
7590 const OperandVector &Operands) const {
7591 for (unsigned i = Operands.size() - 1; i > 0; --i) {
7592 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7593 if (Test(Op))
7594 return Op.getStartLoc();
7595 }
7596 return getInstLoc(Operands);
7597 }
7598
7599 SMLoc
getImmLoc(AMDGPUOperand::ImmTy Type,const OperandVector & Operands) const7600 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
7601 const OperandVector &Operands) const {
7602 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
7603 return getOperandLoc(Test, Operands);
7604 }
7605
7606 SMLoc
getRegLoc(unsigned Reg,const OperandVector & Operands) const7607 AMDGPUAsmParser::getRegLoc(unsigned Reg,
7608 const OperandVector &Operands) const {
7609 auto Test = [=](const AMDGPUOperand& Op) {
7610 return Op.isRegKind() && Op.getReg() == Reg;
7611 };
7612 return getOperandLoc(Test, Operands);
7613 }
7614
getLitLoc(const OperandVector & Operands,bool SearchMandatoryLiterals) const7615 SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands,
7616 bool SearchMandatoryLiterals) const {
7617 auto Test = [](const AMDGPUOperand& Op) {
7618 return Op.IsImmKindLiteral() || Op.isExpr();
7619 };
7620 SMLoc Loc = getOperandLoc(Test, Operands);
7621 if (SearchMandatoryLiterals && Loc == getInstLoc(Operands))
7622 Loc = getMandatoryLitLoc(Operands);
7623 return Loc;
7624 }
7625
getMandatoryLitLoc(const OperandVector & Operands) const7626 SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const {
7627 auto Test = [](const AMDGPUOperand &Op) {
7628 return Op.IsImmKindMandatoryLiteral();
7629 };
7630 return getOperandLoc(Test, Operands);
7631 }
7632
7633 SMLoc
getConstLoc(const OperandVector & Operands) const7634 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
7635 auto Test = [](const AMDGPUOperand& Op) {
7636 return Op.isImmKindConst();
7637 };
7638 return getOperandLoc(Test, Operands);
7639 }
7640
7641 //===----------------------------------------------------------------------===//
7642 // swizzle
7643 //===----------------------------------------------------------------------===//
7644
7645 LLVM_READNONE
7646 static unsigned
encodeBitmaskPerm(const unsigned AndMask,const unsigned OrMask,const unsigned XorMask)7647 encodeBitmaskPerm(const unsigned AndMask,
7648 const unsigned OrMask,
7649 const unsigned XorMask) {
7650 using namespace llvm::AMDGPU::Swizzle;
7651
7652 return BITMASK_PERM_ENC |
7653 (AndMask << BITMASK_AND_SHIFT) |
7654 (OrMask << BITMASK_OR_SHIFT) |
7655 (XorMask << BITMASK_XOR_SHIFT);
7656 }
7657
7658 bool
parseSwizzleOperand(int64_t & Op,const unsigned MinVal,const unsigned MaxVal,const StringRef ErrMsg,SMLoc & Loc)7659 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
7660 const unsigned MinVal,
7661 const unsigned MaxVal,
7662 const StringRef ErrMsg,
7663 SMLoc &Loc) {
7664 if (!skipToken(AsmToken::Comma, "expected a comma")) {
7665 return false;
7666 }
7667 Loc = getLoc();
7668 if (!parseExpr(Op)) {
7669 return false;
7670 }
7671 if (Op < MinVal || Op > MaxVal) {
7672 Error(Loc, ErrMsg);
7673 return false;
7674 }
7675
7676 return true;
7677 }
7678
7679 bool
parseSwizzleOperands(const unsigned OpNum,int64_t * Op,const unsigned MinVal,const unsigned MaxVal,const StringRef ErrMsg)7680 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
7681 const unsigned MinVal,
7682 const unsigned MaxVal,
7683 const StringRef ErrMsg) {
7684 SMLoc Loc;
7685 for (unsigned i = 0; i < OpNum; ++i) {
7686 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
7687 return false;
7688 }
7689
7690 return true;
7691 }
7692
7693 bool
parseSwizzleQuadPerm(int64_t & Imm)7694 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
7695 using namespace llvm::AMDGPU::Swizzle;
7696
7697 int64_t Lane[LANE_NUM];
7698 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
7699 "expected a 2-bit lane id")) {
7700 Imm = QUAD_PERM_ENC;
7701 for (unsigned I = 0; I < LANE_NUM; ++I) {
7702 Imm |= Lane[I] << (LANE_SHIFT * I);
7703 }
7704 return true;
7705 }
7706 return false;
7707 }
7708
7709 bool
parseSwizzleBroadcast(int64_t & Imm)7710 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
7711 using namespace llvm::AMDGPU::Swizzle;
7712
7713 SMLoc Loc;
7714 int64_t GroupSize;
7715 int64_t LaneIdx;
7716
7717 if (!parseSwizzleOperand(GroupSize,
7718 2, 32,
7719 "group size must be in the interval [2,32]",
7720 Loc)) {
7721 return false;
7722 }
7723 if (!isPowerOf2_64(GroupSize)) {
7724 Error(Loc, "group size must be a power of two");
7725 return false;
7726 }
7727 if (parseSwizzleOperand(LaneIdx,
7728 0, GroupSize - 1,
7729 "lane id must be in the interval [0,group size - 1]",
7730 Loc)) {
7731 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
7732 return true;
7733 }
7734 return false;
7735 }
7736
7737 bool
parseSwizzleReverse(int64_t & Imm)7738 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
7739 using namespace llvm::AMDGPU::Swizzle;
7740
7741 SMLoc Loc;
7742 int64_t GroupSize;
7743
7744 if (!parseSwizzleOperand(GroupSize,
7745 2, 32,
7746 "group size must be in the interval [2,32]",
7747 Loc)) {
7748 return false;
7749 }
7750 if (!isPowerOf2_64(GroupSize)) {
7751 Error(Loc, "group size must be a power of two");
7752 return false;
7753 }
7754
7755 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
7756 return true;
7757 }
7758
7759 bool
parseSwizzleSwap(int64_t & Imm)7760 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
7761 using namespace llvm::AMDGPU::Swizzle;
7762
7763 SMLoc Loc;
7764 int64_t GroupSize;
7765
7766 if (!parseSwizzleOperand(GroupSize,
7767 1, 16,
7768 "group size must be in the interval [1,16]",
7769 Loc)) {
7770 return false;
7771 }
7772 if (!isPowerOf2_64(GroupSize)) {
7773 Error(Loc, "group size must be a power of two");
7774 return false;
7775 }
7776
7777 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
7778 return true;
7779 }
7780
7781 bool
parseSwizzleBitmaskPerm(int64_t & Imm)7782 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
7783 using namespace llvm::AMDGPU::Swizzle;
7784
7785 if (!skipToken(AsmToken::Comma, "expected a comma")) {
7786 return false;
7787 }
7788
7789 StringRef Ctl;
7790 SMLoc StrLoc = getLoc();
7791 if (!parseString(Ctl)) {
7792 return false;
7793 }
7794 if (Ctl.size() != BITMASK_WIDTH) {
7795 Error(StrLoc, "expected a 5-character mask");
7796 return false;
7797 }
7798
7799 unsigned AndMask = 0;
7800 unsigned OrMask = 0;
7801 unsigned XorMask = 0;
7802
7803 for (size_t i = 0; i < Ctl.size(); ++i) {
7804 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
7805 switch(Ctl[i]) {
7806 default:
7807 Error(StrLoc, "invalid mask");
7808 return false;
7809 case '0':
7810 break;
7811 case '1':
7812 OrMask |= Mask;
7813 break;
7814 case 'p':
7815 AndMask |= Mask;
7816 break;
7817 case 'i':
7818 AndMask |= Mask;
7819 XorMask |= Mask;
7820 break;
7821 }
7822 }
7823
7824 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
7825 return true;
7826 }
7827
7828 bool
parseSwizzleOffset(int64_t & Imm)7829 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
7830
7831 SMLoc OffsetLoc = getLoc();
7832
7833 if (!parseExpr(Imm, "a swizzle macro")) {
7834 return false;
7835 }
7836 if (!isUInt<16>(Imm)) {
7837 Error(OffsetLoc, "expected a 16-bit offset");
7838 return false;
7839 }
7840 return true;
7841 }
7842
7843 bool
parseSwizzleMacro(int64_t & Imm)7844 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
7845 using namespace llvm::AMDGPU::Swizzle;
7846
7847 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
7848
7849 SMLoc ModeLoc = getLoc();
7850 bool Ok = false;
7851
7852 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
7853 Ok = parseSwizzleQuadPerm(Imm);
7854 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
7855 Ok = parseSwizzleBitmaskPerm(Imm);
7856 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
7857 Ok = parseSwizzleBroadcast(Imm);
7858 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
7859 Ok = parseSwizzleSwap(Imm);
7860 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
7861 Ok = parseSwizzleReverse(Imm);
7862 } else {
7863 Error(ModeLoc, "expected a swizzle mode");
7864 }
7865
7866 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
7867 }
7868
7869 return false;
7870 }
7871
parseSwizzle(OperandVector & Operands)7872 ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
7873 SMLoc S = getLoc();
7874 int64_t Imm = 0;
7875
7876 if (trySkipId("offset")) {
7877
7878 bool Ok = false;
7879 if (skipToken(AsmToken::Colon, "expected a colon")) {
7880 if (trySkipId("swizzle")) {
7881 Ok = parseSwizzleMacro(Imm);
7882 } else {
7883 Ok = parseSwizzleOffset(Imm);
7884 }
7885 }
7886
7887 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
7888
7889 return Ok ? ParseStatus::Success : ParseStatus::Failure;
7890 }
7891 return ParseStatus::NoMatch;
7892 }
7893
7894 bool
isSwizzle() const7895 AMDGPUOperand::isSwizzle() const {
7896 return isImmTy(ImmTySwizzle);
7897 }
7898
7899 //===----------------------------------------------------------------------===//
7900 // VGPR Index Mode
7901 //===----------------------------------------------------------------------===//
7902
parseGPRIdxMacro()7903 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
7904
7905 using namespace llvm::AMDGPU::VGPRIndexMode;
7906
7907 if (trySkipToken(AsmToken::RParen)) {
7908 return OFF;
7909 }
7910
7911 int64_t Imm = 0;
7912
7913 while (true) {
7914 unsigned Mode = 0;
7915 SMLoc S = getLoc();
7916
7917 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
7918 if (trySkipId(IdSymbolic[ModeId])) {
7919 Mode = 1 << ModeId;
7920 break;
7921 }
7922 }
7923
7924 if (Mode == 0) {
7925 Error(S, (Imm == 0)?
7926 "expected a VGPR index mode or a closing parenthesis" :
7927 "expected a VGPR index mode");
7928 return UNDEF;
7929 }
7930
7931 if (Imm & Mode) {
7932 Error(S, "duplicate VGPR index mode");
7933 return UNDEF;
7934 }
7935 Imm |= Mode;
7936
7937 if (trySkipToken(AsmToken::RParen))
7938 break;
7939 if (!skipToken(AsmToken::Comma,
7940 "expected a comma or a closing parenthesis"))
7941 return UNDEF;
7942 }
7943
7944 return Imm;
7945 }
7946
parseGPRIdxMode(OperandVector & Operands)7947 ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
7948
7949 using namespace llvm::AMDGPU::VGPRIndexMode;
7950
7951 int64_t Imm = 0;
7952 SMLoc S = getLoc();
7953
7954 if (trySkipId("gpr_idx", AsmToken::LParen)) {
7955 Imm = parseGPRIdxMacro();
7956 if (Imm == UNDEF)
7957 return ParseStatus::Failure;
7958 } else {
7959 if (getParser().parseAbsoluteExpression(Imm))
7960 return ParseStatus::Failure;
7961 if (Imm < 0 || !isUInt<4>(Imm))
7962 return Error(S, "invalid immediate: only 4-bit values are legal");
7963 }
7964
7965 Operands.push_back(
7966 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
7967 return ParseStatus::Success;
7968 }
7969
isGPRIdxMode() const7970 bool AMDGPUOperand::isGPRIdxMode() const {
7971 return isImmTy(ImmTyGprIdxMode);
7972 }
7973
7974 //===----------------------------------------------------------------------===//
7975 // sopp branch targets
7976 //===----------------------------------------------------------------------===//
7977
parseSOPPBrTarget(OperandVector & Operands)7978 ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
7979
7980 // Make sure we are not parsing something
7981 // that looks like a label or an expression but is not.
7982 // This will improve error messages.
7983 if (isRegister() || isModifier())
7984 return ParseStatus::NoMatch;
7985
7986 if (!parseExpr(Operands))
7987 return ParseStatus::Failure;
7988
7989 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
7990 assert(Opr.isImm() || Opr.isExpr());
7991 SMLoc Loc = Opr.getStartLoc();
7992
7993 // Currently we do not support arbitrary expressions as branch targets.
7994 // Only labels and absolute expressions are accepted.
7995 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
7996 Error(Loc, "expected an absolute expression or a label");
7997 } else if (Opr.isImm() && !Opr.isS16Imm()) {
7998 Error(Loc, "expected a 16-bit signed jump offset");
7999 }
8000
8001 return ParseStatus::Success;
8002 }
8003
8004 //===----------------------------------------------------------------------===//
8005 // Boolean holding registers
8006 //===----------------------------------------------------------------------===//
8007
parseBoolReg(OperandVector & Operands)8008 ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
8009 return parseReg(Operands);
8010 }
8011
8012 //===----------------------------------------------------------------------===//
8013 // mubuf
8014 //===----------------------------------------------------------------------===//
8015
cvtMubufImpl(MCInst & Inst,const OperandVector & Operands,bool IsAtomic)8016 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
8017 const OperandVector &Operands,
8018 bool IsAtomic) {
8019 OptionalImmIndexMap OptionalIdx;
8020 unsigned FirstOperandIdx = 1;
8021 bool IsAtomicReturn = false;
8022
8023 if (IsAtomic) {
8024 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
8025 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8026 if (!Op.isCPol())
8027 continue;
8028 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
8029 break;
8030 }
8031
8032 if (!IsAtomicReturn) {
8033 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
8034 if (NewOpc != -1)
8035 Inst.setOpcode(NewOpc);
8036 }
8037
8038 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
8039 SIInstrFlags::IsAtomicRet;
8040 }
8041
8042 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
8043 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8044
8045 // Add the register arguments
8046 if (Op.isReg()) {
8047 Op.addRegOperands(Inst, 1);
8048 // Insert a tied src for atomic return dst.
8049 // This cannot be postponed as subsequent calls to
8050 // addImmOperands rely on correct number of MC operands.
8051 if (IsAtomicReturn && i == FirstOperandIdx)
8052 Op.addRegOperands(Inst, 1);
8053 continue;
8054 }
8055
8056 // Handle the case where soffset is an immediate
8057 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
8058 Op.addImmOperands(Inst, 1);
8059 continue;
8060 }
8061
8062 // Handle tokens like 'offen' which are sometimes hard-coded into the
8063 // asm string. There are no MCInst operands for these.
8064 if (Op.isToken()) {
8065 continue;
8066 }
8067 assert(Op.isImm());
8068
8069 // Handle optional arguments
8070 OptionalIdx[Op.getImmTy()] = i;
8071 }
8072
8073 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
8074 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
8075 }
8076
8077 //===----------------------------------------------------------------------===//
8078 // smrd
8079 //===----------------------------------------------------------------------===//
8080
isSMRDOffset8() const8081 bool AMDGPUOperand::isSMRDOffset8() const {
8082 return isImmLiteral() && isUInt<8>(getImm());
8083 }
8084
isSMEMOffset() const8085 bool AMDGPUOperand::isSMEMOffset() const {
8086 // Offset range is checked later by validator.
8087 return isImmLiteral();
8088 }
8089
isSMRDLiteralOffset() const8090 bool AMDGPUOperand::isSMRDLiteralOffset() const {
8091 // 32-bit literals are only supported on CI and we only want to use them
8092 // when the offset is > 8-bits.
8093 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
8094 }
8095
8096 //===----------------------------------------------------------------------===//
8097 // vop3
8098 //===----------------------------------------------------------------------===//
8099
ConvertOmodMul(int64_t & Mul)8100 static bool ConvertOmodMul(int64_t &Mul) {
8101 if (Mul != 1 && Mul != 2 && Mul != 4)
8102 return false;
8103
8104 Mul >>= 1;
8105 return true;
8106 }
8107
ConvertOmodDiv(int64_t & Div)8108 static bool ConvertOmodDiv(int64_t &Div) {
8109 if (Div == 1) {
8110 Div = 0;
8111 return true;
8112 }
8113
8114 if (Div == 2) {
8115 Div = 3;
8116 return true;
8117 }
8118
8119 return false;
8120 }
8121
8122 // For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
8123 // This is intentional and ensures compatibility with sp3.
8124 // See bug 35397 for details.
convertDppBoundCtrl(int64_t & BoundCtrl)8125 bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
8126 if (BoundCtrl == 0 || BoundCtrl == 1) {
8127 if (!isGFX11Plus())
8128 BoundCtrl = 1;
8129 return true;
8130 }
8131 return false;
8132 }
8133
onBeginOfFile()8134 void AMDGPUAsmParser::onBeginOfFile() {
8135 if (!getParser().getStreamer().getTargetStreamer() ||
8136 getSTI().getTargetTriple().getArch() == Triple::r600)
8137 return;
8138
8139 if (!getTargetStreamer().getTargetID())
8140 getTargetStreamer().initializeTargetID(getSTI(),
8141 getSTI().getFeatureString());
8142
8143 if (isHsaAbi(getSTI()))
8144 getTargetStreamer().EmitDirectiveAMDGCNTarget();
8145 }
8146
parseOModSI(OperandVector & Operands)8147 ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
8148 StringRef Name = getTokenStr();
8149 if (Name == "mul") {
8150 return parseIntWithPrefix("mul", Operands,
8151 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
8152 }
8153
8154 if (Name == "div") {
8155 return parseIntWithPrefix("div", Operands,
8156 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
8157 }
8158
8159 return ParseStatus::NoMatch;
8160 }
8161
8162 // Determines which bit DST_OP_SEL occupies in the op_sel operand according to
8163 // the number of src operands present, then copies that bit into src0_modifiers.
cvtVOP3DstOpSelOnly(MCInst & Inst)8164 void cvtVOP3DstOpSelOnly(MCInst &Inst) {
8165 int Opc = Inst.getOpcode();
8166 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8167 if (OpSelIdx == -1)
8168 return;
8169
8170 int SrcNum;
8171 const int Ops[] = { AMDGPU::OpName::src0,
8172 AMDGPU::OpName::src1,
8173 AMDGPU::OpName::src2 };
8174 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
8175 ++SrcNum)
8176 ;
8177 assert(SrcNum > 0);
8178
8179 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8180
8181 if ((OpSel & (1 << SrcNum)) != 0) {
8182 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
8183 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8184 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
8185 }
8186 }
8187
cvtVOP3OpSel(MCInst & Inst,const OperandVector & Operands)8188 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
8189 const OperandVector &Operands) {
8190 cvtVOP3P(Inst, Operands);
8191 cvtVOP3DstOpSelOnly(Inst);
8192 }
8193
cvtVOP3OpSel(MCInst & Inst,const OperandVector & Operands,OptionalImmIndexMap & OptionalIdx)8194 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
8195 OptionalImmIndexMap &OptionalIdx) {
8196 cvtVOP3P(Inst, Operands, OptionalIdx);
8197 cvtVOP3DstOpSelOnly(Inst);
8198 }
8199
isRegOrImmWithInputMods(const MCInstrDesc & Desc,unsigned OpNum)8200 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
8201 return
8202 // 1. This operand is input modifiers
8203 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
8204 // 2. This is not last operand
8205 && Desc.NumOperands > (OpNum + 1)
8206 // 3. Next operand is register class
8207 && Desc.operands()[OpNum + 1].RegClass != -1
8208 // 4. Next register is not tied to any other operand
8209 && Desc.getOperandConstraint(OpNum + 1,
8210 MCOI::OperandConstraint::TIED_TO) == -1;
8211 }
8212
cvtVOP3Interp(MCInst & Inst,const OperandVector & Operands)8213 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
8214 {
8215 OptionalImmIndexMap OptionalIdx;
8216 unsigned Opc = Inst.getOpcode();
8217
8218 unsigned I = 1;
8219 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8220 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8221 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8222 }
8223
8224 for (unsigned E = Operands.size(); I != E; ++I) {
8225 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8226 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8227 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8228 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
8229 Op.isInterpAttrChan()) {
8230 Inst.addOperand(MCOperand::createImm(Op.getImm()));
8231 } else if (Op.isImmModifier()) {
8232 OptionalIdx[Op.getImmTy()] = I;
8233 } else {
8234 llvm_unreachable("unhandled operand type");
8235 }
8236 }
8237
8238 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
8239 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8240 AMDGPUOperand::ImmTyHigh);
8241
8242 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8243 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8244 AMDGPUOperand::ImmTyClampSI);
8245
8246 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8247 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8248 AMDGPUOperand::ImmTyOModSI);
8249 }
8250
cvtVINTERP(MCInst & Inst,const OperandVector & Operands)8251 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
8252 {
8253 OptionalImmIndexMap OptionalIdx;
8254 unsigned Opc = Inst.getOpcode();
8255
8256 unsigned I = 1;
8257 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8258 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8259 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8260 }
8261
8262 for (unsigned E = Operands.size(); I != E; ++I) {
8263 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8264 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8265 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8266 } else if (Op.isImmModifier()) {
8267 OptionalIdx[Op.getImmTy()] = I;
8268 } else {
8269 llvm_unreachable("unhandled operand type");
8270 }
8271 }
8272
8273 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8274
8275 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8276 if (OpSelIdx != -1)
8277 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8278
8279 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
8280
8281 if (OpSelIdx == -1)
8282 return;
8283
8284 const int Ops[] = { AMDGPU::OpName::src0,
8285 AMDGPU::OpName::src1,
8286 AMDGPU::OpName::src2 };
8287 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8288 AMDGPU::OpName::src1_modifiers,
8289 AMDGPU::OpName::src2_modifiers };
8290
8291 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
8292
8293 for (int J = 0; J < 3; ++J) {
8294 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8295 if (OpIdx == -1)
8296 break;
8297
8298 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8299 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
8300
8301 if ((OpSel & (1 << J)) != 0)
8302 ModVal |= SISrcMods::OP_SEL_0;
8303 if (ModOps[J] == AMDGPU::OpName::src0_modifiers &&
8304 (OpSel & (1 << 3)) != 0)
8305 ModVal |= SISrcMods::DST_OP_SEL;
8306
8307 Inst.getOperand(ModIdx).setImm(ModVal);
8308 }
8309 }
8310
cvtVOP3(MCInst & Inst,const OperandVector & Operands,OptionalImmIndexMap & OptionalIdx)8311 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
8312 OptionalImmIndexMap &OptionalIdx) {
8313 unsigned Opc = Inst.getOpcode();
8314
8315 unsigned I = 1;
8316 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8317 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8318 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8319 }
8320
8321 for (unsigned E = Operands.size(); I != E; ++I) {
8322 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8323 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8324 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8325 } else if (Op.isImmModifier()) {
8326 OptionalIdx[Op.getImmTy()] = I;
8327 } else if (Op.isRegOrImm()) {
8328 Op.addRegOrImmOperands(Inst, 1);
8329 } else {
8330 llvm_unreachable("unhandled operand type");
8331 }
8332 }
8333
8334 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8335 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8336 AMDGPUOperand::ImmTyClampSI);
8337
8338 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8339 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8340 AMDGPUOperand::ImmTyOModSI);
8341
8342 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
8343 // it has src2 register operand that is tied to dst operand
8344 // we don't allow modifiers for this operand in assembler so src2_modifiers
8345 // should be 0.
8346 if (isMAC(Opc)) {
8347 auto it = Inst.begin();
8348 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
8349 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
8350 ++it;
8351 // Copy the operand to ensure it's not invalidated when Inst grows.
8352 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
8353 }
8354 }
8355
cvtVOP3(MCInst & Inst,const OperandVector & Operands)8356 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
8357 OptionalImmIndexMap OptionalIdx;
8358 cvtVOP3(Inst, Operands, OptionalIdx);
8359 }
8360
cvtVOP3P(MCInst & Inst,const OperandVector & Operands,OptionalImmIndexMap & OptIdx)8361 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
8362 OptionalImmIndexMap &OptIdx) {
8363 const int Opc = Inst.getOpcode();
8364 const MCInstrDesc &Desc = MII.get(Opc);
8365
8366 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
8367
8368 if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
8369 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
8370 Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_gfx12 ||
8371 Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_gfx12) {
8372 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
8373 Inst.addOperand(Inst.getOperand(0));
8374 }
8375
8376 // Adding vdst_in operand is already covered for these DPP instructions in
8377 // cvtVOP3DPP.
8378 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
8379 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp_gfx12 ||
8380 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp_gfx12 ||
8381 Opc == AMDGPU::V_CVT_PK_BF8_F32_e64_dpp8_gfx12 ||
8382 Opc == AMDGPU::V_CVT_PK_FP8_F32_e64_dpp8_gfx12)) {
8383 assert(!IsPacked);
8384 Inst.addOperand(Inst.getOperand(0));
8385 }
8386
8387 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
8388 // instruction, and then figure out where to actually put the modifiers
8389
8390 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
8391 if (OpSelIdx != -1) {
8392 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
8393 }
8394
8395 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
8396 if (OpSelHiIdx != -1) {
8397 int DefaultVal = IsPacked ? -1 : 0;
8398 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
8399 DefaultVal);
8400 }
8401
8402 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
8403 if (NegLoIdx != -1)
8404 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
8405
8406 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
8407 if (NegHiIdx != -1)
8408 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
8409
8410 const int Ops[] = { AMDGPU::OpName::src0,
8411 AMDGPU::OpName::src1,
8412 AMDGPU::OpName::src2 };
8413 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
8414 AMDGPU::OpName::src1_modifiers,
8415 AMDGPU::OpName::src2_modifiers };
8416
8417 unsigned OpSel = 0;
8418 unsigned OpSelHi = 0;
8419 unsigned NegLo = 0;
8420 unsigned NegHi = 0;
8421
8422 if (OpSelIdx != -1)
8423 OpSel = Inst.getOperand(OpSelIdx).getImm();
8424
8425 if (OpSelHiIdx != -1)
8426 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
8427
8428 if (NegLoIdx != -1)
8429 NegLo = Inst.getOperand(NegLoIdx).getImm();
8430
8431 if (NegHiIdx != -1)
8432 NegHi = Inst.getOperand(NegHiIdx).getImm();
8433
8434 for (int J = 0; J < 3; ++J) {
8435 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
8436 if (OpIdx == -1)
8437 break;
8438
8439 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8440
8441 if (ModIdx == -1)
8442 continue;
8443
8444 uint32_t ModVal = 0;
8445
8446 if ((OpSel & (1 << J)) != 0)
8447 ModVal |= SISrcMods::OP_SEL_0;
8448
8449 if ((OpSelHi & (1 << J)) != 0)
8450 ModVal |= SISrcMods::OP_SEL_1;
8451
8452 if ((NegLo & (1 << J)) != 0)
8453 ModVal |= SISrcMods::NEG;
8454
8455 if ((NegHi & (1 << J)) != 0)
8456 ModVal |= SISrcMods::NEG_HI;
8457
8458 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
8459 }
8460 }
8461
cvtVOP3P(MCInst & Inst,const OperandVector & Operands)8462 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
8463 OptionalImmIndexMap OptIdx;
8464 cvtVOP3(Inst, Operands, OptIdx);
8465 cvtVOP3P(Inst, Operands, OptIdx);
8466 }
8467
addSrcModifiersAndSrc(MCInst & Inst,const OperandVector & Operands,unsigned i,unsigned Opc,unsigned OpName)8468 static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands,
8469 unsigned i, unsigned Opc, unsigned OpName) {
8470 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
8471 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
8472 else
8473 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
8474 }
8475
cvtSWMMAC(MCInst & Inst,const OperandVector & Operands)8476 void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
8477 unsigned Opc = Inst.getOpcode();
8478
8479 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
8480 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
8481 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
8482 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
8483 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
8484
8485 OptionalImmIndexMap OptIdx;
8486 for (unsigned i = 5; i < Operands.size(); ++i) {
8487 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8488 OptIdx[Op.getImmTy()] = i;
8489 }
8490
8491 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
8492 addOptionalImmOperand(Inst, Operands, OptIdx,
8493 AMDGPUOperand::ImmTyIndexKey8bit);
8494
8495 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
8496 addOptionalImmOperand(Inst, Operands, OptIdx,
8497 AMDGPUOperand::ImmTyIndexKey16bit);
8498
8499 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8500 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClampSI);
8501
8502 cvtVOP3P(Inst, Operands, OptIdx);
8503 }
8504
8505 //===----------------------------------------------------------------------===//
8506 // VOPD
8507 //===----------------------------------------------------------------------===//
8508
parseVOPD(OperandVector & Operands)8509 ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
8510 if (!hasVOPD(getSTI()))
8511 return ParseStatus::NoMatch;
8512
8513 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
8514 SMLoc S = getLoc();
8515 lex();
8516 lex();
8517 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
8518 SMLoc OpYLoc = getLoc();
8519 StringRef OpYName;
8520 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
8521 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
8522 return ParseStatus::Success;
8523 }
8524 return Error(OpYLoc, "expected a VOPDY instruction after ::");
8525 }
8526 return ParseStatus::NoMatch;
8527 }
8528
8529 // Create VOPD MCInst operands using parsed assembler operands.
cvtVOPD(MCInst & Inst,const OperandVector & Operands)8530 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
8531 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
8532 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
8533 if (Op.isReg()) {
8534 Op.addRegOperands(Inst, 1);
8535 return;
8536 }
8537 if (Op.isImm()) {
8538 Op.addImmOperands(Inst, 1);
8539 return;
8540 }
8541 llvm_unreachable("Unhandled operand type in cvtVOPD");
8542 };
8543
8544 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
8545
8546 // MCInst operands are ordered as follows:
8547 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
8548
8549 for (auto CompIdx : VOPD::COMPONENTS) {
8550 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
8551 }
8552
8553 for (auto CompIdx : VOPD::COMPONENTS) {
8554 const auto &CInfo = InstInfo[CompIdx];
8555 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
8556 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
8557 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
8558 if (CInfo.hasSrc2Acc())
8559 addOp(CInfo.getIndexOfDstInParsedOperands());
8560 }
8561 }
8562
8563 //===----------------------------------------------------------------------===//
8564 // dpp
8565 //===----------------------------------------------------------------------===//
8566
isDPP8() const8567 bool AMDGPUOperand::isDPP8() const {
8568 return isImmTy(ImmTyDPP8);
8569 }
8570
isDPPCtrl() const8571 bool AMDGPUOperand::isDPPCtrl() const {
8572 using namespace AMDGPU::DPP;
8573
8574 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
8575 if (result) {
8576 int64_t Imm = getImm();
8577 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
8578 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
8579 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
8580 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
8581 (Imm == DppCtrl::WAVE_SHL1) ||
8582 (Imm == DppCtrl::WAVE_ROL1) ||
8583 (Imm == DppCtrl::WAVE_SHR1) ||
8584 (Imm == DppCtrl::WAVE_ROR1) ||
8585 (Imm == DppCtrl::ROW_MIRROR) ||
8586 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
8587 (Imm == DppCtrl::BCAST15) ||
8588 (Imm == DppCtrl::BCAST31) ||
8589 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
8590 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
8591 }
8592 return false;
8593 }
8594
8595 //===----------------------------------------------------------------------===//
8596 // mAI
8597 //===----------------------------------------------------------------------===//
8598
isBLGP() const8599 bool AMDGPUOperand::isBLGP() const {
8600 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
8601 }
8602
isCBSZ() const8603 bool AMDGPUOperand::isCBSZ() const {
8604 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
8605 }
8606
isABID() const8607 bool AMDGPUOperand::isABID() const {
8608 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
8609 }
8610
isS16Imm() const8611 bool AMDGPUOperand::isS16Imm() const {
8612 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
8613 }
8614
isU16Imm() const8615 bool AMDGPUOperand::isU16Imm() const {
8616 return isImmLiteral() && isUInt<16>(getImm());
8617 }
8618
8619 //===----------------------------------------------------------------------===//
8620 // dim
8621 //===----------------------------------------------------------------------===//
8622
parseDimId(unsigned & Encoding)8623 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
8624 // We want to allow "dim:1D" etc.,
8625 // but the initial 1 is tokenized as an integer.
8626 std::string Token;
8627 if (isToken(AsmToken::Integer)) {
8628 SMLoc Loc = getToken().getEndLoc();
8629 Token = std::string(getTokenStr());
8630 lex();
8631 if (getLoc() != Loc)
8632 return false;
8633 }
8634
8635 StringRef Suffix;
8636 if (!parseId(Suffix))
8637 return false;
8638 Token += Suffix;
8639
8640 StringRef DimId = Token;
8641 if (DimId.starts_with("SQ_RSRC_IMG_"))
8642 DimId = DimId.drop_front(12);
8643
8644 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
8645 if (!DimInfo)
8646 return false;
8647
8648 Encoding = DimInfo->Encoding;
8649 return true;
8650 }
8651
parseDim(OperandVector & Operands)8652 ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
8653 if (!isGFX10Plus())
8654 return ParseStatus::NoMatch;
8655
8656 SMLoc S = getLoc();
8657
8658 if (!trySkipId("dim", AsmToken::Colon))
8659 return ParseStatus::NoMatch;
8660
8661 unsigned Encoding;
8662 SMLoc Loc = getLoc();
8663 if (!parseDimId(Encoding))
8664 return Error(Loc, "invalid dim value");
8665
8666 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
8667 AMDGPUOperand::ImmTyDim));
8668 return ParseStatus::Success;
8669 }
8670
8671 //===----------------------------------------------------------------------===//
8672 // dpp
8673 //===----------------------------------------------------------------------===//
8674
parseDPP8(OperandVector & Operands)8675 ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
8676 SMLoc S = getLoc();
8677
8678 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
8679 return ParseStatus::NoMatch;
8680
8681 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
8682
8683 int64_t Sels[8];
8684
8685 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8686 return ParseStatus::Failure;
8687
8688 for (size_t i = 0; i < 8; ++i) {
8689 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8690 return ParseStatus::Failure;
8691
8692 SMLoc Loc = getLoc();
8693 if (getParser().parseAbsoluteExpression(Sels[i]))
8694 return ParseStatus::Failure;
8695 if (0 > Sels[i] || 7 < Sels[i])
8696 return Error(Loc, "expected a 3-bit value");
8697 }
8698
8699 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8700 return ParseStatus::Failure;
8701
8702 unsigned DPP8 = 0;
8703 for (size_t i = 0; i < 8; ++i)
8704 DPP8 |= (Sels[i] << (i * 3));
8705
8706 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
8707 return ParseStatus::Success;
8708 }
8709
8710 bool
isSupportedDPPCtrl(StringRef Ctrl,const OperandVector & Operands)8711 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
8712 const OperandVector &Operands) {
8713 if (Ctrl == "row_newbcast")
8714 return isGFX90A();
8715
8716 if (Ctrl == "row_share" ||
8717 Ctrl == "row_xmask")
8718 return isGFX10Plus();
8719
8720 if (Ctrl == "wave_shl" ||
8721 Ctrl == "wave_shr" ||
8722 Ctrl == "wave_rol" ||
8723 Ctrl == "wave_ror" ||
8724 Ctrl == "row_bcast")
8725 return isVI() || isGFX9();
8726
8727 return Ctrl == "row_mirror" ||
8728 Ctrl == "row_half_mirror" ||
8729 Ctrl == "quad_perm" ||
8730 Ctrl == "row_shl" ||
8731 Ctrl == "row_shr" ||
8732 Ctrl == "row_ror";
8733 }
8734
8735 int64_t
parseDPPCtrlPerm()8736 AMDGPUAsmParser::parseDPPCtrlPerm() {
8737 // quad_perm:[%d,%d,%d,%d]
8738
8739 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
8740 return -1;
8741
8742 int64_t Val = 0;
8743 for (int i = 0; i < 4; ++i) {
8744 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
8745 return -1;
8746
8747 int64_t Temp;
8748 SMLoc Loc = getLoc();
8749 if (getParser().parseAbsoluteExpression(Temp))
8750 return -1;
8751 if (Temp < 0 || Temp > 3) {
8752 Error(Loc, "expected a 2-bit value");
8753 return -1;
8754 }
8755
8756 Val += (Temp << i * 2);
8757 }
8758
8759 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
8760 return -1;
8761
8762 return Val;
8763 }
8764
8765 int64_t
parseDPPCtrlSel(StringRef Ctrl)8766 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
8767 using namespace AMDGPU::DPP;
8768
8769 // sel:%d
8770
8771 int64_t Val;
8772 SMLoc Loc = getLoc();
8773
8774 if (getParser().parseAbsoluteExpression(Val))
8775 return -1;
8776
8777 struct DppCtrlCheck {
8778 int64_t Ctrl;
8779 int Lo;
8780 int Hi;
8781 };
8782
8783 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
8784 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
8785 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
8786 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
8787 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
8788 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
8789 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
8790 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
8791 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
8792 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
8793 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
8794 .Default({-1, 0, 0});
8795
8796 bool Valid;
8797 if (Check.Ctrl == -1) {
8798 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
8799 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
8800 } else {
8801 Valid = Check.Lo <= Val && Val <= Check.Hi;
8802 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
8803 }
8804
8805 if (!Valid) {
8806 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
8807 return -1;
8808 }
8809
8810 return Val;
8811 }
8812
parseDPPCtrl(OperandVector & Operands)8813 ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
8814 using namespace AMDGPU::DPP;
8815
8816 if (!isToken(AsmToken::Identifier) ||
8817 !isSupportedDPPCtrl(getTokenStr(), Operands))
8818 return ParseStatus::NoMatch;
8819
8820 SMLoc S = getLoc();
8821 int64_t Val = -1;
8822 StringRef Ctrl;
8823
8824 parseId(Ctrl);
8825
8826 if (Ctrl == "row_mirror") {
8827 Val = DppCtrl::ROW_MIRROR;
8828 } else if (Ctrl == "row_half_mirror") {
8829 Val = DppCtrl::ROW_HALF_MIRROR;
8830 } else {
8831 if (skipToken(AsmToken::Colon, "expected a colon")) {
8832 if (Ctrl == "quad_perm") {
8833 Val = parseDPPCtrlPerm();
8834 } else {
8835 Val = parseDPPCtrlSel(Ctrl);
8836 }
8837 }
8838 }
8839
8840 if (Val == -1)
8841 return ParseStatus::Failure;
8842
8843 Operands.push_back(
8844 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
8845 return ParseStatus::Success;
8846 }
8847
cvtVOP3DPP(MCInst & Inst,const OperandVector & Operands,bool IsDPP8)8848 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
8849 bool IsDPP8) {
8850 OptionalImmIndexMap OptionalIdx;
8851 unsigned Opc = Inst.getOpcode();
8852 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8853
8854 // MAC instructions are special because they have 'old'
8855 // operand which is not tied to dst (but assumed to be).
8856 // They also have dummy unused src2_modifiers.
8857 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
8858 int Src2ModIdx =
8859 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
8860 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
8861 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
8862
8863 unsigned I = 1;
8864 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8865 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8866 }
8867
8868 int Fi = 0;
8869 for (unsigned E = Operands.size(); I != E; ++I) {
8870
8871 if (IsMAC) {
8872 int NumOperands = Inst.getNumOperands();
8873 if (OldIdx == NumOperands) {
8874 // Handle old operand
8875 constexpr int DST_IDX = 0;
8876 Inst.addOperand(Inst.getOperand(DST_IDX));
8877 } else if (Src2ModIdx == NumOperands) {
8878 // Add unused dummy src2_modifiers
8879 Inst.addOperand(MCOperand::createImm(0));
8880 }
8881 }
8882
8883 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
8884 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
8885 Inst.addOperand(Inst.getOperand(0));
8886 }
8887
8888 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp8_gfx12 ||
8889 Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp8_gfx12 ||
8890 Opc == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp_gfx12 ||
8891 Opc == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp_gfx12;
8892 if (IsVOP3CvtSrDpp) {
8893 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
8894 Inst.addOperand(MCOperand::createImm(0));
8895 Inst.addOperand(MCOperand::createReg(0));
8896 }
8897 }
8898
8899 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8900 MCOI::TIED_TO);
8901 if (TiedTo != -1) {
8902 assert((unsigned)TiedTo < Inst.getNumOperands());
8903 // handle tied old or src2 for MAC instructions
8904 Inst.addOperand(Inst.getOperand(TiedTo));
8905 }
8906 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8907 // Add the register arguments
8908 if (IsDPP8 && Op.isDppFI()) {
8909 Fi = Op.getImm();
8910 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8911 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
8912 } else if (Op.isReg()) {
8913 Op.addRegOperands(Inst, 1);
8914 } else if (Op.isImm() &&
8915 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
8916 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP");
8917 Op.addImmOperands(Inst, 1);
8918 } else if (Op.isImm()) {
8919 OptionalIdx[Op.getImmTy()] = I;
8920 } else {
8921 llvm_unreachable("unhandled operand type");
8922 }
8923 }
8924 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
8925 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
8926
8927 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
8928 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
8929
8930 if (Desc.TSFlags & SIInstrFlags::VOP3P)
8931 cvtVOP3P(Inst, Operands, OptionalIdx);
8932 else if (Desc.TSFlags & SIInstrFlags::VOP3)
8933 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
8934 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
8935 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
8936 }
8937
8938 if (IsDPP8) {
8939 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
8940 using namespace llvm::AMDGPU::DPP;
8941 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
8942 } else {
8943 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
8944 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
8945 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
8946 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
8947
8948 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
8949 addOptionalImmOperand(Inst, Operands, OptionalIdx,
8950 AMDGPUOperand::ImmTyDppFI);
8951 }
8952 }
8953
cvtDPP(MCInst & Inst,const OperandVector & Operands,bool IsDPP8)8954 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
8955 OptionalImmIndexMap OptionalIdx;
8956
8957 unsigned I = 1;
8958 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
8959 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
8960 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
8961 }
8962
8963 int Fi = 0;
8964 for (unsigned E = Operands.size(); I != E; ++I) {
8965 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
8966 MCOI::TIED_TO);
8967 if (TiedTo != -1) {
8968 assert((unsigned)TiedTo < Inst.getNumOperands());
8969 // handle tied old or src2 for MAC instructions
8970 Inst.addOperand(Inst.getOperand(TiedTo));
8971 }
8972 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
8973 // Add the register arguments
8974 if (Op.isReg() && validateVccOperand(Op.getReg())) {
8975 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
8976 // Skip it.
8977 continue;
8978 }
8979
8980 if (IsDPP8) {
8981 if (Op.isDPP8()) {
8982 Op.addImmOperands(Inst, 1);
8983 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8984 Op.addRegWithFPInputModsOperands(Inst, 2);
8985 } else if (Op.isDppFI()) {
8986 Fi = Op.getImm();
8987 } else if (Op.isReg()) {
8988 Op.addRegOperands(Inst, 1);
8989 } else {
8990 llvm_unreachable("Invalid operand type");
8991 }
8992 } else {
8993 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
8994 Op.addRegWithFPInputModsOperands(Inst, 2);
8995 } else if (Op.isReg()) {
8996 Op.addRegOperands(Inst, 1);
8997 } else if (Op.isDPPCtrl()) {
8998 Op.addImmOperands(Inst, 1);
8999 } else if (Op.isImm()) {
9000 // Handle optional arguments
9001 OptionalIdx[Op.getImmTy()] = I;
9002 } else {
9003 llvm_unreachable("Invalid operand type");
9004 }
9005 }
9006 }
9007
9008 if (IsDPP8) {
9009 using namespace llvm::AMDGPU::DPP;
9010 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
9011 } else {
9012 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
9013 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
9014 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
9015 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
9016 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9017 AMDGPUOperand::ImmTyDppFI);
9018 }
9019 }
9020 }
9021
9022 //===----------------------------------------------------------------------===//
9023 // sdwa
9024 //===----------------------------------------------------------------------===//
9025
parseSDWASel(OperandVector & Operands,StringRef Prefix,AMDGPUOperand::ImmTy Type)9026 ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
9027 StringRef Prefix,
9028 AMDGPUOperand::ImmTy Type) {
9029 using namespace llvm::AMDGPU::SDWA;
9030
9031 SMLoc S = getLoc();
9032 StringRef Value;
9033
9034 SMLoc StringLoc;
9035 ParseStatus Res = parseStringWithPrefix(Prefix, Value, StringLoc);
9036 if (!Res.isSuccess())
9037 return Res;
9038
9039 int64_t Int;
9040 Int = StringSwitch<int64_t>(Value)
9041 .Case("BYTE_0", SdwaSel::BYTE_0)
9042 .Case("BYTE_1", SdwaSel::BYTE_1)
9043 .Case("BYTE_2", SdwaSel::BYTE_2)
9044 .Case("BYTE_3", SdwaSel::BYTE_3)
9045 .Case("WORD_0", SdwaSel::WORD_0)
9046 .Case("WORD_1", SdwaSel::WORD_1)
9047 .Case("DWORD", SdwaSel::DWORD)
9048 .Default(0xffffffff);
9049
9050 if (Int == 0xffffffff)
9051 return Error(StringLoc, "invalid " + Twine(Prefix) + " value");
9052
9053 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
9054 return ParseStatus::Success;
9055 }
9056
parseSDWADstUnused(OperandVector & Operands)9057 ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
9058 using namespace llvm::AMDGPU::SDWA;
9059
9060 SMLoc S = getLoc();
9061 StringRef Value;
9062
9063 SMLoc StringLoc;
9064 ParseStatus Res = parseStringWithPrefix("dst_unused", Value, StringLoc);
9065 if (!Res.isSuccess())
9066 return Res;
9067
9068 int64_t Int;
9069 Int = StringSwitch<int64_t>(Value)
9070 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
9071 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
9072 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
9073 .Default(0xffffffff);
9074
9075 if (Int == 0xffffffff)
9076 return Error(StringLoc, "invalid dst_unused value");
9077
9078 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySDWADstUnused));
9079 return ParseStatus::Success;
9080 }
9081
cvtSdwaVOP1(MCInst & Inst,const OperandVector & Operands)9082 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
9083 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
9084 }
9085
cvtSdwaVOP2(MCInst & Inst,const OperandVector & Operands)9086 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
9087 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
9088 }
9089
cvtSdwaVOP2b(MCInst & Inst,const OperandVector & Operands)9090 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
9091 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
9092 }
9093
cvtSdwaVOP2e(MCInst & Inst,const OperandVector & Operands)9094 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
9095 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
9096 }
9097
cvtSdwaVOPC(MCInst & Inst,const OperandVector & Operands)9098 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
9099 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
9100 }
9101
cvtSDWA(MCInst & Inst,const OperandVector & Operands,uint64_t BasicInstType,bool SkipDstVcc,bool SkipSrcVcc)9102 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
9103 uint64_t BasicInstType,
9104 bool SkipDstVcc,
9105 bool SkipSrcVcc) {
9106 using namespace llvm::AMDGPU::SDWA;
9107
9108 OptionalImmIndexMap OptionalIdx;
9109 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
9110 bool SkippedVcc = false;
9111
9112 unsigned I = 1;
9113 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9114 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9115 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9116 }
9117
9118 for (unsigned E = Operands.size(); I != E; ++I) {
9119 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9120 if (SkipVcc && !SkippedVcc && Op.isReg() &&
9121 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
9122 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
9123 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
9124 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
9125 // Skip VCC only if we didn't skip it on previous iteration.
9126 // Note that src0 and src1 occupy 2 slots each because of modifiers.
9127 if (BasicInstType == SIInstrFlags::VOP2 &&
9128 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
9129 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
9130 SkippedVcc = true;
9131 continue;
9132 } else if (BasicInstType == SIInstrFlags::VOPC &&
9133 Inst.getNumOperands() == 0) {
9134 SkippedVcc = true;
9135 continue;
9136 }
9137 }
9138 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
9139 Op.addRegOrImmWithInputModsOperands(Inst, 2);
9140 } else if (Op.isImm()) {
9141 // Handle optional arguments
9142 OptionalIdx[Op.getImmTy()] = I;
9143 } else {
9144 llvm_unreachable("Invalid operand type");
9145 }
9146 SkippedVcc = false;
9147 }
9148
9149 const unsigned Opc = Inst.getOpcode();
9150 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
9151 Opc != AMDGPU::V_NOP_sdwa_vi) {
9152 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
9153 switch (BasicInstType) {
9154 case SIInstrFlags::VOP1:
9155 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9156 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9157 AMDGPUOperand::ImmTyClampSI, 0);
9158
9159 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9160 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9161 AMDGPUOperand::ImmTyOModSI, 0);
9162
9163 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
9164 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9165 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9166
9167 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
9168 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9169 AMDGPUOperand::ImmTySDWADstUnused,
9170 DstUnused::UNUSED_PRESERVE);
9171
9172 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9173 break;
9174
9175 case SIInstrFlags::VOP2:
9176 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9177
9178 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
9179 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
9180
9181 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
9182 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
9183 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9184 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9185 break;
9186
9187 case SIInstrFlags::VOPC:
9188 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
9189 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
9190 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
9191 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
9192 break;
9193
9194 default:
9195 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
9196 }
9197 }
9198
9199 // special case v_mac_{f16, f32}:
9200 // it has src2 register operand that is tied to dst operand
9201 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
9202 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
9203 auto it = Inst.begin();
9204 std::advance(
9205 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
9206 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
9207 }
9208 }
9209
9210 /// Force static initialization.
LLVMInitializeAMDGPUAsmParser()9211 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
9212 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheR600Target());
9213 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
9214 }
9215
9216 #define GET_REGISTER_MATCHER
9217 #define GET_MATCHER_IMPLEMENTATION
9218 #define GET_MNEMONIC_SPELL_CHECKER
9219 #define GET_MNEMONIC_CHECKER
9220 #include "AMDGPUGenAsmMatcher.inc"
9221
parseCustomOperand(OperandVector & Operands,unsigned MCK)9222 ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
9223 unsigned MCK) {
9224 switch (MCK) {
9225 case MCK_addr64:
9226 return parseTokenOp("addr64", Operands);
9227 case MCK_done:
9228 return parseTokenOp("done", Operands);
9229 case MCK_idxen:
9230 return parseTokenOp("idxen", Operands);
9231 case MCK_lds:
9232 return parseTokenOp("lds", Operands);
9233 case MCK_offen:
9234 return parseTokenOp("offen", Operands);
9235 case MCK_off:
9236 return parseTokenOp("off", Operands);
9237 case MCK_row_95_en:
9238 return parseTokenOp("row_en", Operands);
9239 case MCK_gds:
9240 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
9241 case MCK_tfe:
9242 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
9243 }
9244 return tryCustomParseOperand(Operands, MCK);
9245 }
9246
9247 // This function should be defined after auto-generated include so that we have
9248 // MatchClassKind enum defined
validateTargetOperandClass(MCParsedAsmOperand & Op,unsigned Kind)9249 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
9250 unsigned Kind) {
9251 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
9252 // But MatchInstructionImpl() expects to meet token and fails to validate
9253 // operand. This method checks if we are given immediate operand but expect to
9254 // get corresponding token.
9255 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
9256 switch (Kind) {
9257 case MCK_addr64:
9258 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
9259 case MCK_gds:
9260 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
9261 case MCK_lds:
9262 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
9263 case MCK_idxen:
9264 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
9265 case MCK_offen:
9266 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
9267 case MCK_tfe:
9268 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
9269 case MCK_SSrcB32:
9270 // When operands have expression values, they will return true for isToken,
9271 // because it is not possible to distinguish between a token and an
9272 // expression at parse time. MatchInstructionImpl() will always try to
9273 // match an operand as a token, when isToken returns true, and when the
9274 // name of the expression is not a valid token, the match will fail,
9275 // so we need to handle it here.
9276 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
9277 case MCK_SSrcF32:
9278 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
9279 case MCK_SOPPBrTarget:
9280 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
9281 case MCK_VReg32OrOff:
9282 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
9283 case MCK_InterpSlot:
9284 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
9285 case MCK_InterpAttr:
9286 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
9287 case MCK_InterpAttrChan:
9288 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
9289 case MCK_SReg_64:
9290 case MCK_SReg_64_XEXEC:
9291 // Null is defined as a 32-bit register but
9292 // it should also be enabled with 64-bit operands.
9293 // The following code enables it for SReg_64 operands
9294 // used as source and destination. Remaining source
9295 // operands are handled in isInlinableImm.
9296 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
9297 default:
9298 return Match_InvalidOperand;
9299 }
9300 }
9301
9302 //===----------------------------------------------------------------------===//
9303 // endpgm
9304 //===----------------------------------------------------------------------===//
9305
parseEndpgm(OperandVector & Operands)9306 ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
9307 SMLoc S = getLoc();
9308 int64_t Imm = 0;
9309
9310 if (!parseExpr(Imm)) {
9311 // The operand is optional, if not present default to 0
9312 Imm = 0;
9313 }
9314
9315 if (!isUInt<16>(Imm))
9316 return Error(S, "expected a 16-bit value");
9317
9318 Operands.push_back(
9319 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
9320 return ParseStatus::Success;
9321 }
9322
isEndpgm() const9323 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
9324
9325 //===----------------------------------------------------------------------===//
9326 // LDSDIR
9327 //===----------------------------------------------------------------------===//
9328
isWaitVDST() const9329 bool AMDGPUOperand::isWaitVDST() const {
9330 return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
9331 }
9332
isWaitVAVDst() const9333 bool AMDGPUOperand::isWaitVAVDst() const {
9334 return isImmTy(ImmTyWaitVAVDst) && isUInt<4>(getImm());
9335 }
9336
isWaitVMVSrc() const9337 bool AMDGPUOperand::isWaitVMVSrc() const {
9338 return isImmTy(ImmTyWaitVMVSrc) && isUInt<1>(getImm());
9339 }
9340
9341 //===----------------------------------------------------------------------===//
9342 // VINTERP
9343 //===----------------------------------------------------------------------===//
9344
isWaitEXP() const9345 bool AMDGPUOperand::isWaitEXP() const {
9346 return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());
9347 }
9348
9349 //===----------------------------------------------------------------------===//
9350 // Split Barrier
9351 //===----------------------------------------------------------------------===//
9352
isSplitBarrier() const9353 bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
9354