1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "AMDKernelCodeT.h"
10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
12 #include "SIDefines.h"
13 #include "SIInstrInfo.h"
14 #include "TargetInfo/AMDGPUTargetInfo.h"
15 #include "Utils/AMDGPUAsmUtils.h"
16 #include "Utils/AMDGPUBaseInfo.h"
17 #include "Utils/AMDKernelCodeTUtils.h"
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/SmallBitVector.h"
20 #include "llvm/ADT/StringSet.h"
21 #include "llvm/ADT/Twine.h"
22 #include "llvm/MC/MCAsmInfo.h"
23 #include "llvm/MC/MCContext.h"
24 #include "llvm/MC/MCExpr.h"
25 #include "llvm/MC/MCInst.h"
26 #include "llvm/MC/MCParser/MCAsmParser.h"
27 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
28 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
29 #include "llvm/MC/MCSymbol.h"
30 #include "llvm/Support/AMDGPUMetadata.h"
31 #include "llvm/Support/AMDHSAKernelDescriptor.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/MachineValueType.h"
34 #include "llvm/Support/TargetParser.h"
35 #include "llvm/Support/TargetRegistry.h"
36
37 using namespace llvm;
38 using namespace llvm::AMDGPU;
39 using namespace llvm::amdhsa;
40
41 namespace {
42
43 class AMDGPUAsmParser;
44
45 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
46
47 //===----------------------------------------------------------------------===//
48 // Operand
49 //===----------------------------------------------------------------------===//
50
51 class AMDGPUOperand : public MCParsedAsmOperand {
52 enum KindTy {
53 Token,
54 Immediate,
55 Register,
56 Expression
57 } Kind;
58
59 SMLoc StartLoc, EndLoc;
60 const AMDGPUAsmParser *AsmParser;
61
62 public:
AMDGPUOperand(KindTy Kind_,const AMDGPUAsmParser * AsmParser_)63 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
64 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
65
66 using Ptr = std::unique_ptr<AMDGPUOperand>;
67
68 struct Modifiers {
69 bool Abs = false;
70 bool Neg = false;
71 bool Sext = false;
72
hasFPModifiers__anon58f421940111::AMDGPUOperand::Modifiers73 bool hasFPModifiers() const { return Abs || Neg; }
hasIntModifiers__anon58f421940111::AMDGPUOperand::Modifiers74 bool hasIntModifiers() const { return Sext; }
hasModifiers__anon58f421940111::AMDGPUOperand::Modifiers75 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
76
getFPModifiersOperand__anon58f421940111::AMDGPUOperand::Modifiers77 int64_t getFPModifiersOperand() const {
78 int64_t Operand = 0;
79 Operand |= Abs ? SISrcMods::ABS : 0u;
80 Operand |= Neg ? SISrcMods::NEG : 0u;
81 return Operand;
82 }
83
getIntModifiersOperand__anon58f421940111::AMDGPUOperand::Modifiers84 int64_t getIntModifiersOperand() const {
85 int64_t Operand = 0;
86 Operand |= Sext ? SISrcMods::SEXT : 0u;
87 return Operand;
88 }
89
getModifiersOperand__anon58f421940111::AMDGPUOperand::Modifiers90 int64_t getModifiersOperand() const {
91 assert(!(hasFPModifiers() && hasIntModifiers())
92 && "fp and int modifiers should not be used simultaneously");
93 if (hasFPModifiers()) {
94 return getFPModifiersOperand();
95 } else if (hasIntModifiers()) {
96 return getIntModifiersOperand();
97 } else {
98 return 0;
99 }
100 }
101
102 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
103 };
104
105 enum ImmTy {
106 ImmTyNone,
107 ImmTyGDS,
108 ImmTyLDS,
109 ImmTyOffen,
110 ImmTyIdxen,
111 ImmTyAddr64,
112 ImmTyOffset,
113 ImmTyInstOffset,
114 ImmTyOffset0,
115 ImmTyOffset1,
116 ImmTyDLC,
117 ImmTyGLC,
118 ImmTySLC,
119 ImmTySWZ,
120 ImmTyTFE,
121 ImmTyD16,
122 ImmTyClampSI,
123 ImmTyOModSI,
124 ImmTyDPP8,
125 ImmTyDppCtrl,
126 ImmTyDppRowMask,
127 ImmTyDppBankMask,
128 ImmTyDppBoundCtrl,
129 ImmTyDppFi,
130 ImmTySdwaDstSel,
131 ImmTySdwaSrc0Sel,
132 ImmTySdwaSrc1Sel,
133 ImmTySdwaDstUnused,
134 ImmTyDMask,
135 ImmTyDim,
136 ImmTyUNorm,
137 ImmTyDA,
138 ImmTyR128A16,
139 ImmTyA16,
140 ImmTyLWE,
141 ImmTyExpTgt,
142 ImmTyExpCompr,
143 ImmTyExpVM,
144 ImmTyFORMAT,
145 ImmTyHwreg,
146 ImmTyOff,
147 ImmTySendMsg,
148 ImmTyInterpSlot,
149 ImmTyInterpAttr,
150 ImmTyAttrChan,
151 ImmTyOpSel,
152 ImmTyOpSelHi,
153 ImmTyNegLo,
154 ImmTyNegHi,
155 ImmTySwizzle,
156 ImmTyGprIdxMode,
157 ImmTyHigh,
158 ImmTyBLGP,
159 ImmTyCBSZ,
160 ImmTyABID,
161 ImmTyEndpgm,
162 };
163
164 enum ImmKindTy {
165 ImmKindTyNone,
166 ImmKindTyLiteral,
167 ImmKindTyConst,
168 };
169
170 private:
171 struct TokOp {
172 const char *Data;
173 unsigned Length;
174 };
175
176 struct ImmOp {
177 int64_t Val;
178 ImmTy Type;
179 bool IsFPImm;
180 mutable ImmKindTy Kind;
181 Modifiers Mods;
182 };
183
184 struct RegOp {
185 unsigned RegNo;
186 Modifiers Mods;
187 };
188
189 union {
190 TokOp Tok;
191 ImmOp Imm;
192 RegOp Reg;
193 const MCExpr *Expr;
194 };
195
196 public:
isToken() const197 bool isToken() const override {
198 if (Kind == Token)
199 return true;
200
201 // When parsing operands, we can't always tell if something was meant to be
202 // a token, like 'gds', or an expression that references a global variable.
203 // In this case, we assume the string is an expression, and if we need to
204 // interpret is a token, then we treat the symbol name as the token.
205 return isSymbolRefExpr();
206 }
207
isSymbolRefExpr() const208 bool isSymbolRefExpr() const {
209 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
210 }
211
isImm() const212 bool isImm() const override {
213 return Kind == Immediate;
214 }
215
setImmKindNone() const216 void setImmKindNone() const {
217 assert(isImm());
218 Imm.Kind = ImmKindTyNone;
219 }
220
setImmKindLiteral() const221 void setImmKindLiteral() const {
222 assert(isImm());
223 Imm.Kind = ImmKindTyLiteral;
224 }
225
setImmKindConst() const226 void setImmKindConst() const {
227 assert(isImm());
228 Imm.Kind = ImmKindTyConst;
229 }
230
IsImmKindLiteral() const231 bool IsImmKindLiteral() const {
232 return isImm() && Imm.Kind == ImmKindTyLiteral;
233 }
234
isImmKindConst() const235 bool isImmKindConst() const {
236 return isImm() && Imm.Kind == ImmKindTyConst;
237 }
238
239 bool isInlinableImm(MVT type) const;
240 bool isLiteralImm(MVT type) const;
241
isRegKind() const242 bool isRegKind() const {
243 return Kind == Register;
244 }
245
isReg() const246 bool isReg() const override {
247 return isRegKind() && !hasModifiers();
248 }
249
isRegOrImmWithInputMods(unsigned RCID,MVT type) const250 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
251 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
252 }
253
isRegOrImmWithInt16InputMods() const254 bool isRegOrImmWithInt16InputMods() const {
255 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
256 }
257
isRegOrImmWithInt32InputMods() const258 bool isRegOrImmWithInt32InputMods() const {
259 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
260 }
261
isRegOrImmWithInt64InputMods() const262 bool isRegOrImmWithInt64InputMods() const {
263 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
264 }
265
isRegOrImmWithFP16InputMods() const266 bool isRegOrImmWithFP16InputMods() const {
267 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
268 }
269
isRegOrImmWithFP32InputMods() const270 bool isRegOrImmWithFP32InputMods() const {
271 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
272 }
273
isRegOrImmWithFP64InputMods() const274 bool isRegOrImmWithFP64InputMods() const {
275 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
276 }
277
isVReg() const278 bool isVReg() const {
279 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
280 isRegClass(AMDGPU::VReg_64RegClassID) ||
281 isRegClass(AMDGPU::VReg_96RegClassID) ||
282 isRegClass(AMDGPU::VReg_128RegClassID) ||
283 isRegClass(AMDGPU::VReg_160RegClassID) ||
284 isRegClass(AMDGPU::VReg_192RegClassID) ||
285 isRegClass(AMDGPU::VReg_256RegClassID) ||
286 isRegClass(AMDGPU::VReg_512RegClassID) ||
287 isRegClass(AMDGPU::VReg_1024RegClassID);
288 }
289
isVReg32() const290 bool isVReg32() const {
291 return isRegClass(AMDGPU::VGPR_32RegClassID);
292 }
293
isVReg32OrOff() const294 bool isVReg32OrOff() const {
295 return isOff() || isVReg32();
296 }
297
isNull() const298 bool isNull() const {
299 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
300 }
301
302 bool isSDWAOperand(MVT type) const;
303 bool isSDWAFP16Operand() const;
304 bool isSDWAFP32Operand() const;
305 bool isSDWAInt16Operand() const;
306 bool isSDWAInt32Operand() const;
307
isImmTy(ImmTy ImmT) const308 bool isImmTy(ImmTy ImmT) const {
309 return isImm() && Imm.Type == ImmT;
310 }
311
isImmModifier() const312 bool isImmModifier() const {
313 return isImm() && Imm.Type != ImmTyNone;
314 }
315
isClampSI() const316 bool isClampSI() const { return isImmTy(ImmTyClampSI); }
isOModSI() const317 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
isDMask() const318 bool isDMask() const { return isImmTy(ImmTyDMask); }
isDim() const319 bool isDim() const { return isImmTy(ImmTyDim); }
isUNorm() const320 bool isUNorm() const { return isImmTy(ImmTyUNorm); }
isDA() const321 bool isDA() const { return isImmTy(ImmTyDA); }
isR128A16() const322 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
isGFX10A16() const323 bool isGFX10A16() const { return isImmTy(ImmTyA16); }
isLWE() const324 bool isLWE() const { return isImmTy(ImmTyLWE); }
isOff() const325 bool isOff() const { return isImmTy(ImmTyOff); }
isExpTgt() const326 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
isExpVM() const327 bool isExpVM() const { return isImmTy(ImmTyExpVM); }
isExpCompr() const328 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
isOffen() const329 bool isOffen() const { return isImmTy(ImmTyOffen); }
isIdxen() const330 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
isAddr64() const331 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
isOffset() const332 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
isOffset0() const333 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
isOffset1() const334 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
335
isFlatOffset() const336 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
isGDS() const337 bool isGDS() const { return isImmTy(ImmTyGDS); }
isLDS() const338 bool isLDS() const { return isImmTy(ImmTyLDS); }
isDLC() const339 bool isDLC() const { return isImmTy(ImmTyDLC); }
isGLC() const340 bool isGLC() const { return isImmTy(ImmTyGLC); }
341 // "GLC_1" is a MatchClass of the GLC_1 operand with the default and forced
342 // value of the GLC operand.
isGLC_1() const343 bool isGLC_1() const { return isImmTy(ImmTyGLC); }
isSLC() const344 bool isSLC() const { return isImmTy(ImmTySLC); }
isSWZ() const345 bool isSWZ() const { return isImmTy(ImmTySWZ); }
isTFE() const346 bool isTFE() const { return isImmTy(ImmTyTFE); }
isD16() const347 bool isD16() const { return isImmTy(ImmTyD16); }
isFORMAT() const348 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
isBankMask() const349 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
isRowMask() const350 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
isBoundCtrl() const351 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
isFI() const352 bool isFI() const { return isImmTy(ImmTyDppFi); }
isSDWADstSel() const353 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
isSDWASrc0Sel() const354 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
isSDWASrc1Sel() const355 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
isSDWADstUnused() const356 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
isInterpSlot() const357 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
isInterpAttr() const358 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
isAttrChan() const359 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
isOpSel() const360 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
isOpSelHi() const361 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
isNegLo() const362 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
isNegHi() const363 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
isHigh() const364 bool isHigh() const { return isImmTy(ImmTyHigh); }
365
isMod() const366 bool isMod() const {
367 return isClampSI() || isOModSI();
368 }
369
isRegOrImm() const370 bool isRegOrImm() const {
371 return isReg() || isImm();
372 }
373
374 bool isRegClass(unsigned RCID) const;
375
376 bool isInlineValue() const;
377
isRegOrInlineNoMods(unsigned RCID,MVT type) const378 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
379 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
380 }
381
isSCSrcB16() const382 bool isSCSrcB16() const {
383 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
384 }
385
isSCSrcV2B16() const386 bool isSCSrcV2B16() const {
387 return isSCSrcB16();
388 }
389
isSCSrcB32() const390 bool isSCSrcB32() const {
391 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
392 }
393
isSCSrcB64() const394 bool isSCSrcB64() const {
395 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
396 }
397
398 bool isBoolReg() const;
399
isSCSrcF16() const400 bool isSCSrcF16() const {
401 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
402 }
403
isSCSrcV2F16() const404 bool isSCSrcV2F16() const {
405 return isSCSrcF16();
406 }
407
isSCSrcF32() const408 bool isSCSrcF32() const {
409 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
410 }
411
isSCSrcF64() const412 bool isSCSrcF64() const {
413 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
414 }
415
isSSrcB32() const416 bool isSSrcB32() const {
417 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
418 }
419
isSSrcB16() const420 bool isSSrcB16() const {
421 return isSCSrcB16() || isLiteralImm(MVT::i16);
422 }
423
isSSrcV2B16() const424 bool isSSrcV2B16() const {
425 llvm_unreachable("cannot happen");
426 return isSSrcB16();
427 }
428
isSSrcB64() const429 bool isSSrcB64() const {
430 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
431 // See isVSrc64().
432 return isSCSrcB64() || isLiteralImm(MVT::i64);
433 }
434
isSSrcF32() const435 bool isSSrcF32() const {
436 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
437 }
438
isSSrcF64() const439 bool isSSrcF64() const {
440 return isSCSrcB64() || isLiteralImm(MVT::f64);
441 }
442
isSSrcF16() const443 bool isSSrcF16() const {
444 return isSCSrcB16() || isLiteralImm(MVT::f16);
445 }
446
isSSrcV2F16() const447 bool isSSrcV2F16() const {
448 llvm_unreachable("cannot happen");
449 return isSSrcF16();
450 }
451
isSSrcOrLdsB32() const452 bool isSSrcOrLdsB32() const {
453 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
454 isLiteralImm(MVT::i32) || isExpr();
455 }
456
isVCSrcB32() const457 bool isVCSrcB32() const {
458 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
459 }
460
isVCSrcB64() const461 bool isVCSrcB64() const {
462 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
463 }
464
isVCSrcB16() const465 bool isVCSrcB16() const {
466 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
467 }
468
isVCSrcV2B16() const469 bool isVCSrcV2B16() const {
470 return isVCSrcB16();
471 }
472
isVCSrcF32() const473 bool isVCSrcF32() const {
474 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
475 }
476
isVCSrcF64() const477 bool isVCSrcF64() const {
478 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
479 }
480
isVCSrcF16() const481 bool isVCSrcF16() const {
482 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
483 }
484
isVCSrcV2F16() const485 bool isVCSrcV2F16() const {
486 return isVCSrcF16();
487 }
488
isVSrcB32() const489 bool isVSrcB32() const {
490 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
491 }
492
isVSrcB64() const493 bool isVSrcB64() const {
494 return isVCSrcF64() || isLiteralImm(MVT::i64);
495 }
496
isVSrcB16() const497 bool isVSrcB16() const {
498 return isVCSrcB16() || isLiteralImm(MVT::i16);
499 }
500
isVSrcV2B16() const501 bool isVSrcV2B16() const {
502 return isVSrcB16() || isLiteralImm(MVT::v2i16);
503 }
504
isVSrcF32() const505 bool isVSrcF32() const {
506 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
507 }
508
isVSrcF64() const509 bool isVSrcF64() const {
510 return isVCSrcF64() || isLiteralImm(MVT::f64);
511 }
512
isVSrcF16() const513 bool isVSrcF16() const {
514 return isVCSrcF16() || isLiteralImm(MVT::f16);
515 }
516
isVSrcV2F16() const517 bool isVSrcV2F16() const {
518 return isVSrcF16() || isLiteralImm(MVT::v2f16);
519 }
520
isVISrcB32() const521 bool isVISrcB32() const {
522 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
523 }
524
isVISrcB16() const525 bool isVISrcB16() const {
526 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
527 }
528
isVISrcV2B16() const529 bool isVISrcV2B16() const {
530 return isVISrcB16();
531 }
532
isVISrcF32() const533 bool isVISrcF32() const {
534 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
535 }
536
isVISrcF16() const537 bool isVISrcF16() const {
538 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
539 }
540
isVISrcV2F16() const541 bool isVISrcV2F16() const {
542 return isVISrcF16() || isVISrcB32();
543 }
544
isAISrcB32() const545 bool isAISrcB32() const {
546 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
547 }
548
isAISrcB16() const549 bool isAISrcB16() const {
550 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
551 }
552
isAISrcV2B16() const553 bool isAISrcV2B16() const {
554 return isAISrcB16();
555 }
556
isAISrcF32() const557 bool isAISrcF32() const {
558 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
559 }
560
isAISrcF16() const561 bool isAISrcF16() const {
562 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
563 }
564
isAISrcV2F16() const565 bool isAISrcV2F16() const {
566 return isAISrcF16() || isAISrcB32();
567 }
568
isAISrc_128B32() const569 bool isAISrc_128B32() const {
570 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
571 }
572
isAISrc_128B16() const573 bool isAISrc_128B16() const {
574 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
575 }
576
isAISrc_128V2B16() const577 bool isAISrc_128V2B16() const {
578 return isAISrc_128B16();
579 }
580
isAISrc_128F32() const581 bool isAISrc_128F32() const {
582 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
583 }
584
isAISrc_128F16() const585 bool isAISrc_128F16() const {
586 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
587 }
588
isAISrc_128V2F16() const589 bool isAISrc_128V2F16() const {
590 return isAISrc_128F16() || isAISrc_128B32();
591 }
592
isAISrc_512B32() const593 bool isAISrc_512B32() const {
594 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
595 }
596
isAISrc_512B16() const597 bool isAISrc_512B16() const {
598 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
599 }
600
isAISrc_512V2B16() const601 bool isAISrc_512V2B16() const {
602 return isAISrc_512B16();
603 }
604
isAISrc_512F32() const605 bool isAISrc_512F32() const {
606 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
607 }
608
isAISrc_512F16() const609 bool isAISrc_512F16() const {
610 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
611 }
612
isAISrc_512V2F16() const613 bool isAISrc_512V2F16() const {
614 return isAISrc_512F16() || isAISrc_512B32();
615 }
616
isAISrc_1024B32() const617 bool isAISrc_1024B32() const {
618 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
619 }
620
isAISrc_1024B16() const621 bool isAISrc_1024B16() const {
622 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
623 }
624
isAISrc_1024V2B16() const625 bool isAISrc_1024V2B16() const {
626 return isAISrc_1024B16();
627 }
628
isAISrc_1024F32() const629 bool isAISrc_1024F32() const {
630 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
631 }
632
isAISrc_1024F16() const633 bool isAISrc_1024F16() const {
634 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
635 }
636
isAISrc_1024V2F16() const637 bool isAISrc_1024V2F16() const {
638 return isAISrc_1024F16() || isAISrc_1024B32();
639 }
640
isKImmFP32() const641 bool isKImmFP32() const {
642 return isLiteralImm(MVT::f32);
643 }
644
isKImmFP16() const645 bool isKImmFP16() const {
646 return isLiteralImm(MVT::f16);
647 }
648
isMem() const649 bool isMem() const override {
650 return false;
651 }
652
isExpr() const653 bool isExpr() const {
654 return Kind == Expression;
655 }
656
isSoppBrTarget() const657 bool isSoppBrTarget() const {
658 return isExpr() || isImm();
659 }
660
661 bool isSWaitCnt() const;
662 bool isHwreg() const;
663 bool isSendMsg() const;
664 bool isSwizzle() const;
665 bool isSMRDOffset8() const;
666 bool isSMEMOffset() const;
667 bool isSMRDLiteralOffset() const;
668 bool isDPP8() const;
669 bool isDPPCtrl() const;
670 bool isBLGP() const;
671 bool isCBSZ() const;
672 bool isABID() const;
673 bool isGPRIdxMode() const;
674 bool isS16Imm() const;
675 bool isU16Imm() const;
676 bool isEndpgm() const;
677
getExpressionAsToken() const678 StringRef getExpressionAsToken() const {
679 assert(isExpr());
680 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
681 return S->getSymbol().getName();
682 }
683
getToken() const684 StringRef getToken() const {
685 assert(isToken());
686
687 if (Kind == Expression)
688 return getExpressionAsToken();
689
690 return StringRef(Tok.Data, Tok.Length);
691 }
692
getImm() const693 int64_t getImm() const {
694 assert(isImm());
695 return Imm.Val;
696 }
697
setImm(int64_t Val)698 void setImm(int64_t Val) {
699 assert(isImm());
700 Imm.Val = Val;
701 }
702
getImmTy() const703 ImmTy getImmTy() const {
704 assert(isImm());
705 return Imm.Type;
706 }
707
getReg() const708 unsigned getReg() const override {
709 assert(isRegKind());
710 return Reg.RegNo;
711 }
712
getStartLoc() const713 SMLoc getStartLoc() const override {
714 return StartLoc;
715 }
716
getEndLoc() const717 SMLoc getEndLoc() const override {
718 return EndLoc;
719 }
720
getLocRange() const721 SMRange getLocRange() const {
722 return SMRange(StartLoc, EndLoc);
723 }
724
getModifiers() const725 Modifiers getModifiers() const {
726 assert(isRegKind() || isImmTy(ImmTyNone));
727 return isRegKind() ? Reg.Mods : Imm.Mods;
728 }
729
setModifiers(Modifiers Mods)730 void setModifiers(Modifiers Mods) {
731 assert(isRegKind() || isImmTy(ImmTyNone));
732 if (isRegKind())
733 Reg.Mods = Mods;
734 else
735 Imm.Mods = Mods;
736 }
737
hasModifiers() const738 bool hasModifiers() const {
739 return getModifiers().hasModifiers();
740 }
741
hasFPModifiers() const742 bool hasFPModifiers() const {
743 return getModifiers().hasFPModifiers();
744 }
745
hasIntModifiers() const746 bool hasIntModifiers() const {
747 return getModifiers().hasIntModifiers();
748 }
749
750 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
751
752 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
753
754 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
755
756 template <unsigned Bitwidth>
757 void addKImmFPOperands(MCInst &Inst, unsigned N) const;
758
addKImmFP16Operands(MCInst & Inst,unsigned N) const759 void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
760 addKImmFPOperands<16>(Inst, N);
761 }
762
addKImmFP32Operands(MCInst & Inst,unsigned N) const763 void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
764 addKImmFPOperands<32>(Inst, N);
765 }
766
767 void addRegOperands(MCInst &Inst, unsigned N) const;
768
addBoolRegOperands(MCInst & Inst,unsigned N) const769 void addBoolRegOperands(MCInst &Inst, unsigned N) const {
770 addRegOperands(Inst, N);
771 }
772
addRegOrImmOperands(MCInst & Inst,unsigned N) const773 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
774 if (isRegKind())
775 addRegOperands(Inst, N);
776 else if (isExpr())
777 Inst.addOperand(MCOperand::createExpr(Expr));
778 else
779 addImmOperands(Inst, N);
780 }
781
addRegOrImmWithInputModsOperands(MCInst & Inst,unsigned N) const782 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
783 Modifiers Mods = getModifiers();
784 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
785 if (isRegKind()) {
786 addRegOperands(Inst, N);
787 } else {
788 addImmOperands(Inst, N, false);
789 }
790 }
791
addRegOrImmWithFPInputModsOperands(MCInst & Inst,unsigned N) const792 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
793 assert(!hasIntModifiers());
794 addRegOrImmWithInputModsOperands(Inst, N);
795 }
796
addRegOrImmWithIntInputModsOperands(MCInst & Inst,unsigned N) const797 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
798 assert(!hasFPModifiers());
799 addRegOrImmWithInputModsOperands(Inst, N);
800 }
801
addRegWithInputModsOperands(MCInst & Inst,unsigned N) const802 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
803 Modifiers Mods = getModifiers();
804 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
805 assert(isRegKind());
806 addRegOperands(Inst, N);
807 }
808
addRegWithFPInputModsOperands(MCInst & Inst,unsigned N) const809 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
810 assert(!hasIntModifiers());
811 addRegWithInputModsOperands(Inst, N);
812 }
813
addRegWithIntInputModsOperands(MCInst & Inst,unsigned N) const814 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
815 assert(!hasFPModifiers());
816 addRegWithInputModsOperands(Inst, N);
817 }
818
addSoppBrTargetOperands(MCInst & Inst,unsigned N) const819 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
820 if (isImm())
821 addImmOperands(Inst, N);
822 else {
823 assert(isExpr());
824 Inst.addOperand(MCOperand::createExpr(Expr));
825 }
826 }
827
printImmTy(raw_ostream & OS,ImmTy Type)828 static void printImmTy(raw_ostream& OS, ImmTy Type) {
829 switch (Type) {
830 case ImmTyNone: OS << "None"; break;
831 case ImmTyGDS: OS << "GDS"; break;
832 case ImmTyLDS: OS << "LDS"; break;
833 case ImmTyOffen: OS << "Offen"; break;
834 case ImmTyIdxen: OS << "Idxen"; break;
835 case ImmTyAddr64: OS << "Addr64"; break;
836 case ImmTyOffset: OS << "Offset"; break;
837 case ImmTyInstOffset: OS << "InstOffset"; break;
838 case ImmTyOffset0: OS << "Offset0"; break;
839 case ImmTyOffset1: OS << "Offset1"; break;
840 case ImmTyDLC: OS << "DLC"; break;
841 case ImmTyGLC: OS << "GLC"; break;
842 case ImmTySLC: OS << "SLC"; break;
843 case ImmTySWZ: OS << "SWZ"; break;
844 case ImmTyTFE: OS << "TFE"; break;
845 case ImmTyD16: OS << "D16"; break;
846 case ImmTyFORMAT: OS << "FORMAT"; break;
847 case ImmTyClampSI: OS << "ClampSI"; break;
848 case ImmTyOModSI: OS << "OModSI"; break;
849 case ImmTyDPP8: OS << "DPP8"; break;
850 case ImmTyDppCtrl: OS << "DppCtrl"; break;
851 case ImmTyDppRowMask: OS << "DppRowMask"; break;
852 case ImmTyDppBankMask: OS << "DppBankMask"; break;
853 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
854 case ImmTyDppFi: OS << "FI"; break;
855 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
856 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
857 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
858 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
859 case ImmTyDMask: OS << "DMask"; break;
860 case ImmTyDim: OS << "Dim"; break;
861 case ImmTyUNorm: OS << "UNorm"; break;
862 case ImmTyDA: OS << "DA"; break;
863 case ImmTyR128A16: OS << "R128A16"; break;
864 case ImmTyA16: OS << "A16"; break;
865 case ImmTyLWE: OS << "LWE"; break;
866 case ImmTyOff: OS << "Off"; break;
867 case ImmTyExpTgt: OS << "ExpTgt"; break;
868 case ImmTyExpCompr: OS << "ExpCompr"; break;
869 case ImmTyExpVM: OS << "ExpVM"; break;
870 case ImmTyHwreg: OS << "Hwreg"; break;
871 case ImmTySendMsg: OS << "SendMsg"; break;
872 case ImmTyInterpSlot: OS << "InterpSlot"; break;
873 case ImmTyInterpAttr: OS << "InterpAttr"; break;
874 case ImmTyAttrChan: OS << "AttrChan"; break;
875 case ImmTyOpSel: OS << "OpSel"; break;
876 case ImmTyOpSelHi: OS << "OpSelHi"; break;
877 case ImmTyNegLo: OS << "NegLo"; break;
878 case ImmTyNegHi: OS << "NegHi"; break;
879 case ImmTySwizzle: OS << "Swizzle"; break;
880 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
881 case ImmTyHigh: OS << "High"; break;
882 case ImmTyBLGP: OS << "BLGP"; break;
883 case ImmTyCBSZ: OS << "CBSZ"; break;
884 case ImmTyABID: OS << "ABID"; break;
885 case ImmTyEndpgm: OS << "Endpgm"; break;
886 }
887 }
888
print(raw_ostream & OS) const889 void print(raw_ostream &OS) const override {
890 switch (Kind) {
891 case Register:
892 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
893 break;
894 case Immediate:
895 OS << '<' << getImm();
896 if (getImmTy() != ImmTyNone) {
897 OS << " type: "; printImmTy(OS, getImmTy());
898 }
899 OS << " mods: " << Imm.Mods << '>';
900 break;
901 case Token:
902 OS << '\'' << getToken() << '\'';
903 break;
904 case Expression:
905 OS << "<expr " << *Expr << '>';
906 break;
907 }
908 }
909
CreateImm(const AMDGPUAsmParser * AsmParser,int64_t Val,SMLoc Loc,ImmTy Type=ImmTyNone,bool IsFPImm=false)910 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
911 int64_t Val, SMLoc Loc,
912 ImmTy Type = ImmTyNone,
913 bool IsFPImm = false) {
914 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
915 Op->Imm.Val = Val;
916 Op->Imm.IsFPImm = IsFPImm;
917 Op->Imm.Kind = ImmKindTyNone;
918 Op->Imm.Type = Type;
919 Op->Imm.Mods = Modifiers();
920 Op->StartLoc = Loc;
921 Op->EndLoc = Loc;
922 return Op;
923 }
924
CreateToken(const AMDGPUAsmParser * AsmParser,StringRef Str,SMLoc Loc,bool HasExplicitEncodingSize=true)925 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
926 StringRef Str, SMLoc Loc,
927 bool HasExplicitEncodingSize = true) {
928 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
929 Res->Tok.Data = Str.data();
930 Res->Tok.Length = Str.size();
931 Res->StartLoc = Loc;
932 Res->EndLoc = Loc;
933 return Res;
934 }
935
CreateReg(const AMDGPUAsmParser * AsmParser,unsigned RegNo,SMLoc S,SMLoc E)936 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
937 unsigned RegNo, SMLoc S,
938 SMLoc E) {
939 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
940 Op->Reg.RegNo = RegNo;
941 Op->Reg.Mods = Modifiers();
942 Op->StartLoc = S;
943 Op->EndLoc = E;
944 return Op;
945 }
946
CreateExpr(const AMDGPUAsmParser * AsmParser,const class MCExpr * Expr,SMLoc S)947 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
948 const class MCExpr *Expr, SMLoc S) {
949 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
950 Op->Expr = Expr;
951 Op->StartLoc = S;
952 Op->EndLoc = S;
953 return Op;
954 }
955 };
956
operator <<(raw_ostream & OS,AMDGPUOperand::Modifiers Mods)957 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
958 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
959 return OS;
960 }
961
962 //===----------------------------------------------------------------------===//
963 // AsmParser
964 //===----------------------------------------------------------------------===//
965
966 // Holds info related to the current kernel, e.g. count of SGPRs used.
967 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
968 // .amdgpu_hsa_kernel or at EOF.
969 class KernelScopeInfo {
970 int SgprIndexUnusedMin = -1;
971 int VgprIndexUnusedMin = -1;
972 MCContext *Ctx = nullptr;
973
usesSgprAt(int i)974 void usesSgprAt(int i) {
975 if (i >= SgprIndexUnusedMin) {
976 SgprIndexUnusedMin = ++i;
977 if (Ctx) {
978 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
979 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
980 }
981 }
982 }
983
usesVgprAt(int i)984 void usesVgprAt(int i) {
985 if (i >= VgprIndexUnusedMin) {
986 VgprIndexUnusedMin = ++i;
987 if (Ctx) {
988 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
989 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
990 }
991 }
992 }
993
994 public:
995 KernelScopeInfo() = default;
996
initialize(MCContext & Context)997 void initialize(MCContext &Context) {
998 Ctx = &Context;
999 usesSgprAt(SgprIndexUnusedMin = -1);
1000 usesVgprAt(VgprIndexUnusedMin = -1);
1001 }
1002
usesRegister(RegisterKind RegKind,unsigned DwordRegIndex,unsigned RegWidth)1003 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
1004 switch (RegKind) {
1005 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
1006 case IS_AGPR: // fall through
1007 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
1008 default: break;
1009 }
1010 }
1011 };
1012
1013 class AMDGPUAsmParser : public MCTargetAsmParser {
1014 MCAsmParser &Parser;
1015
1016 // Number of extra operands parsed after the first optional operand.
1017 // This may be necessary to skip hardcoded mandatory operands.
1018 static const unsigned MAX_OPR_LOOKAHEAD = 8;
1019
1020 unsigned ForcedEncodingSize = 0;
1021 bool ForcedDPP = false;
1022 bool ForcedSDWA = false;
1023 KernelScopeInfo KernelScope;
1024
1025 /// @name Auto-generated Match Functions
1026 /// {
1027
1028 #define GET_ASSEMBLER_HEADER
1029 #include "AMDGPUGenAsmMatcher.inc"
1030
1031 /// }
1032
1033 private:
1034 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1035 bool OutOfRangeError(SMRange Range);
1036 /// Calculate VGPR/SGPR blocks required for given target, reserved
1037 /// registers, and user-specified NextFreeXGPR values.
1038 ///
1039 /// \param Features [in] Target features, used for bug corrections.
1040 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1041 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1042 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1043 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1044 /// descriptor field, if valid.
1045 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1046 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1047 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1048 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1049 /// \param VGPRBlocks [out] Result VGPR block count.
1050 /// \param SGPRBlocks [out] Result SGPR block count.
1051 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
1052 bool FlatScrUsed, bool XNACKUsed,
1053 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
1054 SMRange VGPRRange, unsigned NextFreeSGPR,
1055 SMRange SGPRRange, unsigned &VGPRBlocks,
1056 unsigned &SGPRBlocks);
1057 bool ParseDirectiveAMDGCNTarget();
1058 bool ParseDirectiveAMDHSAKernel();
1059 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
1060 bool ParseDirectiveHSACodeObjectVersion();
1061 bool ParseDirectiveHSACodeObjectISA();
1062 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
1063 bool ParseDirectiveAMDKernelCodeT();
1064 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const;
1065 bool ParseDirectiveAMDGPUHsaKernel();
1066
1067 bool ParseDirectiveISAVersion();
1068 bool ParseDirectiveHSAMetadata();
1069 bool ParseDirectivePALMetadataBegin();
1070 bool ParseDirectivePALMetadata();
1071 bool ParseDirectiveAMDGPULDS();
1072
1073 /// Common code to parse out a block of text (typically YAML) between start and
1074 /// end directives.
1075 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1076 const char *AssemblerDirectiveEnd,
1077 std::string &CollectString);
1078
1079 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
1080 RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
1081 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1082 unsigned &RegNum, unsigned &RegWidth,
1083 bool RestoreOnFailure = false);
1084 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
1085 unsigned &RegNum, unsigned &RegWidth,
1086 SmallVectorImpl<AsmToken> &Tokens);
1087 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1088 unsigned &RegWidth,
1089 SmallVectorImpl<AsmToken> &Tokens);
1090 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1091 unsigned &RegWidth,
1092 SmallVectorImpl<AsmToken> &Tokens);
1093 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1094 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
1095 bool ParseRegRange(unsigned& Num, unsigned& Width);
1096 unsigned getRegularReg(RegisterKind RegKind,
1097 unsigned RegNum,
1098 unsigned RegWidth,
1099 SMLoc Loc);
1100
1101 bool isRegister();
1102 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1103 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1104 void initializeGprCountSymbol(RegisterKind RegKind);
1105 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1106 unsigned RegWidth);
1107 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1108 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false);
1109 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
1110 bool IsGdsHardcoded);
1111
1112 public:
1113 enum AMDGPUMatchResultTy {
1114 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
1115 };
1116 enum OperandMode {
1117 OperandMode_Default,
1118 OperandMode_NSA,
1119 };
1120
1121 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1122
AMDGPUAsmParser(const MCSubtargetInfo & STI,MCAsmParser & _Parser,const MCInstrInfo & MII,const MCTargetOptions & Options)1123 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1124 const MCInstrInfo &MII,
1125 const MCTargetOptions &Options)
1126 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
1127 MCAsmParserExtension::Initialize(Parser);
1128
1129 if (getFeatureBits().none()) {
1130 // Set default features.
1131 copySTI().ToggleFeature("southern-islands");
1132 }
1133
1134 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1135
1136 {
1137 // TODO: make those pre-defined variables read-only.
1138 // Currently there is none suitable machinery in the core llvm-mc for this.
1139 // MCSymbol::isRedefinable is intended for another purpose, and
1140 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
1141 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1142 MCContext &Ctx = getContext();
1143 if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1144 MCSymbol *Sym =
1145 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
1146 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1147 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
1148 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1149 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
1150 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1151 } else {
1152 MCSymbol *Sym =
1153 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
1154 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
1155 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
1156 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
1157 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
1158 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
1159 }
1160 if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) {
1161 initializeGprCountSymbol(IS_VGPR);
1162 initializeGprCountSymbol(IS_SGPR);
1163 } else
1164 KernelScope.initialize(getContext());
1165 }
1166 }
1167
hasXNACK() const1168 bool hasXNACK() const {
1169 return AMDGPU::hasXNACK(getSTI());
1170 }
1171
hasMIMG_R128() const1172 bool hasMIMG_R128() const {
1173 return AMDGPU::hasMIMG_R128(getSTI());
1174 }
1175
hasPackedD16() const1176 bool hasPackedD16() const {
1177 return AMDGPU::hasPackedD16(getSTI());
1178 }
1179
hasGFX10A16() const1180 bool hasGFX10A16() const {
1181 return AMDGPU::hasGFX10A16(getSTI());
1182 }
1183
isSI() const1184 bool isSI() const {
1185 return AMDGPU::isSI(getSTI());
1186 }
1187
isCI() const1188 bool isCI() const {
1189 return AMDGPU::isCI(getSTI());
1190 }
1191
isVI() const1192 bool isVI() const {
1193 return AMDGPU::isVI(getSTI());
1194 }
1195
isGFX9() const1196 bool isGFX9() const {
1197 return AMDGPU::isGFX9(getSTI());
1198 }
1199
isGFX9Plus() const1200 bool isGFX9Plus() const {
1201 return AMDGPU::isGFX9Plus(getSTI());
1202 }
1203
isGFX10() const1204 bool isGFX10() const {
1205 return AMDGPU::isGFX10(getSTI());
1206 }
1207
isGFX10Plus() const1208 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1209
isGFX10_BEncoding() const1210 bool isGFX10_BEncoding() const {
1211 return AMDGPU::isGFX10_BEncoding(getSTI());
1212 }
1213
hasInv2PiInlineImm() const1214 bool hasInv2PiInlineImm() const {
1215 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1216 }
1217
hasFlatOffsets() const1218 bool hasFlatOffsets() const {
1219 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1220 }
1221
hasSGPR102_SGPR103() const1222 bool hasSGPR102_SGPR103() const {
1223 return !isVI() && !isGFX9();
1224 }
1225
hasSGPR104_SGPR105() const1226 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1227
hasIntClamp() const1228 bool hasIntClamp() const {
1229 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1230 }
1231
getTargetStreamer()1232 AMDGPUTargetStreamer &getTargetStreamer() {
1233 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1234 return static_cast<AMDGPUTargetStreamer &>(TS);
1235 }
1236
getMRI() const1237 const MCRegisterInfo *getMRI() const {
1238 // We need this const_cast because for some reason getContext() is not const
1239 // in MCAsmParser.
1240 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
1241 }
1242
getMII() const1243 const MCInstrInfo *getMII() const {
1244 return &MII;
1245 }
1246
getFeatureBits() const1247 const FeatureBitset &getFeatureBits() const {
1248 return getSTI().getFeatureBits();
1249 }
1250
setForcedEncodingSize(unsigned Size)1251 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
setForcedDPP(bool ForceDPP_)1252 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
setForcedSDWA(bool ForceSDWA_)1253 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1254
getForcedEncodingSize() const1255 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
isForcedVOP3() const1256 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
isForcedDPP() const1257 bool isForcedDPP() const { return ForcedDPP; }
isForcedSDWA() const1258 bool isForcedSDWA() const { return ForcedSDWA; }
1259 ArrayRef<unsigned> getMatchedVariants() const;
1260 StringRef getMatchedVariantName() const;
1261
1262 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1263 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1264 bool RestoreOnFailure);
1265 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
1266 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
1267 SMLoc &EndLoc) override;
1268 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1269 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1270 unsigned Kind) override;
1271 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1272 OperandVector &Operands, MCStreamer &Out,
1273 uint64_t &ErrorInfo,
1274 bool MatchingInlineAsm) override;
1275 bool ParseDirective(AsmToken DirectiveID) override;
1276 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
1277 OperandMode Mode = OperandMode_Default);
1278 StringRef parseMnemonicSuffix(StringRef Name);
1279 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1280 SMLoc NameLoc, OperandVector &Operands) override;
1281 //bool ProcessInstruction(MCInst &Inst);
1282
1283 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
1284
1285 OperandMatchResultTy
1286 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1287 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1288 bool (*ConvertResult)(int64_t &) = nullptr);
1289
1290 OperandMatchResultTy
1291 parseOperandArrayWithPrefix(const char *Prefix,
1292 OperandVector &Operands,
1293 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1294 bool (*ConvertResult)(int64_t&) = nullptr);
1295
1296 OperandMatchResultTy
1297 parseNamedBit(const char *Name, OperandVector &Operands,
1298 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
1299 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
1300 StringRef &Value,
1301 SMLoc &StringLoc);
1302
1303 bool isModifier();
1304 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1305 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1306 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1307 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1308 bool parseSP3NegModifier();
1309 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
1310 OperandMatchResultTy parseReg(OperandVector &Operands);
1311 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
1312 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
1313 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
1314 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
1315 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
1316 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
1317 OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
1318 OperandMatchResultTy parseUfmt(int64_t &Format);
1319 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1320 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
1321 OperandMatchResultTy parseFORMAT(OperandVector &Operands);
1322 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
1323 OperandMatchResultTy parseNumericFormat(int64_t &Format);
1324 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1325 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1326
1327 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
cvtDS(MCInst & Inst,const OperandVector & Operands)1328 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
cvtDSGds(MCInst & Inst,const OperandVector & Operands)1329 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
1330 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1331
1332 bool parseCnt(int64_t &IntVal);
1333 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
1334 OperandMatchResultTy parseHwreg(OperandVector &Operands);
1335
1336 private:
1337 struct OperandInfoTy {
1338 SMLoc Loc;
1339 int64_t Id;
1340 bool IsSymbolic = false;
1341 bool IsDefined = false;
1342
OperandInfoTy__anon58f421940111::AMDGPUAsmParser::OperandInfoTy1343 OperandInfoTy(int64_t Id_) : Id(Id_) {}
1344 };
1345
1346 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1347 bool validateSendMsg(const OperandInfoTy &Msg,
1348 const OperandInfoTy &Op,
1349 const OperandInfoTy &Stream);
1350
1351 bool parseHwregBody(OperandInfoTy &HwReg,
1352 OperandInfoTy &Offset,
1353 OperandInfoTy &Width);
1354 bool validateHwreg(const OperandInfoTy &HwReg,
1355 const OperandInfoTy &Offset,
1356 const OperandInfoTy &Width);
1357
1358 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1359 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1360
1361 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1362 const OperandVector &Operands) const;
1363 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
1364 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
1365 SMLoc getLitLoc(const OperandVector &Operands) const;
1366 SMLoc getConstLoc(const OperandVector &Operands) const;
1367
1368 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
1369 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1370 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1371 bool validateSOPLiteral(const MCInst &Inst) const;
1372 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1373 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
1374 bool validateIntClampSupported(const MCInst &Inst);
1375 bool validateMIMGAtomicDMask(const MCInst &Inst);
1376 bool validateMIMGGatherDMask(const MCInst &Inst);
1377 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1378 bool validateMIMGDataSize(const MCInst &Inst);
1379 bool validateMIMGAddrSize(const MCInst &Inst);
1380 bool validateMIMGD16(const MCInst &Inst);
1381 bool validateMIMGDim(const MCInst &Inst);
1382 bool validateLdsDirect(const MCInst &Inst);
1383 bool validateOpSel(const MCInst &Inst);
1384 bool validateVccOperand(unsigned Reg) const;
1385 bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
1386 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1387 bool validateDivScale(const MCInst &Inst);
1388 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1389 const SMLoc &IDLoc);
1390 unsigned getConstantBusLimit(unsigned Opcode) const;
1391 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1392 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1393 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1394
1395 bool isSupportedMnemo(StringRef Mnemo,
1396 const FeatureBitset &FBS);
1397 bool isSupportedMnemo(StringRef Mnemo,
1398 const FeatureBitset &FBS,
1399 ArrayRef<unsigned> Variants);
1400 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
1401
1402 bool isId(const StringRef Id) const;
1403 bool isId(const AsmToken &Token, const StringRef Id) const;
1404 bool isToken(const AsmToken::TokenKind Kind) const;
1405 bool trySkipId(const StringRef Id);
1406 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1407 bool trySkipToken(const AsmToken::TokenKind Kind);
1408 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1409 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1410 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1411
1412 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1413 AsmToken::TokenKind getTokenKind() const;
1414 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1415 bool parseExpr(OperandVector &Operands);
1416 StringRef getTokenStr() const;
1417 AsmToken peekToken();
1418 AsmToken getToken() const;
1419 SMLoc getLoc() const;
1420 void lex();
1421
1422 public:
1423 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
1424 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
1425
1426 OperandMatchResultTy parseExpTgt(OperandVector &Operands);
1427 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
1428 OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
1429 OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
1430 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
1431 OperandMatchResultTy parseBoolReg(OperandVector &Operands);
1432
1433 bool parseSwizzleOperand(int64_t &Op,
1434 const unsigned MinVal,
1435 const unsigned MaxVal,
1436 const StringRef ErrMsg,
1437 SMLoc &Loc);
1438 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1439 const unsigned MinVal,
1440 const unsigned MaxVal,
1441 const StringRef ErrMsg);
1442 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
1443 bool parseSwizzleOffset(int64_t &Imm);
1444 bool parseSwizzleMacro(int64_t &Imm);
1445 bool parseSwizzleQuadPerm(int64_t &Imm);
1446 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1447 bool parseSwizzleBroadcast(int64_t &Imm);
1448 bool parseSwizzleSwap(int64_t &Imm);
1449 bool parseSwizzleReverse(int64_t &Imm);
1450
1451 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
1452 int64_t parseGPRIdxMacro();
1453
cvtMubuf(MCInst & Inst,const OperandVector & Operands)1454 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
cvtMubufAtomic(MCInst & Inst,const OperandVector & Operands)1455 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); }
cvtMubufAtomicReturn(MCInst & Inst,const OperandVector & Operands)1456 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); }
cvtMubufLds(MCInst & Inst,const OperandVector & Operands)1457 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); }
1458 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
1459
1460 AMDGPUOperand::Ptr defaultDLC() const;
1461 AMDGPUOperand::Ptr defaultGLC() const;
1462 AMDGPUOperand::Ptr defaultGLC_1() const;
1463 AMDGPUOperand::Ptr defaultSLC() const;
1464
1465 AMDGPUOperand::Ptr defaultSMRDOffset8() const;
1466 AMDGPUOperand::Ptr defaultSMEMOffset() const;
1467 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
1468 AMDGPUOperand::Ptr defaultFlatOffset() const;
1469
1470 OperandMatchResultTy parseOModOperand(OperandVector &Operands);
1471
1472 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1473 OptionalImmIndexMap &OptionalIdx);
1474 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1475 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1476 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1477
1478 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1479
1480 void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
1481 bool IsAtomic = false);
1482 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
1483 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
1484
1485 OperandMatchResultTy parseDim(OperandVector &Operands);
1486 OperandMatchResultTy parseDPP8(OperandVector &Operands);
1487 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
1488 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1489 int64_t parseDPPCtrlSel(StringRef Ctrl);
1490 int64_t parseDPPCtrlPerm();
1491 AMDGPUOperand::Ptr defaultRowMask() const;
1492 AMDGPUOperand::Ptr defaultBankMask() const;
1493 AMDGPUOperand::Ptr defaultBoundCtrl() const;
1494 AMDGPUOperand::Ptr defaultFI() const;
1495 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
cvtDPP8(MCInst & Inst,const OperandVector & Operands)1496 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
1497
1498 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
1499 AMDGPUOperand::ImmTy Type);
1500 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
1501 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
1502 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
1503 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
1504 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
1505 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
1506 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
1507 uint64_t BasicInstType,
1508 bool SkipDstVcc = false,
1509 bool SkipSrcVcc = false);
1510
1511 AMDGPUOperand::Ptr defaultBLGP() const;
1512 AMDGPUOperand::Ptr defaultCBSZ() const;
1513 AMDGPUOperand::Ptr defaultABID() const;
1514
1515 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
1516 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
1517 };
1518
1519 struct OptionalOperand {
1520 const char *Name;
1521 AMDGPUOperand::ImmTy Type;
1522 bool IsBit;
1523 bool (*ConvertResult)(int64_t&);
1524 };
1525
1526 } // end anonymous namespace
1527
1528 // May be called with integer type with equivalent bitwidth.
getFltSemantics(unsigned Size)1529 static const fltSemantics *getFltSemantics(unsigned Size) {
1530 switch (Size) {
1531 case 4:
1532 return &APFloat::IEEEsingle();
1533 case 8:
1534 return &APFloat::IEEEdouble();
1535 case 2:
1536 return &APFloat::IEEEhalf();
1537 default:
1538 llvm_unreachable("unsupported fp type");
1539 }
1540 }
1541
getFltSemantics(MVT VT)1542 static const fltSemantics *getFltSemantics(MVT VT) {
1543 return getFltSemantics(VT.getSizeInBits() / 8);
1544 }
1545
getOpFltSemantics(uint8_t OperandType)1546 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
1547 switch (OperandType) {
1548 case AMDGPU::OPERAND_REG_IMM_INT32:
1549 case AMDGPU::OPERAND_REG_IMM_FP32:
1550 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1551 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1552 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1553 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1554 return &APFloat::IEEEsingle();
1555 case AMDGPU::OPERAND_REG_IMM_INT64:
1556 case AMDGPU::OPERAND_REG_IMM_FP64:
1557 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1558 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1559 return &APFloat::IEEEdouble();
1560 case AMDGPU::OPERAND_REG_IMM_INT16:
1561 case AMDGPU::OPERAND_REG_IMM_FP16:
1562 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1563 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1564 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1565 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1566 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1567 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1568 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1569 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1570 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1571 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1572 return &APFloat::IEEEhalf();
1573 default:
1574 llvm_unreachable("unsupported fp type");
1575 }
1576 }
1577
1578 //===----------------------------------------------------------------------===//
1579 // Operand
1580 //===----------------------------------------------------------------------===//
1581
canLosslesslyConvertToFPType(APFloat & FPLiteral,MVT VT)1582 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
1583 bool Lost;
1584
1585 // Convert literal to single precision
1586 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
1587 APFloat::rmNearestTiesToEven,
1588 &Lost);
1589 // We allow precision lost but not overflow or underflow
1590 if (Status != APFloat::opOK &&
1591 Lost &&
1592 ((Status & APFloat::opOverflow) != 0 ||
1593 (Status & APFloat::opUnderflow) != 0)) {
1594 return false;
1595 }
1596
1597 return true;
1598 }
1599
isSafeTruncation(int64_t Val,unsigned Size)1600 static bool isSafeTruncation(int64_t Val, unsigned Size) {
1601 return isUIntN(Size, Val) || isIntN(Size, Val);
1602 }
1603
isInlineableLiteralOp16(int64_t Val,MVT VT,bool HasInv2Pi)1604 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
1605 if (VT.getScalarType() == MVT::i16) {
1606 // FP immediate values are broken.
1607 return isInlinableIntLiteral(Val);
1608 }
1609
1610 // f16/v2f16 operands work correctly for all values.
1611 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
1612 }
1613
isInlinableImm(MVT type) const1614 bool AMDGPUOperand::isInlinableImm(MVT type) const {
1615
1616 // This is a hack to enable named inline values like
1617 // shared_base with both 32-bit and 64-bit operands.
1618 // Note that these values are defined as
1619 // 32-bit operands only.
1620 if (isInlineValue()) {
1621 return true;
1622 }
1623
1624 if (!isImmTy(ImmTyNone)) {
1625 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
1626 return false;
1627 }
1628 // TODO: We should avoid using host float here. It would be better to
1629 // check the float bit values which is what a few other places do.
1630 // We've had bot failures before due to weird NaN support on mips hosts.
1631
1632 APInt Literal(64, Imm.Val);
1633
1634 if (Imm.IsFPImm) { // We got fp literal token
1635 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1636 return AMDGPU::isInlinableLiteral64(Imm.Val,
1637 AsmParser->hasInv2PiInlineImm());
1638 }
1639
1640 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1641 if (!canLosslesslyConvertToFPType(FPLiteral, type))
1642 return false;
1643
1644 if (type.getScalarSizeInBits() == 16) {
1645 return isInlineableLiteralOp16(
1646 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1647 type, AsmParser->hasInv2PiInlineImm());
1648 }
1649
1650 // Check if single precision literal is inlinable
1651 return AMDGPU::isInlinableLiteral32(
1652 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
1653 AsmParser->hasInv2PiInlineImm());
1654 }
1655
1656 // We got int literal token.
1657 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
1658 return AMDGPU::isInlinableLiteral64(Imm.Val,
1659 AsmParser->hasInv2PiInlineImm());
1660 }
1661
1662 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
1663 return false;
1664 }
1665
1666 if (type.getScalarSizeInBits() == 16) {
1667 return isInlineableLiteralOp16(
1668 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
1669 type, AsmParser->hasInv2PiInlineImm());
1670 }
1671
1672 return AMDGPU::isInlinableLiteral32(
1673 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
1674 AsmParser->hasInv2PiInlineImm());
1675 }
1676
isLiteralImm(MVT type) const1677 bool AMDGPUOperand::isLiteralImm(MVT type) const {
1678 // Check that this immediate can be added as literal
1679 if (!isImmTy(ImmTyNone)) {
1680 return false;
1681 }
1682
1683 if (!Imm.IsFPImm) {
1684 // We got int literal token.
1685
1686 if (type == MVT::f64 && hasFPModifiers()) {
1687 // Cannot apply fp modifiers to int literals preserving the same semantics
1688 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
1689 // disable these cases.
1690 return false;
1691 }
1692
1693 unsigned Size = type.getSizeInBits();
1694 if (Size == 64)
1695 Size = 32;
1696
1697 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
1698 // types.
1699 return isSafeTruncation(Imm.Val, Size);
1700 }
1701
1702 // We got fp literal token
1703 if (type == MVT::f64) { // Expected 64-bit fp operand
1704 // We would set low 64-bits of literal to zeroes but we accept this literals
1705 return true;
1706 }
1707
1708 if (type == MVT::i64) { // Expected 64-bit int operand
1709 // We don't allow fp literals in 64-bit integer instructions. It is
1710 // unclear how we should encode them.
1711 return false;
1712 }
1713
1714 // We allow fp literals with f16x2 operands assuming that the specified
1715 // literal goes into the lower half and the upper half is zero. We also
1716 // require that the literal may be losslesly converted to f16.
1717 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
1718 (type == MVT::v2i16)? MVT::i16 : type;
1719
1720 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
1721 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
1722 }
1723
isRegClass(unsigned RCID) const1724 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
1725 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
1726 }
1727
isSDWAOperand(MVT type) const1728 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
1729 if (AsmParser->isVI())
1730 return isVReg32();
1731 else if (AsmParser->isGFX9Plus())
1732 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
1733 else
1734 return false;
1735 }
1736
isSDWAFP16Operand() const1737 bool AMDGPUOperand::isSDWAFP16Operand() const {
1738 return isSDWAOperand(MVT::f16);
1739 }
1740
isSDWAFP32Operand() const1741 bool AMDGPUOperand::isSDWAFP32Operand() const {
1742 return isSDWAOperand(MVT::f32);
1743 }
1744
isSDWAInt16Operand() const1745 bool AMDGPUOperand::isSDWAInt16Operand() const {
1746 return isSDWAOperand(MVT::i16);
1747 }
1748
isSDWAInt32Operand() const1749 bool AMDGPUOperand::isSDWAInt32Operand() const {
1750 return isSDWAOperand(MVT::i32);
1751 }
1752
isBoolReg() const1753 bool AMDGPUOperand::isBoolReg() const {
1754 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
1755 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32());
1756 }
1757
applyInputFPModifiers(uint64_t Val,unsigned Size) const1758 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
1759 {
1760 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1761 assert(Size == 2 || Size == 4 || Size == 8);
1762
1763 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
1764
1765 if (Imm.Mods.Abs) {
1766 Val &= ~FpSignMask;
1767 }
1768 if (Imm.Mods.Neg) {
1769 Val ^= FpSignMask;
1770 }
1771
1772 return Val;
1773 }
1774
addImmOperands(MCInst & Inst,unsigned N,bool ApplyModifiers) const1775 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
1776 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
1777 Inst.getNumOperands())) {
1778 addLiteralImmOperand(Inst, Imm.Val,
1779 ApplyModifiers &
1780 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
1781 } else {
1782 assert(!isImmTy(ImmTyNone) || !hasModifiers());
1783 Inst.addOperand(MCOperand::createImm(Imm.Val));
1784 setImmKindNone();
1785 }
1786 }
1787
addLiteralImmOperand(MCInst & Inst,int64_t Val,bool ApplyModifiers) const1788 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
1789 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
1790 auto OpNum = Inst.getNumOperands();
1791 // Check that this operand accepts literals
1792 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
1793
1794 if (ApplyModifiers) {
1795 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
1796 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
1797 Val = applyInputFPModifiers(Val, Size);
1798 }
1799
1800 APInt Literal(64, Val);
1801 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
1802
1803 if (Imm.IsFPImm) { // We got fp literal token
1804 switch (OpTy) {
1805 case AMDGPU::OPERAND_REG_IMM_INT64:
1806 case AMDGPU::OPERAND_REG_IMM_FP64:
1807 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1808 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1809 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
1810 AsmParser->hasInv2PiInlineImm())) {
1811 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
1812 setImmKindConst();
1813 return;
1814 }
1815
1816 // Non-inlineable
1817 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
1818 // For fp operands we check if low 32 bits are zeros
1819 if (Literal.getLoBits(32) != 0) {
1820 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
1821 "Can't encode literal as exact 64-bit floating-point operand. "
1822 "Low 32-bits will be set to zero");
1823 }
1824
1825 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
1826 setImmKindLiteral();
1827 return;
1828 }
1829
1830 // We don't allow fp literals in 64-bit integer instructions. It is
1831 // unclear how we should encode them. This case should be checked earlier
1832 // in predicate methods (isLiteralImm())
1833 llvm_unreachable("fp literal in 64-bit integer instruction.");
1834
1835 case AMDGPU::OPERAND_REG_IMM_INT32:
1836 case AMDGPU::OPERAND_REG_IMM_FP32:
1837 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1838 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1839 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1840 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1841 case AMDGPU::OPERAND_REG_IMM_INT16:
1842 case AMDGPU::OPERAND_REG_IMM_FP16:
1843 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1844 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1845 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1846 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1847 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1848 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1849 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1850 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1851 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1852 case AMDGPU::OPERAND_REG_IMM_V2FP16: {
1853 bool lost;
1854 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1855 // Convert literal to single precision
1856 FPLiteral.convert(*getOpFltSemantics(OpTy),
1857 APFloat::rmNearestTiesToEven, &lost);
1858 // We allow precision lost but not overflow or underflow. This should be
1859 // checked earlier in isLiteralImm()
1860
1861 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
1862 Inst.addOperand(MCOperand::createImm(ImmVal));
1863 setImmKindLiteral();
1864 return;
1865 }
1866 default:
1867 llvm_unreachable("invalid operand size");
1868 }
1869
1870 return;
1871 }
1872
1873 // We got int literal token.
1874 // Only sign extend inline immediates.
1875 switch (OpTy) {
1876 case AMDGPU::OPERAND_REG_IMM_INT32:
1877 case AMDGPU::OPERAND_REG_IMM_FP32:
1878 case AMDGPU::OPERAND_REG_INLINE_C_INT32:
1879 case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1880 case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
1881 case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1882 case AMDGPU::OPERAND_REG_IMM_V2INT16:
1883 case AMDGPU::OPERAND_REG_IMM_V2FP16:
1884 if (isSafeTruncation(Val, 32) &&
1885 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
1886 AsmParser->hasInv2PiInlineImm())) {
1887 Inst.addOperand(MCOperand::createImm(Val));
1888 setImmKindConst();
1889 return;
1890 }
1891
1892 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
1893 setImmKindLiteral();
1894 return;
1895
1896 case AMDGPU::OPERAND_REG_IMM_INT64:
1897 case AMDGPU::OPERAND_REG_IMM_FP64:
1898 case AMDGPU::OPERAND_REG_INLINE_C_INT64:
1899 case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1900 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
1901 Inst.addOperand(MCOperand::createImm(Val));
1902 setImmKindConst();
1903 return;
1904 }
1905
1906 Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
1907 setImmKindLiteral();
1908 return;
1909
1910 case AMDGPU::OPERAND_REG_IMM_INT16:
1911 case AMDGPU::OPERAND_REG_IMM_FP16:
1912 case AMDGPU::OPERAND_REG_INLINE_C_INT16:
1913 case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1914 case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
1915 case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1916 if (isSafeTruncation(Val, 16) &&
1917 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1918 AsmParser->hasInv2PiInlineImm())) {
1919 Inst.addOperand(MCOperand::createImm(Val));
1920 setImmKindConst();
1921 return;
1922 }
1923
1924 Inst.addOperand(MCOperand::createImm(Val & 0xffff));
1925 setImmKindLiteral();
1926 return;
1927
1928 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1929 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1930 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1931 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
1932 assert(isSafeTruncation(Val, 16));
1933 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
1934 AsmParser->hasInv2PiInlineImm()));
1935
1936 Inst.addOperand(MCOperand::createImm(Val));
1937 return;
1938 }
1939 default:
1940 llvm_unreachable("invalid operand size");
1941 }
1942 }
1943
1944 template <unsigned Bitwidth>
addKImmFPOperands(MCInst & Inst,unsigned N) const1945 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
1946 APInt Literal(64, Imm.Val);
1947 setImmKindNone();
1948
1949 if (!Imm.IsFPImm) {
1950 // We got int literal token.
1951 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
1952 return;
1953 }
1954
1955 bool Lost;
1956 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
1957 FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
1958 APFloat::rmNearestTiesToEven, &Lost);
1959 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
1960 }
1961
addRegOperands(MCInst & Inst,unsigned N) const1962 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
1963 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
1964 }
1965
isInlineValue(unsigned Reg)1966 static bool isInlineValue(unsigned Reg) {
1967 switch (Reg) {
1968 case AMDGPU::SRC_SHARED_BASE:
1969 case AMDGPU::SRC_SHARED_LIMIT:
1970 case AMDGPU::SRC_PRIVATE_BASE:
1971 case AMDGPU::SRC_PRIVATE_LIMIT:
1972 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
1973 return true;
1974 case AMDGPU::SRC_VCCZ:
1975 case AMDGPU::SRC_EXECZ:
1976 case AMDGPU::SRC_SCC:
1977 return true;
1978 case AMDGPU::SGPR_NULL:
1979 return true;
1980 default:
1981 return false;
1982 }
1983 }
1984
isInlineValue() const1985 bool AMDGPUOperand::isInlineValue() const {
1986 return isRegKind() && ::isInlineValue(getReg());
1987 }
1988
1989 //===----------------------------------------------------------------------===//
1990 // AsmParser
1991 //===----------------------------------------------------------------------===//
1992
getRegClass(RegisterKind Is,unsigned RegWidth)1993 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
1994 if (Is == IS_VGPR) {
1995 switch (RegWidth) {
1996 default: return -1;
1997 case 1: return AMDGPU::VGPR_32RegClassID;
1998 case 2: return AMDGPU::VReg_64RegClassID;
1999 case 3: return AMDGPU::VReg_96RegClassID;
2000 case 4: return AMDGPU::VReg_128RegClassID;
2001 case 5: return AMDGPU::VReg_160RegClassID;
2002 case 6: return AMDGPU::VReg_192RegClassID;
2003 case 8: return AMDGPU::VReg_256RegClassID;
2004 case 16: return AMDGPU::VReg_512RegClassID;
2005 case 32: return AMDGPU::VReg_1024RegClassID;
2006 }
2007 } else if (Is == IS_TTMP) {
2008 switch (RegWidth) {
2009 default: return -1;
2010 case 1: return AMDGPU::TTMP_32RegClassID;
2011 case 2: return AMDGPU::TTMP_64RegClassID;
2012 case 4: return AMDGPU::TTMP_128RegClassID;
2013 case 8: return AMDGPU::TTMP_256RegClassID;
2014 case 16: return AMDGPU::TTMP_512RegClassID;
2015 }
2016 } else if (Is == IS_SGPR) {
2017 switch (RegWidth) {
2018 default: return -1;
2019 case 1: return AMDGPU::SGPR_32RegClassID;
2020 case 2: return AMDGPU::SGPR_64RegClassID;
2021 case 3: return AMDGPU::SGPR_96RegClassID;
2022 case 4: return AMDGPU::SGPR_128RegClassID;
2023 case 5: return AMDGPU::SGPR_160RegClassID;
2024 case 6: return AMDGPU::SGPR_192RegClassID;
2025 case 8: return AMDGPU::SGPR_256RegClassID;
2026 case 16: return AMDGPU::SGPR_512RegClassID;
2027 }
2028 } else if (Is == IS_AGPR) {
2029 switch (RegWidth) {
2030 default: return -1;
2031 case 1: return AMDGPU::AGPR_32RegClassID;
2032 case 2: return AMDGPU::AReg_64RegClassID;
2033 case 3: return AMDGPU::AReg_96RegClassID;
2034 case 4: return AMDGPU::AReg_128RegClassID;
2035 case 5: return AMDGPU::AReg_160RegClassID;
2036 case 6: return AMDGPU::AReg_192RegClassID;
2037 case 8: return AMDGPU::AReg_256RegClassID;
2038 case 16: return AMDGPU::AReg_512RegClassID;
2039 case 32: return AMDGPU::AReg_1024RegClassID;
2040 }
2041 }
2042 return -1;
2043 }
2044
getSpecialRegForName(StringRef RegName)2045 static unsigned getSpecialRegForName(StringRef RegName) {
2046 return StringSwitch<unsigned>(RegName)
2047 .Case("exec", AMDGPU::EXEC)
2048 .Case("vcc", AMDGPU::VCC)
2049 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2050 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2051 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2052 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2053 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2054 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2055 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2056 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2057 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2058 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2059 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2060 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2061 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2062 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2063 .Case("m0", AMDGPU::M0)
2064 .Case("vccz", AMDGPU::SRC_VCCZ)
2065 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2066 .Case("execz", AMDGPU::SRC_EXECZ)
2067 .Case("src_execz", AMDGPU::SRC_EXECZ)
2068 .Case("scc", AMDGPU::SRC_SCC)
2069 .Case("src_scc", AMDGPU::SRC_SCC)
2070 .Case("tba", AMDGPU::TBA)
2071 .Case("tma", AMDGPU::TMA)
2072 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2073 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2074 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2075 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2076 .Case("vcc_lo", AMDGPU::VCC_LO)
2077 .Case("vcc_hi", AMDGPU::VCC_HI)
2078 .Case("exec_lo", AMDGPU::EXEC_LO)
2079 .Case("exec_hi", AMDGPU::EXEC_HI)
2080 .Case("tma_lo", AMDGPU::TMA_LO)
2081 .Case("tma_hi", AMDGPU::TMA_HI)
2082 .Case("tba_lo", AMDGPU::TBA_LO)
2083 .Case("tba_hi", AMDGPU::TBA_HI)
2084 .Case("pc", AMDGPU::PC_REG)
2085 .Case("null", AMDGPU::SGPR_NULL)
2086 .Default(AMDGPU::NoRegister);
2087 }
2088
ParseRegister(unsigned & RegNo,SMLoc & StartLoc,SMLoc & EndLoc,bool RestoreOnFailure)2089 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2090 SMLoc &EndLoc, bool RestoreOnFailure) {
2091 auto R = parseRegister();
2092 if (!R) return true;
2093 assert(R->isReg());
2094 RegNo = R->getReg();
2095 StartLoc = R->getStartLoc();
2096 EndLoc = R->getEndLoc();
2097 return false;
2098 }
2099
ParseRegister(unsigned & RegNo,SMLoc & StartLoc,SMLoc & EndLoc)2100 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
2101 SMLoc &EndLoc) {
2102 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2103 }
2104
tryParseRegister(unsigned & RegNo,SMLoc & StartLoc,SMLoc & EndLoc)2105 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
2106 SMLoc &StartLoc,
2107 SMLoc &EndLoc) {
2108 bool Result =
2109 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2110 bool PendingErrors = getParser().hasPendingError();
2111 getParser().clearPendingErrors();
2112 if (PendingErrors)
2113 return MatchOperand_ParseFail;
2114 if (Result)
2115 return MatchOperand_NoMatch;
2116 return MatchOperand_Success;
2117 }
2118
AddNextRegisterToList(unsigned & Reg,unsigned & RegWidth,RegisterKind RegKind,unsigned Reg1,SMLoc Loc)2119 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
2120 RegisterKind RegKind, unsigned Reg1,
2121 SMLoc Loc) {
2122 switch (RegKind) {
2123 case IS_SPECIAL:
2124 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2125 Reg = AMDGPU::EXEC;
2126 RegWidth = 2;
2127 return true;
2128 }
2129 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2130 Reg = AMDGPU::FLAT_SCR;
2131 RegWidth = 2;
2132 return true;
2133 }
2134 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2135 Reg = AMDGPU::XNACK_MASK;
2136 RegWidth = 2;
2137 return true;
2138 }
2139 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2140 Reg = AMDGPU::VCC;
2141 RegWidth = 2;
2142 return true;
2143 }
2144 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2145 Reg = AMDGPU::TBA;
2146 RegWidth = 2;
2147 return true;
2148 }
2149 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2150 Reg = AMDGPU::TMA;
2151 RegWidth = 2;
2152 return true;
2153 }
2154 Error(Loc, "register does not fit in the list");
2155 return false;
2156 case IS_VGPR:
2157 case IS_SGPR:
2158 case IS_AGPR:
2159 case IS_TTMP:
2160 if (Reg1 != Reg + RegWidth) {
2161 Error(Loc, "registers in a list must have consecutive indices");
2162 return false;
2163 }
2164 RegWidth++;
2165 return true;
2166 default:
2167 llvm_unreachable("unexpected register kind");
2168 }
2169 }
2170
2171 struct RegInfo {
2172 StringLiteral Name;
2173 RegisterKind Kind;
2174 };
2175
2176 static constexpr RegInfo RegularRegisters[] = {
2177 {{"v"}, IS_VGPR},
2178 {{"s"}, IS_SGPR},
2179 {{"ttmp"}, IS_TTMP},
2180 {{"acc"}, IS_AGPR},
2181 {{"a"}, IS_AGPR},
2182 };
2183
isRegularReg(RegisterKind Kind)2184 static bool isRegularReg(RegisterKind Kind) {
2185 return Kind == IS_VGPR ||
2186 Kind == IS_SGPR ||
2187 Kind == IS_TTMP ||
2188 Kind == IS_AGPR;
2189 }
2190
getRegularRegInfo(StringRef Str)2191 static const RegInfo* getRegularRegInfo(StringRef Str) {
2192 for (const RegInfo &Reg : RegularRegisters)
2193 if (Str.startswith(Reg.Name))
2194 return &Reg;
2195 return nullptr;
2196 }
2197
getRegNum(StringRef Str,unsigned & Num)2198 static bool getRegNum(StringRef Str, unsigned& Num) {
2199 return !Str.getAsInteger(10, Num);
2200 }
2201
2202 bool
isRegister(const AsmToken & Token,const AsmToken & NextToken) const2203 AMDGPUAsmParser::isRegister(const AsmToken &Token,
2204 const AsmToken &NextToken) const {
2205
2206 // A list of consecutive registers: [s0,s1,s2,s3]
2207 if (Token.is(AsmToken::LBrac))
2208 return true;
2209
2210 if (!Token.is(AsmToken::Identifier))
2211 return false;
2212
2213 // A single register like s0 or a range of registers like s[0:1]
2214
2215 StringRef Str = Token.getString();
2216 const RegInfo *Reg = getRegularRegInfo(Str);
2217 if (Reg) {
2218 StringRef RegName = Reg->Name;
2219 StringRef RegSuffix = Str.substr(RegName.size());
2220 if (!RegSuffix.empty()) {
2221 unsigned Num;
2222 // A single register with an index: rXX
2223 if (getRegNum(RegSuffix, Num))
2224 return true;
2225 } else {
2226 // A range of registers: r[XX:YY].
2227 if (NextToken.is(AsmToken::LBrac))
2228 return true;
2229 }
2230 }
2231
2232 return getSpecialRegForName(Str) != AMDGPU::NoRegister;
2233 }
2234
2235 bool
isRegister()2236 AMDGPUAsmParser::isRegister()
2237 {
2238 return isRegister(getToken(), peekToken());
2239 }
2240
2241 unsigned
getRegularReg(RegisterKind RegKind,unsigned RegNum,unsigned RegWidth,SMLoc Loc)2242 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
2243 unsigned RegNum,
2244 unsigned RegWidth,
2245 SMLoc Loc) {
2246
2247 assert(isRegularReg(RegKind));
2248
2249 unsigned AlignSize = 1;
2250 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2251 // SGPR and TTMP registers must be aligned.
2252 // Max required alignment is 4 dwords.
2253 AlignSize = std::min(RegWidth, 4u);
2254 }
2255
2256 if (RegNum % AlignSize != 0) {
2257 Error(Loc, "invalid register alignment");
2258 return AMDGPU::NoRegister;
2259 }
2260
2261 unsigned RegIdx = RegNum / AlignSize;
2262 int RCID = getRegClass(RegKind, RegWidth);
2263 if (RCID == -1) {
2264 Error(Loc, "invalid or unsupported register size");
2265 return AMDGPU::NoRegister;
2266 }
2267
2268 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2269 const MCRegisterClass RC = TRI->getRegClass(RCID);
2270 if (RegIdx >= RC.getNumRegs()) {
2271 Error(Loc, "register index is out of range");
2272 return AMDGPU::NoRegister;
2273 }
2274
2275 return RC.getRegister(RegIdx);
2276 }
2277
2278 bool
ParseRegRange(unsigned & Num,unsigned & Width)2279 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
2280 int64_t RegLo, RegHi;
2281 if (!skipToken(AsmToken::LBrac, "missing register index"))
2282 return false;
2283
2284 SMLoc FirstIdxLoc = getLoc();
2285 SMLoc SecondIdxLoc;
2286
2287 if (!parseExpr(RegLo))
2288 return false;
2289
2290 if (trySkipToken(AsmToken::Colon)) {
2291 SecondIdxLoc = getLoc();
2292 if (!parseExpr(RegHi))
2293 return false;
2294 } else {
2295 RegHi = RegLo;
2296 }
2297
2298 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2299 return false;
2300
2301 if (!isUInt<32>(RegLo)) {
2302 Error(FirstIdxLoc, "invalid register index");
2303 return false;
2304 }
2305
2306 if (!isUInt<32>(RegHi)) {
2307 Error(SecondIdxLoc, "invalid register index");
2308 return false;
2309 }
2310
2311 if (RegLo > RegHi) {
2312 Error(FirstIdxLoc, "first register index should not exceed second index");
2313 return false;
2314 }
2315
2316 Num = static_cast<unsigned>(RegLo);
2317 Width = (RegHi - RegLo) + 1;
2318 return true;
2319 }
2320
ParseSpecialReg(RegisterKind & RegKind,unsigned & RegNum,unsigned & RegWidth,SmallVectorImpl<AsmToken> & Tokens)2321 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
2322 unsigned &RegNum, unsigned &RegWidth,
2323 SmallVectorImpl<AsmToken> &Tokens) {
2324 assert(isToken(AsmToken::Identifier));
2325 unsigned Reg = getSpecialRegForName(getTokenStr());
2326 if (Reg) {
2327 RegNum = 0;
2328 RegWidth = 1;
2329 RegKind = IS_SPECIAL;
2330 Tokens.push_back(getToken());
2331 lex(); // skip register name
2332 }
2333 return Reg;
2334 }
2335
ParseRegularReg(RegisterKind & RegKind,unsigned & RegNum,unsigned & RegWidth,SmallVectorImpl<AsmToken> & Tokens)2336 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
2337 unsigned &RegNum, unsigned &RegWidth,
2338 SmallVectorImpl<AsmToken> &Tokens) {
2339 assert(isToken(AsmToken::Identifier));
2340 StringRef RegName = getTokenStr();
2341 auto Loc = getLoc();
2342
2343 const RegInfo *RI = getRegularRegInfo(RegName);
2344 if (!RI) {
2345 Error(Loc, "invalid register name");
2346 return AMDGPU::NoRegister;
2347 }
2348
2349 Tokens.push_back(getToken());
2350 lex(); // skip register name
2351
2352 RegKind = RI->Kind;
2353 StringRef RegSuffix = RegName.substr(RI->Name.size());
2354 if (!RegSuffix.empty()) {
2355 // Single 32-bit register: vXX.
2356 if (!getRegNum(RegSuffix, RegNum)) {
2357 Error(Loc, "invalid register index");
2358 return AMDGPU::NoRegister;
2359 }
2360 RegWidth = 1;
2361 } else {
2362 // Range of registers: v[XX:YY]. ":YY" is optional.
2363 if (!ParseRegRange(RegNum, RegWidth))
2364 return AMDGPU::NoRegister;
2365 }
2366
2367 return getRegularReg(RegKind, RegNum, RegWidth, Loc);
2368 }
2369
ParseRegList(RegisterKind & RegKind,unsigned & RegNum,unsigned & RegWidth,SmallVectorImpl<AsmToken> & Tokens)2370 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
2371 unsigned &RegWidth,
2372 SmallVectorImpl<AsmToken> &Tokens) {
2373 unsigned Reg = AMDGPU::NoRegister;
2374 auto ListLoc = getLoc();
2375
2376 if (!skipToken(AsmToken::LBrac,
2377 "expected a register or a list of registers")) {
2378 return AMDGPU::NoRegister;
2379 }
2380
2381 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
2382
2383 auto Loc = getLoc();
2384 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
2385 return AMDGPU::NoRegister;
2386 if (RegWidth != 1) {
2387 Error(Loc, "expected a single 32-bit register");
2388 return AMDGPU::NoRegister;
2389 }
2390
2391 for (; trySkipToken(AsmToken::Comma); ) {
2392 RegisterKind NextRegKind;
2393 unsigned NextReg, NextRegNum, NextRegWidth;
2394 Loc = getLoc();
2395
2396 if (!ParseAMDGPURegister(NextRegKind, NextReg,
2397 NextRegNum, NextRegWidth,
2398 Tokens)) {
2399 return AMDGPU::NoRegister;
2400 }
2401 if (NextRegWidth != 1) {
2402 Error(Loc, "expected a single 32-bit register");
2403 return AMDGPU::NoRegister;
2404 }
2405 if (NextRegKind != RegKind) {
2406 Error(Loc, "registers in a list must be of the same kind");
2407 return AMDGPU::NoRegister;
2408 }
2409 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
2410 return AMDGPU::NoRegister;
2411 }
2412
2413 if (!skipToken(AsmToken::RBrac,
2414 "expected a comma or a closing square bracket")) {
2415 return AMDGPU::NoRegister;
2416 }
2417
2418 if (isRegularReg(RegKind))
2419 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
2420
2421 return Reg;
2422 }
2423
ParseAMDGPURegister(RegisterKind & RegKind,unsigned & Reg,unsigned & RegNum,unsigned & RegWidth,SmallVectorImpl<AsmToken> & Tokens)2424 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2425 unsigned &RegNum, unsigned &RegWidth,
2426 SmallVectorImpl<AsmToken> &Tokens) {
2427 auto Loc = getLoc();
2428 Reg = AMDGPU::NoRegister;
2429
2430 if (isToken(AsmToken::Identifier)) {
2431 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
2432 if (Reg == AMDGPU::NoRegister)
2433 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
2434 } else {
2435 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
2436 }
2437
2438 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2439 if (Reg == AMDGPU::NoRegister) {
2440 assert(Parser.hasPendingError());
2441 return false;
2442 }
2443
2444 if (!subtargetHasRegister(*TRI, Reg)) {
2445 if (Reg == AMDGPU::SGPR_NULL) {
2446 Error(Loc, "'null' operand is not supported on this GPU");
2447 } else {
2448 Error(Loc, "register not available on this GPU");
2449 }
2450 return false;
2451 }
2452
2453 return true;
2454 }
2455
ParseAMDGPURegister(RegisterKind & RegKind,unsigned & Reg,unsigned & RegNum,unsigned & RegWidth,bool RestoreOnFailure)2456 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
2457 unsigned &RegNum, unsigned &RegWidth,
2458 bool RestoreOnFailure /*=false*/) {
2459 Reg = AMDGPU::NoRegister;
2460
2461 SmallVector<AsmToken, 1> Tokens;
2462 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
2463 if (RestoreOnFailure) {
2464 while (!Tokens.empty()) {
2465 getLexer().UnLex(Tokens.pop_back_val());
2466 }
2467 }
2468 return true;
2469 }
2470 return false;
2471 }
2472
2473 Optional<StringRef>
getGprCountSymbolName(RegisterKind RegKind)2474 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
2475 switch (RegKind) {
2476 case IS_VGPR:
2477 return StringRef(".amdgcn.next_free_vgpr");
2478 case IS_SGPR:
2479 return StringRef(".amdgcn.next_free_sgpr");
2480 default:
2481 return None;
2482 }
2483 }
2484
initializeGprCountSymbol(RegisterKind RegKind)2485 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
2486 auto SymbolName = getGprCountSymbolName(RegKind);
2487 assert(SymbolName && "initializing invalid register kind");
2488 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2489 Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
2490 }
2491
updateGprCountSymbols(RegisterKind RegKind,unsigned DwordRegIndex,unsigned RegWidth)2492 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
2493 unsigned DwordRegIndex,
2494 unsigned RegWidth) {
2495 // Symbols are only defined for GCN targets
2496 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
2497 return true;
2498
2499 auto SymbolName = getGprCountSymbolName(RegKind);
2500 if (!SymbolName)
2501 return true;
2502 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
2503
2504 int64_t NewMax = DwordRegIndex + RegWidth - 1;
2505 int64_t OldCount;
2506
2507 if (!Sym->isVariable())
2508 return !Error(getLoc(),
2509 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
2510 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
2511 return !Error(
2512 getLoc(),
2513 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
2514
2515 if (OldCount <= NewMax)
2516 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
2517
2518 return true;
2519 }
2520
2521 std::unique_ptr<AMDGPUOperand>
parseRegister(bool RestoreOnFailure)2522 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
2523 const auto &Tok = getToken();
2524 SMLoc StartLoc = Tok.getLoc();
2525 SMLoc EndLoc = Tok.getEndLoc();
2526 RegisterKind RegKind;
2527 unsigned Reg, RegNum, RegWidth;
2528
2529 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
2530 return nullptr;
2531 }
2532 if (isHsaAbiVersion3(&getSTI())) {
2533 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
2534 return nullptr;
2535 } else
2536 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
2537 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
2538 }
2539
2540 OperandMatchResultTy
parseImm(OperandVector & Operands,bool HasSP3AbsModifier)2541 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
2542 // TODO: add syntactic sugar for 1/(2*PI)
2543
2544 assert(!isRegister());
2545 assert(!isModifier());
2546
2547 const auto& Tok = getToken();
2548 const auto& NextTok = peekToken();
2549 bool IsReal = Tok.is(AsmToken::Real);
2550 SMLoc S = getLoc();
2551 bool Negate = false;
2552
2553 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
2554 lex();
2555 IsReal = true;
2556 Negate = true;
2557 }
2558
2559 if (IsReal) {
2560 // Floating-point expressions are not supported.
2561 // Can only allow floating-point literals with an
2562 // optional sign.
2563
2564 StringRef Num = getTokenStr();
2565 lex();
2566
2567 APFloat RealVal(APFloat::IEEEdouble());
2568 auto roundMode = APFloat::rmNearestTiesToEven;
2569 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
2570 return MatchOperand_ParseFail;
2571 }
2572 if (Negate)
2573 RealVal.changeSign();
2574
2575 Operands.push_back(
2576 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
2577 AMDGPUOperand::ImmTyNone, true));
2578
2579 return MatchOperand_Success;
2580
2581 } else {
2582 int64_t IntVal;
2583 const MCExpr *Expr;
2584 SMLoc S = getLoc();
2585
2586 if (HasSP3AbsModifier) {
2587 // This is a workaround for handling expressions
2588 // as arguments of SP3 'abs' modifier, for example:
2589 // |1.0|
2590 // |-1|
2591 // |1+x|
2592 // This syntax is not compatible with syntax of standard
2593 // MC expressions (due to the trailing '|').
2594 SMLoc EndLoc;
2595 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
2596 return MatchOperand_ParseFail;
2597 } else {
2598 if (Parser.parseExpression(Expr))
2599 return MatchOperand_ParseFail;
2600 }
2601
2602 if (Expr->evaluateAsAbsolute(IntVal)) {
2603 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
2604 } else {
2605 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
2606 }
2607
2608 return MatchOperand_Success;
2609 }
2610
2611 return MatchOperand_NoMatch;
2612 }
2613
2614 OperandMatchResultTy
parseReg(OperandVector & Operands)2615 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
2616 if (!isRegister())
2617 return MatchOperand_NoMatch;
2618
2619 if (auto R = parseRegister()) {
2620 assert(R->isReg());
2621 Operands.push_back(std::move(R));
2622 return MatchOperand_Success;
2623 }
2624 return MatchOperand_ParseFail;
2625 }
2626
2627 OperandMatchResultTy
parseRegOrImm(OperandVector & Operands,bool HasSP3AbsMod)2628 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
2629 auto res = parseReg(Operands);
2630 if (res != MatchOperand_NoMatch) {
2631 return res;
2632 } else if (isModifier()) {
2633 return MatchOperand_NoMatch;
2634 } else {
2635 return parseImm(Operands, HasSP3AbsMod);
2636 }
2637 }
2638
2639 bool
isNamedOperandModifier(const AsmToken & Token,const AsmToken & NextToken) const2640 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2641 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
2642 const auto &str = Token.getString();
2643 return str == "abs" || str == "neg" || str == "sext";
2644 }
2645 return false;
2646 }
2647
2648 bool
isOpcodeModifierWithVal(const AsmToken & Token,const AsmToken & NextToken) const2649 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
2650 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
2651 }
2652
2653 bool
isOperandModifier(const AsmToken & Token,const AsmToken & NextToken) const2654 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2655 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
2656 }
2657
2658 bool
isRegOrOperandModifier(const AsmToken & Token,const AsmToken & NextToken) const2659 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
2660 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
2661 }
2662
2663 // Check if this is an operand modifier or an opcode modifier
2664 // which may look like an expression but it is not. We should
2665 // avoid parsing these modifiers as expressions. Currently
2666 // recognized sequences are:
2667 // |...|
2668 // abs(...)
2669 // neg(...)
2670 // sext(...)
2671 // -reg
2672 // -|...|
2673 // -abs(...)
2674 // name:...
2675 // Note that simple opcode modifiers like 'gds' may be parsed as
2676 // expressions; this is a special case. See getExpressionAsToken.
2677 //
2678 bool
isModifier()2679 AMDGPUAsmParser::isModifier() {
2680
2681 AsmToken Tok = getToken();
2682 AsmToken NextToken[2];
2683 peekTokens(NextToken);
2684
2685 return isOperandModifier(Tok, NextToken[0]) ||
2686 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
2687 isOpcodeModifierWithVal(Tok, NextToken[0]);
2688 }
2689
2690 // Check if the current token is an SP3 'neg' modifier.
2691 // Currently this modifier is allowed in the following context:
2692 //
2693 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
2694 // 2. Before an 'abs' modifier: -abs(...)
2695 // 3. Before an SP3 'abs' modifier: -|...|
2696 //
2697 // In all other cases "-" is handled as a part
2698 // of an expression that follows the sign.
2699 //
2700 // Note: When "-" is followed by an integer literal,
2701 // this is interpreted as integer negation rather
2702 // than a floating-point NEG modifier applied to N.
2703 // Beside being contr-intuitive, such use of floating-point
2704 // NEG modifier would have resulted in different meaning
2705 // of integer literals used with VOP1/2/C and VOP3,
2706 // for example:
2707 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
2708 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
2709 // Negative fp literals with preceding "-" are
2710 // handled likewise for unifomtity
2711 //
2712 bool
parseSP3NegModifier()2713 AMDGPUAsmParser::parseSP3NegModifier() {
2714
2715 AsmToken NextToken[2];
2716 peekTokens(NextToken);
2717
2718 if (isToken(AsmToken::Minus) &&
2719 (isRegister(NextToken[0], NextToken[1]) ||
2720 NextToken[0].is(AsmToken::Pipe) ||
2721 isId(NextToken[0], "abs"))) {
2722 lex();
2723 return true;
2724 }
2725
2726 return false;
2727 }
2728
2729 OperandMatchResultTy
parseRegOrImmWithFPInputMods(OperandVector & Operands,bool AllowImm)2730 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
2731 bool AllowImm) {
2732 bool Neg, SP3Neg;
2733 bool Abs, SP3Abs;
2734 SMLoc Loc;
2735
2736 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
2737 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
2738 Error(getLoc(), "invalid syntax, expected 'neg' modifier");
2739 return MatchOperand_ParseFail;
2740 }
2741
2742 SP3Neg = parseSP3NegModifier();
2743
2744 Loc = getLoc();
2745 Neg = trySkipId("neg");
2746 if (Neg && SP3Neg) {
2747 Error(Loc, "expected register or immediate");
2748 return MatchOperand_ParseFail;
2749 }
2750 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
2751 return MatchOperand_ParseFail;
2752
2753 Abs = trySkipId("abs");
2754 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
2755 return MatchOperand_ParseFail;
2756
2757 Loc = getLoc();
2758 SP3Abs = trySkipToken(AsmToken::Pipe);
2759 if (Abs && SP3Abs) {
2760 Error(Loc, "expected register or immediate");
2761 return MatchOperand_ParseFail;
2762 }
2763
2764 OperandMatchResultTy Res;
2765 if (AllowImm) {
2766 Res = parseRegOrImm(Operands, SP3Abs);
2767 } else {
2768 Res = parseReg(Operands);
2769 }
2770 if (Res != MatchOperand_Success) {
2771 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
2772 }
2773
2774 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
2775 return MatchOperand_ParseFail;
2776 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2777 return MatchOperand_ParseFail;
2778 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2779 return MatchOperand_ParseFail;
2780
2781 AMDGPUOperand::Modifiers Mods;
2782 Mods.Abs = Abs || SP3Abs;
2783 Mods.Neg = Neg || SP3Neg;
2784
2785 if (Mods.hasFPModifiers()) {
2786 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2787 if (Op.isExpr()) {
2788 Error(Op.getStartLoc(), "expected an absolute expression");
2789 return MatchOperand_ParseFail;
2790 }
2791 Op.setModifiers(Mods);
2792 }
2793 return MatchOperand_Success;
2794 }
2795
2796 OperandMatchResultTy
parseRegOrImmWithIntInputMods(OperandVector & Operands,bool AllowImm)2797 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
2798 bool AllowImm) {
2799 bool Sext = trySkipId("sext");
2800 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
2801 return MatchOperand_ParseFail;
2802
2803 OperandMatchResultTy Res;
2804 if (AllowImm) {
2805 Res = parseRegOrImm(Operands);
2806 } else {
2807 Res = parseReg(Operands);
2808 }
2809 if (Res != MatchOperand_Success) {
2810 return Sext? MatchOperand_ParseFail : Res;
2811 }
2812
2813 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
2814 return MatchOperand_ParseFail;
2815
2816 AMDGPUOperand::Modifiers Mods;
2817 Mods.Sext = Sext;
2818
2819 if (Mods.hasIntModifiers()) {
2820 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
2821 if (Op.isExpr()) {
2822 Error(Op.getStartLoc(), "expected an absolute expression");
2823 return MatchOperand_ParseFail;
2824 }
2825 Op.setModifiers(Mods);
2826 }
2827
2828 return MatchOperand_Success;
2829 }
2830
2831 OperandMatchResultTy
parseRegWithFPInputMods(OperandVector & Operands)2832 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
2833 return parseRegOrImmWithFPInputMods(Operands, false);
2834 }
2835
2836 OperandMatchResultTy
parseRegWithIntInputMods(OperandVector & Operands)2837 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
2838 return parseRegOrImmWithIntInputMods(Operands, false);
2839 }
2840
parseVReg32OrOff(OperandVector & Operands)2841 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
2842 auto Loc = getLoc();
2843 if (trySkipId("off")) {
2844 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
2845 AMDGPUOperand::ImmTyOff, false));
2846 return MatchOperand_Success;
2847 }
2848
2849 if (!isRegister())
2850 return MatchOperand_NoMatch;
2851
2852 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
2853 if (Reg) {
2854 Operands.push_back(std::move(Reg));
2855 return MatchOperand_Success;
2856 }
2857
2858 return MatchOperand_ParseFail;
2859
2860 }
2861
checkTargetMatchPredicate(MCInst & Inst)2862 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
2863 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
2864
2865 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
2866 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
2867 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
2868 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
2869 return Match_InvalidOperand;
2870
2871 if ((TSFlags & SIInstrFlags::VOP3) &&
2872 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
2873 getForcedEncodingSize() != 64)
2874 return Match_PreferE32;
2875
2876 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
2877 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
2878 // v_mac_f32/16 allow only dst_sel == DWORD;
2879 auto OpNum =
2880 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
2881 const auto &Op = Inst.getOperand(OpNum);
2882 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
2883 return Match_InvalidOperand;
2884 }
2885 }
2886
2887 return Match_Success;
2888 }
2889
getAllVariants()2890 static ArrayRef<unsigned> getAllVariants() {
2891 static const unsigned Variants[] = {
2892 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
2893 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
2894 };
2895
2896 return makeArrayRef(Variants);
2897 }
2898
2899 // What asm variants we should check
getMatchedVariants() const2900 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
2901 if (getForcedEncodingSize() == 32) {
2902 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
2903 return makeArrayRef(Variants);
2904 }
2905
2906 if (isForcedVOP3()) {
2907 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
2908 return makeArrayRef(Variants);
2909 }
2910
2911 if (isForcedSDWA()) {
2912 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
2913 AMDGPUAsmVariants::SDWA9};
2914 return makeArrayRef(Variants);
2915 }
2916
2917 if (isForcedDPP()) {
2918 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
2919 return makeArrayRef(Variants);
2920 }
2921
2922 return getAllVariants();
2923 }
2924
getMatchedVariantName() const2925 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
2926 if (getForcedEncodingSize() == 32)
2927 return "e32";
2928
2929 if (isForcedVOP3())
2930 return "e64";
2931
2932 if (isForcedSDWA())
2933 return "sdwa";
2934
2935 if (isForcedDPP())
2936 return "dpp";
2937
2938 return "";
2939 }
2940
findImplicitSGPRReadInVOP(const MCInst & Inst) const2941 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
2942 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2943 const unsigned Num = Desc.getNumImplicitUses();
2944 for (unsigned i = 0; i < Num; ++i) {
2945 unsigned Reg = Desc.ImplicitUses[i];
2946 switch (Reg) {
2947 case AMDGPU::FLAT_SCR:
2948 case AMDGPU::VCC:
2949 case AMDGPU::VCC_LO:
2950 case AMDGPU::VCC_HI:
2951 case AMDGPU::M0:
2952 return Reg;
2953 default:
2954 break;
2955 }
2956 }
2957 return AMDGPU::NoRegister;
2958 }
2959
2960 // NB: This code is correct only when used to check constant
2961 // bus limitations because GFX7 support no f16 inline constants.
2962 // Note that there are no cases when a GFX7 opcode violates
2963 // constant bus limitations due to the use of an f16 constant.
isInlineConstant(const MCInst & Inst,unsigned OpIdx) const2964 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
2965 unsigned OpIdx) const {
2966 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
2967
2968 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
2969 return false;
2970 }
2971
2972 const MCOperand &MO = Inst.getOperand(OpIdx);
2973
2974 int64_t Val = MO.getImm();
2975 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
2976
2977 switch (OpSize) { // expected operand size
2978 case 8:
2979 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
2980 case 4:
2981 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
2982 case 2: {
2983 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
2984 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
2985 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
2986 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
2987 return AMDGPU::isInlinableIntLiteral(Val);
2988
2989 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
2990 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
2991 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
2992 return AMDGPU::isInlinableIntLiteralV216(Val);
2993
2994 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
2995 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
2996 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
2997 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
2998
2999 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
3000 }
3001 default:
3002 llvm_unreachable("invalid operand size");
3003 }
3004 }
3005
getConstantBusLimit(unsigned Opcode) const3006 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3007 if (!isGFX10Plus())
3008 return 1;
3009
3010 switch (Opcode) {
3011 // 64-bit shift instructions can use only one scalar value input
3012 case AMDGPU::V_LSHLREV_B64_e64:
3013 case AMDGPU::V_LSHLREV_B64_gfx10:
3014 case AMDGPU::V_LSHRREV_B64_e64:
3015 case AMDGPU::V_LSHRREV_B64_gfx10:
3016 case AMDGPU::V_ASHRREV_I64_e64:
3017 case AMDGPU::V_ASHRREV_I64_gfx10:
3018 case AMDGPU::V_LSHL_B64_e64:
3019 case AMDGPU::V_LSHR_B64_e64:
3020 case AMDGPU::V_ASHR_I64_e64:
3021 return 1;
3022 default:
3023 return 2;
3024 }
3025 }
3026
usesConstantBus(const MCInst & Inst,unsigned OpIdx)3027 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3028 const MCOperand &MO = Inst.getOperand(OpIdx);
3029 if (MO.isImm()) {
3030 return !isInlineConstant(Inst, OpIdx);
3031 } else if (MO.isReg()) {
3032 auto Reg = MO.getReg();
3033 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3034 auto PReg = mc2PseudoReg(Reg);
3035 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3036 } else {
3037 return true;
3038 }
3039 }
3040
3041 bool
validateConstantBusLimitations(const MCInst & Inst,const OperandVector & Operands)3042 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
3043 const OperandVector &Operands) {
3044 const unsigned Opcode = Inst.getOpcode();
3045 const MCInstrDesc &Desc = MII.get(Opcode);
3046 unsigned LastSGPR = AMDGPU::NoRegister;
3047 unsigned ConstantBusUseCount = 0;
3048 unsigned NumLiterals = 0;
3049 unsigned LiteralSize;
3050
3051 if (Desc.TSFlags &
3052 (SIInstrFlags::VOPC |
3053 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
3054 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
3055 SIInstrFlags::SDWA)) {
3056 // Check special imm operands (used by madmk, etc)
3057 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
3058 ++ConstantBusUseCount;
3059 }
3060
3061 SmallDenseSet<unsigned> SGPRsUsed;
3062 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3063 if (SGPRUsed != AMDGPU::NoRegister) {
3064 SGPRsUsed.insert(SGPRUsed);
3065 ++ConstantBusUseCount;
3066 }
3067
3068 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3069 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3070 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3071
3072 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3073
3074 for (int OpIdx : OpIndices) {
3075 if (OpIdx == -1) break;
3076
3077 const MCOperand &MO = Inst.getOperand(OpIdx);
3078 if (usesConstantBus(Inst, OpIdx)) {
3079 if (MO.isReg()) {
3080 LastSGPR = mc2PseudoReg(MO.getReg());
3081 // Pairs of registers with a partial intersections like these
3082 // s0, s[0:1]
3083 // flat_scratch_lo, flat_scratch
3084 // flat_scratch_lo, flat_scratch_hi
3085 // are theoretically valid but they are disabled anyway.
3086 // Note that this code mimics SIInstrInfo::verifyInstruction
3087 if (!SGPRsUsed.count(LastSGPR)) {
3088 SGPRsUsed.insert(LastSGPR);
3089 ++ConstantBusUseCount;
3090 }
3091 } else { // Expression or a literal
3092
3093 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3094 continue; // special operand like VINTERP attr_chan
3095
3096 // An instruction may use only one literal.
3097 // This has been validated on the previous step.
3098 // See validateVOP3Literal.
3099 // This literal may be used as more than one operand.
3100 // If all these operands are of the same size,
3101 // this literal counts as one scalar value.
3102 // Otherwise it counts as 2 scalar values.
3103 // See "GFX10 Shader Programming", section 3.6.2.3.
3104
3105 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
3106 if (Size < 4) Size = 4;
3107
3108 if (NumLiterals == 0) {
3109 NumLiterals = 1;
3110 LiteralSize = Size;
3111 } else if (LiteralSize != Size) {
3112 NumLiterals = 2;
3113 }
3114 }
3115 }
3116 }
3117 }
3118 ConstantBusUseCount += NumLiterals;
3119
3120 if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
3121 return true;
3122
3123 SMLoc LitLoc = getLitLoc(Operands);
3124 SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
3125 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
3126 Error(Loc, "invalid operand (violates constant bus restrictions)");
3127 return false;
3128 }
3129
3130 bool
validateEarlyClobberLimitations(const MCInst & Inst,const OperandVector & Operands)3131 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
3132 const OperandVector &Operands) {
3133 const unsigned Opcode = Inst.getOpcode();
3134 const MCInstrDesc &Desc = MII.get(Opcode);
3135
3136 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
3137 if (DstIdx == -1 ||
3138 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
3139 return true;
3140 }
3141
3142 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3143
3144 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3145 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3146 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3147
3148 assert(DstIdx != -1);
3149 const MCOperand &Dst = Inst.getOperand(DstIdx);
3150 assert(Dst.isReg());
3151 const unsigned DstReg = mc2PseudoReg(Dst.getReg());
3152
3153 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3154
3155 for (int SrcIdx : SrcIndices) {
3156 if (SrcIdx == -1) break;
3157 const MCOperand &Src = Inst.getOperand(SrcIdx);
3158 if (Src.isReg()) {
3159 const unsigned SrcReg = mc2PseudoReg(Src.getReg());
3160 if (isRegIntersect(DstReg, SrcReg, TRI)) {
3161 Error(getRegLoc(SrcReg, Operands),
3162 "destination must be different than all sources");
3163 return false;
3164 }
3165 }
3166 }
3167
3168 return true;
3169 }
3170
validateIntClampSupported(const MCInst & Inst)3171 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
3172
3173 const unsigned Opc = Inst.getOpcode();
3174 const MCInstrDesc &Desc = MII.get(Opc);
3175
3176 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
3177 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
3178 assert(ClampIdx != -1);
3179 return Inst.getOperand(ClampIdx).getImm() == 0;
3180 }
3181
3182 return true;
3183 }
3184
validateMIMGDataSize(const MCInst & Inst)3185 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
3186
3187 const unsigned Opc = Inst.getOpcode();
3188 const MCInstrDesc &Desc = MII.get(Opc);
3189
3190 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3191 return true;
3192
3193 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
3194 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3195 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
3196
3197 assert(VDataIdx != -1);
3198
3199 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
3200 return true;
3201
3202 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
3203 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0;
3204 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3205 if (DMask == 0)
3206 DMask = 1;
3207
3208 unsigned DataSize =
3209 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
3210 if (hasPackedD16()) {
3211 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3212 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
3213 DataSize = (DataSize + 1) / 2;
3214 }
3215
3216 return (VDataSize / 4) == DataSize + TFESize;
3217 }
3218
validateMIMGAddrSize(const MCInst & Inst)3219 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
3220 const unsigned Opc = Inst.getOpcode();
3221 const MCInstrDesc &Desc = MII.get(Opc);
3222
3223 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
3224 return true;
3225
3226 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
3227
3228 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
3229 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
3230 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
3231 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
3232 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3233
3234 assert(VAddr0Idx != -1);
3235 assert(SrsrcIdx != -1);
3236 assert(SrsrcIdx > VAddr0Idx);
3237
3238 if (DimIdx == -1)
3239 return true; // intersect_ray
3240
3241 unsigned Dim = Inst.getOperand(DimIdx).getImm();
3242 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
3243 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
3244 unsigned VAddrSize =
3245 IsNSA ? SrsrcIdx - VAddr0Idx
3246 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
3247
3248 unsigned AddrSize = BaseOpcode->NumExtraArgs +
3249 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) +
3250 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) +
3251 (BaseOpcode->LodOrClampOrMip ? 1 : 0);
3252 if (!IsNSA) {
3253 if (AddrSize > 8)
3254 AddrSize = 16;
3255 else if (AddrSize > 4)
3256 AddrSize = 8;
3257 }
3258
3259 return VAddrSize == AddrSize;
3260 }
3261
validateMIMGAtomicDMask(const MCInst & Inst)3262 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
3263
3264 const unsigned Opc = Inst.getOpcode();
3265 const MCInstrDesc &Desc = MII.get(Opc);
3266
3267 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3268 return true;
3269 if (!Desc.mayLoad() || !Desc.mayStore())
3270 return true; // Not atomic
3271
3272 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3273 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3274
3275 // This is an incomplete check because image_atomic_cmpswap
3276 // may only use 0x3 and 0xf while other atomic operations
3277 // may use 0x1 and 0x3. However these limitations are
3278 // verified when we check that dmask matches dst size.
3279 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
3280 }
3281
validateMIMGGatherDMask(const MCInst & Inst)3282 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
3283
3284 const unsigned Opc = Inst.getOpcode();
3285 const MCInstrDesc &Desc = MII.get(Opc);
3286
3287 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
3288 return true;
3289
3290 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
3291 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
3292
3293 // GATHER4 instructions use dmask in a different fashion compared to
3294 // other MIMG instructions. The only useful DMASK values are
3295 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
3296 // (red,red,red,red) etc.) The ISA document doesn't mention
3297 // this.
3298 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
3299 }
3300
IsMovrelsSDWAOpcode(const unsigned Opcode)3301 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
3302 {
3303 switch (Opcode) {
3304 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
3305 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
3306 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
3307 return true;
3308 default:
3309 return false;
3310 }
3311 }
3312
3313 // movrels* opcodes should only allow VGPRS as src0.
3314 // This is specified in .td description for vop1/vop3,
3315 // but sdwa is handled differently. See isSDWAOperand.
validateMovrels(const MCInst & Inst,const OperandVector & Operands)3316 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
3317 const OperandVector &Operands) {
3318
3319 const unsigned Opc = Inst.getOpcode();
3320 const MCInstrDesc &Desc = MII.get(Opc);
3321
3322 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
3323 return true;
3324
3325 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3326 assert(Src0Idx != -1);
3327
3328 SMLoc ErrLoc;
3329 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3330 if (Src0.isReg()) {
3331 auto Reg = mc2PseudoReg(Src0.getReg());
3332 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3333 if (!isSGPR(Reg, TRI))
3334 return true;
3335 ErrLoc = getRegLoc(Reg, Operands);
3336 } else {
3337 ErrLoc = getConstLoc(Operands);
3338 }
3339
3340 Error(ErrLoc, "source operand must be a VGPR");
3341 return false;
3342 }
3343
validateMAIAccWrite(const MCInst & Inst,const OperandVector & Operands)3344 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
3345 const OperandVector &Operands) {
3346
3347 const unsigned Opc = Inst.getOpcode();
3348
3349 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
3350 return true;
3351
3352 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
3353 assert(Src0Idx != -1);
3354
3355 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
3356 if (!Src0.isReg())
3357 return true;
3358
3359 auto Reg = mc2PseudoReg(Src0.getReg());
3360 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3361 if (isSGPR(Reg, TRI)) {
3362 Error(getRegLoc(Reg, Operands),
3363 "source operand must be either a VGPR or an inline constant");
3364 return false;
3365 }
3366
3367 return true;
3368 }
3369
validateDivScale(const MCInst & Inst)3370 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
3371 switch (Inst.getOpcode()) {
3372 default:
3373 return true;
3374 case V_DIV_SCALE_F32_gfx6_gfx7:
3375 case V_DIV_SCALE_F32_vi:
3376 case V_DIV_SCALE_F32_gfx10:
3377 case V_DIV_SCALE_F64_gfx6_gfx7:
3378 case V_DIV_SCALE_F64_vi:
3379 case V_DIV_SCALE_F64_gfx10:
3380 break;
3381 }
3382
3383 // TODO: Check that src0 = src1 or src2.
3384
3385 for (auto Name : {AMDGPU::OpName::src0_modifiers,
3386 AMDGPU::OpName::src2_modifiers,
3387 AMDGPU::OpName::src2_modifiers}) {
3388 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
3389 .getImm() &
3390 SISrcMods::ABS) {
3391 return false;
3392 }
3393 }
3394
3395 return true;
3396 }
3397
validateMIMGD16(const MCInst & Inst)3398 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
3399
3400 const unsigned Opc = Inst.getOpcode();
3401 const MCInstrDesc &Desc = MII.get(Opc);
3402
3403 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3404 return true;
3405
3406 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
3407 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
3408 if (isCI() || isSI())
3409 return false;
3410 }
3411
3412 return true;
3413 }
3414
validateMIMGDim(const MCInst & Inst)3415 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
3416 const unsigned Opc = Inst.getOpcode();
3417 const MCInstrDesc &Desc = MII.get(Opc);
3418
3419 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
3420 return true;
3421
3422 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
3423 if (DimIdx < 0)
3424 return true;
3425
3426 long Imm = Inst.getOperand(DimIdx).getImm();
3427 if (Imm < 0 || Imm >= 8)
3428 return false;
3429
3430 return true;
3431 }
3432
IsRevOpcode(const unsigned Opcode)3433 static bool IsRevOpcode(const unsigned Opcode)
3434 {
3435 switch (Opcode) {
3436 case AMDGPU::V_SUBREV_F32_e32:
3437 case AMDGPU::V_SUBREV_F32_e64:
3438 case AMDGPU::V_SUBREV_F32_e32_gfx10:
3439 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
3440 case AMDGPU::V_SUBREV_F32_e32_vi:
3441 case AMDGPU::V_SUBREV_F32_e64_gfx10:
3442 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
3443 case AMDGPU::V_SUBREV_F32_e64_vi:
3444
3445 case AMDGPU::V_SUBREV_CO_U32_e32:
3446 case AMDGPU::V_SUBREV_CO_U32_e64:
3447 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
3448 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
3449
3450 case AMDGPU::V_SUBBREV_U32_e32:
3451 case AMDGPU::V_SUBBREV_U32_e64:
3452 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
3453 case AMDGPU::V_SUBBREV_U32_e32_vi:
3454 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
3455 case AMDGPU::V_SUBBREV_U32_e64_vi:
3456
3457 case AMDGPU::V_SUBREV_U32_e32:
3458 case AMDGPU::V_SUBREV_U32_e64:
3459 case AMDGPU::V_SUBREV_U32_e32_gfx9:
3460 case AMDGPU::V_SUBREV_U32_e32_vi:
3461 case AMDGPU::V_SUBREV_U32_e64_gfx9:
3462 case AMDGPU::V_SUBREV_U32_e64_vi:
3463
3464 case AMDGPU::V_SUBREV_F16_e32:
3465 case AMDGPU::V_SUBREV_F16_e64:
3466 case AMDGPU::V_SUBREV_F16_e32_gfx10:
3467 case AMDGPU::V_SUBREV_F16_e32_vi:
3468 case AMDGPU::V_SUBREV_F16_e64_gfx10:
3469 case AMDGPU::V_SUBREV_F16_e64_vi:
3470
3471 case AMDGPU::V_SUBREV_U16_e32:
3472 case AMDGPU::V_SUBREV_U16_e64:
3473 case AMDGPU::V_SUBREV_U16_e32_vi:
3474 case AMDGPU::V_SUBREV_U16_e64_vi:
3475
3476 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
3477 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
3478 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
3479
3480 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
3481 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
3482
3483 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
3484 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
3485
3486 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
3487 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
3488
3489 case AMDGPU::V_LSHRREV_B32_e32:
3490 case AMDGPU::V_LSHRREV_B32_e64:
3491 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
3492 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
3493 case AMDGPU::V_LSHRREV_B32_e32_vi:
3494 case AMDGPU::V_LSHRREV_B32_e64_vi:
3495 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
3496 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
3497
3498 case AMDGPU::V_ASHRREV_I32_e32:
3499 case AMDGPU::V_ASHRREV_I32_e64:
3500 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
3501 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
3502 case AMDGPU::V_ASHRREV_I32_e32_vi:
3503 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
3504 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
3505 case AMDGPU::V_ASHRREV_I32_e64_vi:
3506
3507 case AMDGPU::V_LSHLREV_B32_e32:
3508 case AMDGPU::V_LSHLREV_B32_e64:
3509 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
3510 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
3511 case AMDGPU::V_LSHLREV_B32_e32_vi:
3512 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
3513 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
3514 case AMDGPU::V_LSHLREV_B32_e64_vi:
3515
3516 case AMDGPU::V_LSHLREV_B16_e32:
3517 case AMDGPU::V_LSHLREV_B16_e64:
3518 case AMDGPU::V_LSHLREV_B16_e32_vi:
3519 case AMDGPU::V_LSHLREV_B16_e64_vi:
3520 case AMDGPU::V_LSHLREV_B16_gfx10:
3521
3522 case AMDGPU::V_LSHRREV_B16_e32:
3523 case AMDGPU::V_LSHRREV_B16_e64:
3524 case AMDGPU::V_LSHRREV_B16_e32_vi:
3525 case AMDGPU::V_LSHRREV_B16_e64_vi:
3526 case AMDGPU::V_LSHRREV_B16_gfx10:
3527
3528 case AMDGPU::V_ASHRREV_I16_e32:
3529 case AMDGPU::V_ASHRREV_I16_e64:
3530 case AMDGPU::V_ASHRREV_I16_e32_vi:
3531 case AMDGPU::V_ASHRREV_I16_e64_vi:
3532 case AMDGPU::V_ASHRREV_I16_gfx10:
3533
3534 case AMDGPU::V_LSHLREV_B64_e64:
3535 case AMDGPU::V_LSHLREV_B64_gfx10:
3536 case AMDGPU::V_LSHLREV_B64_vi:
3537
3538 case AMDGPU::V_LSHRREV_B64_e64:
3539 case AMDGPU::V_LSHRREV_B64_gfx10:
3540 case AMDGPU::V_LSHRREV_B64_vi:
3541
3542 case AMDGPU::V_ASHRREV_I64_e64:
3543 case AMDGPU::V_ASHRREV_I64_gfx10:
3544 case AMDGPU::V_ASHRREV_I64_vi:
3545
3546 case AMDGPU::V_PK_LSHLREV_B16:
3547 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
3548 case AMDGPU::V_PK_LSHLREV_B16_vi:
3549
3550 case AMDGPU::V_PK_LSHRREV_B16:
3551 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
3552 case AMDGPU::V_PK_LSHRREV_B16_vi:
3553 case AMDGPU::V_PK_ASHRREV_I16:
3554 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
3555 case AMDGPU::V_PK_ASHRREV_I16_vi:
3556 return true;
3557 default:
3558 return false;
3559 }
3560 }
3561
validateLdsDirect(const MCInst & Inst)3562 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
3563
3564 using namespace SIInstrFlags;
3565 const unsigned Opcode = Inst.getOpcode();
3566 const MCInstrDesc &Desc = MII.get(Opcode);
3567
3568 // lds_direct register is defined so that it can be used
3569 // with 9-bit operands only. Ignore encodings which do not accept these.
3570 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0)
3571 return true;
3572
3573 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3574 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3575 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3576
3577 const int SrcIndices[] = { Src1Idx, Src2Idx };
3578
3579 // lds_direct cannot be specified as either src1 or src2.
3580 for (int SrcIdx : SrcIndices) {
3581 if (SrcIdx == -1) break;
3582 const MCOperand &Src = Inst.getOperand(SrcIdx);
3583 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
3584 return false;
3585 }
3586 }
3587
3588 if (Src0Idx == -1)
3589 return true;
3590
3591 const MCOperand &Src = Inst.getOperand(Src0Idx);
3592 if (!Src.isReg() || Src.getReg() != LDS_DIRECT)
3593 return true;
3594
3595 // lds_direct is specified as src0. Check additional limitations.
3596 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode);
3597 }
3598
getFlatOffsetLoc(const OperandVector & Operands) const3599 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
3600 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
3601 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3602 if (Op.isFlatOffset())
3603 return Op.getStartLoc();
3604 }
3605 return getLoc();
3606 }
3607
validateFlatOffset(const MCInst & Inst,const OperandVector & Operands)3608 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
3609 const OperandVector &Operands) {
3610 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3611 if ((TSFlags & SIInstrFlags::FLAT) == 0)
3612 return true;
3613
3614 auto Opcode = Inst.getOpcode();
3615 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3616 assert(OpNum != -1);
3617
3618 const auto &Op = Inst.getOperand(OpNum);
3619 if (!hasFlatOffsets() && Op.getImm() != 0) {
3620 Error(getFlatOffsetLoc(Operands),
3621 "flat offset modifier is not supported on this GPU");
3622 return false;
3623 }
3624
3625 // For FLAT segment the offset must be positive;
3626 // MSB is ignored and forced to zero.
3627 if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) {
3628 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
3629 if (!isIntN(OffsetSize, Op.getImm())) {
3630 Error(getFlatOffsetLoc(Operands),
3631 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
3632 return false;
3633 }
3634 } else {
3635 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
3636 if (!isUIntN(OffsetSize, Op.getImm())) {
3637 Error(getFlatOffsetLoc(Operands),
3638 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
3639 return false;
3640 }
3641 }
3642
3643 return true;
3644 }
3645
getSMEMOffsetLoc(const OperandVector & Operands) const3646 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
3647 // Start with second operand because SMEM Offset cannot be dst or src0.
3648 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
3649 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
3650 if (Op.isSMEMOffset())
3651 return Op.getStartLoc();
3652 }
3653 return getLoc();
3654 }
3655
validateSMEMOffset(const MCInst & Inst,const OperandVector & Operands)3656 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
3657 const OperandVector &Operands) {
3658 if (isCI() || isSI())
3659 return true;
3660
3661 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3662 if ((TSFlags & SIInstrFlags::SMRD) == 0)
3663 return true;
3664
3665 auto Opcode = Inst.getOpcode();
3666 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
3667 if (OpNum == -1)
3668 return true;
3669
3670 const auto &Op = Inst.getOperand(OpNum);
3671 if (!Op.isImm())
3672 return true;
3673
3674 uint64_t Offset = Op.getImm();
3675 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
3676 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
3677 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
3678 return true;
3679
3680 Error(getSMEMOffsetLoc(Operands),
3681 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
3682 "expected a 21-bit signed offset");
3683
3684 return false;
3685 }
3686
validateSOPLiteral(const MCInst & Inst) const3687 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
3688 unsigned Opcode = Inst.getOpcode();
3689 const MCInstrDesc &Desc = MII.get(Opcode);
3690 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
3691 return true;
3692
3693 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3694 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3695
3696 const int OpIndices[] = { Src0Idx, Src1Idx };
3697
3698 unsigned NumExprs = 0;
3699 unsigned NumLiterals = 0;
3700 uint32_t LiteralValue;
3701
3702 for (int OpIdx : OpIndices) {
3703 if (OpIdx == -1) break;
3704
3705 const MCOperand &MO = Inst.getOperand(OpIdx);
3706 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
3707 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
3708 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3709 uint32_t Value = static_cast<uint32_t>(MO.getImm());
3710 if (NumLiterals == 0 || LiteralValue != Value) {
3711 LiteralValue = Value;
3712 ++NumLiterals;
3713 }
3714 } else if (MO.isExpr()) {
3715 ++NumExprs;
3716 }
3717 }
3718 }
3719
3720 return NumLiterals + NumExprs <= 1;
3721 }
3722
validateOpSel(const MCInst & Inst)3723 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
3724 const unsigned Opc = Inst.getOpcode();
3725 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
3726 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
3727 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
3728 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
3729
3730 if (OpSel & ~3)
3731 return false;
3732 }
3733 return true;
3734 }
3735
3736 // Check if VCC register matches wavefront size
validateVccOperand(unsigned Reg) const3737 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
3738 auto FB = getFeatureBits();
3739 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
3740 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
3741 }
3742
3743 // VOP3 literal is only allowed in GFX10+ and only one can be used
validateVOP3Literal(const MCInst & Inst,const OperandVector & Operands)3744 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
3745 const OperandVector &Operands) {
3746 unsigned Opcode = Inst.getOpcode();
3747 const MCInstrDesc &Desc = MII.get(Opcode);
3748 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
3749 return true;
3750
3751 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
3752 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
3753 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
3754
3755 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
3756
3757 unsigned NumExprs = 0;
3758 unsigned NumLiterals = 0;
3759 uint32_t LiteralValue;
3760
3761 for (int OpIdx : OpIndices) {
3762 if (OpIdx == -1) break;
3763
3764 const MCOperand &MO = Inst.getOperand(OpIdx);
3765 if (!MO.isImm() && !MO.isExpr())
3766 continue;
3767 if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
3768 continue;
3769
3770 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
3771 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
3772 Error(getConstLoc(Operands),
3773 "inline constants are not allowed for this operand");
3774 return false;
3775 }
3776
3777 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
3778 uint32_t Value = static_cast<uint32_t>(MO.getImm());
3779 if (NumLiterals == 0 || LiteralValue != Value) {
3780 LiteralValue = Value;
3781 ++NumLiterals;
3782 }
3783 } else if (MO.isExpr()) {
3784 ++NumExprs;
3785 }
3786 }
3787 NumLiterals += NumExprs;
3788
3789 if (!NumLiterals)
3790 return true;
3791
3792 if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
3793 Error(getLitLoc(Operands), "literal operands are not supported");
3794 return false;
3795 }
3796
3797 if (NumLiterals > 1) {
3798 Error(getLitLoc(Operands), "only one literal operand is allowed");
3799 return false;
3800 }
3801
3802 return true;
3803 }
3804
validateCoherencyBits(const MCInst & Inst,const OperandVector & Operands,const SMLoc & IDLoc)3805 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
3806 const OperandVector &Operands,
3807 const SMLoc &IDLoc) {
3808 int GLCPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
3809 AMDGPU::OpName::glc1);
3810 if (GLCPos != -1) {
3811 // -1 is set by GLC_1 default operand. In all cases "glc" must be present
3812 // in the asm string, and the default value means it is not present.
3813 if (Inst.getOperand(GLCPos).getImm() == -1) {
3814 Error(IDLoc, "instruction must use glc");
3815 return false;
3816 }
3817 }
3818
3819 return true;
3820 }
3821
validateInstruction(const MCInst & Inst,const SMLoc & IDLoc,const OperandVector & Operands)3822 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
3823 const SMLoc &IDLoc,
3824 const OperandVector &Operands) {
3825 if (!validateLdsDirect(Inst)) {
3826 Error(getRegLoc(AMDGPU::LDS_DIRECT, Operands),
3827 "invalid use of lds_direct");
3828 return false;
3829 }
3830 if (!validateSOPLiteral(Inst)) {
3831 Error(getLitLoc(Operands),
3832 "only one literal operand is allowed");
3833 return false;
3834 }
3835 if (!validateVOP3Literal(Inst, Operands)) {
3836 return false;
3837 }
3838 if (!validateConstantBusLimitations(Inst, Operands)) {
3839 return false;
3840 }
3841 if (!validateEarlyClobberLimitations(Inst, Operands)) {
3842 return false;
3843 }
3844 if (!validateIntClampSupported(Inst)) {
3845 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
3846 "integer clamping is not supported on this GPU");
3847 return false;
3848 }
3849 if (!validateOpSel(Inst)) {
3850 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
3851 "invalid op_sel operand");
3852 return false;
3853 }
3854 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
3855 if (!validateMIMGD16(Inst)) {
3856 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
3857 "d16 modifier is not supported on this GPU");
3858 return false;
3859 }
3860 if (!validateMIMGDim(Inst)) {
3861 Error(IDLoc, "dim modifier is required on this GPU");
3862 return false;
3863 }
3864 if (!validateMIMGDataSize(Inst)) {
3865 Error(IDLoc,
3866 "image data size does not match dmask and tfe");
3867 return false;
3868 }
3869 if (!validateMIMGAddrSize(Inst)) {
3870 Error(IDLoc,
3871 "image address size does not match dim and a16");
3872 return false;
3873 }
3874 if (!validateMIMGAtomicDMask(Inst)) {
3875 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
3876 "invalid atomic image dmask");
3877 return false;
3878 }
3879 if (!validateMIMGGatherDMask(Inst)) {
3880 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
3881 "invalid image_gather dmask: only one bit must be set");
3882 return false;
3883 }
3884 if (!validateMovrels(Inst, Operands)) {
3885 return false;
3886 }
3887 if (!validateFlatOffset(Inst, Operands)) {
3888 return false;
3889 }
3890 if (!validateSMEMOffset(Inst, Operands)) {
3891 return false;
3892 }
3893 if (!validateMAIAccWrite(Inst, Operands)) {
3894 return false;
3895 }
3896 if (!validateDivScale(Inst)) {
3897 Error(IDLoc, "ABS not allowed in VOP3B instructions");
3898 return false;
3899 }
3900 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
3901 return false;
3902 }
3903
3904 return true;
3905 }
3906
3907 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
3908 const FeatureBitset &FBS,
3909 unsigned VariantID = 0);
3910
3911 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
3912 const FeatureBitset &AvailableFeatures,
3913 unsigned VariantID);
3914
isSupportedMnemo(StringRef Mnemo,const FeatureBitset & FBS)3915 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
3916 const FeatureBitset &FBS) {
3917 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
3918 }
3919
isSupportedMnemo(StringRef Mnemo,const FeatureBitset & FBS,ArrayRef<unsigned> Variants)3920 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
3921 const FeatureBitset &FBS,
3922 ArrayRef<unsigned> Variants) {
3923 for (auto Variant : Variants) {
3924 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
3925 return true;
3926 }
3927
3928 return false;
3929 }
3930
checkUnsupportedInstruction(StringRef Mnemo,const SMLoc & IDLoc)3931 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
3932 const SMLoc &IDLoc) {
3933 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
3934
3935 // Check if requested instruction variant is supported.
3936 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
3937 return false;
3938
3939 // This instruction is not supported.
3940 // Clear any other pending errors because they are no longer relevant.
3941 getParser().clearPendingErrors();
3942
3943 // Requested instruction variant is not supported.
3944 // Check if any other variants are supported.
3945 StringRef VariantName = getMatchedVariantName();
3946 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
3947 return Error(IDLoc,
3948 Twine(VariantName,
3949 " variant of this instruction is not supported"));
3950 }
3951
3952 // Finally check if this instruction is supported on any other GPU.
3953 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
3954 return Error(IDLoc, "instruction not supported on this GPU");
3955 }
3956
3957 // Instruction not supported on any GPU. Probably a typo.
3958 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
3959 return Error(IDLoc, "invalid instruction" + Suggestion);
3960 }
3961
MatchAndEmitInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)3962 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
3963 OperandVector &Operands,
3964 MCStreamer &Out,
3965 uint64_t &ErrorInfo,
3966 bool MatchingInlineAsm) {
3967 MCInst Inst;
3968 unsigned Result = Match_Success;
3969 for (auto Variant : getMatchedVariants()) {
3970 uint64_t EI;
3971 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
3972 Variant);
3973 // We order match statuses from least to most specific. We use most specific
3974 // status as resulting
3975 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
3976 if ((R == Match_Success) ||
3977 (R == Match_PreferE32) ||
3978 (R == Match_MissingFeature && Result != Match_PreferE32) ||
3979 (R == Match_InvalidOperand && Result != Match_MissingFeature
3980 && Result != Match_PreferE32) ||
3981 (R == Match_MnemonicFail && Result != Match_InvalidOperand
3982 && Result != Match_MissingFeature
3983 && Result != Match_PreferE32)) {
3984 Result = R;
3985 ErrorInfo = EI;
3986 }
3987 if (R == Match_Success)
3988 break;
3989 }
3990
3991 if (Result == Match_Success) {
3992 if (!validateInstruction(Inst, IDLoc, Operands)) {
3993 return true;
3994 }
3995 Inst.setLoc(IDLoc);
3996 Out.emitInstruction(Inst, getSTI());
3997 return false;
3998 }
3999
4000 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
4001 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
4002 return true;
4003 }
4004
4005 switch (Result) {
4006 default: break;
4007 case Match_MissingFeature:
4008 // It has been verified that the specified instruction
4009 // mnemonic is valid. A match was found but it requires
4010 // features which are not supported on this GPU.
4011 return Error(IDLoc, "operands are not valid for this GPU or mode");
4012
4013 case Match_InvalidOperand: {
4014 SMLoc ErrorLoc = IDLoc;
4015 if (ErrorInfo != ~0ULL) {
4016 if (ErrorInfo >= Operands.size()) {
4017 return Error(IDLoc, "too few operands for instruction");
4018 }
4019 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
4020 if (ErrorLoc == SMLoc())
4021 ErrorLoc = IDLoc;
4022 }
4023 return Error(ErrorLoc, "invalid operand for instruction");
4024 }
4025
4026 case Match_PreferE32:
4027 return Error(IDLoc, "internal error: instruction without _e64 suffix "
4028 "should be encoded as e32");
4029 case Match_MnemonicFail:
4030 llvm_unreachable("Invalid instructions should have been handled already");
4031 }
4032 llvm_unreachable("Implement any new match types added!");
4033 }
4034
ParseAsAbsoluteExpression(uint32_t & Ret)4035 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
4036 int64_t Tmp = -1;
4037 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
4038 return true;
4039 }
4040 if (getParser().parseAbsoluteExpression(Tmp)) {
4041 return true;
4042 }
4043 Ret = static_cast<uint32_t>(Tmp);
4044 return false;
4045 }
4046
ParseDirectiveMajorMinor(uint32_t & Major,uint32_t & Minor)4047 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
4048 uint32_t &Minor) {
4049 if (ParseAsAbsoluteExpression(Major))
4050 return TokError("invalid major version");
4051
4052 if (!trySkipToken(AsmToken::Comma))
4053 return TokError("minor version number required, comma expected");
4054
4055 if (ParseAsAbsoluteExpression(Minor))
4056 return TokError("invalid minor version");
4057
4058 return false;
4059 }
4060
ParseDirectiveAMDGCNTarget()4061 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
4062 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4063 return TokError("directive only supported for amdgcn architecture");
4064
4065 std::string Target;
4066
4067 SMLoc TargetStart = getLoc();
4068 if (getParser().parseEscapedString(Target))
4069 return true;
4070 SMRange TargetRange = SMRange(TargetStart, getLoc());
4071
4072 std::string ExpectedTarget;
4073 raw_string_ostream ExpectedTargetOS(ExpectedTarget);
4074 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS);
4075
4076 if (Target != ExpectedTargetOS.str())
4077 return Error(TargetRange.Start, "target must match options", TargetRange);
4078
4079 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target);
4080 return false;
4081 }
4082
OutOfRangeError(SMRange Range)4083 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
4084 return Error(Range.Start, "value out of range", Range);
4085 }
4086
calculateGPRBlocks(const FeatureBitset & Features,bool VCCUsed,bool FlatScrUsed,bool XNACKUsed,Optional<bool> EnableWavefrontSize32,unsigned NextFreeVGPR,SMRange VGPRRange,unsigned NextFreeSGPR,SMRange SGPRRange,unsigned & VGPRBlocks,unsigned & SGPRBlocks)4087 bool AMDGPUAsmParser::calculateGPRBlocks(
4088 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
4089 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
4090 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
4091 unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
4092 // TODO(scott.linder): These calculations are duplicated from
4093 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
4094 IsaVersion Version = getIsaVersion(getSTI().getCPU());
4095
4096 unsigned NumVGPRs = NextFreeVGPR;
4097 unsigned NumSGPRs = NextFreeSGPR;
4098
4099 if (Version.Major >= 10)
4100 NumSGPRs = 0;
4101 else {
4102 unsigned MaxAddressableNumSGPRs =
4103 IsaInfo::getAddressableNumSGPRs(&getSTI());
4104
4105 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
4106 NumSGPRs > MaxAddressableNumSGPRs)
4107 return OutOfRangeError(SGPRRange);
4108
4109 NumSGPRs +=
4110 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
4111
4112 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
4113 NumSGPRs > MaxAddressableNumSGPRs)
4114 return OutOfRangeError(SGPRRange);
4115
4116 if (Features.test(FeatureSGPRInitBug))
4117 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
4118 }
4119
4120 VGPRBlocks =
4121 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
4122 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
4123
4124 return false;
4125 }
4126
ParseDirectiveAMDHSAKernel()4127 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
4128 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
4129 return TokError("directive only supported for amdgcn architecture");
4130
4131 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
4132 return TokError("directive only supported for amdhsa OS");
4133
4134 StringRef KernelName;
4135 if (getParser().parseIdentifier(KernelName))
4136 return true;
4137
4138 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
4139
4140 StringSet<> Seen;
4141
4142 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
4143
4144 SMRange VGPRRange;
4145 uint64_t NextFreeVGPR = 0;
4146 SMRange SGPRRange;
4147 uint64_t NextFreeSGPR = 0;
4148 unsigned UserSGPRCount = 0;
4149 bool ReserveVCC = true;
4150 bool ReserveFlatScr = true;
4151 bool ReserveXNACK = hasXNACK();
4152 Optional<bool> EnableWavefrontSize32;
4153
4154 while (true) {
4155 while (trySkipToken(AsmToken::EndOfStatement));
4156
4157 StringRef ID;
4158 SMRange IDRange = getTok().getLocRange();
4159 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
4160 return true;
4161
4162 if (ID == ".end_amdhsa_kernel")
4163 break;
4164
4165 if (Seen.find(ID) != Seen.end())
4166 return TokError(".amdhsa_ directives cannot be repeated");
4167 Seen.insert(ID);
4168
4169 SMLoc ValStart = getLoc();
4170 int64_t IVal;
4171 if (getParser().parseAbsoluteExpression(IVal))
4172 return true;
4173 SMLoc ValEnd = getLoc();
4174 SMRange ValRange = SMRange(ValStart, ValEnd);
4175
4176 if (IVal < 0)
4177 return OutOfRangeError(ValRange);
4178
4179 uint64_t Val = IVal;
4180
4181 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
4182 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \
4183 return OutOfRangeError(RANGE); \
4184 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
4185
4186 if (ID == ".amdhsa_group_segment_fixed_size") {
4187 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
4188 return OutOfRangeError(ValRange);
4189 KD.group_segment_fixed_size = Val;
4190 } else if (ID == ".amdhsa_private_segment_fixed_size") {
4191 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
4192 return OutOfRangeError(ValRange);
4193 KD.private_segment_fixed_size = Val;
4194 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
4195 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4196 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
4197 Val, ValRange);
4198 if (Val)
4199 UserSGPRCount += 4;
4200 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
4201 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4202 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
4203 ValRange);
4204 if (Val)
4205 UserSGPRCount += 2;
4206 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
4207 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4208 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
4209 ValRange);
4210 if (Val)
4211 UserSGPRCount += 2;
4212 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
4213 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4214 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
4215 Val, ValRange);
4216 if (Val)
4217 UserSGPRCount += 2;
4218 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
4219 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4220 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
4221 ValRange);
4222 if (Val)
4223 UserSGPRCount += 2;
4224 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
4225 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4226 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
4227 ValRange);
4228 if (Val)
4229 UserSGPRCount += 2;
4230 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
4231 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4232 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
4233 Val, ValRange);
4234 if (Val)
4235 UserSGPRCount += 1;
4236 } else if (ID == ".amdhsa_wavefront_size32") {
4237 if (IVersion.Major < 10)
4238 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4239 EnableWavefrontSize32 = Val;
4240 PARSE_BITS_ENTRY(KD.kernel_code_properties,
4241 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
4242 Val, ValRange);
4243 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
4244 PARSE_BITS_ENTRY(
4245 KD.compute_pgm_rsrc2,
4246 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val,
4247 ValRange);
4248 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
4249 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4250 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
4251 ValRange);
4252 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
4253 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4254 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
4255 ValRange);
4256 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
4257 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4258 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
4259 ValRange);
4260 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
4261 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4262 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
4263 ValRange);
4264 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
4265 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4266 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
4267 ValRange);
4268 } else if (ID == ".amdhsa_next_free_vgpr") {
4269 VGPRRange = ValRange;
4270 NextFreeVGPR = Val;
4271 } else if (ID == ".amdhsa_next_free_sgpr") {
4272 SGPRRange = ValRange;
4273 NextFreeSGPR = Val;
4274 } else if (ID == ".amdhsa_reserve_vcc") {
4275 if (!isUInt<1>(Val))
4276 return OutOfRangeError(ValRange);
4277 ReserveVCC = Val;
4278 } else if (ID == ".amdhsa_reserve_flat_scratch") {
4279 if (IVersion.Major < 7)
4280 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
4281 if (!isUInt<1>(Val))
4282 return OutOfRangeError(ValRange);
4283 ReserveFlatScr = Val;
4284 } else if (ID == ".amdhsa_reserve_xnack_mask") {
4285 if (IVersion.Major < 8)
4286 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
4287 if (!isUInt<1>(Val))
4288 return OutOfRangeError(ValRange);
4289 ReserveXNACK = Val;
4290 } else if (ID == ".amdhsa_float_round_mode_32") {
4291 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4292 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
4293 } else if (ID == ".amdhsa_float_round_mode_16_64") {
4294 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4295 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
4296 } else if (ID == ".amdhsa_float_denorm_mode_32") {
4297 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4298 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
4299 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
4300 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4301 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
4302 ValRange);
4303 } else if (ID == ".amdhsa_dx10_clamp") {
4304 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
4305 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
4306 } else if (ID == ".amdhsa_ieee_mode") {
4307 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
4308 Val, ValRange);
4309 } else if (ID == ".amdhsa_fp16_overflow") {
4310 if (IVersion.Major < 9)
4311 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
4312 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
4313 ValRange);
4314 } else if (ID == ".amdhsa_workgroup_processor_mode") {
4315 if (IVersion.Major < 10)
4316 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4317 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
4318 ValRange);
4319 } else if (ID == ".amdhsa_memory_ordered") {
4320 if (IVersion.Major < 10)
4321 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4322 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
4323 ValRange);
4324 } else if (ID == ".amdhsa_forward_progress") {
4325 if (IVersion.Major < 10)
4326 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
4327 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
4328 ValRange);
4329 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
4330 PARSE_BITS_ENTRY(
4331 KD.compute_pgm_rsrc2,
4332 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
4333 ValRange);
4334 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
4335 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4336 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
4337 Val, ValRange);
4338 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
4339 PARSE_BITS_ENTRY(
4340 KD.compute_pgm_rsrc2,
4341 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
4342 ValRange);
4343 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
4344 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4345 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
4346 Val, ValRange);
4347 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
4348 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4349 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
4350 Val, ValRange);
4351 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
4352 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4353 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
4354 Val, ValRange);
4355 } else if (ID == ".amdhsa_exception_int_div_zero") {
4356 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
4357 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
4358 Val, ValRange);
4359 } else {
4360 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
4361 }
4362
4363 #undef PARSE_BITS_ENTRY
4364 }
4365
4366 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
4367 return TokError(".amdhsa_next_free_vgpr directive is required");
4368
4369 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
4370 return TokError(".amdhsa_next_free_sgpr directive is required");
4371
4372 unsigned VGPRBlocks;
4373 unsigned SGPRBlocks;
4374 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
4375 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR,
4376 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
4377 SGPRBlocks))
4378 return true;
4379
4380 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
4381 VGPRBlocks))
4382 return OutOfRangeError(VGPRRange);
4383 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4384 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
4385
4386 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
4387 SGPRBlocks))
4388 return OutOfRangeError(SGPRRange);
4389 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
4390 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
4391 SGPRBlocks);
4392
4393 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
4394 return TokError("too many user SGPRs enabled");
4395 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
4396 UserSGPRCount);
4397
4398 getTargetStreamer().EmitAmdhsaKernelDescriptor(
4399 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
4400 ReserveFlatScr, ReserveXNACK);
4401 return false;
4402 }
4403
ParseDirectiveHSACodeObjectVersion()4404 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
4405 uint32_t Major;
4406 uint32_t Minor;
4407
4408 if (ParseDirectiveMajorMinor(Major, Minor))
4409 return true;
4410
4411 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
4412 return false;
4413 }
4414
ParseDirectiveHSACodeObjectISA()4415 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
4416 uint32_t Major;
4417 uint32_t Minor;
4418 uint32_t Stepping;
4419 StringRef VendorName;
4420 StringRef ArchName;
4421
4422 // If this directive has no arguments, then use the ISA version for the
4423 // targeted GPU.
4424 if (isToken(AsmToken::EndOfStatement)) {
4425 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
4426 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
4427 ISA.Stepping,
4428 "AMD", "AMDGPU");
4429 return false;
4430 }
4431
4432 if (ParseDirectiveMajorMinor(Major, Minor))
4433 return true;
4434
4435 if (!trySkipToken(AsmToken::Comma))
4436 return TokError("stepping version number required, comma expected");
4437
4438 if (ParseAsAbsoluteExpression(Stepping))
4439 return TokError("invalid stepping version");
4440
4441 if (!trySkipToken(AsmToken::Comma))
4442 return TokError("vendor name required, comma expected");
4443
4444 if (!parseString(VendorName, "invalid vendor name"))
4445 return true;
4446
4447 if (!trySkipToken(AsmToken::Comma))
4448 return TokError("arch name required, comma expected");
4449
4450 if (!parseString(ArchName, "invalid arch name"))
4451 return true;
4452
4453 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping,
4454 VendorName, ArchName);
4455 return false;
4456 }
4457
ParseAMDKernelCodeTValue(StringRef ID,amd_kernel_code_t & Header)4458 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
4459 amd_kernel_code_t &Header) {
4460 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
4461 // assembly for backwards compatibility.
4462 if (ID == "max_scratch_backing_memory_byte_size") {
4463 Parser.eatToEndOfStatement();
4464 return false;
4465 }
4466
4467 SmallString<40> ErrStr;
4468 raw_svector_ostream Err(ErrStr);
4469 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
4470 return TokError(Err.str());
4471 }
4472 Lex();
4473
4474 if (ID == "enable_wavefront_size32") {
4475 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
4476 if (!isGFX10Plus())
4477 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
4478 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4479 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
4480 } else {
4481 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4482 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
4483 }
4484 }
4485
4486 if (ID == "wavefront_size") {
4487 if (Header.wavefront_size == 5) {
4488 if (!isGFX10Plus())
4489 return TokError("wavefront_size=5 is only allowed on GFX10+");
4490 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
4491 return TokError("wavefront_size=5 requires +WavefrontSize32");
4492 } else if (Header.wavefront_size == 6) {
4493 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
4494 return TokError("wavefront_size=6 requires +WavefrontSize64");
4495 }
4496 }
4497
4498 if (ID == "enable_wgp_mode") {
4499 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
4500 !isGFX10Plus())
4501 return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
4502 }
4503
4504 if (ID == "enable_mem_ordered") {
4505 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
4506 !isGFX10Plus())
4507 return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
4508 }
4509
4510 if (ID == "enable_fwd_progress") {
4511 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
4512 !isGFX10Plus())
4513 return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
4514 }
4515
4516 return false;
4517 }
4518
ParseDirectiveAMDKernelCodeT()4519 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
4520 amd_kernel_code_t Header;
4521 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
4522
4523 while (true) {
4524 // Lex EndOfStatement. This is in a while loop, because lexing a comment
4525 // will set the current token to EndOfStatement.
4526 while(trySkipToken(AsmToken::EndOfStatement));
4527
4528 StringRef ID;
4529 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
4530 return true;
4531
4532 if (ID == ".end_amd_kernel_code_t")
4533 break;
4534
4535 if (ParseAMDKernelCodeTValue(ID, Header))
4536 return true;
4537 }
4538
4539 getTargetStreamer().EmitAMDKernelCodeT(Header);
4540
4541 return false;
4542 }
4543
ParseDirectiveAMDGPUHsaKernel()4544 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
4545 StringRef KernelName;
4546 if (!parseId(KernelName, "expected symbol name"))
4547 return true;
4548
4549 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
4550 ELF::STT_AMDGPU_HSA_KERNEL);
4551
4552 KernelScope.initialize(getContext());
4553 return false;
4554 }
4555
ParseDirectiveISAVersion()4556 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
4557 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
4558 return Error(getLoc(),
4559 ".amd_amdgpu_isa directive is not available on non-amdgcn "
4560 "architectures");
4561 }
4562
4563 auto ISAVersionStringFromASM = getToken().getStringContents();
4564
4565 std::string ISAVersionStringFromSTI;
4566 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI);
4567 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI);
4568
4569 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) {
4570 return Error(getLoc(),
4571 ".amd_amdgpu_isa directive does not match triple and/or mcpu "
4572 "arguments specified through the command line");
4573 }
4574
4575 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str());
4576 Lex();
4577
4578 return false;
4579 }
4580
ParseDirectiveHSAMetadata()4581 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
4582 const char *AssemblerDirectiveBegin;
4583 const char *AssemblerDirectiveEnd;
4584 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
4585 isHsaAbiVersion3(&getSTI())
4586 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
4587 HSAMD::V3::AssemblerDirectiveEnd)
4588 : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
4589 HSAMD::AssemblerDirectiveEnd);
4590
4591 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
4592 return Error(getLoc(),
4593 (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
4594 "not available on non-amdhsa OSes")).str());
4595 }
4596
4597 std::string HSAMetadataString;
4598 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
4599 HSAMetadataString))
4600 return true;
4601
4602 if (isHsaAbiVersion3(&getSTI())) {
4603 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
4604 return Error(getLoc(), "invalid HSA metadata");
4605 } else {
4606 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
4607 return Error(getLoc(), "invalid HSA metadata");
4608 }
4609
4610 return false;
4611 }
4612
4613 /// Common code to parse out a block of text (typically YAML) between start and
4614 /// end directives.
ParseToEndDirective(const char * AssemblerDirectiveBegin,const char * AssemblerDirectiveEnd,std::string & CollectString)4615 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
4616 const char *AssemblerDirectiveEnd,
4617 std::string &CollectString) {
4618
4619 raw_string_ostream CollectStream(CollectString);
4620
4621 getLexer().setSkipSpace(false);
4622
4623 bool FoundEnd = false;
4624 while (!isToken(AsmToken::Eof)) {
4625 while (isToken(AsmToken::Space)) {
4626 CollectStream << getTokenStr();
4627 Lex();
4628 }
4629
4630 if (trySkipId(AssemblerDirectiveEnd)) {
4631 FoundEnd = true;
4632 break;
4633 }
4634
4635 CollectStream << Parser.parseStringToEndOfStatement()
4636 << getContext().getAsmInfo()->getSeparatorString();
4637
4638 Parser.eatToEndOfStatement();
4639 }
4640
4641 getLexer().setSkipSpace(true);
4642
4643 if (isToken(AsmToken::Eof) && !FoundEnd) {
4644 return TokError(Twine("expected directive ") +
4645 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
4646 }
4647
4648 CollectStream.flush();
4649 return false;
4650 }
4651
4652 /// Parse the assembler directive for new MsgPack-format PAL metadata.
ParseDirectivePALMetadataBegin()4653 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
4654 std::string String;
4655 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
4656 AMDGPU::PALMD::AssemblerDirectiveEnd, String))
4657 return true;
4658
4659 auto PALMetadata = getTargetStreamer().getPALMetadata();
4660 if (!PALMetadata->setFromString(String))
4661 return Error(getLoc(), "invalid PAL metadata");
4662 return false;
4663 }
4664
4665 /// Parse the assembler directive for old linear-format PAL metadata.
ParseDirectivePALMetadata()4666 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
4667 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
4668 return Error(getLoc(),
4669 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
4670 "not available on non-amdpal OSes")).str());
4671 }
4672
4673 auto PALMetadata = getTargetStreamer().getPALMetadata();
4674 PALMetadata->setLegacy();
4675 for (;;) {
4676 uint32_t Key, Value;
4677 if (ParseAsAbsoluteExpression(Key)) {
4678 return TokError(Twine("invalid value in ") +
4679 Twine(PALMD::AssemblerDirective));
4680 }
4681 if (!trySkipToken(AsmToken::Comma)) {
4682 return TokError(Twine("expected an even number of values in ") +
4683 Twine(PALMD::AssemblerDirective));
4684 }
4685 if (ParseAsAbsoluteExpression(Value)) {
4686 return TokError(Twine("invalid value in ") +
4687 Twine(PALMD::AssemblerDirective));
4688 }
4689 PALMetadata->setRegister(Key, Value);
4690 if (!trySkipToken(AsmToken::Comma))
4691 break;
4692 }
4693 return false;
4694 }
4695
4696 /// ParseDirectiveAMDGPULDS
4697 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
ParseDirectiveAMDGPULDS()4698 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
4699 if (getParser().checkForValidSection())
4700 return true;
4701
4702 StringRef Name;
4703 SMLoc NameLoc = getLoc();
4704 if (getParser().parseIdentifier(Name))
4705 return TokError("expected identifier in directive");
4706
4707 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
4708 if (parseToken(AsmToken::Comma, "expected ','"))
4709 return true;
4710
4711 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
4712
4713 int64_t Size;
4714 SMLoc SizeLoc = getLoc();
4715 if (getParser().parseAbsoluteExpression(Size))
4716 return true;
4717 if (Size < 0)
4718 return Error(SizeLoc, "size must be non-negative");
4719 if (Size > LocalMemorySize)
4720 return Error(SizeLoc, "size is too large");
4721
4722 int64_t Alignment = 4;
4723 if (trySkipToken(AsmToken::Comma)) {
4724 SMLoc AlignLoc = getLoc();
4725 if (getParser().parseAbsoluteExpression(Alignment))
4726 return true;
4727 if (Alignment < 0 || !isPowerOf2_64(Alignment))
4728 return Error(AlignLoc, "alignment must be a power of two");
4729
4730 // Alignment larger than the size of LDS is possible in theory, as long
4731 // as the linker manages to place to symbol at address 0, but we do want
4732 // to make sure the alignment fits nicely into a 32-bit integer.
4733 if (Alignment >= 1u << 31)
4734 return Error(AlignLoc, "alignment is too large");
4735 }
4736
4737 if (parseToken(AsmToken::EndOfStatement,
4738 "unexpected token in '.amdgpu_lds' directive"))
4739 return true;
4740
4741 Symbol->redefineIfPossible();
4742 if (!Symbol->isUndefined())
4743 return Error(NameLoc, "invalid symbol redefinition");
4744
4745 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
4746 return false;
4747 }
4748
ParseDirective(AsmToken DirectiveID)4749 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
4750 StringRef IDVal = DirectiveID.getString();
4751
4752 if (isHsaAbiVersion3(&getSTI())) {
4753 if (IDVal == ".amdgcn_target")
4754 return ParseDirectiveAMDGCNTarget();
4755
4756 if (IDVal == ".amdhsa_kernel")
4757 return ParseDirectiveAMDHSAKernel();
4758
4759 // TODO: Restructure/combine with PAL metadata directive.
4760 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
4761 return ParseDirectiveHSAMetadata();
4762 } else {
4763 if (IDVal == ".hsa_code_object_version")
4764 return ParseDirectiveHSACodeObjectVersion();
4765
4766 if (IDVal == ".hsa_code_object_isa")
4767 return ParseDirectiveHSACodeObjectISA();
4768
4769 if (IDVal == ".amd_kernel_code_t")
4770 return ParseDirectiveAMDKernelCodeT();
4771
4772 if (IDVal == ".amdgpu_hsa_kernel")
4773 return ParseDirectiveAMDGPUHsaKernel();
4774
4775 if (IDVal == ".amd_amdgpu_isa")
4776 return ParseDirectiveISAVersion();
4777
4778 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
4779 return ParseDirectiveHSAMetadata();
4780 }
4781
4782 if (IDVal == ".amdgpu_lds")
4783 return ParseDirectiveAMDGPULDS();
4784
4785 if (IDVal == PALMD::AssemblerDirectiveBegin)
4786 return ParseDirectivePALMetadataBegin();
4787
4788 if (IDVal == PALMD::AssemblerDirective)
4789 return ParseDirectivePALMetadata();
4790
4791 return true;
4792 }
4793
subtargetHasRegister(const MCRegisterInfo & MRI,unsigned RegNo) const4794 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
4795 unsigned RegNo) const {
4796
4797 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
4798 R.isValid(); ++R) {
4799 if (*R == RegNo)
4800 return isGFX9Plus();
4801 }
4802
4803 // GFX10 has 2 more SGPRs 104 and 105.
4804 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
4805 R.isValid(); ++R) {
4806 if (*R == RegNo)
4807 return hasSGPR104_SGPR105();
4808 }
4809
4810 switch (RegNo) {
4811 case AMDGPU::SRC_SHARED_BASE:
4812 case AMDGPU::SRC_SHARED_LIMIT:
4813 case AMDGPU::SRC_PRIVATE_BASE:
4814 case AMDGPU::SRC_PRIVATE_LIMIT:
4815 case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
4816 return isGFX9Plus();
4817 case AMDGPU::TBA:
4818 case AMDGPU::TBA_LO:
4819 case AMDGPU::TBA_HI:
4820 case AMDGPU::TMA:
4821 case AMDGPU::TMA_LO:
4822 case AMDGPU::TMA_HI:
4823 return !isGFX9Plus();
4824 case AMDGPU::XNACK_MASK:
4825 case AMDGPU::XNACK_MASK_LO:
4826 case AMDGPU::XNACK_MASK_HI:
4827 return (isVI() || isGFX9()) && hasXNACK();
4828 case AMDGPU::SGPR_NULL:
4829 return isGFX10Plus();
4830 default:
4831 break;
4832 }
4833
4834 if (isCI())
4835 return true;
4836
4837 if (isSI() || isGFX10Plus()) {
4838 // No flat_scr on SI.
4839 // On GFX10 flat scratch is not a valid register operand and can only be
4840 // accessed with s_setreg/s_getreg.
4841 switch (RegNo) {
4842 case AMDGPU::FLAT_SCR:
4843 case AMDGPU::FLAT_SCR_LO:
4844 case AMDGPU::FLAT_SCR_HI:
4845 return false;
4846 default:
4847 return true;
4848 }
4849 }
4850
4851 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
4852 // SI/CI have.
4853 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
4854 R.isValid(); ++R) {
4855 if (*R == RegNo)
4856 return hasSGPR102_SGPR103();
4857 }
4858
4859 return true;
4860 }
4861
4862 OperandMatchResultTy
parseOperand(OperandVector & Operands,StringRef Mnemonic,OperandMode Mode)4863 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
4864 OperandMode Mode) {
4865 // Try to parse with a custom parser
4866 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
4867
4868 // If we successfully parsed the operand or if there as an error parsing,
4869 // we are done.
4870 //
4871 // If we are parsing after we reach EndOfStatement then this means we
4872 // are appending default values to the Operands list. This is only done
4873 // by custom parser, so we shouldn't continue on to the generic parsing.
4874 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
4875 isToken(AsmToken::EndOfStatement))
4876 return ResTy;
4877
4878 SMLoc RBraceLoc;
4879 SMLoc LBraceLoc = getLoc();
4880 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
4881 unsigned Prefix = Operands.size();
4882
4883 for (;;) {
4884 ResTy = parseReg(Operands);
4885 if (ResTy != MatchOperand_Success)
4886 return ResTy;
4887
4888 RBraceLoc = getLoc();
4889 if (trySkipToken(AsmToken::RBrac))
4890 break;
4891
4892 if (!trySkipToken(AsmToken::Comma))
4893 return MatchOperand_ParseFail;
4894 }
4895
4896 if (Operands.size() - Prefix > 1) {
4897 Operands.insert(Operands.begin() + Prefix,
4898 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
4899 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
4900 }
4901
4902 return MatchOperand_Success;
4903 }
4904
4905 return parseRegOrImm(Operands);
4906 }
4907
parseMnemonicSuffix(StringRef Name)4908 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
4909 // Clear any forced encodings from the previous instruction.
4910 setForcedEncodingSize(0);
4911 setForcedDPP(false);
4912 setForcedSDWA(false);
4913
4914 if (Name.endswith("_e64")) {
4915 setForcedEncodingSize(64);
4916 return Name.substr(0, Name.size() - 4);
4917 } else if (Name.endswith("_e32")) {
4918 setForcedEncodingSize(32);
4919 return Name.substr(0, Name.size() - 4);
4920 } else if (Name.endswith("_dpp")) {
4921 setForcedDPP(true);
4922 return Name.substr(0, Name.size() - 4);
4923 } else if (Name.endswith("_sdwa")) {
4924 setForcedSDWA(true);
4925 return Name.substr(0, Name.size() - 5);
4926 }
4927 return Name;
4928 }
4929
ParseInstruction(ParseInstructionInfo & Info,StringRef Name,SMLoc NameLoc,OperandVector & Operands)4930 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
4931 StringRef Name,
4932 SMLoc NameLoc, OperandVector &Operands) {
4933 // Add the instruction mnemonic
4934 Name = parseMnemonicSuffix(Name);
4935 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
4936
4937 bool IsMIMG = Name.startswith("image_");
4938
4939 while (!trySkipToken(AsmToken::EndOfStatement)) {
4940 OperandMode Mode = OperandMode_Default;
4941 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
4942 Mode = OperandMode_NSA;
4943 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
4944
4945 // Eat the comma or space if there is one.
4946 trySkipToken(AsmToken::Comma);
4947
4948 if (Res != MatchOperand_Success) {
4949 checkUnsupportedInstruction(Name, NameLoc);
4950 if (!Parser.hasPendingError()) {
4951 // FIXME: use real operand location rather than the current location.
4952 StringRef Msg =
4953 (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
4954 "not a valid operand.";
4955 Error(getLoc(), Msg);
4956 }
4957 while (!trySkipToken(AsmToken::EndOfStatement)) {
4958 lex();
4959 }
4960 return true;
4961 }
4962 }
4963
4964 return false;
4965 }
4966
4967 //===----------------------------------------------------------------------===//
4968 // Utility functions
4969 //===----------------------------------------------------------------------===//
4970
4971 OperandMatchResultTy
parseIntWithPrefix(const char * Prefix,int64_t & IntVal)4972 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
4973
4974 if (!trySkipId(Prefix, AsmToken::Colon))
4975 return MatchOperand_NoMatch;
4976
4977 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
4978 }
4979
4980 OperandMatchResultTy
parseIntWithPrefix(const char * Prefix,OperandVector & Operands,AMDGPUOperand::ImmTy ImmTy,bool (* ConvertResult)(int64_t &))4981 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
4982 AMDGPUOperand::ImmTy ImmTy,
4983 bool (*ConvertResult)(int64_t&)) {
4984 SMLoc S = getLoc();
4985 int64_t Value = 0;
4986
4987 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
4988 if (Res != MatchOperand_Success)
4989 return Res;
4990
4991 if (ConvertResult && !ConvertResult(Value)) {
4992 Error(S, "invalid " + StringRef(Prefix) + " value.");
4993 }
4994
4995 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
4996 return MatchOperand_Success;
4997 }
4998
4999 OperandMatchResultTy
parseOperandArrayWithPrefix(const char * Prefix,OperandVector & Operands,AMDGPUOperand::ImmTy ImmTy,bool (* ConvertResult)(int64_t &))5000 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
5001 OperandVector &Operands,
5002 AMDGPUOperand::ImmTy ImmTy,
5003 bool (*ConvertResult)(int64_t&)) {
5004 SMLoc S = getLoc();
5005 if (!trySkipId(Prefix, AsmToken::Colon))
5006 return MatchOperand_NoMatch;
5007
5008 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
5009 return MatchOperand_ParseFail;
5010
5011 unsigned Val = 0;
5012 const unsigned MaxSize = 4;
5013
5014 // FIXME: How to verify the number of elements matches the number of src
5015 // operands?
5016 for (int I = 0; ; ++I) {
5017 int64_t Op;
5018 SMLoc Loc = getLoc();
5019 if (!parseExpr(Op))
5020 return MatchOperand_ParseFail;
5021
5022 if (Op != 0 && Op != 1) {
5023 Error(Loc, "invalid " + StringRef(Prefix) + " value.");
5024 return MatchOperand_ParseFail;
5025 }
5026
5027 Val |= (Op << I);
5028
5029 if (trySkipToken(AsmToken::RBrac))
5030 break;
5031
5032 if (I + 1 == MaxSize) {
5033 Error(getLoc(), "expected a closing square bracket");
5034 return MatchOperand_ParseFail;
5035 }
5036
5037 if (!skipToken(AsmToken::Comma, "expected a comma"))
5038 return MatchOperand_ParseFail;
5039 }
5040
5041 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
5042 return MatchOperand_Success;
5043 }
5044
5045 OperandMatchResultTy
parseNamedBit(const char * Name,OperandVector & Operands,AMDGPUOperand::ImmTy ImmTy)5046 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
5047 AMDGPUOperand::ImmTy ImmTy) {
5048 int64_t Bit = 0;
5049 SMLoc S = getLoc();
5050
5051 // We are at the end of the statement, and this is a default argument, so
5052 // use a default value.
5053 if (!isToken(AsmToken::EndOfStatement)) {
5054 switch(getTokenKind()) {
5055 case AsmToken::Identifier: {
5056 StringRef Tok = getTokenStr();
5057 if (Tok == Name) {
5058 if (Tok == "r128" && !hasMIMG_R128())
5059 Error(S, "r128 modifier is not supported on this GPU");
5060 if (Tok == "a16" && !isGFX9() && !hasGFX10A16())
5061 Error(S, "a16 modifier is not supported on this GPU");
5062 Bit = 1;
5063 Parser.Lex();
5064 } else if (Tok.startswith("no") && Tok.endswith(Name)) {
5065 Bit = 0;
5066 Parser.Lex();
5067 } else {
5068 return MatchOperand_NoMatch;
5069 }
5070 break;
5071 }
5072 default:
5073 return MatchOperand_NoMatch;
5074 }
5075 }
5076
5077 if (!isGFX10Plus() && ImmTy == AMDGPUOperand::ImmTyDLC)
5078 return MatchOperand_ParseFail;
5079
5080 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
5081 ImmTy = AMDGPUOperand::ImmTyR128A16;
5082
5083 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
5084 return MatchOperand_Success;
5085 }
5086
addOptionalImmOperand(MCInst & Inst,const OperandVector & Operands,AMDGPUAsmParser::OptionalImmIndexMap & OptionalIdx,AMDGPUOperand::ImmTy ImmT,int64_t Default=0)5087 static void addOptionalImmOperand(
5088 MCInst& Inst, const OperandVector& Operands,
5089 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
5090 AMDGPUOperand::ImmTy ImmT,
5091 int64_t Default = 0) {
5092 auto i = OptionalIdx.find(ImmT);
5093 if (i != OptionalIdx.end()) {
5094 unsigned Idx = i->second;
5095 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
5096 } else {
5097 Inst.addOperand(MCOperand::createImm(Default));
5098 }
5099 }
5100
5101 OperandMatchResultTy
parseStringWithPrefix(StringRef Prefix,StringRef & Value,SMLoc & StringLoc)5102 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
5103 StringRef &Value,
5104 SMLoc &StringLoc) {
5105 if (!trySkipId(Prefix, AsmToken::Colon))
5106 return MatchOperand_NoMatch;
5107
5108 StringLoc = getLoc();
5109 return parseId(Value, "expected an identifier") ? MatchOperand_Success
5110 : MatchOperand_ParseFail;
5111 }
5112
5113 //===----------------------------------------------------------------------===//
5114 // MTBUF format
5115 //===----------------------------------------------------------------------===//
5116
tryParseFmt(const char * Pref,int64_t MaxVal,int64_t & Fmt)5117 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
5118 int64_t MaxVal,
5119 int64_t &Fmt) {
5120 int64_t Val;
5121 SMLoc Loc = getLoc();
5122
5123 auto Res = parseIntWithPrefix(Pref, Val);
5124 if (Res == MatchOperand_ParseFail)
5125 return false;
5126 if (Res == MatchOperand_NoMatch)
5127 return true;
5128
5129 if (Val < 0 || Val > MaxVal) {
5130 Error(Loc, Twine("out of range ", StringRef(Pref)));
5131 return false;
5132 }
5133
5134 Fmt = Val;
5135 return true;
5136 }
5137
5138 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
5139 // values to live in a joint format operand in the MCInst encoding.
5140 OperandMatchResultTy
parseDfmtNfmt(int64_t & Format)5141 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
5142 using namespace llvm::AMDGPU::MTBUFFormat;
5143
5144 int64_t Dfmt = DFMT_UNDEF;
5145 int64_t Nfmt = NFMT_UNDEF;
5146
5147 // dfmt and nfmt can appear in either order, and each is optional.
5148 for (int I = 0; I < 2; ++I) {
5149 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
5150 return MatchOperand_ParseFail;
5151
5152 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
5153 return MatchOperand_ParseFail;
5154 }
5155 // Skip optional comma between dfmt/nfmt
5156 // but guard against 2 commas following each other.
5157 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
5158 !peekToken().is(AsmToken::Comma)) {
5159 trySkipToken(AsmToken::Comma);
5160 }
5161 }
5162
5163 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
5164 return MatchOperand_NoMatch;
5165
5166 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5167 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5168
5169 Format = encodeDfmtNfmt(Dfmt, Nfmt);
5170 return MatchOperand_Success;
5171 }
5172
5173 OperandMatchResultTy
parseUfmt(int64_t & Format)5174 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
5175 using namespace llvm::AMDGPU::MTBUFFormat;
5176
5177 int64_t Fmt = UFMT_UNDEF;
5178
5179 if (!tryParseFmt("format", UFMT_MAX, Fmt))
5180 return MatchOperand_ParseFail;
5181
5182 if (Fmt == UFMT_UNDEF)
5183 return MatchOperand_NoMatch;
5184
5185 Format = Fmt;
5186 return MatchOperand_Success;
5187 }
5188
matchDfmtNfmt(int64_t & Dfmt,int64_t & Nfmt,StringRef FormatStr,SMLoc Loc)5189 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
5190 int64_t &Nfmt,
5191 StringRef FormatStr,
5192 SMLoc Loc) {
5193 using namespace llvm::AMDGPU::MTBUFFormat;
5194 int64_t Format;
5195
5196 Format = getDfmt(FormatStr);
5197 if (Format != DFMT_UNDEF) {
5198 Dfmt = Format;
5199 return true;
5200 }
5201
5202 Format = getNfmt(FormatStr, getSTI());
5203 if (Format != NFMT_UNDEF) {
5204 Nfmt = Format;
5205 return true;
5206 }
5207
5208 Error(Loc, "unsupported format");
5209 return false;
5210 }
5211
5212 OperandMatchResultTy
parseSymbolicSplitFormat(StringRef FormatStr,SMLoc FormatLoc,int64_t & Format)5213 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
5214 SMLoc FormatLoc,
5215 int64_t &Format) {
5216 using namespace llvm::AMDGPU::MTBUFFormat;
5217
5218 int64_t Dfmt = DFMT_UNDEF;
5219 int64_t Nfmt = NFMT_UNDEF;
5220 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
5221 return MatchOperand_ParseFail;
5222
5223 if (trySkipToken(AsmToken::Comma)) {
5224 StringRef Str;
5225 SMLoc Loc = getLoc();
5226 if (!parseId(Str, "expected a format string") ||
5227 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
5228 return MatchOperand_ParseFail;
5229 }
5230 if (Dfmt == DFMT_UNDEF) {
5231 Error(Loc, "duplicate numeric format");
5232 return MatchOperand_ParseFail;
5233 } else if (Nfmt == NFMT_UNDEF) {
5234 Error(Loc, "duplicate data format");
5235 return MatchOperand_ParseFail;
5236 }
5237 }
5238
5239 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
5240 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
5241
5242 if (isGFX10Plus()) {
5243 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
5244 if (Ufmt == UFMT_UNDEF) {
5245 Error(FormatLoc, "unsupported format");
5246 return MatchOperand_ParseFail;
5247 }
5248 Format = Ufmt;
5249 } else {
5250 Format = encodeDfmtNfmt(Dfmt, Nfmt);
5251 }
5252
5253 return MatchOperand_Success;
5254 }
5255
5256 OperandMatchResultTy
parseSymbolicUnifiedFormat(StringRef FormatStr,SMLoc Loc,int64_t & Format)5257 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
5258 SMLoc Loc,
5259 int64_t &Format) {
5260 using namespace llvm::AMDGPU::MTBUFFormat;
5261
5262 auto Id = getUnifiedFormat(FormatStr);
5263 if (Id == UFMT_UNDEF)
5264 return MatchOperand_NoMatch;
5265
5266 if (!isGFX10Plus()) {
5267 Error(Loc, "unified format is not supported on this GPU");
5268 return MatchOperand_ParseFail;
5269 }
5270
5271 Format = Id;
5272 return MatchOperand_Success;
5273 }
5274
5275 OperandMatchResultTy
parseNumericFormat(int64_t & Format)5276 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
5277 using namespace llvm::AMDGPU::MTBUFFormat;
5278 SMLoc Loc = getLoc();
5279
5280 if (!parseExpr(Format))
5281 return MatchOperand_ParseFail;
5282 if (!isValidFormatEncoding(Format, getSTI())) {
5283 Error(Loc, "out of range format");
5284 return MatchOperand_ParseFail;
5285 }
5286
5287 return MatchOperand_Success;
5288 }
5289
5290 OperandMatchResultTy
parseSymbolicOrNumericFormat(int64_t & Format)5291 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
5292 using namespace llvm::AMDGPU::MTBUFFormat;
5293
5294 if (!trySkipId("format", AsmToken::Colon))
5295 return MatchOperand_NoMatch;
5296
5297 if (trySkipToken(AsmToken::LBrac)) {
5298 StringRef FormatStr;
5299 SMLoc Loc = getLoc();
5300 if (!parseId(FormatStr, "expected a format string"))
5301 return MatchOperand_ParseFail;
5302
5303 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
5304 if (Res == MatchOperand_NoMatch)
5305 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
5306 if (Res != MatchOperand_Success)
5307 return Res;
5308
5309 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
5310 return MatchOperand_ParseFail;
5311
5312 return MatchOperand_Success;
5313 }
5314
5315 return parseNumericFormat(Format);
5316 }
5317
5318 OperandMatchResultTy
parseFORMAT(OperandVector & Operands)5319 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
5320 using namespace llvm::AMDGPU::MTBUFFormat;
5321
5322 int64_t Format = getDefaultFormatEncoding(getSTI());
5323 OperandMatchResultTy Res;
5324 SMLoc Loc = getLoc();
5325
5326 // Parse legacy format syntax.
5327 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
5328 if (Res == MatchOperand_ParseFail)
5329 return Res;
5330
5331 bool FormatFound = (Res == MatchOperand_Success);
5332
5333 Operands.push_back(
5334 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
5335
5336 if (FormatFound)
5337 trySkipToken(AsmToken::Comma);
5338
5339 if (isToken(AsmToken::EndOfStatement)) {
5340 // We are expecting an soffset operand,
5341 // but let matcher handle the error.
5342 return MatchOperand_Success;
5343 }
5344
5345 // Parse soffset.
5346 Res = parseRegOrImm(Operands);
5347 if (Res != MatchOperand_Success)
5348 return Res;
5349
5350 trySkipToken(AsmToken::Comma);
5351
5352 if (!FormatFound) {
5353 Res = parseSymbolicOrNumericFormat(Format);
5354 if (Res == MatchOperand_ParseFail)
5355 return Res;
5356 if (Res == MatchOperand_Success) {
5357 auto Size = Operands.size();
5358 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
5359 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
5360 Op.setImm(Format);
5361 }
5362 return MatchOperand_Success;
5363 }
5364
5365 if (isId("format") && peekToken().is(AsmToken::Colon)) {
5366 Error(getLoc(), "duplicate format");
5367 return MatchOperand_ParseFail;
5368 }
5369 return MatchOperand_Success;
5370 }
5371
5372 //===----------------------------------------------------------------------===//
5373 // ds
5374 //===----------------------------------------------------------------------===//
5375
cvtDSOffset01(MCInst & Inst,const OperandVector & Operands)5376 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
5377 const OperandVector &Operands) {
5378 OptionalImmIndexMap OptionalIdx;
5379
5380 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5381 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5382
5383 // Add the register arguments
5384 if (Op.isReg()) {
5385 Op.addRegOperands(Inst, 1);
5386 continue;
5387 }
5388
5389 // Handle optional arguments
5390 OptionalIdx[Op.getImmTy()] = i;
5391 }
5392
5393 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
5394 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
5395 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5396
5397 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5398 }
5399
cvtDSImpl(MCInst & Inst,const OperandVector & Operands,bool IsGdsHardcoded)5400 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
5401 bool IsGdsHardcoded) {
5402 OptionalImmIndexMap OptionalIdx;
5403
5404 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5405 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5406
5407 // Add the register arguments
5408 if (Op.isReg()) {
5409 Op.addRegOperands(Inst, 1);
5410 continue;
5411 }
5412
5413 if (Op.isToken() && Op.getToken() == "gds") {
5414 IsGdsHardcoded = true;
5415 continue;
5416 }
5417
5418 // Handle optional arguments
5419 OptionalIdx[Op.getImmTy()] = i;
5420 }
5421
5422 AMDGPUOperand::ImmTy OffsetType =
5423 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
5424 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
5425 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
5426 AMDGPUOperand::ImmTyOffset;
5427
5428 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
5429
5430 if (!IsGdsHardcoded) {
5431 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
5432 }
5433 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
5434 }
5435
cvtExp(MCInst & Inst,const OperandVector & Operands)5436 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
5437 OptionalImmIndexMap OptionalIdx;
5438
5439 unsigned OperandIdx[4];
5440 unsigned EnMask = 0;
5441 int SrcIdx = 0;
5442
5443 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5444 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5445
5446 // Add the register arguments
5447 if (Op.isReg()) {
5448 assert(SrcIdx < 4);
5449 OperandIdx[SrcIdx] = Inst.size();
5450 Op.addRegOperands(Inst, 1);
5451 ++SrcIdx;
5452 continue;
5453 }
5454
5455 if (Op.isOff()) {
5456 assert(SrcIdx < 4);
5457 OperandIdx[SrcIdx] = Inst.size();
5458 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
5459 ++SrcIdx;
5460 continue;
5461 }
5462
5463 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
5464 Op.addImmOperands(Inst, 1);
5465 continue;
5466 }
5467
5468 if (Op.isToken() && Op.getToken() == "done")
5469 continue;
5470
5471 // Handle optional arguments
5472 OptionalIdx[Op.getImmTy()] = i;
5473 }
5474
5475 assert(SrcIdx == 4);
5476
5477 bool Compr = false;
5478 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
5479 Compr = true;
5480 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
5481 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
5482 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
5483 }
5484
5485 for (auto i = 0; i < SrcIdx; ++i) {
5486 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
5487 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
5488 }
5489 }
5490
5491 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
5492 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
5493
5494 Inst.addOperand(MCOperand::createImm(EnMask));
5495 }
5496
5497 //===----------------------------------------------------------------------===//
5498 // s_waitcnt
5499 //===----------------------------------------------------------------------===//
5500
5501 static bool
encodeCnt(const AMDGPU::IsaVersion ISA,int64_t & IntVal,int64_t CntVal,bool Saturate,unsigned (* encode)(const IsaVersion & Version,unsigned,unsigned),unsigned (* decode)(const IsaVersion & Version,unsigned))5502 encodeCnt(
5503 const AMDGPU::IsaVersion ISA,
5504 int64_t &IntVal,
5505 int64_t CntVal,
5506 bool Saturate,
5507 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
5508 unsigned (*decode)(const IsaVersion &Version, unsigned))
5509 {
5510 bool Failed = false;
5511
5512 IntVal = encode(ISA, IntVal, CntVal);
5513 if (CntVal != decode(ISA, IntVal)) {
5514 if (Saturate) {
5515 IntVal = encode(ISA, IntVal, -1);
5516 } else {
5517 Failed = true;
5518 }
5519 }
5520 return Failed;
5521 }
5522
parseCnt(int64_t & IntVal)5523 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
5524
5525 SMLoc CntLoc = getLoc();
5526 StringRef CntName = getTokenStr();
5527
5528 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
5529 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
5530 return false;
5531
5532 int64_t CntVal;
5533 SMLoc ValLoc = getLoc();
5534 if (!parseExpr(CntVal))
5535 return false;
5536
5537 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5538
5539 bool Failed = true;
5540 bool Sat = CntName.endswith("_sat");
5541
5542 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
5543 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
5544 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
5545 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
5546 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
5547 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
5548 } else {
5549 Error(CntLoc, "invalid counter name " + CntName);
5550 return false;
5551 }
5552
5553 if (Failed) {
5554 Error(ValLoc, "too large value for " + CntName);
5555 return false;
5556 }
5557
5558 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
5559 return false;
5560
5561 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
5562 if (isToken(AsmToken::EndOfStatement)) {
5563 Error(getLoc(), "expected a counter name");
5564 return false;
5565 }
5566 }
5567
5568 return true;
5569 }
5570
5571 OperandMatchResultTy
parseSWaitCntOps(OperandVector & Operands)5572 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
5573 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
5574 int64_t Waitcnt = getWaitcntBitMask(ISA);
5575 SMLoc S = getLoc();
5576
5577 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
5578 while (!isToken(AsmToken::EndOfStatement)) {
5579 if (!parseCnt(Waitcnt))
5580 return MatchOperand_ParseFail;
5581 }
5582 } else {
5583 if (!parseExpr(Waitcnt))
5584 return MatchOperand_ParseFail;
5585 }
5586
5587 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
5588 return MatchOperand_Success;
5589 }
5590
5591 bool
isSWaitCnt() const5592 AMDGPUOperand::isSWaitCnt() const {
5593 return isImm();
5594 }
5595
5596 //===----------------------------------------------------------------------===//
5597 // hwreg
5598 //===----------------------------------------------------------------------===//
5599
5600 bool
parseHwregBody(OperandInfoTy & HwReg,OperandInfoTy & Offset,OperandInfoTy & Width)5601 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
5602 OperandInfoTy &Offset,
5603 OperandInfoTy &Width) {
5604 using namespace llvm::AMDGPU::Hwreg;
5605
5606 // The register may be specified by name or using a numeric code
5607 HwReg.Loc = getLoc();
5608 if (isToken(AsmToken::Identifier) &&
5609 (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
5610 HwReg.IsSymbolic = true;
5611 lex(); // skip register name
5612 } else if (!parseExpr(HwReg.Id, "a register name")) {
5613 return false;
5614 }
5615
5616 if (trySkipToken(AsmToken::RParen))
5617 return true;
5618
5619 // parse optional params
5620 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
5621 return false;
5622
5623 Offset.Loc = getLoc();
5624 if (!parseExpr(Offset.Id))
5625 return false;
5626
5627 if (!skipToken(AsmToken::Comma, "expected a comma"))
5628 return false;
5629
5630 Width.Loc = getLoc();
5631 return parseExpr(Width.Id) &&
5632 skipToken(AsmToken::RParen, "expected a closing parenthesis");
5633 }
5634
5635 bool
validateHwreg(const OperandInfoTy & HwReg,const OperandInfoTy & Offset,const OperandInfoTy & Width)5636 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
5637 const OperandInfoTy &Offset,
5638 const OperandInfoTy &Width) {
5639
5640 using namespace llvm::AMDGPU::Hwreg;
5641
5642 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
5643 Error(HwReg.Loc,
5644 "specified hardware register is not supported on this GPU");
5645 return false;
5646 }
5647 if (!isValidHwreg(HwReg.Id)) {
5648 Error(HwReg.Loc,
5649 "invalid code of hardware register: only 6-bit values are legal");
5650 return false;
5651 }
5652 if (!isValidHwregOffset(Offset.Id)) {
5653 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
5654 return false;
5655 }
5656 if (!isValidHwregWidth(Width.Id)) {
5657 Error(Width.Loc,
5658 "invalid bitfield width: only values from 1 to 32 are legal");
5659 return false;
5660 }
5661 return true;
5662 }
5663
5664 OperandMatchResultTy
parseHwreg(OperandVector & Operands)5665 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
5666 using namespace llvm::AMDGPU::Hwreg;
5667
5668 int64_t ImmVal = 0;
5669 SMLoc Loc = getLoc();
5670
5671 if (trySkipId("hwreg", AsmToken::LParen)) {
5672 OperandInfoTy HwReg(ID_UNKNOWN_);
5673 OperandInfoTy Offset(OFFSET_DEFAULT_);
5674 OperandInfoTy Width(WIDTH_DEFAULT_);
5675 if (parseHwregBody(HwReg, Offset, Width) &&
5676 validateHwreg(HwReg, Offset, Width)) {
5677 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
5678 } else {
5679 return MatchOperand_ParseFail;
5680 }
5681 } else if (parseExpr(ImmVal, "a hwreg macro")) {
5682 if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
5683 Error(Loc, "invalid immediate: only 16-bit values are legal");
5684 return MatchOperand_ParseFail;
5685 }
5686 } else {
5687 return MatchOperand_ParseFail;
5688 }
5689
5690 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
5691 return MatchOperand_Success;
5692 }
5693
isHwreg() const5694 bool AMDGPUOperand::isHwreg() const {
5695 return isImmTy(ImmTyHwreg);
5696 }
5697
5698 //===----------------------------------------------------------------------===//
5699 // sendmsg
5700 //===----------------------------------------------------------------------===//
5701
5702 bool
parseSendMsgBody(OperandInfoTy & Msg,OperandInfoTy & Op,OperandInfoTy & Stream)5703 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
5704 OperandInfoTy &Op,
5705 OperandInfoTy &Stream) {
5706 using namespace llvm::AMDGPU::SendMsg;
5707
5708 Msg.Loc = getLoc();
5709 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
5710 Msg.IsSymbolic = true;
5711 lex(); // skip message name
5712 } else if (!parseExpr(Msg.Id, "a message name")) {
5713 return false;
5714 }
5715
5716 if (trySkipToken(AsmToken::Comma)) {
5717 Op.IsDefined = true;
5718 Op.Loc = getLoc();
5719 if (isToken(AsmToken::Identifier) &&
5720 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
5721 lex(); // skip operation name
5722 } else if (!parseExpr(Op.Id, "an operation name")) {
5723 return false;
5724 }
5725
5726 if (trySkipToken(AsmToken::Comma)) {
5727 Stream.IsDefined = true;
5728 Stream.Loc = getLoc();
5729 if (!parseExpr(Stream.Id))
5730 return false;
5731 }
5732 }
5733
5734 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
5735 }
5736
5737 bool
validateSendMsg(const OperandInfoTy & Msg,const OperandInfoTy & Op,const OperandInfoTy & Stream)5738 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
5739 const OperandInfoTy &Op,
5740 const OperandInfoTy &Stream) {
5741 using namespace llvm::AMDGPU::SendMsg;
5742
5743 // Validation strictness depends on whether message is specified
5744 // in a symbolc or in a numeric form. In the latter case
5745 // only encoding possibility is checked.
5746 bool Strict = Msg.IsSymbolic;
5747
5748 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
5749 Error(Msg.Loc, "invalid message id");
5750 return false;
5751 }
5752 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
5753 if (Op.IsDefined) {
5754 Error(Op.Loc, "message does not support operations");
5755 } else {
5756 Error(Msg.Loc, "missing message operation");
5757 }
5758 return false;
5759 }
5760 if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) {
5761 Error(Op.Loc, "invalid operation id");
5762 return false;
5763 }
5764 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
5765 Error(Stream.Loc, "message operation does not support streams");
5766 return false;
5767 }
5768 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) {
5769 Error(Stream.Loc, "invalid message stream id");
5770 return false;
5771 }
5772 return true;
5773 }
5774
5775 OperandMatchResultTy
parseSendMsgOp(OperandVector & Operands)5776 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
5777 using namespace llvm::AMDGPU::SendMsg;
5778
5779 int64_t ImmVal = 0;
5780 SMLoc Loc = getLoc();
5781
5782 if (trySkipId("sendmsg", AsmToken::LParen)) {
5783 OperandInfoTy Msg(ID_UNKNOWN_);
5784 OperandInfoTy Op(OP_NONE_);
5785 OperandInfoTy Stream(STREAM_ID_NONE_);
5786 if (parseSendMsgBody(Msg, Op, Stream) &&
5787 validateSendMsg(Msg, Op, Stream)) {
5788 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
5789 } else {
5790 return MatchOperand_ParseFail;
5791 }
5792 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
5793 if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
5794 Error(Loc, "invalid immediate: only 16-bit values are legal");
5795 return MatchOperand_ParseFail;
5796 }
5797 } else {
5798 return MatchOperand_ParseFail;
5799 }
5800
5801 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
5802 return MatchOperand_Success;
5803 }
5804
isSendMsg() const5805 bool AMDGPUOperand::isSendMsg() const {
5806 return isImmTy(ImmTySendMsg);
5807 }
5808
5809 //===----------------------------------------------------------------------===//
5810 // v_interp
5811 //===----------------------------------------------------------------------===//
5812
parseInterpSlot(OperandVector & Operands)5813 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
5814 StringRef Str;
5815 SMLoc S = getLoc();
5816
5817 if (!parseId(Str))
5818 return MatchOperand_NoMatch;
5819
5820 int Slot = StringSwitch<int>(Str)
5821 .Case("p10", 0)
5822 .Case("p20", 1)
5823 .Case("p0", 2)
5824 .Default(-1);
5825
5826 if (Slot == -1) {
5827 Error(S, "invalid interpolation slot");
5828 return MatchOperand_ParseFail;
5829 }
5830
5831 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
5832 AMDGPUOperand::ImmTyInterpSlot));
5833 return MatchOperand_Success;
5834 }
5835
parseInterpAttr(OperandVector & Operands)5836 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
5837 StringRef Str;
5838 SMLoc S = getLoc();
5839
5840 if (!parseId(Str))
5841 return MatchOperand_NoMatch;
5842
5843 if (!Str.startswith("attr")) {
5844 Error(S, "invalid interpolation attribute");
5845 return MatchOperand_ParseFail;
5846 }
5847
5848 StringRef Chan = Str.take_back(2);
5849 int AttrChan = StringSwitch<int>(Chan)
5850 .Case(".x", 0)
5851 .Case(".y", 1)
5852 .Case(".z", 2)
5853 .Case(".w", 3)
5854 .Default(-1);
5855 if (AttrChan == -1) {
5856 Error(S, "invalid or missing interpolation attribute channel");
5857 return MatchOperand_ParseFail;
5858 }
5859
5860 Str = Str.drop_back(2).drop_front(4);
5861
5862 uint8_t Attr;
5863 if (Str.getAsInteger(10, Attr)) {
5864 Error(S, "invalid or missing interpolation attribute number");
5865 return MatchOperand_ParseFail;
5866 }
5867
5868 if (Attr > 63) {
5869 Error(S, "out of bounds interpolation attribute number");
5870 return MatchOperand_ParseFail;
5871 }
5872
5873 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
5874
5875 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
5876 AMDGPUOperand::ImmTyInterpAttr));
5877 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
5878 AMDGPUOperand::ImmTyAttrChan));
5879 return MatchOperand_Success;
5880 }
5881
5882 //===----------------------------------------------------------------------===//
5883 // exp
5884 //===----------------------------------------------------------------------===//
5885
parseExpTgt(OperandVector & Operands)5886 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
5887 using namespace llvm::AMDGPU::Exp;
5888
5889 StringRef Str;
5890 SMLoc S = getLoc();
5891
5892 if (!parseId(Str))
5893 return MatchOperand_NoMatch;
5894
5895 unsigned Id = getTgtId(Str);
5896 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
5897 Error(S, (Id == ET_INVALID) ?
5898 "invalid exp target" :
5899 "exp target is not supported on this GPU");
5900 return MatchOperand_ParseFail;
5901 }
5902
5903 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
5904 AMDGPUOperand::ImmTyExpTgt));
5905 return MatchOperand_Success;
5906 }
5907
5908 //===----------------------------------------------------------------------===//
5909 // parser helpers
5910 //===----------------------------------------------------------------------===//
5911
5912 bool
isId(const AsmToken & Token,const StringRef Id) const5913 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
5914 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
5915 }
5916
5917 bool
isId(const StringRef Id) const5918 AMDGPUAsmParser::isId(const StringRef Id) const {
5919 return isId(getToken(), Id);
5920 }
5921
5922 bool
isToken(const AsmToken::TokenKind Kind) const5923 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
5924 return getTokenKind() == Kind;
5925 }
5926
5927 bool
trySkipId(const StringRef Id)5928 AMDGPUAsmParser::trySkipId(const StringRef Id) {
5929 if (isId(Id)) {
5930 lex();
5931 return true;
5932 }
5933 return false;
5934 }
5935
5936 bool
trySkipId(const StringRef Id,const AsmToken::TokenKind Kind)5937 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
5938 if (isId(Id) && peekToken().is(Kind)) {
5939 lex();
5940 lex();
5941 return true;
5942 }
5943 return false;
5944 }
5945
5946 bool
trySkipToken(const AsmToken::TokenKind Kind)5947 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
5948 if (isToken(Kind)) {
5949 lex();
5950 return true;
5951 }
5952 return false;
5953 }
5954
5955 bool
skipToken(const AsmToken::TokenKind Kind,const StringRef ErrMsg)5956 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
5957 const StringRef ErrMsg) {
5958 if (!trySkipToken(Kind)) {
5959 Error(getLoc(), ErrMsg);
5960 return false;
5961 }
5962 return true;
5963 }
5964
5965 bool
parseExpr(int64_t & Imm,StringRef Expected)5966 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
5967 SMLoc S = getLoc();
5968
5969 const MCExpr *Expr;
5970 if (Parser.parseExpression(Expr))
5971 return false;
5972
5973 if (Expr->evaluateAsAbsolute(Imm))
5974 return true;
5975
5976 if (Expected.empty()) {
5977 Error(S, "expected absolute expression");
5978 } else {
5979 Error(S, Twine("expected ", Expected) +
5980 Twine(" or an absolute expression"));
5981 }
5982 return false;
5983 }
5984
5985 bool
parseExpr(OperandVector & Operands)5986 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
5987 SMLoc S = getLoc();
5988
5989 const MCExpr *Expr;
5990 if (Parser.parseExpression(Expr))
5991 return false;
5992
5993 int64_t IntVal;
5994 if (Expr->evaluateAsAbsolute(IntVal)) {
5995 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
5996 } else {
5997 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
5998 }
5999 return true;
6000 }
6001
6002 bool
parseString(StringRef & Val,const StringRef ErrMsg)6003 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
6004 if (isToken(AsmToken::String)) {
6005 Val = getToken().getStringContents();
6006 lex();
6007 return true;
6008 } else {
6009 Error(getLoc(), ErrMsg);
6010 return false;
6011 }
6012 }
6013
6014 bool
parseId(StringRef & Val,const StringRef ErrMsg)6015 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
6016 if (isToken(AsmToken::Identifier)) {
6017 Val = getTokenStr();
6018 lex();
6019 return true;
6020 } else {
6021 if (!ErrMsg.empty())
6022 Error(getLoc(), ErrMsg);
6023 return false;
6024 }
6025 }
6026
6027 AsmToken
getToken() const6028 AMDGPUAsmParser::getToken() const {
6029 return Parser.getTok();
6030 }
6031
6032 AsmToken
peekToken()6033 AMDGPUAsmParser::peekToken() {
6034 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
6035 }
6036
6037 void
peekTokens(MutableArrayRef<AsmToken> Tokens)6038 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
6039 auto TokCount = getLexer().peekTokens(Tokens);
6040
6041 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
6042 Tokens[Idx] = AsmToken(AsmToken::Error, "");
6043 }
6044
6045 AsmToken::TokenKind
getTokenKind() const6046 AMDGPUAsmParser::getTokenKind() const {
6047 return getLexer().getKind();
6048 }
6049
6050 SMLoc
getLoc() const6051 AMDGPUAsmParser::getLoc() const {
6052 return getToken().getLoc();
6053 }
6054
6055 StringRef
getTokenStr() const6056 AMDGPUAsmParser::getTokenStr() const {
6057 return getToken().getString();
6058 }
6059
6060 void
lex()6061 AMDGPUAsmParser::lex() {
6062 Parser.Lex();
6063 }
6064
6065 SMLoc
getOperandLoc(std::function<bool (const AMDGPUOperand &)> Test,const OperandVector & Operands) const6066 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
6067 const OperandVector &Operands) const {
6068 for (unsigned i = Operands.size() - 1; i > 0; --i) {
6069 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6070 if (Test(Op))
6071 return Op.getStartLoc();
6072 }
6073 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
6074 }
6075
6076 SMLoc
getImmLoc(AMDGPUOperand::ImmTy Type,const OperandVector & Operands) const6077 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
6078 const OperandVector &Operands) const {
6079 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
6080 return getOperandLoc(Test, Operands);
6081 }
6082
6083 SMLoc
getRegLoc(unsigned Reg,const OperandVector & Operands) const6084 AMDGPUAsmParser::getRegLoc(unsigned Reg,
6085 const OperandVector &Operands) const {
6086 auto Test = [=](const AMDGPUOperand& Op) {
6087 return Op.isRegKind() && Op.getReg() == Reg;
6088 };
6089 return getOperandLoc(Test, Operands);
6090 }
6091
6092 SMLoc
getLitLoc(const OperandVector & Operands) const6093 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
6094 auto Test = [](const AMDGPUOperand& Op) {
6095 return Op.IsImmKindLiteral() || Op.isExpr();
6096 };
6097 return getOperandLoc(Test, Operands);
6098 }
6099
6100 SMLoc
getConstLoc(const OperandVector & Operands) const6101 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
6102 auto Test = [](const AMDGPUOperand& Op) {
6103 return Op.isImmKindConst();
6104 };
6105 return getOperandLoc(Test, Operands);
6106 }
6107
6108 //===----------------------------------------------------------------------===//
6109 // swizzle
6110 //===----------------------------------------------------------------------===//
6111
6112 LLVM_READNONE
6113 static unsigned
encodeBitmaskPerm(const unsigned AndMask,const unsigned OrMask,const unsigned XorMask)6114 encodeBitmaskPerm(const unsigned AndMask,
6115 const unsigned OrMask,
6116 const unsigned XorMask) {
6117 using namespace llvm::AMDGPU::Swizzle;
6118
6119 return BITMASK_PERM_ENC |
6120 (AndMask << BITMASK_AND_SHIFT) |
6121 (OrMask << BITMASK_OR_SHIFT) |
6122 (XorMask << BITMASK_XOR_SHIFT);
6123 }
6124
6125 bool
parseSwizzleOperand(int64_t & Op,const unsigned MinVal,const unsigned MaxVal,const StringRef ErrMsg,SMLoc & Loc)6126 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
6127 const unsigned MinVal,
6128 const unsigned MaxVal,
6129 const StringRef ErrMsg,
6130 SMLoc &Loc) {
6131 if (!skipToken(AsmToken::Comma, "expected a comma")) {
6132 return false;
6133 }
6134 Loc = getLoc();
6135 if (!parseExpr(Op)) {
6136 return false;
6137 }
6138 if (Op < MinVal || Op > MaxVal) {
6139 Error(Loc, ErrMsg);
6140 return false;
6141 }
6142
6143 return true;
6144 }
6145
6146 bool
parseSwizzleOperands(const unsigned OpNum,int64_t * Op,const unsigned MinVal,const unsigned MaxVal,const StringRef ErrMsg)6147 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
6148 const unsigned MinVal,
6149 const unsigned MaxVal,
6150 const StringRef ErrMsg) {
6151 SMLoc Loc;
6152 for (unsigned i = 0; i < OpNum; ++i) {
6153 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
6154 return false;
6155 }
6156
6157 return true;
6158 }
6159
6160 bool
parseSwizzleQuadPerm(int64_t & Imm)6161 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
6162 using namespace llvm::AMDGPU::Swizzle;
6163
6164 int64_t Lane[LANE_NUM];
6165 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
6166 "expected a 2-bit lane id")) {
6167 Imm = QUAD_PERM_ENC;
6168 for (unsigned I = 0; I < LANE_NUM; ++I) {
6169 Imm |= Lane[I] << (LANE_SHIFT * I);
6170 }
6171 return true;
6172 }
6173 return false;
6174 }
6175
6176 bool
parseSwizzleBroadcast(int64_t & Imm)6177 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
6178 using namespace llvm::AMDGPU::Swizzle;
6179
6180 SMLoc Loc;
6181 int64_t GroupSize;
6182 int64_t LaneIdx;
6183
6184 if (!parseSwizzleOperand(GroupSize,
6185 2, 32,
6186 "group size must be in the interval [2,32]",
6187 Loc)) {
6188 return false;
6189 }
6190 if (!isPowerOf2_64(GroupSize)) {
6191 Error(Loc, "group size must be a power of two");
6192 return false;
6193 }
6194 if (parseSwizzleOperand(LaneIdx,
6195 0, GroupSize - 1,
6196 "lane id must be in the interval [0,group size - 1]",
6197 Loc)) {
6198 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
6199 return true;
6200 }
6201 return false;
6202 }
6203
6204 bool
parseSwizzleReverse(int64_t & Imm)6205 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
6206 using namespace llvm::AMDGPU::Swizzle;
6207
6208 SMLoc Loc;
6209 int64_t GroupSize;
6210
6211 if (!parseSwizzleOperand(GroupSize,
6212 2, 32,
6213 "group size must be in the interval [2,32]",
6214 Loc)) {
6215 return false;
6216 }
6217 if (!isPowerOf2_64(GroupSize)) {
6218 Error(Loc, "group size must be a power of two");
6219 return false;
6220 }
6221
6222 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
6223 return true;
6224 }
6225
6226 bool
parseSwizzleSwap(int64_t & Imm)6227 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
6228 using namespace llvm::AMDGPU::Swizzle;
6229
6230 SMLoc Loc;
6231 int64_t GroupSize;
6232
6233 if (!parseSwizzleOperand(GroupSize,
6234 1, 16,
6235 "group size must be in the interval [1,16]",
6236 Loc)) {
6237 return false;
6238 }
6239 if (!isPowerOf2_64(GroupSize)) {
6240 Error(Loc, "group size must be a power of two");
6241 return false;
6242 }
6243
6244 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
6245 return true;
6246 }
6247
6248 bool
parseSwizzleBitmaskPerm(int64_t & Imm)6249 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
6250 using namespace llvm::AMDGPU::Swizzle;
6251
6252 if (!skipToken(AsmToken::Comma, "expected a comma")) {
6253 return false;
6254 }
6255
6256 StringRef Ctl;
6257 SMLoc StrLoc = getLoc();
6258 if (!parseString(Ctl)) {
6259 return false;
6260 }
6261 if (Ctl.size() != BITMASK_WIDTH) {
6262 Error(StrLoc, "expected a 5-character mask");
6263 return false;
6264 }
6265
6266 unsigned AndMask = 0;
6267 unsigned OrMask = 0;
6268 unsigned XorMask = 0;
6269
6270 for (size_t i = 0; i < Ctl.size(); ++i) {
6271 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
6272 switch(Ctl[i]) {
6273 default:
6274 Error(StrLoc, "invalid mask");
6275 return false;
6276 case '0':
6277 break;
6278 case '1':
6279 OrMask |= Mask;
6280 break;
6281 case 'p':
6282 AndMask |= Mask;
6283 break;
6284 case 'i':
6285 AndMask |= Mask;
6286 XorMask |= Mask;
6287 break;
6288 }
6289 }
6290
6291 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
6292 return true;
6293 }
6294
6295 bool
parseSwizzleOffset(int64_t & Imm)6296 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
6297
6298 SMLoc OffsetLoc = getLoc();
6299
6300 if (!parseExpr(Imm, "a swizzle macro")) {
6301 return false;
6302 }
6303 if (!isUInt<16>(Imm)) {
6304 Error(OffsetLoc, "expected a 16-bit offset");
6305 return false;
6306 }
6307 return true;
6308 }
6309
6310 bool
parseSwizzleMacro(int64_t & Imm)6311 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
6312 using namespace llvm::AMDGPU::Swizzle;
6313
6314 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
6315
6316 SMLoc ModeLoc = getLoc();
6317 bool Ok = false;
6318
6319 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
6320 Ok = parseSwizzleQuadPerm(Imm);
6321 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
6322 Ok = parseSwizzleBitmaskPerm(Imm);
6323 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
6324 Ok = parseSwizzleBroadcast(Imm);
6325 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
6326 Ok = parseSwizzleSwap(Imm);
6327 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
6328 Ok = parseSwizzleReverse(Imm);
6329 } else {
6330 Error(ModeLoc, "expected a swizzle mode");
6331 }
6332
6333 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
6334 }
6335
6336 return false;
6337 }
6338
6339 OperandMatchResultTy
parseSwizzleOp(OperandVector & Operands)6340 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
6341 SMLoc S = getLoc();
6342 int64_t Imm = 0;
6343
6344 if (trySkipId("offset")) {
6345
6346 bool Ok = false;
6347 if (skipToken(AsmToken::Colon, "expected a colon")) {
6348 if (trySkipId("swizzle")) {
6349 Ok = parseSwizzleMacro(Imm);
6350 } else {
6351 Ok = parseSwizzleOffset(Imm);
6352 }
6353 }
6354
6355 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
6356
6357 return Ok? MatchOperand_Success : MatchOperand_ParseFail;
6358 } else {
6359 // Swizzle "offset" operand is optional.
6360 // If it is omitted, try parsing other optional operands.
6361 return parseOptionalOpr(Operands);
6362 }
6363 }
6364
6365 bool
isSwizzle() const6366 AMDGPUOperand::isSwizzle() const {
6367 return isImmTy(ImmTySwizzle);
6368 }
6369
6370 //===----------------------------------------------------------------------===//
6371 // VGPR Index Mode
6372 //===----------------------------------------------------------------------===//
6373
parseGPRIdxMacro()6374 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
6375
6376 using namespace llvm::AMDGPU::VGPRIndexMode;
6377
6378 if (trySkipToken(AsmToken::RParen)) {
6379 return OFF;
6380 }
6381
6382 int64_t Imm = 0;
6383
6384 while (true) {
6385 unsigned Mode = 0;
6386 SMLoc S = getLoc();
6387
6388 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
6389 if (trySkipId(IdSymbolic[ModeId])) {
6390 Mode = 1 << ModeId;
6391 break;
6392 }
6393 }
6394
6395 if (Mode == 0) {
6396 Error(S, (Imm == 0)?
6397 "expected a VGPR index mode or a closing parenthesis" :
6398 "expected a VGPR index mode");
6399 return UNDEF;
6400 }
6401
6402 if (Imm & Mode) {
6403 Error(S, "duplicate VGPR index mode");
6404 return UNDEF;
6405 }
6406 Imm |= Mode;
6407
6408 if (trySkipToken(AsmToken::RParen))
6409 break;
6410 if (!skipToken(AsmToken::Comma,
6411 "expected a comma or a closing parenthesis"))
6412 return UNDEF;
6413 }
6414
6415 return Imm;
6416 }
6417
6418 OperandMatchResultTy
parseGPRIdxMode(OperandVector & Operands)6419 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
6420
6421 using namespace llvm::AMDGPU::VGPRIndexMode;
6422
6423 int64_t Imm = 0;
6424 SMLoc S = getLoc();
6425
6426 if (trySkipId("gpr_idx", AsmToken::LParen)) {
6427 Imm = parseGPRIdxMacro();
6428 if (Imm == UNDEF)
6429 return MatchOperand_ParseFail;
6430 } else {
6431 if (getParser().parseAbsoluteExpression(Imm))
6432 return MatchOperand_ParseFail;
6433 if (Imm < 0 || !isUInt<4>(Imm)) {
6434 Error(S, "invalid immediate: only 4-bit values are legal");
6435 return MatchOperand_ParseFail;
6436 }
6437 }
6438
6439 Operands.push_back(
6440 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
6441 return MatchOperand_Success;
6442 }
6443
isGPRIdxMode() const6444 bool AMDGPUOperand::isGPRIdxMode() const {
6445 return isImmTy(ImmTyGprIdxMode);
6446 }
6447
6448 //===----------------------------------------------------------------------===//
6449 // sopp branch targets
6450 //===----------------------------------------------------------------------===//
6451
6452 OperandMatchResultTy
parseSOppBrTarget(OperandVector & Operands)6453 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
6454
6455 // Make sure we are not parsing something
6456 // that looks like a label or an expression but is not.
6457 // This will improve error messages.
6458 if (isRegister() || isModifier())
6459 return MatchOperand_NoMatch;
6460
6461 if (!parseExpr(Operands))
6462 return MatchOperand_ParseFail;
6463
6464 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
6465 assert(Opr.isImm() || Opr.isExpr());
6466 SMLoc Loc = Opr.getStartLoc();
6467
6468 // Currently we do not support arbitrary expressions as branch targets.
6469 // Only labels and absolute expressions are accepted.
6470 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
6471 Error(Loc, "expected an absolute expression or a label");
6472 } else if (Opr.isImm() && !Opr.isS16Imm()) {
6473 Error(Loc, "expected a 16-bit signed jump offset");
6474 }
6475
6476 return MatchOperand_Success;
6477 }
6478
6479 //===----------------------------------------------------------------------===//
6480 // Boolean holding registers
6481 //===----------------------------------------------------------------------===//
6482
6483 OperandMatchResultTy
parseBoolReg(OperandVector & Operands)6484 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
6485 return parseReg(Operands);
6486 }
6487
6488 //===----------------------------------------------------------------------===//
6489 // mubuf
6490 //===----------------------------------------------------------------------===//
6491
defaultDLC() const6492 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const {
6493 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC);
6494 }
6495
defaultGLC() const6496 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
6497 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
6498 }
6499
defaultGLC_1() const6500 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC_1() const {
6501 return AMDGPUOperand::CreateImm(this, -1, SMLoc(), AMDGPUOperand::ImmTyGLC);
6502 }
6503
defaultSLC() const6504 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
6505 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
6506 }
6507
cvtMubufImpl(MCInst & Inst,const OperandVector & Operands,bool IsAtomic,bool IsAtomicReturn,bool IsLds)6508 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
6509 const OperandVector &Operands,
6510 bool IsAtomic,
6511 bool IsAtomicReturn,
6512 bool IsLds) {
6513 bool IsLdsOpcode = IsLds;
6514 bool HasLdsModifier = false;
6515 OptionalImmIndexMap OptionalIdx;
6516 assert(IsAtomicReturn ? IsAtomic : true);
6517 unsigned FirstOperandIdx = 1;
6518
6519 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
6520 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6521
6522 // Add the register arguments
6523 if (Op.isReg()) {
6524 Op.addRegOperands(Inst, 1);
6525 // Insert a tied src for atomic return dst.
6526 // This cannot be postponed as subsequent calls to
6527 // addImmOperands rely on correct number of MC operands.
6528 if (IsAtomicReturn && i == FirstOperandIdx)
6529 Op.addRegOperands(Inst, 1);
6530 continue;
6531 }
6532
6533 // Handle the case where soffset is an immediate
6534 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6535 Op.addImmOperands(Inst, 1);
6536 continue;
6537 }
6538
6539 HasLdsModifier |= Op.isLDS();
6540
6541 // Handle tokens like 'offen' which are sometimes hard-coded into the
6542 // asm string. There are no MCInst operands for these.
6543 if (Op.isToken()) {
6544 continue;
6545 }
6546 assert(Op.isImm());
6547
6548 // Handle optional arguments
6549 OptionalIdx[Op.getImmTy()] = i;
6550 }
6551
6552 // This is a workaround for an llvm quirk which may result in an
6553 // incorrect instruction selection. Lds and non-lds versions of
6554 // MUBUF instructions are identical except that lds versions
6555 // have mandatory 'lds' modifier. However this modifier follows
6556 // optional modifiers and llvm asm matcher regards this 'lds'
6557 // modifier as an optional one. As a result, an lds version
6558 // of opcode may be selected even if it has no 'lds' modifier.
6559 if (IsLdsOpcode && !HasLdsModifier) {
6560 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
6561 if (NoLdsOpcode != -1) { // Got lds version - correct it.
6562 Inst.setOpcode(NoLdsOpcode);
6563 IsLdsOpcode = false;
6564 }
6565 }
6566
6567 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
6568 if (!IsAtomic || IsAtomicReturn) {
6569 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC,
6570 IsAtomicReturn ? -1 : 0);
6571 }
6572 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6573
6574 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
6575 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6576 }
6577
6578 if (isGFX10Plus())
6579 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6580 }
6581
cvtMtbuf(MCInst & Inst,const OperandVector & Operands)6582 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
6583 OptionalImmIndexMap OptionalIdx;
6584
6585 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
6586 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
6587
6588 // Add the register arguments
6589 if (Op.isReg()) {
6590 Op.addRegOperands(Inst, 1);
6591 continue;
6592 }
6593
6594 // Handle the case where soffset is an immediate
6595 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
6596 Op.addImmOperands(Inst, 1);
6597 continue;
6598 }
6599
6600 // Handle tokens like 'offen' which are sometimes hard-coded into the
6601 // asm string. There are no MCInst operands for these.
6602 if (Op.isToken()) {
6603 continue;
6604 }
6605 assert(Op.isImm());
6606
6607 // Handle optional arguments
6608 OptionalIdx[Op.getImmTy()] = i;
6609 }
6610
6611 addOptionalImmOperand(Inst, Operands, OptionalIdx,
6612 AMDGPUOperand::ImmTyOffset);
6613 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
6614 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6615 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6616 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6617
6618 if (isGFX10Plus())
6619 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6620 }
6621
6622 //===----------------------------------------------------------------------===//
6623 // mimg
6624 //===----------------------------------------------------------------------===//
6625
cvtMIMG(MCInst & Inst,const OperandVector & Operands,bool IsAtomic)6626 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
6627 bool IsAtomic) {
6628 unsigned I = 1;
6629 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6630 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6631 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6632 }
6633
6634 if (IsAtomic) {
6635 // Add src, same as dst
6636 assert(Desc.getNumDefs() == 1);
6637 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
6638 }
6639
6640 OptionalImmIndexMap OptionalIdx;
6641
6642 for (unsigned E = Operands.size(); I != E; ++I) {
6643 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6644
6645 // Add the register arguments
6646 if (Op.isReg()) {
6647 Op.addRegOperands(Inst, 1);
6648 } else if (Op.isImmModifier()) {
6649 OptionalIdx[Op.getImmTy()] = I;
6650 } else if (!Op.isToken()) {
6651 llvm_unreachable("unexpected operand type");
6652 }
6653 }
6654
6655 bool IsGFX10Plus = isGFX10Plus();
6656
6657 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
6658 if (IsGFX10Plus)
6659 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
6660 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
6661 if (IsGFX10Plus)
6662 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC);
6663 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC);
6664 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC);
6665 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
6666 if (IsGFX10Plus)
6667 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
6668 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
6669 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
6670 if (!IsGFX10Plus)
6671 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
6672 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
6673 }
6674
cvtMIMGAtomic(MCInst & Inst,const OperandVector & Operands)6675 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
6676 cvtMIMG(Inst, Operands, true);
6677 }
6678
cvtIntersectRay(MCInst & Inst,const OperandVector & Operands)6679 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
6680 const OperandVector &Operands) {
6681 for (unsigned I = 1; I < Operands.size(); ++I) {
6682 auto &Operand = (AMDGPUOperand &)*Operands[I];
6683 if (Operand.isReg())
6684 Operand.addRegOperands(Inst, 1);
6685 }
6686
6687 Inst.addOperand(MCOperand::createImm(1)); // a16
6688 }
6689
6690 //===----------------------------------------------------------------------===//
6691 // smrd
6692 //===----------------------------------------------------------------------===//
6693
isSMRDOffset8() const6694 bool AMDGPUOperand::isSMRDOffset8() const {
6695 return isImm() && isUInt<8>(getImm());
6696 }
6697
isSMEMOffset() const6698 bool AMDGPUOperand::isSMEMOffset() const {
6699 return isImm(); // Offset range is checked later by validator.
6700 }
6701
isSMRDLiteralOffset() const6702 bool AMDGPUOperand::isSMRDLiteralOffset() const {
6703 // 32-bit literals are only supported on CI and we only want to use them
6704 // when the offset is > 8-bits.
6705 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
6706 }
6707
defaultSMRDOffset8() const6708 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
6709 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6710 }
6711
defaultSMEMOffset() const6712 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
6713 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6714 }
6715
defaultSMRDLiteralOffset() const6716 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
6717 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6718 }
6719
defaultFlatOffset() const6720 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
6721 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
6722 }
6723
6724 //===----------------------------------------------------------------------===//
6725 // vop3
6726 //===----------------------------------------------------------------------===//
6727
ConvertOmodMul(int64_t & Mul)6728 static bool ConvertOmodMul(int64_t &Mul) {
6729 if (Mul != 1 && Mul != 2 && Mul != 4)
6730 return false;
6731
6732 Mul >>= 1;
6733 return true;
6734 }
6735
ConvertOmodDiv(int64_t & Div)6736 static bool ConvertOmodDiv(int64_t &Div) {
6737 if (Div == 1) {
6738 Div = 0;
6739 return true;
6740 }
6741
6742 if (Div == 2) {
6743 Div = 3;
6744 return true;
6745 }
6746
6747 return false;
6748 }
6749
ConvertBoundCtrl(int64_t & BoundCtrl)6750 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
6751 if (BoundCtrl == 0) {
6752 BoundCtrl = 1;
6753 return true;
6754 }
6755
6756 if (BoundCtrl == -1) {
6757 BoundCtrl = 0;
6758 return true;
6759 }
6760
6761 return false;
6762 }
6763
6764 // Note: the order in this table matches the order of operands in AsmString.
6765 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
6766 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr},
6767 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr},
6768 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr},
6769 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
6770 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
6771 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr},
6772 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr},
6773 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr},
6774 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
6775 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr},
6776 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr},
6777 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr},
6778 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr},
6779 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},
6780 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
6781 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr},
6782 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr},
6783 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
6784 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr},
6785 {"da", AMDGPUOperand::ImmTyDA, true, nullptr},
6786 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr},
6787 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr},
6788 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr},
6789 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
6790 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr},
6791 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr},
6792 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
6793 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
6794 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
6795 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr},
6796 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
6797 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
6798 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
6799 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
6800 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
6801 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
6802 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
6803 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
6804 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
6805 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
6806 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
6807 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
6808 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
6809 };
6810
parseOptionalOperand(OperandVector & Operands)6811 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
6812
6813 OperandMatchResultTy res = parseOptionalOpr(Operands);
6814
6815 // This is a hack to enable hardcoded mandatory operands which follow
6816 // optional operands.
6817 //
6818 // Current design assumes that all operands after the first optional operand
6819 // are also optional. However implementation of some instructions violates
6820 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
6821 //
6822 // To alleviate this problem, we have to (implicitly) parse extra operands
6823 // to make sure autogenerated parser of custom operands never hit hardcoded
6824 // mandatory operands.
6825
6826 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
6827 if (res != MatchOperand_Success ||
6828 isToken(AsmToken::EndOfStatement))
6829 break;
6830
6831 trySkipToken(AsmToken::Comma);
6832 res = parseOptionalOpr(Operands);
6833 }
6834
6835 return res;
6836 }
6837
parseOptionalOpr(OperandVector & Operands)6838 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
6839 OperandMatchResultTy res;
6840 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
6841 // try to parse any optional operand here
6842 if (Op.IsBit) {
6843 res = parseNamedBit(Op.Name, Operands, Op.Type);
6844 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
6845 res = parseOModOperand(Operands);
6846 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
6847 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
6848 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
6849 res = parseSDWASel(Operands, Op.Name, Op.Type);
6850 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
6851 res = parseSDWADstUnused(Operands);
6852 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
6853 Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
6854 Op.Type == AMDGPUOperand::ImmTyNegLo ||
6855 Op.Type == AMDGPUOperand::ImmTyNegHi) {
6856 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
6857 Op.ConvertResult);
6858 } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
6859 res = parseDim(Operands);
6860 } else {
6861 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
6862 }
6863 if (res != MatchOperand_NoMatch) {
6864 return res;
6865 }
6866 }
6867 return MatchOperand_NoMatch;
6868 }
6869
parseOModOperand(OperandVector & Operands)6870 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
6871 StringRef Name = getTokenStr();
6872 if (Name == "mul") {
6873 return parseIntWithPrefix("mul", Operands,
6874 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
6875 }
6876
6877 if (Name == "div") {
6878 return parseIntWithPrefix("div", Operands,
6879 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
6880 }
6881
6882 return MatchOperand_NoMatch;
6883 }
6884
cvtVOP3OpSel(MCInst & Inst,const OperandVector & Operands)6885 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
6886 cvtVOP3P(Inst, Operands);
6887
6888 int Opc = Inst.getOpcode();
6889
6890 int SrcNum;
6891 const int Ops[] = { AMDGPU::OpName::src0,
6892 AMDGPU::OpName::src1,
6893 AMDGPU::OpName::src2 };
6894 for (SrcNum = 0;
6895 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
6896 ++SrcNum);
6897 assert(SrcNum > 0);
6898
6899 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
6900 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
6901
6902 if ((OpSel & (1 << SrcNum)) != 0) {
6903 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
6904 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
6905 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
6906 }
6907 }
6908
isRegOrImmWithInputMods(const MCInstrDesc & Desc,unsigned OpNum)6909 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
6910 // 1. This operand is input modifiers
6911 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
6912 // 2. This is not last operand
6913 && Desc.NumOperands > (OpNum + 1)
6914 // 3. Next operand is register class
6915 && Desc.OpInfo[OpNum + 1].RegClass != -1
6916 // 4. Next register is not tied to any other operand
6917 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
6918 }
6919
cvtVOP3Interp(MCInst & Inst,const OperandVector & Operands)6920 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
6921 {
6922 OptionalImmIndexMap OptionalIdx;
6923 unsigned Opc = Inst.getOpcode();
6924
6925 unsigned I = 1;
6926 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6927 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6928 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6929 }
6930
6931 for (unsigned E = Operands.size(); I != E; ++I) {
6932 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6933 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6934 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6935 } else if (Op.isInterpSlot() ||
6936 Op.isInterpAttr() ||
6937 Op.isAttrChan()) {
6938 Inst.addOperand(MCOperand::createImm(Op.getImm()));
6939 } else if (Op.isImmModifier()) {
6940 OptionalIdx[Op.getImmTy()] = I;
6941 } else {
6942 llvm_unreachable("unhandled operand type");
6943 }
6944 }
6945
6946 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
6947 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
6948 }
6949
6950 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6951 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6952 }
6953
6954 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
6955 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
6956 }
6957 }
6958
cvtVOP3(MCInst & Inst,const OperandVector & Operands,OptionalImmIndexMap & OptionalIdx)6959 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
6960 OptionalImmIndexMap &OptionalIdx) {
6961 unsigned Opc = Inst.getOpcode();
6962
6963 unsigned I = 1;
6964 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
6965 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
6966 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
6967 }
6968
6969 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
6970 // This instruction has src modifiers
6971 for (unsigned E = Operands.size(); I != E; ++I) {
6972 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6973 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
6974 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
6975 } else if (Op.isImmModifier()) {
6976 OptionalIdx[Op.getImmTy()] = I;
6977 } else if (Op.isRegOrImm()) {
6978 Op.addRegOrImmOperands(Inst, 1);
6979 } else {
6980 llvm_unreachable("unhandled operand type");
6981 }
6982 }
6983 } else {
6984 // No src modifiers
6985 for (unsigned E = Operands.size(); I != E; ++I) {
6986 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
6987 if (Op.isMod()) {
6988 OptionalIdx[Op.getImmTy()] = I;
6989 } else {
6990 Op.addRegOrImmOperands(Inst, 1);
6991 }
6992 }
6993 }
6994
6995 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
6996 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
6997 }
6998
6999 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
7000 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
7001 }
7002
7003 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
7004 // it has src2 register operand that is tied to dst operand
7005 // we don't allow modifiers for this operand in assembler so src2_modifiers
7006 // should be 0.
7007 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
7008 Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
7009 Opc == AMDGPU::V_MAC_F32_e64_vi ||
7010 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
7011 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
7012 Opc == AMDGPU::V_MAC_F16_e64_vi ||
7013 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
7014 Opc == AMDGPU::V_FMAC_F32_e64_vi ||
7015 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
7016 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
7017 auto it = Inst.begin();
7018 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
7019 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
7020 ++it;
7021 // Copy the operand to ensure it's not invalidated when Inst grows.
7022 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
7023 }
7024 }
7025
cvtVOP3(MCInst & Inst,const OperandVector & Operands)7026 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
7027 OptionalImmIndexMap OptionalIdx;
7028 cvtVOP3(Inst, Operands, OptionalIdx);
7029 }
7030
cvtVOP3P(MCInst & Inst,const OperandVector & Operands)7031 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst,
7032 const OperandVector &Operands) {
7033 OptionalImmIndexMap OptIdx;
7034 const int Opc = Inst.getOpcode();
7035 const MCInstrDesc &Desc = MII.get(Opc);
7036
7037 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
7038
7039 cvtVOP3(Inst, Operands, OptIdx);
7040
7041 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
7042 assert(!IsPacked);
7043 Inst.addOperand(Inst.getOperand(0));
7044 }
7045
7046 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
7047 // instruction, and then figure out where to actually put the modifiers
7048
7049 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
7050
7051 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
7052 if (OpSelHiIdx != -1) {
7053 int DefaultVal = IsPacked ? -1 : 0;
7054 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
7055 DefaultVal);
7056 }
7057
7058 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
7059 if (NegLoIdx != -1) {
7060 assert(IsPacked);
7061 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
7062 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
7063 }
7064
7065 const int Ops[] = { AMDGPU::OpName::src0,
7066 AMDGPU::OpName::src1,
7067 AMDGPU::OpName::src2 };
7068 const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
7069 AMDGPU::OpName::src1_modifiers,
7070 AMDGPU::OpName::src2_modifiers };
7071
7072 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
7073
7074 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
7075 unsigned OpSelHi = 0;
7076 unsigned NegLo = 0;
7077 unsigned NegHi = 0;
7078
7079 if (OpSelHiIdx != -1) {
7080 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
7081 }
7082
7083 if (NegLoIdx != -1) {
7084 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
7085 NegLo = Inst.getOperand(NegLoIdx).getImm();
7086 NegHi = Inst.getOperand(NegHiIdx).getImm();
7087 }
7088
7089 for (int J = 0; J < 3; ++J) {
7090 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
7091 if (OpIdx == -1)
7092 break;
7093
7094 uint32_t ModVal = 0;
7095
7096 if ((OpSel & (1 << J)) != 0)
7097 ModVal |= SISrcMods::OP_SEL_0;
7098
7099 if ((OpSelHi & (1 << J)) != 0)
7100 ModVal |= SISrcMods::OP_SEL_1;
7101
7102 if ((NegLo & (1 << J)) != 0)
7103 ModVal |= SISrcMods::NEG;
7104
7105 if ((NegHi & (1 << J)) != 0)
7106 ModVal |= SISrcMods::NEG_HI;
7107
7108 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
7109
7110 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
7111 }
7112 }
7113
7114 //===----------------------------------------------------------------------===//
7115 // dpp
7116 //===----------------------------------------------------------------------===//
7117
isDPP8() const7118 bool AMDGPUOperand::isDPP8() const {
7119 return isImmTy(ImmTyDPP8);
7120 }
7121
isDPPCtrl() const7122 bool AMDGPUOperand::isDPPCtrl() const {
7123 using namespace AMDGPU::DPP;
7124
7125 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
7126 if (result) {
7127 int64_t Imm = getImm();
7128 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
7129 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
7130 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
7131 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
7132 (Imm == DppCtrl::WAVE_SHL1) ||
7133 (Imm == DppCtrl::WAVE_ROL1) ||
7134 (Imm == DppCtrl::WAVE_SHR1) ||
7135 (Imm == DppCtrl::WAVE_ROR1) ||
7136 (Imm == DppCtrl::ROW_MIRROR) ||
7137 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
7138 (Imm == DppCtrl::BCAST15) ||
7139 (Imm == DppCtrl::BCAST31) ||
7140 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
7141 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
7142 }
7143 return false;
7144 }
7145
7146 //===----------------------------------------------------------------------===//
7147 // mAI
7148 //===----------------------------------------------------------------------===//
7149
isBLGP() const7150 bool AMDGPUOperand::isBLGP() const {
7151 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
7152 }
7153
isCBSZ() const7154 bool AMDGPUOperand::isCBSZ() const {
7155 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
7156 }
7157
isABID() const7158 bool AMDGPUOperand::isABID() const {
7159 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
7160 }
7161
isS16Imm() const7162 bool AMDGPUOperand::isS16Imm() const {
7163 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
7164 }
7165
isU16Imm() const7166 bool AMDGPUOperand::isU16Imm() const {
7167 return isImm() && isUInt<16>(getImm());
7168 }
7169
parseDim(OperandVector & Operands)7170 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
7171 if (!isGFX10Plus())
7172 return MatchOperand_NoMatch;
7173
7174 SMLoc S = getLoc();
7175
7176 if (!trySkipId("dim", AsmToken::Colon))
7177 return MatchOperand_NoMatch;
7178
7179 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an
7180 // integer.
7181 std::string Token;
7182 if (isToken(AsmToken::Integer)) {
7183 SMLoc Loc = getToken().getEndLoc();
7184 Token = std::string(getTokenStr());
7185 lex();
7186 if (getLoc() != Loc)
7187 return MatchOperand_ParseFail;
7188 }
7189 if (!isToken(AsmToken::Identifier))
7190 return MatchOperand_ParseFail;
7191 Token += getTokenStr();
7192
7193 StringRef DimId = Token;
7194 if (DimId.startswith("SQ_RSRC_IMG_"))
7195 DimId = DimId.substr(12);
7196
7197 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
7198 if (!DimInfo)
7199 return MatchOperand_ParseFail;
7200
7201 lex();
7202
7203 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S,
7204 AMDGPUOperand::ImmTyDim));
7205 return MatchOperand_Success;
7206 }
7207
parseDPP8(OperandVector & Operands)7208 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
7209 SMLoc S = getLoc();
7210
7211 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
7212 return MatchOperand_NoMatch;
7213
7214 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
7215
7216 int64_t Sels[8];
7217
7218 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7219 return MatchOperand_ParseFail;
7220
7221 for (size_t i = 0; i < 8; ++i) {
7222 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7223 return MatchOperand_ParseFail;
7224
7225 SMLoc Loc = getLoc();
7226 if (getParser().parseAbsoluteExpression(Sels[i]))
7227 return MatchOperand_ParseFail;
7228 if (0 > Sels[i] || 7 < Sels[i]) {
7229 Error(Loc, "expected a 3-bit value");
7230 return MatchOperand_ParseFail;
7231 }
7232 }
7233
7234 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7235 return MatchOperand_ParseFail;
7236
7237 unsigned DPP8 = 0;
7238 for (size_t i = 0; i < 8; ++i)
7239 DPP8 |= (Sels[i] << (i * 3));
7240
7241 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
7242 return MatchOperand_Success;
7243 }
7244
7245 bool
isSupportedDPPCtrl(StringRef Ctrl,const OperandVector & Operands)7246 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
7247 const OperandVector &Operands) {
7248 if (Ctrl == "row_share" ||
7249 Ctrl == "row_xmask")
7250 return isGFX10Plus();
7251
7252 if (Ctrl == "wave_shl" ||
7253 Ctrl == "wave_shr" ||
7254 Ctrl == "wave_rol" ||
7255 Ctrl == "wave_ror" ||
7256 Ctrl == "row_bcast")
7257 return isVI() || isGFX9();
7258
7259 return Ctrl == "row_mirror" ||
7260 Ctrl == "row_half_mirror" ||
7261 Ctrl == "quad_perm" ||
7262 Ctrl == "row_shl" ||
7263 Ctrl == "row_shr" ||
7264 Ctrl == "row_ror";
7265 }
7266
7267 int64_t
parseDPPCtrlPerm()7268 AMDGPUAsmParser::parseDPPCtrlPerm() {
7269 // quad_perm:[%d,%d,%d,%d]
7270
7271 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
7272 return -1;
7273
7274 int64_t Val = 0;
7275 for (int i = 0; i < 4; ++i) {
7276 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
7277 return -1;
7278
7279 int64_t Temp;
7280 SMLoc Loc = getLoc();
7281 if (getParser().parseAbsoluteExpression(Temp))
7282 return -1;
7283 if (Temp < 0 || Temp > 3) {
7284 Error(Loc, "expected a 2-bit value");
7285 return -1;
7286 }
7287
7288 Val += (Temp << i * 2);
7289 }
7290
7291 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7292 return -1;
7293
7294 return Val;
7295 }
7296
7297 int64_t
parseDPPCtrlSel(StringRef Ctrl)7298 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
7299 using namespace AMDGPU::DPP;
7300
7301 // sel:%d
7302
7303 int64_t Val;
7304 SMLoc Loc = getLoc();
7305
7306 if (getParser().parseAbsoluteExpression(Val))
7307 return -1;
7308
7309 struct DppCtrlCheck {
7310 int64_t Ctrl;
7311 int Lo;
7312 int Hi;
7313 };
7314
7315 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
7316 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
7317 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
7318 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
7319 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
7320 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
7321 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
7322 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
7323 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
7324 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
7325 .Default({-1, 0, 0});
7326
7327 bool Valid;
7328 if (Check.Ctrl == -1) {
7329 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
7330 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
7331 } else {
7332 Valid = Check.Lo <= Val && Val <= Check.Hi;
7333 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
7334 }
7335
7336 if (!Valid) {
7337 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
7338 return -1;
7339 }
7340
7341 return Val;
7342 }
7343
7344 OperandMatchResultTy
parseDPPCtrl(OperandVector & Operands)7345 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
7346 using namespace AMDGPU::DPP;
7347
7348 if (!isToken(AsmToken::Identifier) ||
7349 !isSupportedDPPCtrl(getTokenStr(), Operands))
7350 return MatchOperand_NoMatch;
7351
7352 SMLoc S = getLoc();
7353 int64_t Val = -1;
7354 StringRef Ctrl;
7355
7356 parseId(Ctrl);
7357
7358 if (Ctrl == "row_mirror") {
7359 Val = DppCtrl::ROW_MIRROR;
7360 } else if (Ctrl == "row_half_mirror") {
7361 Val = DppCtrl::ROW_HALF_MIRROR;
7362 } else {
7363 if (skipToken(AsmToken::Colon, "expected a colon")) {
7364 if (Ctrl == "quad_perm") {
7365 Val = parseDPPCtrlPerm();
7366 } else {
7367 Val = parseDPPCtrlSel(Ctrl);
7368 }
7369 }
7370 }
7371
7372 if (Val == -1)
7373 return MatchOperand_ParseFail;
7374
7375 Operands.push_back(
7376 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
7377 return MatchOperand_Success;
7378 }
7379
defaultRowMask() const7380 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
7381 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
7382 }
7383
defaultEndpgmImmOperands() const7384 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
7385 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
7386 }
7387
defaultBankMask() const7388 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
7389 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
7390 }
7391
defaultBoundCtrl() const7392 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
7393 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
7394 }
7395
defaultFI() const7396 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
7397 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
7398 }
7399
cvtDPP(MCInst & Inst,const OperandVector & Operands,bool IsDPP8)7400 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
7401 OptionalImmIndexMap OptionalIdx;
7402
7403 unsigned I = 1;
7404 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7405 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7406 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7407 }
7408
7409 int Fi = 0;
7410 for (unsigned E = Operands.size(); I != E; ++I) {
7411 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
7412 MCOI::TIED_TO);
7413 if (TiedTo != -1) {
7414 assert((unsigned)TiedTo < Inst.getNumOperands());
7415 // handle tied old or src2 for MAC instructions
7416 Inst.addOperand(Inst.getOperand(TiedTo));
7417 }
7418 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7419 // Add the register arguments
7420 if (Op.isReg() && validateVccOperand(Op.getReg())) {
7421 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
7422 // Skip it.
7423 continue;
7424 }
7425
7426 if (IsDPP8) {
7427 if (Op.isDPP8()) {
7428 Op.addImmOperands(Inst, 1);
7429 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7430 Op.addRegWithFPInputModsOperands(Inst, 2);
7431 } else if (Op.isFI()) {
7432 Fi = Op.getImm();
7433 } else if (Op.isReg()) {
7434 Op.addRegOperands(Inst, 1);
7435 } else {
7436 llvm_unreachable("Invalid operand type");
7437 }
7438 } else {
7439 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7440 Op.addRegWithFPInputModsOperands(Inst, 2);
7441 } else if (Op.isDPPCtrl()) {
7442 Op.addImmOperands(Inst, 1);
7443 } else if (Op.isImm()) {
7444 // Handle optional arguments
7445 OptionalIdx[Op.getImmTy()] = I;
7446 } else {
7447 llvm_unreachable("Invalid operand type");
7448 }
7449 }
7450 }
7451
7452 if (IsDPP8) {
7453 using namespace llvm::AMDGPU::DPP;
7454 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
7455 } else {
7456 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
7457 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
7458 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
7459 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
7460 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
7461 }
7462 }
7463 }
7464
7465 //===----------------------------------------------------------------------===//
7466 // sdwa
7467 //===----------------------------------------------------------------------===//
7468
7469 OperandMatchResultTy
parseSDWASel(OperandVector & Operands,StringRef Prefix,AMDGPUOperand::ImmTy Type)7470 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
7471 AMDGPUOperand::ImmTy Type) {
7472 using namespace llvm::AMDGPU::SDWA;
7473
7474 SMLoc S = getLoc();
7475 StringRef Value;
7476 OperandMatchResultTy res;
7477
7478 SMLoc StringLoc;
7479 res = parseStringWithPrefix(Prefix, Value, StringLoc);
7480 if (res != MatchOperand_Success) {
7481 return res;
7482 }
7483
7484 int64_t Int;
7485 Int = StringSwitch<int64_t>(Value)
7486 .Case("BYTE_0", SdwaSel::BYTE_0)
7487 .Case("BYTE_1", SdwaSel::BYTE_1)
7488 .Case("BYTE_2", SdwaSel::BYTE_2)
7489 .Case("BYTE_3", SdwaSel::BYTE_3)
7490 .Case("WORD_0", SdwaSel::WORD_0)
7491 .Case("WORD_1", SdwaSel::WORD_1)
7492 .Case("DWORD", SdwaSel::DWORD)
7493 .Default(0xffffffff);
7494
7495 if (Int == 0xffffffff) {
7496 Error(StringLoc, "invalid " + Twine(Prefix) + " value");
7497 return MatchOperand_ParseFail;
7498 }
7499
7500 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
7501 return MatchOperand_Success;
7502 }
7503
7504 OperandMatchResultTy
parseSDWADstUnused(OperandVector & Operands)7505 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
7506 using namespace llvm::AMDGPU::SDWA;
7507
7508 SMLoc S = getLoc();
7509 StringRef Value;
7510 OperandMatchResultTy res;
7511
7512 SMLoc StringLoc;
7513 res = parseStringWithPrefix("dst_unused", Value, StringLoc);
7514 if (res != MatchOperand_Success) {
7515 return res;
7516 }
7517
7518 int64_t Int;
7519 Int = StringSwitch<int64_t>(Value)
7520 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
7521 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
7522 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
7523 .Default(0xffffffff);
7524
7525 if (Int == 0xffffffff) {
7526 Error(StringLoc, "invalid dst_unused value");
7527 return MatchOperand_ParseFail;
7528 }
7529
7530 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
7531 return MatchOperand_Success;
7532 }
7533
cvtSdwaVOP1(MCInst & Inst,const OperandVector & Operands)7534 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
7535 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
7536 }
7537
cvtSdwaVOP2(MCInst & Inst,const OperandVector & Operands)7538 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
7539 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
7540 }
7541
cvtSdwaVOP2b(MCInst & Inst,const OperandVector & Operands)7542 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
7543 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
7544 }
7545
cvtSdwaVOP2e(MCInst & Inst,const OperandVector & Operands)7546 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
7547 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
7548 }
7549
cvtSdwaVOPC(MCInst & Inst,const OperandVector & Operands)7550 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
7551 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
7552 }
7553
cvtSDWA(MCInst & Inst,const OperandVector & Operands,uint64_t BasicInstType,bool SkipDstVcc,bool SkipSrcVcc)7554 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
7555 uint64_t BasicInstType,
7556 bool SkipDstVcc,
7557 bool SkipSrcVcc) {
7558 using namespace llvm::AMDGPU::SDWA;
7559
7560 OptionalImmIndexMap OptionalIdx;
7561 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
7562 bool SkippedVcc = false;
7563
7564 unsigned I = 1;
7565 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
7566 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
7567 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
7568 }
7569
7570 for (unsigned E = Operands.size(); I != E; ++I) {
7571 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
7572 if (SkipVcc && !SkippedVcc && Op.isReg() &&
7573 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
7574 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
7575 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
7576 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
7577 // Skip VCC only if we didn't skip it on previous iteration.
7578 // Note that src0 and src1 occupy 2 slots each because of modifiers.
7579 if (BasicInstType == SIInstrFlags::VOP2 &&
7580 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
7581 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
7582 SkippedVcc = true;
7583 continue;
7584 } else if (BasicInstType == SIInstrFlags::VOPC &&
7585 Inst.getNumOperands() == 0) {
7586 SkippedVcc = true;
7587 continue;
7588 }
7589 }
7590 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
7591 Op.addRegOrImmWithInputModsOperands(Inst, 2);
7592 } else if (Op.isImm()) {
7593 // Handle optional arguments
7594 OptionalIdx[Op.getImmTy()] = I;
7595 } else {
7596 llvm_unreachable("Invalid operand type");
7597 }
7598 SkippedVcc = false;
7599 }
7600
7601 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
7602 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
7603 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
7604 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
7605 switch (BasicInstType) {
7606 case SIInstrFlags::VOP1:
7607 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7608 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7609 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7610 }
7611 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7612 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7613 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7614 break;
7615
7616 case SIInstrFlags::VOP2:
7617 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7618 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
7619 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
7620 }
7621 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
7622 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
7623 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7624 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7625 break;
7626
7627 case SIInstrFlags::VOPC:
7628 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
7629 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
7630 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
7631 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
7632 break;
7633
7634 default:
7635 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
7636 }
7637 }
7638
7639 // special case v_mac_{f16, f32}:
7640 // it has src2 register operand that is tied to dst operand
7641 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
7642 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
7643 auto it = Inst.begin();
7644 std::advance(
7645 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
7646 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
7647 }
7648 }
7649
7650 //===----------------------------------------------------------------------===//
7651 // mAI
7652 //===----------------------------------------------------------------------===//
7653
defaultBLGP() const7654 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
7655 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
7656 }
7657
defaultCBSZ() const7658 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
7659 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
7660 }
7661
defaultABID() const7662 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
7663 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
7664 }
7665
7666 /// Force static initialization.
LLVMInitializeAMDGPUAsmParser()7667 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
7668 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
7669 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
7670 }
7671
7672 #define GET_REGISTER_MATCHER
7673 #define GET_MATCHER_IMPLEMENTATION
7674 #define GET_MNEMONIC_SPELL_CHECKER
7675 #define GET_MNEMONIC_CHECKER
7676 #include "AMDGPUGenAsmMatcher.inc"
7677
7678 // This fuction should be defined after auto-generated include so that we have
7679 // MatchClassKind enum defined
validateTargetOperandClass(MCParsedAsmOperand & Op,unsigned Kind)7680 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
7681 unsigned Kind) {
7682 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
7683 // But MatchInstructionImpl() expects to meet token and fails to validate
7684 // operand. This method checks if we are given immediate operand but expect to
7685 // get corresponding token.
7686 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
7687 switch (Kind) {
7688 case MCK_addr64:
7689 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
7690 case MCK_gds:
7691 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
7692 case MCK_lds:
7693 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
7694 case MCK_glc:
7695 return Operand.isGLC() ? Match_Success : Match_InvalidOperand;
7696 case MCK_idxen:
7697 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
7698 case MCK_offen:
7699 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
7700 case MCK_SSrcB32:
7701 // When operands have expression values, they will return true for isToken,
7702 // because it is not possible to distinguish between a token and an
7703 // expression at parse time. MatchInstructionImpl() will always try to
7704 // match an operand as a token, when isToken returns true, and when the
7705 // name of the expression is not a valid token, the match will fail,
7706 // so we need to handle it here.
7707 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
7708 case MCK_SSrcF32:
7709 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
7710 case MCK_SoppBrTarget:
7711 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
7712 case MCK_VReg32OrOff:
7713 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
7714 case MCK_InterpSlot:
7715 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
7716 case MCK_Attr:
7717 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
7718 case MCK_AttrChan:
7719 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
7720 case MCK_ImmSMEMOffset:
7721 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
7722 case MCK_SReg_64:
7723 case MCK_SReg_64_XEXEC:
7724 // Null is defined as a 32-bit register but
7725 // it should also be enabled with 64-bit operands.
7726 // The following code enables it for SReg_64 operands
7727 // used as source and destination. Remaining source
7728 // operands are handled in isInlinableImm.
7729 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
7730 default:
7731 return Match_InvalidOperand;
7732 }
7733 }
7734
7735 //===----------------------------------------------------------------------===//
7736 // endpgm
7737 //===----------------------------------------------------------------------===//
7738
parseEndpgmOp(OperandVector & Operands)7739 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
7740 SMLoc S = getLoc();
7741 int64_t Imm = 0;
7742
7743 if (!parseExpr(Imm)) {
7744 // The operand is optional, if not present default to 0
7745 Imm = 0;
7746 }
7747
7748 if (!isUInt<16>(Imm)) {
7749 Error(S, "expected a 16-bit value");
7750 return MatchOperand_ParseFail;
7751 }
7752
7753 Operands.push_back(
7754 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
7755 return MatchOperand_Success;
7756 }
7757
isEndpgm() const7758 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
7759