1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/CodeGen/MachineValueType.h" 25 #include "llvm/MC/MCAsmInfo.h" 26 #include "llvm/MC/MCContext.h" 27 #include "llvm/MC/MCExpr.h" 28 #include "llvm/MC/MCInst.h" 29 #include "llvm/MC/MCInstrDesc.h" 30 #include "llvm/MC/MCParser/MCAsmLexer.h" 31 #include "llvm/MC/MCParser/MCAsmParser.h" 32 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 33 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 34 #include "llvm/MC/MCSymbol.h" 35 #include "llvm/MC/TargetRegistry.h" 36 #include "llvm/Support/AMDGPUMetadata.h" 37 #include "llvm/Support/AMDHSAKernelDescriptor.h" 38 #include "llvm/Support/Casting.h" 39 #include "llvm/Support/MathExtras.h" 40 #include "llvm/TargetParser/TargetParser.h" 41 #include <optional> 42 43 using namespace llvm; 44 using namespace llvm::AMDGPU; 45 using namespace llvm::amdhsa; 46 47 namespace { 48 49 class AMDGPUAsmParser; 50 51 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 52 53 //===----------------------------------------------------------------------===// 54 // Operand 55 //===----------------------------------------------------------------------===// 56 57 class AMDGPUOperand : public MCParsedAsmOperand { 58 enum KindTy { 59 Token, 60 Immediate, 61 Register, 62 Expression 63 } Kind; 64 65 SMLoc StartLoc, EndLoc; 66 const AMDGPUAsmParser *AsmParser; 67 68 public: 69 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 70 : Kind(Kind_), AsmParser(AsmParser_) {} 71 72 using Ptr = std::unique_ptr<AMDGPUOperand>; 73 74 struct Modifiers { 75 bool Abs = false; 76 bool Neg = false; 77 bool Sext = false; 78 bool Lit = false; 79 80 bool hasFPModifiers() const { return Abs || Neg; } 81 bool hasIntModifiers() const { return Sext; } 82 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 83 84 int64_t getFPModifiersOperand() const { 85 int64_t Operand = 0; 86 Operand |= Abs ? SISrcMods::ABS : 0u; 87 Operand |= Neg ? SISrcMods::NEG : 0u; 88 return Operand; 89 } 90 91 int64_t getIntModifiersOperand() const { 92 int64_t Operand = 0; 93 Operand |= Sext ? SISrcMods::SEXT : 0u; 94 return Operand; 95 } 96 97 int64_t getModifiersOperand() const { 98 assert(!(hasFPModifiers() && hasIntModifiers()) 99 && "fp and int modifiers should not be used simultaneously"); 100 if (hasFPModifiers()) { 101 return getFPModifiersOperand(); 102 } else if (hasIntModifiers()) { 103 return getIntModifiersOperand(); 104 } else { 105 return 0; 106 } 107 } 108 109 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 110 }; 111 112 enum ImmTy { 113 ImmTyNone, 114 ImmTyGDS, 115 ImmTyLDS, 116 ImmTyOffen, 117 ImmTyIdxen, 118 ImmTyAddr64, 119 ImmTyOffset, 120 ImmTyInstOffset, 121 ImmTyOffset0, 122 ImmTyOffset1, 123 ImmTySMEMOffsetMod, 124 ImmTyCPol, 125 ImmTyTFE, 126 ImmTyD16, 127 ImmTyClampSI, 128 ImmTyOModSI, 129 ImmTySDWADstSel, 130 ImmTySDWASrc0Sel, 131 ImmTySDWASrc1Sel, 132 ImmTySDWADstUnused, 133 ImmTyDMask, 134 ImmTyDim, 135 ImmTyUNorm, 136 ImmTyDA, 137 ImmTyR128A16, 138 ImmTyA16, 139 ImmTyLWE, 140 ImmTyExpTgt, 141 ImmTyExpCompr, 142 ImmTyExpVM, 143 ImmTyFORMAT, 144 ImmTyHwreg, 145 ImmTyOff, 146 ImmTySendMsg, 147 ImmTyInterpSlot, 148 ImmTyInterpAttr, 149 ImmTyInterpAttrChan, 150 ImmTyOpSel, 151 ImmTyOpSelHi, 152 ImmTyNegLo, 153 ImmTyNegHi, 154 ImmTyDPP8, 155 ImmTyDppCtrl, 156 ImmTyDppRowMask, 157 ImmTyDppBankMask, 158 ImmTyDppBoundCtrl, 159 ImmTyDppFI, 160 ImmTySwizzle, 161 ImmTyGprIdxMode, 162 ImmTyHigh, 163 ImmTyBLGP, 164 ImmTyCBSZ, 165 ImmTyABID, 166 ImmTyEndpgm, 167 ImmTyWaitVDST, 168 ImmTyWaitEXP, 169 }; 170 171 // Immediate operand kind. 172 // It helps to identify the location of an offending operand after an error. 173 // Note that regular literals and mandatory literals (KImm) must be handled 174 // differently. When looking for an offending operand, we should usually 175 // ignore mandatory literals because they are part of the instruction and 176 // cannot be changed. Report location of mandatory operands only for VOPD, 177 // when both OpX and OpY have a KImm and there are no other literals. 178 enum ImmKindTy { 179 ImmKindTyNone, 180 ImmKindTyLiteral, 181 ImmKindTyMandatoryLiteral, 182 ImmKindTyConst, 183 }; 184 185 private: 186 struct TokOp { 187 const char *Data; 188 unsigned Length; 189 }; 190 191 struct ImmOp { 192 int64_t Val; 193 ImmTy Type; 194 bool IsFPImm; 195 mutable ImmKindTy Kind; 196 Modifiers Mods; 197 }; 198 199 struct RegOp { 200 unsigned RegNo; 201 Modifiers Mods; 202 }; 203 204 union { 205 TokOp Tok; 206 ImmOp Imm; 207 RegOp Reg; 208 const MCExpr *Expr; 209 }; 210 211 public: 212 bool isToken() const override { return Kind == Token; } 213 214 bool isSymbolRefExpr() const { 215 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 216 } 217 218 bool isImm() const override { 219 return Kind == Immediate; 220 } 221 222 void setImmKindNone() const { 223 assert(isImm()); 224 Imm.Kind = ImmKindTyNone; 225 } 226 227 void setImmKindLiteral() const { 228 assert(isImm()); 229 Imm.Kind = ImmKindTyLiteral; 230 } 231 232 void setImmKindMandatoryLiteral() const { 233 assert(isImm()); 234 Imm.Kind = ImmKindTyMandatoryLiteral; 235 } 236 237 void setImmKindConst() const { 238 assert(isImm()); 239 Imm.Kind = ImmKindTyConst; 240 } 241 242 bool IsImmKindLiteral() const { 243 return isImm() && Imm.Kind == ImmKindTyLiteral; 244 } 245 246 bool IsImmKindMandatoryLiteral() const { 247 return isImm() && Imm.Kind == ImmKindTyMandatoryLiteral; 248 } 249 250 bool isImmKindConst() const { 251 return isImm() && Imm.Kind == ImmKindTyConst; 252 } 253 254 bool isInlinableImm(MVT type) const; 255 bool isLiteralImm(MVT type) const; 256 257 bool isRegKind() const { 258 return Kind == Register; 259 } 260 261 bool isReg() const override { 262 return isRegKind() && !hasModifiers(); 263 } 264 265 bool isRegOrInline(unsigned RCID, MVT type) const { 266 return isRegClass(RCID) || isInlinableImm(type); 267 } 268 269 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 270 return isRegOrInline(RCID, type) || isLiteralImm(type); 271 } 272 273 bool isRegOrImmWithInt16InputMods() const { 274 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 275 } 276 277 bool isRegOrImmWithIntT16InputMods() const { 278 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::i16); 279 } 280 281 bool isRegOrImmWithInt32InputMods() const { 282 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 283 } 284 285 bool isRegOrInlineImmWithInt16InputMods() const { 286 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16); 287 } 288 289 bool isRegOrInlineImmWithInt32InputMods() const { 290 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32); 291 } 292 293 bool isRegOrImmWithInt64InputMods() const { 294 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 295 } 296 297 bool isRegOrImmWithFP16InputMods() const { 298 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 299 } 300 301 bool isRegOrImmWithFPT16InputMods() const { 302 return isRegOrImmWithInputMods(AMDGPU::VS_16RegClassID, MVT::f16); 303 } 304 305 bool isRegOrImmWithFP32InputMods() const { 306 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 307 } 308 309 bool isRegOrImmWithFP64InputMods() const { 310 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 311 } 312 313 bool isRegOrInlineImmWithFP16InputMods() const { 314 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16); 315 } 316 317 bool isRegOrInlineImmWithFP32InputMods() const { 318 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32); 319 } 320 321 322 bool isVReg() const { 323 return isRegClass(AMDGPU::VGPR_32RegClassID) || 324 isRegClass(AMDGPU::VReg_64RegClassID) || 325 isRegClass(AMDGPU::VReg_96RegClassID) || 326 isRegClass(AMDGPU::VReg_128RegClassID) || 327 isRegClass(AMDGPU::VReg_160RegClassID) || 328 isRegClass(AMDGPU::VReg_192RegClassID) || 329 isRegClass(AMDGPU::VReg_256RegClassID) || 330 isRegClass(AMDGPU::VReg_512RegClassID) || 331 isRegClass(AMDGPU::VReg_1024RegClassID); 332 } 333 334 bool isVReg32() const { 335 return isRegClass(AMDGPU::VGPR_32RegClassID); 336 } 337 338 bool isVReg32OrOff() const { 339 return isOff() || isVReg32(); 340 } 341 342 bool isNull() const { 343 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 344 } 345 346 bool isVRegWithInputMods() const; 347 bool isT16VRegWithInputMods() const; 348 349 bool isSDWAOperand(MVT type) const; 350 bool isSDWAFP16Operand() const; 351 bool isSDWAFP32Operand() const; 352 bool isSDWAInt16Operand() const; 353 bool isSDWAInt32Operand() const; 354 355 bool isImmTy(ImmTy ImmT) const { 356 return isImm() && Imm.Type == ImmT; 357 } 358 359 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); } 360 361 bool isImmLiteral() const { return isImmTy(ImmTyNone); } 362 363 bool isImmModifier() const { 364 return isImm() && Imm.Type != ImmTyNone; 365 } 366 367 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 368 bool isDMask() const { return isImmTy(ImmTyDMask); } 369 bool isDim() const { return isImmTy(ImmTyDim); } 370 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 371 bool isOff() const { return isImmTy(ImmTyOff); } 372 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 373 bool isOffen() const { return isImmTy(ImmTyOffen); } 374 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 375 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 376 bool isOffset() const { return isImmTy(ImmTyOffset); } 377 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 378 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 379 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); } 380 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 381 bool isGDS() const { return isImmTy(ImmTyGDS); } 382 bool isLDS() const { return isImmTy(ImmTyLDS); } 383 bool isCPol() const { return isImmTy(ImmTyCPol); } 384 bool isTFE() const { return isImmTy(ImmTyTFE); } 385 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 386 bool isDppBankMask() const { return isImmTy(ImmTyDppBankMask); } 387 bool isDppRowMask() const { return isImmTy(ImmTyDppRowMask); } 388 bool isDppBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 389 bool isDppFI() const { return isImmTy(ImmTyDppFI); } 390 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); } 391 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); } 392 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); } 393 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); } 394 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 395 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 396 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); } 397 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 398 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 399 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 400 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 401 402 bool isRegOrImm() const { 403 return isReg() || isImm(); 404 } 405 406 bool isRegClass(unsigned RCID) const; 407 408 bool isInlineValue() const; 409 410 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 411 return isRegOrInline(RCID, type) && !hasModifiers(); 412 } 413 414 bool isSCSrcB16() const { 415 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 416 } 417 418 bool isSCSrcV2B16() const { 419 return isSCSrcB16(); 420 } 421 422 bool isSCSrcB32() const { 423 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 424 } 425 426 bool isSCSrcB64() const { 427 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 428 } 429 430 bool isBoolReg() const; 431 432 bool isSCSrcF16() const { 433 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 434 } 435 436 bool isSCSrcV2F16() const { 437 return isSCSrcF16(); 438 } 439 440 bool isSCSrcF32() const { 441 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 442 } 443 444 bool isSCSrcF64() const { 445 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 446 } 447 448 bool isSSrcB32() const { 449 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 450 } 451 452 bool isSSrcB16() const { 453 return isSCSrcB16() || isLiteralImm(MVT::i16); 454 } 455 456 bool isSSrcV2B16() const { 457 llvm_unreachable("cannot happen"); 458 return isSSrcB16(); 459 } 460 461 bool isSSrcB64() const { 462 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 463 // See isVSrc64(). 464 return isSCSrcB64() || isLiteralImm(MVT::i64); 465 } 466 467 bool isSSrcF32() const { 468 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 469 } 470 471 bool isSSrcF64() const { 472 return isSCSrcB64() || isLiteralImm(MVT::f64); 473 } 474 475 bool isSSrcF16() const { 476 return isSCSrcB16() || isLiteralImm(MVT::f16); 477 } 478 479 bool isSSrcV2F16() const { 480 llvm_unreachable("cannot happen"); 481 return isSSrcF16(); 482 } 483 484 bool isSSrcV2FP32() const { 485 llvm_unreachable("cannot happen"); 486 return isSSrcF32(); 487 } 488 489 bool isSCSrcV2FP32() const { 490 llvm_unreachable("cannot happen"); 491 return isSCSrcF32(); 492 } 493 494 bool isSSrcV2INT32() const { 495 llvm_unreachable("cannot happen"); 496 return isSSrcB32(); 497 } 498 499 bool isSCSrcV2INT32() const { 500 llvm_unreachable("cannot happen"); 501 return isSCSrcB32(); 502 } 503 504 bool isSSrcOrLdsB32() const { 505 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 506 isLiteralImm(MVT::i32) || isExpr(); 507 } 508 509 bool isVCSrcB32() const { 510 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 511 } 512 513 bool isVCSrcB64() const { 514 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 515 } 516 517 bool isVCSrcTB16() const { 518 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16); 519 } 520 521 bool isVCSrcTB16_Lo128() const { 522 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16); 523 } 524 525 bool isVCSrcFake16B16_Lo128() const { 526 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16); 527 } 528 529 bool isVCSrcB16() const { 530 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 531 } 532 533 bool isVCSrcV2B16() const { 534 return isVCSrcB16(); 535 } 536 537 bool isVCSrcF32() const { 538 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 539 } 540 541 bool isVCSrcF64() const { 542 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 543 } 544 545 bool isVCSrcTF16() const { 546 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16); 547 } 548 549 bool isVCSrcTF16_Lo128() const { 550 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16); 551 } 552 553 bool isVCSrcFake16F16_Lo128() const { 554 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16); 555 } 556 557 bool isVCSrcF16() const { 558 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 559 } 560 561 bool isVCSrcV2F16() const { 562 return isVCSrcF16(); 563 } 564 565 bool isVSrcB32() const { 566 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 567 } 568 569 bool isVSrcB64() const { 570 return isVCSrcF64() || isLiteralImm(MVT::i64); 571 } 572 573 bool isVSrcTB16() const { return isVCSrcTB16() || isLiteralImm(MVT::i16); } 574 575 bool isVSrcTB16_Lo128() const { 576 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16); 577 } 578 579 bool isVSrcFake16B16_Lo128() const { 580 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16); 581 } 582 583 bool isVSrcB16() const { 584 return isVCSrcB16() || isLiteralImm(MVT::i16); 585 } 586 587 bool isVSrcV2B16() const { 588 return isVSrcB16() || isLiteralImm(MVT::v2i16); 589 } 590 591 bool isVCSrcV2FP32() const { 592 return isVCSrcF64(); 593 } 594 595 bool isVSrcV2FP32() const { 596 return isVSrcF64() || isLiteralImm(MVT::v2f32); 597 } 598 599 bool isVCSrcV2INT32() const { 600 return isVCSrcB64(); 601 } 602 603 bool isVSrcV2INT32() const { 604 return isVSrcB64() || isLiteralImm(MVT::v2i32); 605 } 606 607 bool isVSrcF32() const { 608 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 609 } 610 611 bool isVSrcF64() const { 612 return isVCSrcF64() || isLiteralImm(MVT::f64); 613 } 614 615 bool isVSrcTF16() const { return isVCSrcTF16() || isLiteralImm(MVT::f16); } 616 617 bool isVSrcTF16_Lo128() const { 618 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16); 619 } 620 621 bool isVSrcFake16F16_Lo128() const { 622 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16); 623 } 624 625 bool isVSrcF16() const { 626 return isVCSrcF16() || isLiteralImm(MVT::f16); 627 } 628 629 bool isVSrcV2F16() const { 630 return isVSrcF16() || isLiteralImm(MVT::v2f16); 631 } 632 633 bool isVISrcB32() const { 634 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 635 } 636 637 bool isVISrcB16() const { 638 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 639 } 640 641 bool isVISrcV2B16() const { 642 return isVISrcB16(); 643 } 644 645 bool isVISrcF32() const { 646 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 647 } 648 649 bool isVISrcF16() const { 650 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 651 } 652 653 bool isVISrcV2F16() const { 654 return isVISrcF16() || isVISrcB32(); 655 } 656 657 bool isVISrc_64B64() const { 658 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 659 } 660 661 bool isVISrc_64F64() const { 662 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 663 } 664 665 bool isVISrc_64V2FP32() const { 666 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 667 } 668 669 bool isVISrc_64V2INT32() const { 670 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 671 } 672 673 bool isVISrc_256B64() const { 674 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 675 } 676 677 bool isVISrc_256F64() const { 678 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 679 } 680 681 bool isVISrc_128B16() const { 682 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 683 } 684 685 bool isVISrc_128V2B16() const { 686 return isVISrc_128B16(); 687 } 688 689 bool isVISrc_128B32() const { 690 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 691 } 692 693 bool isVISrc_128F32() const { 694 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 695 } 696 697 bool isVISrc_256V2FP32() const { 698 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 699 } 700 701 bool isVISrc_256V2INT32() const { 702 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 703 } 704 705 bool isVISrc_512B32() const { 706 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 707 } 708 709 bool isVISrc_512B16() const { 710 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 711 } 712 713 bool isVISrc_512V2B16() const { 714 return isVISrc_512B16(); 715 } 716 717 bool isVISrc_512F32() const { 718 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 719 } 720 721 bool isVISrc_512F16() const { 722 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 723 } 724 725 bool isVISrc_512V2F16() const { 726 return isVISrc_512F16() || isVISrc_512B32(); 727 } 728 729 bool isVISrc_1024B32() const { 730 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 731 } 732 733 bool isVISrc_1024B16() const { 734 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 735 } 736 737 bool isVISrc_1024V2B16() const { 738 return isVISrc_1024B16(); 739 } 740 741 bool isVISrc_1024F32() const { 742 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 743 } 744 745 bool isVISrc_1024F16() const { 746 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 747 } 748 749 bool isVISrc_1024V2F16() const { 750 return isVISrc_1024F16() || isVISrc_1024B32(); 751 } 752 753 bool isAISrcB32() const { 754 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 755 } 756 757 bool isAISrcB16() const { 758 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 759 } 760 761 bool isAISrcV2B16() const { 762 return isAISrcB16(); 763 } 764 765 bool isAISrcF32() const { 766 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 767 } 768 769 bool isAISrcF16() const { 770 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 771 } 772 773 bool isAISrcV2F16() const { 774 return isAISrcF16() || isAISrcB32(); 775 } 776 777 bool isAISrc_64B64() const { 778 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 779 } 780 781 bool isAISrc_64F64() const { 782 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 783 } 784 785 bool isAISrc_128B32() const { 786 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 787 } 788 789 bool isAISrc_128B16() const { 790 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 791 } 792 793 bool isAISrc_128V2B16() const { 794 return isAISrc_128B16(); 795 } 796 797 bool isAISrc_128F32() const { 798 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 799 } 800 801 bool isAISrc_128F16() const { 802 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 803 } 804 805 bool isAISrc_128V2F16() const { 806 return isAISrc_128F16() || isAISrc_128B32(); 807 } 808 809 bool isVISrc_128F16() const { 810 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 811 } 812 813 bool isVISrc_128V2F16() const { 814 return isVISrc_128F16() || isVISrc_128B32(); 815 } 816 817 bool isAISrc_256B64() const { 818 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 819 } 820 821 bool isAISrc_256F64() const { 822 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 823 } 824 825 bool isAISrc_512B32() const { 826 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 827 } 828 829 bool isAISrc_512B16() const { 830 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 831 } 832 833 bool isAISrc_512V2B16() const { 834 return isAISrc_512B16(); 835 } 836 837 bool isAISrc_512F32() const { 838 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 839 } 840 841 bool isAISrc_512F16() const { 842 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 843 } 844 845 bool isAISrc_512V2F16() const { 846 return isAISrc_512F16() || isAISrc_512B32(); 847 } 848 849 bool isAISrc_1024B32() const { 850 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 851 } 852 853 bool isAISrc_1024B16() const { 854 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 855 } 856 857 bool isAISrc_1024V2B16() const { 858 return isAISrc_1024B16(); 859 } 860 861 bool isAISrc_1024F32() const { 862 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 863 } 864 865 bool isAISrc_1024F16() const { 866 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 867 } 868 869 bool isAISrc_1024V2F16() const { 870 return isAISrc_1024F16() || isAISrc_1024B32(); 871 } 872 873 bool isKImmFP32() const { 874 return isLiteralImm(MVT::f32); 875 } 876 877 bool isKImmFP16() const { 878 return isLiteralImm(MVT::f16); 879 } 880 881 bool isMem() const override { 882 return false; 883 } 884 885 bool isExpr() const { 886 return Kind == Expression; 887 } 888 889 bool isSOPPBrTarget() const { return isExpr() || isImm(); } 890 891 bool isSWaitCnt() const; 892 bool isDepCtr() const; 893 bool isSDelayALU() const; 894 bool isHwreg() const; 895 bool isSendMsg() const; 896 bool isSplitBarrier() const; 897 bool isSwizzle() const; 898 bool isSMRDOffset8() const; 899 bool isSMEMOffset() const; 900 bool isSMRDLiteralOffset() const; 901 bool isDPP8() const; 902 bool isDPPCtrl() const; 903 bool isBLGP() const; 904 bool isCBSZ() const; 905 bool isABID() const; 906 bool isGPRIdxMode() const; 907 bool isS16Imm() const; 908 bool isU16Imm() const; 909 bool isEndpgm() const; 910 bool isWaitVDST() const; 911 bool isWaitEXP() const; 912 913 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const { 914 return std::bind(P, *this); 915 } 916 917 StringRef getToken() const { 918 assert(isToken()); 919 return StringRef(Tok.Data, Tok.Length); 920 } 921 922 int64_t getImm() const { 923 assert(isImm()); 924 return Imm.Val; 925 } 926 927 void setImm(int64_t Val) { 928 assert(isImm()); 929 Imm.Val = Val; 930 } 931 932 ImmTy getImmTy() const { 933 assert(isImm()); 934 return Imm.Type; 935 } 936 937 unsigned getReg() const override { 938 assert(isRegKind()); 939 return Reg.RegNo; 940 } 941 942 SMLoc getStartLoc() const override { 943 return StartLoc; 944 } 945 946 SMLoc getEndLoc() const override { 947 return EndLoc; 948 } 949 950 SMRange getLocRange() const { 951 return SMRange(StartLoc, EndLoc); 952 } 953 954 Modifiers getModifiers() const { 955 assert(isRegKind() || isImmTy(ImmTyNone)); 956 return isRegKind() ? Reg.Mods : Imm.Mods; 957 } 958 959 void setModifiers(Modifiers Mods) { 960 assert(isRegKind() || isImmTy(ImmTyNone)); 961 if (isRegKind()) 962 Reg.Mods = Mods; 963 else 964 Imm.Mods = Mods; 965 } 966 967 bool hasModifiers() const { 968 return getModifiers().hasModifiers(); 969 } 970 971 bool hasFPModifiers() const { 972 return getModifiers().hasFPModifiers(); 973 } 974 975 bool hasIntModifiers() const { 976 return getModifiers().hasIntModifiers(); 977 } 978 979 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 980 981 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 982 983 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 984 985 void addRegOperands(MCInst &Inst, unsigned N) const; 986 987 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 988 if (isRegKind()) 989 addRegOperands(Inst, N); 990 else 991 addImmOperands(Inst, N); 992 } 993 994 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 995 Modifiers Mods = getModifiers(); 996 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 997 if (isRegKind()) { 998 addRegOperands(Inst, N); 999 } else { 1000 addImmOperands(Inst, N, false); 1001 } 1002 } 1003 1004 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 1005 assert(!hasIntModifiers()); 1006 addRegOrImmWithInputModsOperands(Inst, N); 1007 } 1008 1009 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 1010 assert(!hasFPModifiers()); 1011 addRegOrImmWithInputModsOperands(Inst, N); 1012 } 1013 1014 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 1015 Modifiers Mods = getModifiers(); 1016 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 1017 assert(isRegKind()); 1018 addRegOperands(Inst, N); 1019 } 1020 1021 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 1022 assert(!hasIntModifiers()); 1023 addRegWithInputModsOperands(Inst, N); 1024 } 1025 1026 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 1027 assert(!hasFPModifiers()); 1028 addRegWithInputModsOperands(Inst, N); 1029 } 1030 1031 static void printImmTy(raw_ostream& OS, ImmTy Type) { 1032 switch (Type) { 1033 case ImmTyNone: OS << "None"; break; 1034 case ImmTyGDS: OS << "GDS"; break; 1035 case ImmTyLDS: OS << "LDS"; break; 1036 case ImmTyOffen: OS << "Offen"; break; 1037 case ImmTyIdxen: OS << "Idxen"; break; 1038 case ImmTyAddr64: OS << "Addr64"; break; 1039 case ImmTyOffset: OS << "Offset"; break; 1040 case ImmTyInstOffset: OS << "InstOffset"; break; 1041 case ImmTyOffset0: OS << "Offset0"; break; 1042 case ImmTyOffset1: OS << "Offset1"; break; 1043 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break; 1044 case ImmTyCPol: OS << "CPol"; break; 1045 case ImmTyTFE: OS << "TFE"; break; 1046 case ImmTyD16: OS << "D16"; break; 1047 case ImmTyFORMAT: OS << "FORMAT"; break; 1048 case ImmTyClampSI: OS << "ClampSI"; break; 1049 case ImmTyOModSI: OS << "OModSI"; break; 1050 case ImmTyDPP8: OS << "DPP8"; break; 1051 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1052 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1053 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1054 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1055 case ImmTyDppFI: OS << "DppFI"; break; 1056 case ImmTySDWADstSel: OS << "SDWADstSel"; break; 1057 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break; 1058 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break; 1059 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break; 1060 case ImmTyDMask: OS << "DMask"; break; 1061 case ImmTyDim: OS << "Dim"; break; 1062 case ImmTyUNorm: OS << "UNorm"; break; 1063 case ImmTyDA: OS << "DA"; break; 1064 case ImmTyR128A16: OS << "R128A16"; break; 1065 case ImmTyA16: OS << "A16"; break; 1066 case ImmTyLWE: OS << "LWE"; break; 1067 case ImmTyOff: OS << "Off"; break; 1068 case ImmTyExpTgt: OS << "ExpTgt"; break; 1069 case ImmTyExpCompr: OS << "ExpCompr"; break; 1070 case ImmTyExpVM: OS << "ExpVM"; break; 1071 case ImmTyHwreg: OS << "Hwreg"; break; 1072 case ImmTySendMsg: OS << "SendMsg"; break; 1073 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1074 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1075 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break; 1076 case ImmTyOpSel: OS << "OpSel"; break; 1077 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1078 case ImmTyNegLo: OS << "NegLo"; break; 1079 case ImmTyNegHi: OS << "NegHi"; break; 1080 case ImmTySwizzle: OS << "Swizzle"; break; 1081 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1082 case ImmTyHigh: OS << "High"; break; 1083 case ImmTyBLGP: OS << "BLGP"; break; 1084 case ImmTyCBSZ: OS << "CBSZ"; break; 1085 case ImmTyABID: OS << "ABID"; break; 1086 case ImmTyEndpgm: OS << "Endpgm"; break; 1087 case ImmTyWaitVDST: OS << "WaitVDST"; break; 1088 case ImmTyWaitEXP: OS << "WaitEXP"; break; 1089 } 1090 } 1091 1092 void print(raw_ostream &OS) const override { 1093 switch (Kind) { 1094 case Register: 1095 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1096 break; 1097 case Immediate: 1098 OS << '<' << getImm(); 1099 if (getImmTy() != ImmTyNone) { 1100 OS << " type: "; printImmTy(OS, getImmTy()); 1101 } 1102 OS << " mods: " << Imm.Mods << '>'; 1103 break; 1104 case Token: 1105 OS << '\'' << getToken() << '\''; 1106 break; 1107 case Expression: 1108 OS << "<expr " << *Expr << '>'; 1109 break; 1110 } 1111 } 1112 1113 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1114 int64_t Val, SMLoc Loc, 1115 ImmTy Type = ImmTyNone, 1116 bool IsFPImm = false) { 1117 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1118 Op->Imm.Val = Val; 1119 Op->Imm.IsFPImm = IsFPImm; 1120 Op->Imm.Kind = ImmKindTyNone; 1121 Op->Imm.Type = Type; 1122 Op->Imm.Mods = Modifiers(); 1123 Op->StartLoc = Loc; 1124 Op->EndLoc = Loc; 1125 return Op; 1126 } 1127 1128 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1129 StringRef Str, SMLoc Loc, 1130 bool HasExplicitEncodingSize = true) { 1131 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1132 Res->Tok.Data = Str.data(); 1133 Res->Tok.Length = Str.size(); 1134 Res->StartLoc = Loc; 1135 Res->EndLoc = Loc; 1136 return Res; 1137 } 1138 1139 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1140 unsigned RegNo, SMLoc S, 1141 SMLoc E) { 1142 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1143 Op->Reg.RegNo = RegNo; 1144 Op->Reg.Mods = Modifiers(); 1145 Op->StartLoc = S; 1146 Op->EndLoc = E; 1147 return Op; 1148 } 1149 1150 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1151 const class MCExpr *Expr, SMLoc S) { 1152 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1153 Op->Expr = Expr; 1154 Op->StartLoc = S; 1155 Op->EndLoc = S; 1156 return Op; 1157 } 1158 }; 1159 1160 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1161 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1162 return OS; 1163 } 1164 1165 //===----------------------------------------------------------------------===// 1166 // AsmParser 1167 //===----------------------------------------------------------------------===// 1168 1169 // Holds info related to the current kernel, e.g. count of SGPRs used. 1170 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1171 // .amdgpu_hsa_kernel or at EOF. 1172 class KernelScopeInfo { 1173 int SgprIndexUnusedMin = -1; 1174 int VgprIndexUnusedMin = -1; 1175 int AgprIndexUnusedMin = -1; 1176 MCContext *Ctx = nullptr; 1177 MCSubtargetInfo const *MSTI = nullptr; 1178 1179 void usesSgprAt(int i) { 1180 if (i >= SgprIndexUnusedMin) { 1181 SgprIndexUnusedMin = ++i; 1182 if (Ctx) { 1183 MCSymbol* const Sym = 1184 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1185 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1186 } 1187 } 1188 } 1189 1190 void usesVgprAt(int i) { 1191 if (i >= VgprIndexUnusedMin) { 1192 VgprIndexUnusedMin = ++i; 1193 if (Ctx) { 1194 MCSymbol* const Sym = 1195 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1196 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1197 VgprIndexUnusedMin); 1198 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1199 } 1200 } 1201 } 1202 1203 void usesAgprAt(int i) { 1204 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1205 if (!hasMAIInsts(*MSTI)) 1206 return; 1207 1208 if (i >= AgprIndexUnusedMin) { 1209 AgprIndexUnusedMin = ++i; 1210 if (Ctx) { 1211 MCSymbol* const Sym = 1212 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1213 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1214 1215 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1216 MCSymbol* const vSym = 1217 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1218 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1219 VgprIndexUnusedMin); 1220 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1221 } 1222 } 1223 } 1224 1225 public: 1226 KernelScopeInfo() = default; 1227 1228 void initialize(MCContext &Context) { 1229 Ctx = &Context; 1230 MSTI = Ctx->getSubtargetInfo(); 1231 1232 usesSgprAt(SgprIndexUnusedMin = -1); 1233 usesVgprAt(VgprIndexUnusedMin = -1); 1234 if (hasMAIInsts(*MSTI)) { 1235 usesAgprAt(AgprIndexUnusedMin = -1); 1236 } 1237 } 1238 1239 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1240 unsigned RegWidth) { 1241 switch (RegKind) { 1242 case IS_SGPR: 1243 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1244 break; 1245 case IS_AGPR: 1246 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1247 break; 1248 case IS_VGPR: 1249 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1250 break; 1251 default: 1252 break; 1253 } 1254 } 1255 }; 1256 1257 class AMDGPUAsmParser : public MCTargetAsmParser { 1258 MCAsmParser &Parser; 1259 1260 unsigned ForcedEncodingSize = 0; 1261 bool ForcedDPP = false; 1262 bool ForcedSDWA = false; 1263 KernelScopeInfo KernelScope; 1264 1265 /// @name Auto-generated Match Functions 1266 /// { 1267 1268 #define GET_ASSEMBLER_HEADER 1269 #include "AMDGPUGenAsmMatcher.inc" 1270 1271 /// } 1272 1273 private: 1274 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1275 bool OutOfRangeError(SMRange Range); 1276 /// Calculate VGPR/SGPR blocks required for given target, reserved 1277 /// registers, and user-specified NextFreeXGPR values. 1278 /// 1279 /// \param Features [in] Target features, used for bug corrections. 1280 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1281 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1282 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1283 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1284 /// descriptor field, if valid. 1285 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1286 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1287 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1288 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1289 /// \param VGPRBlocks [out] Result VGPR block count. 1290 /// \param SGPRBlocks [out] Result SGPR block count. 1291 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1292 bool FlatScrUsed, bool XNACKUsed, 1293 std::optional<bool> EnableWavefrontSize32, 1294 unsigned NextFreeVGPR, SMRange VGPRRange, 1295 unsigned NextFreeSGPR, SMRange SGPRRange, 1296 unsigned &VGPRBlocks, unsigned &SGPRBlocks); 1297 bool ParseDirectiveAMDGCNTarget(); 1298 bool ParseDirectiveAMDHSAKernel(); 1299 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1300 bool ParseDirectiveHSACodeObjectVersion(); 1301 bool ParseDirectiveHSACodeObjectISA(); 1302 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1303 bool ParseDirectiveAMDKernelCodeT(); 1304 // TODO: Possibly make subtargetHasRegister const. 1305 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1306 bool ParseDirectiveAMDGPUHsaKernel(); 1307 1308 bool ParseDirectiveISAVersion(); 1309 bool ParseDirectiveHSAMetadata(); 1310 bool ParseDirectivePALMetadataBegin(); 1311 bool ParseDirectivePALMetadata(); 1312 bool ParseDirectiveAMDGPULDS(); 1313 1314 /// Common code to parse out a block of text (typically YAML) between start and 1315 /// end directives. 1316 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1317 const char *AssemblerDirectiveEnd, 1318 std::string &CollectString); 1319 1320 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1321 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1322 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1323 unsigned &RegNum, unsigned &RegWidth, 1324 bool RestoreOnFailure = false); 1325 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1326 unsigned &RegNum, unsigned &RegWidth, 1327 SmallVectorImpl<AsmToken> &Tokens); 1328 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1329 unsigned &RegWidth, 1330 SmallVectorImpl<AsmToken> &Tokens); 1331 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1332 unsigned &RegWidth, 1333 SmallVectorImpl<AsmToken> &Tokens); 1334 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1335 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1336 bool ParseRegRange(unsigned& Num, unsigned& Width); 1337 unsigned getRegularReg(RegisterKind RegKind, 1338 unsigned RegNum, 1339 unsigned RegWidth, 1340 SMLoc Loc); 1341 1342 bool isRegister(); 1343 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1344 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1345 void initializeGprCountSymbol(RegisterKind RegKind); 1346 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1347 unsigned RegWidth); 1348 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1349 bool IsAtomic); 1350 1351 public: 1352 enum AMDGPUMatchResultTy { 1353 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1354 }; 1355 enum OperandMode { 1356 OperandMode_Default, 1357 OperandMode_NSA, 1358 }; 1359 1360 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1361 1362 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1363 const MCInstrInfo &MII, 1364 const MCTargetOptions &Options) 1365 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1366 MCAsmParserExtension::Initialize(Parser); 1367 1368 if (getFeatureBits().none()) { 1369 // Set default features. 1370 copySTI().ToggleFeature("southern-islands"); 1371 } 1372 1373 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1374 1375 { 1376 // TODO: make those pre-defined variables read-only. 1377 // Currently there is none suitable machinery in the core llvm-mc for this. 1378 // MCSymbol::isRedefinable is intended for another purpose, and 1379 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1380 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1381 MCContext &Ctx = getContext(); 1382 if (ISA.Major >= 6 && isHsaAbi(getSTI())) { 1383 MCSymbol *Sym = 1384 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1385 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1386 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1387 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1388 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1389 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1390 } else { 1391 MCSymbol *Sym = 1392 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1393 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1394 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1395 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1396 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1397 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1398 } 1399 if (ISA.Major >= 6 && isHsaAbi(getSTI())) { 1400 initializeGprCountSymbol(IS_VGPR); 1401 initializeGprCountSymbol(IS_SGPR); 1402 } else 1403 KernelScope.initialize(getContext()); 1404 } 1405 } 1406 1407 bool hasMIMG_R128() const { 1408 return AMDGPU::hasMIMG_R128(getSTI()); 1409 } 1410 1411 bool hasPackedD16() const { 1412 return AMDGPU::hasPackedD16(getSTI()); 1413 } 1414 1415 bool hasA16() const { return AMDGPU::hasA16(getSTI()); } 1416 1417 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1418 1419 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); } 1420 1421 bool isSI() const { 1422 return AMDGPU::isSI(getSTI()); 1423 } 1424 1425 bool isCI() const { 1426 return AMDGPU::isCI(getSTI()); 1427 } 1428 1429 bool isVI() const { 1430 return AMDGPU::isVI(getSTI()); 1431 } 1432 1433 bool isGFX9() const { 1434 return AMDGPU::isGFX9(getSTI()); 1435 } 1436 1437 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1438 bool isGFX90A() const { 1439 return AMDGPU::isGFX90A(getSTI()); 1440 } 1441 1442 bool isGFX940() const { 1443 return AMDGPU::isGFX940(getSTI()); 1444 } 1445 1446 bool isGFX9Plus() const { 1447 return AMDGPU::isGFX9Plus(getSTI()); 1448 } 1449 1450 bool isGFX10() const { 1451 return AMDGPU::isGFX10(getSTI()); 1452 } 1453 1454 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1455 1456 bool isGFX11() const { 1457 return AMDGPU::isGFX11(getSTI()); 1458 } 1459 1460 bool isGFX11Plus() const { 1461 return AMDGPU::isGFX11Plus(getSTI()); 1462 } 1463 1464 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); } 1465 1466 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); } 1467 1468 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); } 1469 1470 bool isGFX10_BEncoding() const { 1471 return AMDGPU::isGFX10_BEncoding(getSTI()); 1472 } 1473 1474 bool hasInv2PiInlineImm() const { 1475 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1476 } 1477 1478 bool hasFlatOffsets() const { 1479 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1480 } 1481 1482 bool hasArchitectedFlatScratch() const { 1483 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1484 } 1485 1486 bool hasSGPR102_SGPR103() const { 1487 return !isVI() && !isGFX9(); 1488 } 1489 1490 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1491 1492 bool hasIntClamp() const { 1493 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1494 } 1495 1496 bool hasPartialNSAEncoding() const { 1497 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding]; 1498 } 1499 1500 unsigned getNSAMaxSize(bool HasSampler = false) const { 1501 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler); 1502 } 1503 1504 unsigned getMaxNumUserSGPRs() const { 1505 return AMDGPU::getMaxNumUserSGPRs(getSTI()); 1506 } 1507 1508 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); } 1509 1510 AMDGPUTargetStreamer &getTargetStreamer() { 1511 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1512 return static_cast<AMDGPUTargetStreamer &>(TS); 1513 } 1514 1515 const MCRegisterInfo *getMRI() const { 1516 // We need this const_cast because for some reason getContext() is not const 1517 // in MCAsmParser. 1518 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1519 } 1520 1521 const MCInstrInfo *getMII() const { 1522 return &MII; 1523 } 1524 1525 const FeatureBitset &getFeatureBits() const { 1526 return getSTI().getFeatureBits(); 1527 } 1528 1529 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1530 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1531 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1532 1533 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1534 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1535 bool isForcedDPP() const { return ForcedDPP; } 1536 bool isForcedSDWA() const { return ForcedSDWA; } 1537 ArrayRef<unsigned> getMatchedVariants() const; 1538 StringRef getMatchedVariantName() const; 1539 1540 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1541 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1542 bool RestoreOnFailure); 1543 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; 1544 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, 1545 SMLoc &EndLoc) override; 1546 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1547 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1548 unsigned Kind) override; 1549 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1550 OperandVector &Operands, MCStreamer &Out, 1551 uint64_t &ErrorInfo, 1552 bool MatchingInlineAsm) override; 1553 bool ParseDirective(AsmToken DirectiveID) override; 1554 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic, 1555 OperandMode Mode = OperandMode_Default); 1556 StringRef parseMnemonicSuffix(StringRef Name); 1557 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1558 SMLoc NameLoc, OperandVector &Operands) override; 1559 //bool ProcessInstruction(MCInst &Inst); 1560 1561 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands); 1562 1563 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int); 1564 1565 ParseStatus 1566 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1567 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1568 std::function<bool(int64_t &)> ConvertResult = nullptr); 1569 1570 ParseStatus parseOperandArrayWithPrefix( 1571 const char *Prefix, OperandVector &Operands, 1572 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1573 bool (*ConvertResult)(int64_t &) = nullptr); 1574 1575 ParseStatus 1576 parseNamedBit(StringRef Name, OperandVector &Operands, 1577 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1578 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const; 1579 ParseStatus parseCPol(OperandVector &Operands); 1580 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope); 1581 ParseStatus parseTH(OperandVector &Operands, int64_t &TH); 1582 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value, 1583 SMLoc &StringLoc); 1584 1585 bool isModifier(); 1586 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1587 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1588 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1589 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1590 bool parseSP3NegModifier(); 1591 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false, 1592 bool HasLit = false); 1593 ParseStatus parseReg(OperandVector &Operands); 1594 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false, 1595 bool HasLit = false); 1596 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands, 1597 bool AllowImm = true); 1598 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands, 1599 bool AllowImm = true); 1600 ParseStatus parseRegWithFPInputMods(OperandVector &Operands); 1601 ParseStatus parseRegWithIntInputMods(OperandVector &Operands); 1602 ParseStatus parseVReg32OrOff(OperandVector &Operands); 1603 ParseStatus parseDfmtNfmt(int64_t &Format); 1604 ParseStatus parseUfmt(int64_t &Format); 1605 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, 1606 int64_t &Format); 1607 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, 1608 int64_t &Format); 1609 ParseStatus parseFORMAT(OperandVector &Operands); 1610 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format); 1611 ParseStatus parseNumericFormat(int64_t &Format); 1612 ParseStatus parseFlatOffset(OperandVector &Operands); 1613 ParseStatus parseR128A16(OperandVector &Operands); 1614 ParseStatus parseBLGP(OperandVector &Operands); 1615 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1616 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1617 1618 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1619 1620 bool parseCnt(int64_t &IntVal); 1621 ParseStatus parseSWaitCnt(OperandVector &Operands); 1622 1623 bool parseDepCtr(int64_t &IntVal, unsigned &Mask); 1624 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); 1625 ParseStatus parseDepCtr(OperandVector &Operands); 1626 1627 bool parseDelay(int64_t &Delay); 1628 ParseStatus parseSDelayALU(OperandVector &Operands); 1629 1630 ParseStatus parseHwreg(OperandVector &Operands); 1631 1632 private: 1633 struct OperandInfoTy { 1634 SMLoc Loc; 1635 int64_t Id; 1636 bool IsSymbolic = false; 1637 bool IsDefined = false; 1638 1639 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1640 }; 1641 1642 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1643 bool validateSendMsg(const OperandInfoTy &Msg, 1644 const OperandInfoTy &Op, 1645 const OperandInfoTy &Stream); 1646 1647 bool parseHwregBody(OperandInfoTy &HwReg, 1648 OperandInfoTy &Offset, 1649 OperandInfoTy &Width); 1650 bool validateHwreg(const OperandInfoTy &HwReg, 1651 const OperandInfoTy &Offset, 1652 const OperandInfoTy &Width); 1653 1654 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1655 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1656 SMLoc getBLGPLoc(const OperandVector &Operands) const; 1657 1658 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1659 const OperandVector &Operands) const; 1660 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1661 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1662 SMLoc getLitLoc(const OperandVector &Operands, 1663 bool SearchMandatoryLiterals = false) const; 1664 SMLoc getMandatoryLitLoc(const OperandVector &Operands) const; 1665 SMLoc getConstLoc(const OperandVector &Operands) const; 1666 SMLoc getInstLoc(const OperandVector &Operands) const; 1667 1668 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1669 bool validateOffset(const MCInst &Inst, const OperandVector &Operands); 1670 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1671 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1672 bool validateSOPLiteral(const MCInst &Inst) const; 1673 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1674 bool validateVOPDRegBankConstraints(const MCInst &Inst, 1675 const OperandVector &Operands); 1676 bool validateIntClampSupported(const MCInst &Inst); 1677 bool validateMIMGAtomicDMask(const MCInst &Inst); 1678 bool validateMIMGGatherDMask(const MCInst &Inst); 1679 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1680 bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc); 1681 bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc); 1682 bool validateMIMGD16(const MCInst &Inst); 1683 bool validateMIMGMSAA(const MCInst &Inst); 1684 bool validateOpSel(const MCInst &Inst); 1685 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1686 bool validateVccOperand(unsigned Reg) const; 1687 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1688 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1689 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands); 1690 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1691 bool validateAGPRLdSt(const MCInst &Inst) const; 1692 bool validateVGPRAlign(const MCInst &Inst) const; 1693 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); 1694 bool validateDS(const MCInst &Inst, const OperandVector &Operands); 1695 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1696 bool validateDivScale(const MCInst &Inst); 1697 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands); 1698 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1699 const SMLoc &IDLoc); 1700 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands, 1701 const unsigned CPol); 1702 bool validateExeczVcczOperands(const OperandVector &Operands); 1703 bool validateTFE(const MCInst &Inst, const OperandVector &Operands); 1704 std::optional<StringRef> validateLdsDirect(const MCInst &Inst); 1705 unsigned getConstantBusLimit(unsigned Opcode) const; 1706 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1707 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1708 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1709 1710 bool isSupportedMnemo(StringRef Mnemo, 1711 const FeatureBitset &FBS); 1712 bool isSupportedMnemo(StringRef Mnemo, 1713 const FeatureBitset &FBS, 1714 ArrayRef<unsigned> Variants); 1715 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1716 1717 bool isId(const StringRef Id) const; 1718 bool isId(const AsmToken &Token, const StringRef Id) const; 1719 bool isToken(const AsmToken::TokenKind Kind) const; 1720 StringRef getId() const; 1721 bool trySkipId(const StringRef Id); 1722 bool trySkipId(const StringRef Pref, const StringRef Id); 1723 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1724 bool trySkipToken(const AsmToken::TokenKind Kind); 1725 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1726 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1727 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1728 1729 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1730 AsmToken::TokenKind getTokenKind() const; 1731 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1732 bool parseExpr(OperandVector &Operands); 1733 StringRef getTokenStr() const; 1734 AsmToken peekToken(bool ShouldSkipSpace = true); 1735 AsmToken getToken() const; 1736 SMLoc getLoc() const; 1737 void lex(); 1738 1739 public: 1740 void onBeginOfFile() override; 1741 1742 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK); 1743 1744 ParseStatus parseExpTgt(OperandVector &Operands); 1745 ParseStatus parseSendMsg(OperandVector &Operands); 1746 ParseStatus parseInterpSlot(OperandVector &Operands); 1747 ParseStatus parseInterpAttr(OperandVector &Operands); 1748 ParseStatus parseSOPPBrTarget(OperandVector &Operands); 1749 ParseStatus parseBoolReg(OperandVector &Operands); 1750 1751 bool parseSwizzleOperand(int64_t &Op, 1752 const unsigned MinVal, 1753 const unsigned MaxVal, 1754 const StringRef ErrMsg, 1755 SMLoc &Loc); 1756 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1757 const unsigned MinVal, 1758 const unsigned MaxVal, 1759 const StringRef ErrMsg); 1760 ParseStatus parseSwizzle(OperandVector &Operands); 1761 bool parseSwizzleOffset(int64_t &Imm); 1762 bool parseSwizzleMacro(int64_t &Imm); 1763 bool parseSwizzleQuadPerm(int64_t &Imm); 1764 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1765 bool parseSwizzleBroadcast(int64_t &Imm); 1766 bool parseSwizzleSwap(int64_t &Imm); 1767 bool parseSwizzleReverse(int64_t &Imm); 1768 1769 ParseStatus parseGPRIdxMode(OperandVector &Operands); 1770 int64_t parseGPRIdxMacro(); 1771 1772 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1773 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1774 1775 ParseStatus parseOModSI(OperandVector &Operands); 1776 1777 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1778 OptionalImmIndexMap &OptionalIdx); 1779 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1780 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1781 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1782 void cvtVOPD(MCInst &Inst, const OperandVector &Operands); 1783 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, 1784 OptionalImmIndexMap &OptionalIdx); 1785 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1786 OptionalImmIndexMap &OptionalIdx); 1787 1788 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1789 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands); 1790 1791 bool parseDimId(unsigned &Encoding); 1792 ParseStatus parseDim(OperandVector &Operands); 1793 bool convertDppBoundCtrl(int64_t &BoundCtrl); 1794 ParseStatus parseDPP8(OperandVector &Operands); 1795 ParseStatus parseDPPCtrl(OperandVector &Operands); 1796 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1797 int64_t parseDPPCtrlSel(StringRef Ctrl); 1798 int64_t parseDPPCtrlPerm(); 1799 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1800 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { 1801 cvtDPP(Inst, Operands, true); 1802 } 1803 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, 1804 bool IsDPP8 = false); 1805 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) { 1806 cvtVOP3DPP(Inst, Operands, true); 1807 } 1808 1809 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix, 1810 AMDGPUOperand::ImmTy Type); 1811 ParseStatus parseSDWADstUnused(OperandVector &Operands); 1812 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1813 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1814 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1815 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1816 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1817 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1818 uint64_t BasicInstType, 1819 bool SkipDstVcc = false, 1820 bool SkipSrcVcc = false); 1821 1822 ParseStatus parseEndpgm(OperandVector &Operands); 1823 1824 ParseStatus parseVOPD(OperandVector &Operands); 1825 }; 1826 1827 } // end anonymous namespace 1828 1829 // May be called with integer type with equivalent bitwidth. 1830 static const fltSemantics *getFltSemantics(unsigned Size) { 1831 switch (Size) { 1832 case 4: 1833 return &APFloat::IEEEsingle(); 1834 case 8: 1835 return &APFloat::IEEEdouble(); 1836 case 2: 1837 return &APFloat::IEEEhalf(); 1838 default: 1839 llvm_unreachable("unsupported fp type"); 1840 } 1841 } 1842 1843 static const fltSemantics *getFltSemantics(MVT VT) { 1844 return getFltSemantics(VT.getSizeInBits() / 8); 1845 } 1846 1847 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1848 switch (OperandType) { 1849 case AMDGPU::OPERAND_REG_IMM_INT32: 1850 case AMDGPU::OPERAND_REG_IMM_FP32: 1851 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1852 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1853 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1854 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1855 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1856 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1857 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1858 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1859 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1860 case AMDGPU::OPERAND_KIMM32: 1861 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: 1862 return &APFloat::IEEEsingle(); 1863 case AMDGPU::OPERAND_REG_IMM_INT64: 1864 case AMDGPU::OPERAND_REG_IMM_FP64: 1865 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1866 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1867 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1868 return &APFloat::IEEEdouble(); 1869 case AMDGPU::OPERAND_REG_IMM_INT16: 1870 case AMDGPU::OPERAND_REG_IMM_FP16: 1871 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1872 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1873 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1874 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1875 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1876 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1877 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1878 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1879 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1880 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1881 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1882 case AMDGPU::OPERAND_KIMM16: 1883 return &APFloat::IEEEhalf(); 1884 default: 1885 llvm_unreachable("unsupported fp type"); 1886 } 1887 } 1888 1889 //===----------------------------------------------------------------------===// 1890 // Operand 1891 //===----------------------------------------------------------------------===// 1892 1893 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1894 bool Lost; 1895 1896 // Convert literal to single precision 1897 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1898 APFloat::rmNearestTiesToEven, 1899 &Lost); 1900 // We allow precision lost but not overflow or underflow 1901 if (Status != APFloat::opOK && 1902 Lost && 1903 ((Status & APFloat::opOverflow) != 0 || 1904 (Status & APFloat::opUnderflow) != 0)) { 1905 return false; 1906 } 1907 1908 return true; 1909 } 1910 1911 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1912 return isUIntN(Size, Val) || isIntN(Size, Val); 1913 } 1914 1915 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1916 if (VT.getScalarType() == MVT::i16) { 1917 // FP immediate values are broken. 1918 return isInlinableIntLiteral(Val); 1919 } 1920 1921 // f16/v2f16 operands work correctly for all values. 1922 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1923 } 1924 1925 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1926 1927 // This is a hack to enable named inline values like 1928 // shared_base with both 32-bit and 64-bit operands. 1929 // Note that these values are defined as 1930 // 32-bit operands only. 1931 if (isInlineValue()) { 1932 return true; 1933 } 1934 1935 if (!isImmTy(ImmTyNone)) { 1936 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1937 return false; 1938 } 1939 // TODO: We should avoid using host float here. It would be better to 1940 // check the float bit values which is what a few other places do. 1941 // We've had bot failures before due to weird NaN support on mips hosts. 1942 1943 APInt Literal(64, Imm.Val); 1944 1945 if (Imm.IsFPImm) { // We got fp literal token 1946 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1947 return AMDGPU::isInlinableLiteral64(Imm.Val, 1948 AsmParser->hasInv2PiInlineImm()); 1949 } 1950 1951 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1952 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1953 return false; 1954 1955 if (type.getScalarSizeInBits() == 16) { 1956 return isInlineableLiteralOp16( 1957 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1958 type, AsmParser->hasInv2PiInlineImm()); 1959 } 1960 1961 // Check if single precision literal is inlinable 1962 return AMDGPU::isInlinableLiteral32( 1963 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1964 AsmParser->hasInv2PiInlineImm()); 1965 } 1966 1967 // We got int literal token. 1968 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1969 return AMDGPU::isInlinableLiteral64(Imm.Val, 1970 AsmParser->hasInv2PiInlineImm()); 1971 } 1972 1973 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1974 return false; 1975 } 1976 1977 if (type.getScalarSizeInBits() == 16) { 1978 return isInlineableLiteralOp16( 1979 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1980 type, AsmParser->hasInv2PiInlineImm()); 1981 } 1982 1983 return AMDGPU::isInlinableLiteral32( 1984 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1985 AsmParser->hasInv2PiInlineImm()); 1986 } 1987 1988 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1989 // Check that this immediate can be added as literal 1990 if (!isImmTy(ImmTyNone)) { 1991 return false; 1992 } 1993 1994 if (!Imm.IsFPImm) { 1995 // We got int literal token. 1996 1997 if (type == MVT::f64 && hasFPModifiers()) { 1998 // Cannot apply fp modifiers to int literals preserving the same semantics 1999 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 2000 // disable these cases. 2001 return false; 2002 } 2003 2004 unsigned Size = type.getSizeInBits(); 2005 if (Size == 64) 2006 Size = 32; 2007 2008 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 2009 // types. 2010 return isSafeTruncation(Imm.Val, Size); 2011 } 2012 2013 // We got fp literal token 2014 if (type == MVT::f64) { // Expected 64-bit fp operand 2015 // We would set low 64-bits of literal to zeroes but we accept this literals 2016 return true; 2017 } 2018 2019 if (type == MVT::i64) { // Expected 64-bit int operand 2020 // We don't allow fp literals in 64-bit integer instructions. It is 2021 // unclear how we should encode them. 2022 return false; 2023 } 2024 2025 // We allow fp literals with f16x2 operands assuming that the specified 2026 // literal goes into the lower half and the upper half is zero. We also 2027 // require that the literal may be losslessly converted to f16. 2028 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 2029 (type == MVT::v2i16)? MVT::i16 : 2030 (type == MVT::v2f32)? MVT::f32 : type; 2031 2032 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 2033 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 2034 } 2035 2036 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 2037 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 2038 } 2039 2040 bool AMDGPUOperand::isVRegWithInputMods() const { 2041 return isRegClass(AMDGPU::VGPR_32RegClassID) || 2042 // GFX90A allows DPP on 64-bit operands. 2043 (isRegClass(AMDGPU::VReg_64RegClassID) && 2044 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]); 2045 } 2046 2047 bool AMDGPUOperand::isT16VRegWithInputMods() const { 2048 return isRegClass(AMDGPU::VGPR_32_Lo128RegClassID); 2049 } 2050 2051 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 2052 if (AsmParser->isVI()) 2053 return isVReg32(); 2054 else if (AsmParser->isGFX9Plus()) 2055 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 2056 else 2057 return false; 2058 } 2059 2060 bool AMDGPUOperand::isSDWAFP16Operand() const { 2061 return isSDWAOperand(MVT::f16); 2062 } 2063 2064 bool AMDGPUOperand::isSDWAFP32Operand() const { 2065 return isSDWAOperand(MVT::f32); 2066 } 2067 2068 bool AMDGPUOperand::isSDWAInt16Operand() const { 2069 return isSDWAOperand(MVT::i16); 2070 } 2071 2072 bool AMDGPUOperand::isSDWAInt32Operand() const { 2073 return isSDWAOperand(MVT::i32); 2074 } 2075 2076 bool AMDGPUOperand::isBoolReg() const { 2077 auto FB = AsmParser->getFeatureBits(); 2078 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 2079 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 2080 } 2081 2082 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 2083 { 2084 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2085 assert(Size == 2 || Size == 4 || Size == 8); 2086 2087 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2088 2089 if (Imm.Mods.Abs) { 2090 Val &= ~FpSignMask; 2091 } 2092 if (Imm.Mods.Neg) { 2093 Val ^= FpSignMask; 2094 } 2095 2096 return Val; 2097 } 2098 2099 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2100 if (isExpr()) { 2101 Inst.addOperand(MCOperand::createExpr(Expr)); 2102 return; 2103 } 2104 2105 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2106 Inst.getNumOperands())) { 2107 addLiteralImmOperand(Inst, Imm.Val, 2108 ApplyModifiers & 2109 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2110 } else { 2111 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2112 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2113 setImmKindNone(); 2114 } 2115 } 2116 2117 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2118 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2119 auto OpNum = Inst.getNumOperands(); 2120 // Check that this operand accepts literals 2121 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2122 2123 if (ApplyModifiers) { 2124 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2125 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2126 Val = applyInputFPModifiers(Val, Size); 2127 } 2128 2129 APInt Literal(64, Val); 2130 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType; 2131 2132 if (Imm.IsFPImm) { // We got fp literal token 2133 switch (OpTy) { 2134 case AMDGPU::OPERAND_REG_IMM_INT64: 2135 case AMDGPU::OPERAND_REG_IMM_FP64: 2136 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2137 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2138 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2139 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2140 AsmParser->hasInv2PiInlineImm())) { 2141 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2142 setImmKindConst(); 2143 return; 2144 } 2145 2146 // Non-inlineable 2147 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2148 // For fp operands we check if low 32 bits are zeros 2149 if (Literal.getLoBits(32) != 0) { 2150 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2151 "Can't encode literal as exact 64-bit floating-point operand. " 2152 "Low 32-bits will be set to zero"); 2153 Val &= 0xffffffff00000000u; 2154 } 2155 2156 Inst.addOperand(MCOperand::createImm(Val)); 2157 setImmKindLiteral(); 2158 return; 2159 } 2160 2161 // We don't allow fp literals in 64-bit integer instructions. It is 2162 // unclear how we should encode them. This case should be checked earlier 2163 // in predicate methods (isLiteralImm()) 2164 llvm_unreachable("fp literal in 64-bit integer instruction."); 2165 2166 case AMDGPU::OPERAND_REG_IMM_INT32: 2167 case AMDGPU::OPERAND_REG_IMM_FP32: 2168 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2169 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2170 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2171 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2172 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2173 case AMDGPU::OPERAND_REG_IMM_INT16: 2174 case AMDGPU::OPERAND_REG_IMM_FP16: 2175 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2176 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2177 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2178 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2179 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2180 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2181 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2182 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2183 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2184 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2185 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2186 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2187 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2188 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2189 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2190 case AMDGPU::OPERAND_KIMM32: 2191 case AMDGPU::OPERAND_KIMM16: 2192 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: { 2193 bool lost; 2194 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2195 // Convert literal to single precision 2196 FPLiteral.convert(*getOpFltSemantics(OpTy), 2197 APFloat::rmNearestTiesToEven, &lost); 2198 // We allow precision lost but not overflow or underflow. This should be 2199 // checked earlier in isLiteralImm() 2200 2201 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2202 Inst.addOperand(MCOperand::createImm(ImmVal)); 2203 if (OpTy == AMDGPU::OPERAND_KIMM32 || OpTy == AMDGPU::OPERAND_KIMM16) { 2204 setImmKindMandatoryLiteral(); 2205 } else { 2206 setImmKindLiteral(); 2207 } 2208 return; 2209 } 2210 default: 2211 llvm_unreachable("invalid operand size"); 2212 } 2213 2214 return; 2215 } 2216 2217 // We got int literal token. 2218 // Only sign extend inline immediates. 2219 switch (OpTy) { 2220 case AMDGPU::OPERAND_REG_IMM_INT32: 2221 case AMDGPU::OPERAND_REG_IMM_FP32: 2222 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2223 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2224 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2225 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2226 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2227 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2228 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2229 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2230 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2231 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2232 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2233 case AMDGPU::OPERAND_INLINE_SPLIT_BARRIER_INT32: 2234 if (isSafeTruncation(Val, 32) && 2235 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2236 AsmParser->hasInv2PiInlineImm())) { 2237 Inst.addOperand(MCOperand::createImm(Val)); 2238 setImmKindConst(); 2239 return; 2240 } 2241 2242 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2243 setImmKindLiteral(); 2244 return; 2245 2246 case AMDGPU::OPERAND_REG_IMM_INT64: 2247 case AMDGPU::OPERAND_REG_IMM_FP64: 2248 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2249 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2250 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2251 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2252 Inst.addOperand(MCOperand::createImm(Val)); 2253 setImmKindConst(); 2254 return; 2255 } 2256 2257 Val = AMDGPU::isSISrcFPOperand(InstDesc, OpNum) ? (uint64_t)Val << 32 2258 : Lo_32(Val); 2259 2260 Inst.addOperand(MCOperand::createImm(Val)); 2261 setImmKindLiteral(); 2262 return; 2263 2264 case AMDGPU::OPERAND_REG_IMM_INT16: 2265 case AMDGPU::OPERAND_REG_IMM_FP16: 2266 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2267 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2268 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2269 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2270 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2271 if (isSafeTruncation(Val, 16) && 2272 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2273 AsmParser->hasInv2PiInlineImm())) { 2274 Inst.addOperand(MCOperand::createImm(Val)); 2275 setImmKindConst(); 2276 return; 2277 } 2278 2279 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2280 setImmKindLiteral(); 2281 return; 2282 2283 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2284 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2285 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2286 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2287 assert(isSafeTruncation(Val, 16)); 2288 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2289 AsmParser->hasInv2PiInlineImm())); 2290 2291 Inst.addOperand(MCOperand::createImm(Val)); 2292 return; 2293 } 2294 case AMDGPU::OPERAND_KIMM32: 2295 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2296 setImmKindMandatoryLiteral(); 2297 return; 2298 case AMDGPU::OPERAND_KIMM16: 2299 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2300 setImmKindMandatoryLiteral(); 2301 return; 2302 default: 2303 llvm_unreachable("invalid operand size"); 2304 } 2305 } 2306 2307 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2308 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2309 } 2310 2311 bool AMDGPUOperand::isInlineValue() const { 2312 return isRegKind() && ::isInlineValue(getReg()); 2313 } 2314 2315 //===----------------------------------------------------------------------===// 2316 // AsmParser 2317 //===----------------------------------------------------------------------===// 2318 2319 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2320 if (Is == IS_VGPR) { 2321 switch (RegWidth) { 2322 default: return -1; 2323 case 32: 2324 return AMDGPU::VGPR_32RegClassID; 2325 case 64: 2326 return AMDGPU::VReg_64RegClassID; 2327 case 96: 2328 return AMDGPU::VReg_96RegClassID; 2329 case 128: 2330 return AMDGPU::VReg_128RegClassID; 2331 case 160: 2332 return AMDGPU::VReg_160RegClassID; 2333 case 192: 2334 return AMDGPU::VReg_192RegClassID; 2335 case 224: 2336 return AMDGPU::VReg_224RegClassID; 2337 case 256: 2338 return AMDGPU::VReg_256RegClassID; 2339 case 288: 2340 return AMDGPU::VReg_288RegClassID; 2341 case 320: 2342 return AMDGPU::VReg_320RegClassID; 2343 case 352: 2344 return AMDGPU::VReg_352RegClassID; 2345 case 384: 2346 return AMDGPU::VReg_384RegClassID; 2347 case 512: 2348 return AMDGPU::VReg_512RegClassID; 2349 case 1024: 2350 return AMDGPU::VReg_1024RegClassID; 2351 } 2352 } else if (Is == IS_TTMP) { 2353 switch (RegWidth) { 2354 default: return -1; 2355 case 32: 2356 return AMDGPU::TTMP_32RegClassID; 2357 case 64: 2358 return AMDGPU::TTMP_64RegClassID; 2359 case 128: 2360 return AMDGPU::TTMP_128RegClassID; 2361 case 256: 2362 return AMDGPU::TTMP_256RegClassID; 2363 case 512: 2364 return AMDGPU::TTMP_512RegClassID; 2365 } 2366 } else if (Is == IS_SGPR) { 2367 switch (RegWidth) { 2368 default: return -1; 2369 case 32: 2370 return AMDGPU::SGPR_32RegClassID; 2371 case 64: 2372 return AMDGPU::SGPR_64RegClassID; 2373 case 96: 2374 return AMDGPU::SGPR_96RegClassID; 2375 case 128: 2376 return AMDGPU::SGPR_128RegClassID; 2377 case 160: 2378 return AMDGPU::SGPR_160RegClassID; 2379 case 192: 2380 return AMDGPU::SGPR_192RegClassID; 2381 case 224: 2382 return AMDGPU::SGPR_224RegClassID; 2383 case 256: 2384 return AMDGPU::SGPR_256RegClassID; 2385 case 288: 2386 return AMDGPU::SGPR_288RegClassID; 2387 case 320: 2388 return AMDGPU::SGPR_320RegClassID; 2389 case 352: 2390 return AMDGPU::SGPR_352RegClassID; 2391 case 384: 2392 return AMDGPU::SGPR_384RegClassID; 2393 case 512: 2394 return AMDGPU::SGPR_512RegClassID; 2395 } 2396 } else if (Is == IS_AGPR) { 2397 switch (RegWidth) { 2398 default: return -1; 2399 case 32: 2400 return AMDGPU::AGPR_32RegClassID; 2401 case 64: 2402 return AMDGPU::AReg_64RegClassID; 2403 case 96: 2404 return AMDGPU::AReg_96RegClassID; 2405 case 128: 2406 return AMDGPU::AReg_128RegClassID; 2407 case 160: 2408 return AMDGPU::AReg_160RegClassID; 2409 case 192: 2410 return AMDGPU::AReg_192RegClassID; 2411 case 224: 2412 return AMDGPU::AReg_224RegClassID; 2413 case 256: 2414 return AMDGPU::AReg_256RegClassID; 2415 case 288: 2416 return AMDGPU::AReg_288RegClassID; 2417 case 320: 2418 return AMDGPU::AReg_320RegClassID; 2419 case 352: 2420 return AMDGPU::AReg_352RegClassID; 2421 case 384: 2422 return AMDGPU::AReg_384RegClassID; 2423 case 512: 2424 return AMDGPU::AReg_512RegClassID; 2425 case 1024: 2426 return AMDGPU::AReg_1024RegClassID; 2427 } 2428 } 2429 return -1; 2430 } 2431 2432 static unsigned getSpecialRegForName(StringRef RegName) { 2433 return StringSwitch<unsigned>(RegName) 2434 .Case("exec", AMDGPU::EXEC) 2435 .Case("vcc", AMDGPU::VCC) 2436 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2437 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2438 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2439 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2440 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2441 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2442 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2443 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2444 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2445 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2446 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2447 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2448 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2449 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2450 .Case("m0", AMDGPU::M0) 2451 .Case("vccz", AMDGPU::SRC_VCCZ) 2452 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2453 .Case("execz", AMDGPU::SRC_EXECZ) 2454 .Case("src_execz", AMDGPU::SRC_EXECZ) 2455 .Case("scc", AMDGPU::SRC_SCC) 2456 .Case("src_scc", AMDGPU::SRC_SCC) 2457 .Case("tba", AMDGPU::TBA) 2458 .Case("tma", AMDGPU::TMA) 2459 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2460 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2461 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2462 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2463 .Case("vcc_lo", AMDGPU::VCC_LO) 2464 .Case("vcc_hi", AMDGPU::VCC_HI) 2465 .Case("exec_lo", AMDGPU::EXEC_LO) 2466 .Case("exec_hi", AMDGPU::EXEC_HI) 2467 .Case("tma_lo", AMDGPU::TMA_LO) 2468 .Case("tma_hi", AMDGPU::TMA_HI) 2469 .Case("tba_lo", AMDGPU::TBA_LO) 2470 .Case("tba_hi", AMDGPU::TBA_HI) 2471 .Case("pc", AMDGPU::PC_REG) 2472 .Case("null", AMDGPU::SGPR_NULL) 2473 .Default(AMDGPU::NoRegister); 2474 } 2475 2476 bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, 2477 SMLoc &EndLoc, bool RestoreOnFailure) { 2478 auto R = parseRegister(); 2479 if (!R) return true; 2480 assert(R->isReg()); 2481 RegNo = R->getReg(); 2482 StartLoc = R->getStartLoc(); 2483 EndLoc = R->getEndLoc(); 2484 return false; 2485 } 2486 2487 bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc, 2488 SMLoc &EndLoc) { 2489 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2490 } 2491 2492 ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, 2493 SMLoc &EndLoc) { 2494 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2495 bool PendingErrors = getParser().hasPendingError(); 2496 getParser().clearPendingErrors(); 2497 if (PendingErrors) 2498 return ParseStatus::Failure; 2499 if (Result) 2500 return ParseStatus::NoMatch; 2501 return ParseStatus::Success; 2502 } 2503 2504 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2505 RegisterKind RegKind, unsigned Reg1, 2506 SMLoc Loc) { 2507 switch (RegKind) { 2508 case IS_SPECIAL: 2509 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2510 Reg = AMDGPU::EXEC; 2511 RegWidth = 64; 2512 return true; 2513 } 2514 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2515 Reg = AMDGPU::FLAT_SCR; 2516 RegWidth = 64; 2517 return true; 2518 } 2519 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2520 Reg = AMDGPU::XNACK_MASK; 2521 RegWidth = 64; 2522 return true; 2523 } 2524 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2525 Reg = AMDGPU::VCC; 2526 RegWidth = 64; 2527 return true; 2528 } 2529 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2530 Reg = AMDGPU::TBA; 2531 RegWidth = 64; 2532 return true; 2533 } 2534 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2535 Reg = AMDGPU::TMA; 2536 RegWidth = 64; 2537 return true; 2538 } 2539 Error(Loc, "register does not fit in the list"); 2540 return false; 2541 case IS_VGPR: 2542 case IS_SGPR: 2543 case IS_AGPR: 2544 case IS_TTMP: 2545 if (Reg1 != Reg + RegWidth / 32) { 2546 Error(Loc, "registers in a list must have consecutive indices"); 2547 return false; 2548 } 2549 RegWidth += 32; 2550 return true; 2551 default: 2552 llvm_unreachable("unexpected register kind"); 2553 } 2554 } 2555 2556 struct RegInfo { 2557 StringLiteral Name; 2558 RegisterKind Kind; 2559 }; 2560 2561 static constexpr RegInfo RegularRegisters[] = { 2562 {{"v"}, IS_VGPR}, 2563 {{"s"}, IS_SGPR}, 2564 {{"ttmp"}, IS_TTMP}, 2565 {{"acc"}, IS_AGPR}, 2566 {{"a"}, IS_AGPR}, 2567 }; 2568 2569 static bool isRegularReg(RegisterKind Kind) { 2570 return Kind == IS_VGPR || 2571 Kind == IS_SGPR || 2572 Kind == IS_TTMP || 2573 Kind == IS_AGPR; 2574 } 2575 2576 static const RegInfo* getRegularRegInfo(StringRef Str) { 2577 for (const RegInfo &Reg : RegularRegisters) 2578 if (Str.starts_with(Reg.Name)) 2579 return &Reg; 2580 return nullptr; 2581 } 2582 2583 static bool getRegNum(StringRef Str, unsigned& Num) { 2584 return !Str.getAsInteger(10, Num); 2585 } 2586 2587 bool 2588 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2589 const AsmToken &NextToken) const { 2590 2591 // A list of consecutive registers: [s0,s1,s2,s3] 2592 if (Token.is(AsmToken::LBrac)) 2593 return true; 2594 2595 if (!Token.is(AsmToken::Identifier)) 2596 return false; 2597 2598 // A single register like s0 or a range of registers like s[0:1] 2599 2600 StringRef Str = Token.getString(); 2601 const RegInfo *Reg = getRegularRegInfo(Str); 2602 if (Reg) { 2603 StringRef RegName = Reg->Name; 2604 StringRef RegSuffix = Str.substr(RegName.size()); 2605 if (!RegSuffix.empty()) { 2606 unsigned Num; 2607 // A single register with an index: rXX 2608 if (getRegNum(RegSuffix, Num)) 2609 return true; 2610 } else { 2611 // A range of registers: r[XX:YY]. 2612 if (NextToken.is(AsmToken::LBrac)) 2613 return true; 2614 } 2615 } 2616 2617 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2618 } 2619 2620 bool 2621 AMDGPUAsmParser::isRegister() 2622 { 2623 return isRegister(getToken(), peekToken()); 2624 } 2625 2626 unsigned 2627 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2628 unsigned RegNum, 2629 unsigned RegWidth, 2630 SMLoc Loc) { 2631 2632 assert(isRegularReg(RegKind)); 2633 2634 unsigned AlignSize = 1; 2635 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2636 // SGPR and TTMP registers must be aligned. 2637 // Max required alignment is 4 dwords. 2638 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u); 2639 } 2640 2641 if (RegNum % AlignSize != 0) { 2642 Error(Loc, "invalid register alignment"); 2643 return AMDGPU::NoRegister; 2644 } 2645 2646 unsigned RegIdx = RegNum / AlignSize; 2647 int RCID = getRegClass(RegKind, RegWidth); 2648 if (RCID == -1) { 2649 Error(Loc, "invalid or unsupported register size"); 2650 return AMDGPU::NoRegister; 2651 } 2652 2653 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2654 const MCRegisterClass RC = TRI->getRegClass(RCID); 2655 if (RegIdx >= RC.getNumRegs()) { 2656 Error(Loc, "register index is out of range"); 2657 return AMDGPU::NoRegister; 2658 } 2659 2660 return RC.getRegister(RegIdx); 2661 } 2662 2663 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { 2664 int64_t RegLo, RegHi; 2665 if (!skipToken(AsmToken::LBrac, "missing register index")) 2666 return false; 2667 2668 SMLoc FirstIdxLoc = getLoc(); 2669 SMLoc SecondIdxLoc; 2670 2671 if (!parseExpr(RegLo)) 2672 return false; 2673 2674 if (trySkipToken(AsmToken::Colon)) { 2675 SecondIdxLoc = getLoc(); 2676 if (!parseExpr(RegHi)) 2677 return false; 2678 } else { 2679 RegHi = RegLo; 2680 } 2681 2682 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2683 return false; 2684 2685 if (!isUInt<32>(RegLo)) { 2686 Error(FirstIdxLoc, "invalid register index"); 2687 return false; 2688 } 2689 2690 if (!isUInt<32>(RegHi)) { 2691 Error(SecondIdxLoc, "invalid register index"); 2692 return false; 2693 } 2694 2695 if (RegLo > RegHi) { 2696 Error(FirstIdxLoc, "first register index should not exceed second index"); 2697 return false; 2698 } 2699 2700 Num = static_cast<unsigned>(RegLo); 2701 RegWidth = 32 * ((RegHi - RegLo) + 1); 2702 return true; 2703 } 2704 2705 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2706 unsigned &RegNum, unsigned &RegWidth, 2707 SmallVectorImpl<AsmToken> &Tokens) { 2708 assert(isToken(AsmToken::Identifier)); 2709 unsigned Reg = getSpecialRegForName(getTokenStr()); 2710 if (Reg) { 2711 RegNum = 0; 2712 RegWidth = 32; 2713 RegKind = IS_SPECIAL; 2714 Tokens.push_back(getToken()); 2715 lex(); // skip register name 2716 } 2717 return Reg; 2718 } 2719 2720 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2721 unsigned &RegNum, unsigned &RegWidth, 2722 SmallVectorImpl<AsmToken> &Tokens) { 2723 assert(isToken(AsmToken::Identifier)); 2724 StringRef RegName = getTokenStr(); 2725 auto Loc = getLoc(); 2726 2727 const RegInfo *RI = getRegularRegInfo(RegName); 2728 if (!RI) { 2729 Error(Loc, "invalid register name"); 2730 return AMDGPU::NoRegister; 2731 } 2732 2733 Tokens.push_back(getToken()); 2734 lex(); // skip register name 2735 2736 RegKind = RI->Kind; 2737 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2738 if (!RegSuffix.empty()) { 2739 // Single 32-bit register: vXX. 2740 if (!getRegNum(RegSuffix, RegNum)) { 2741 Error(Loc, "invalid register index"); 2742 return AMDGPU::NoRegister; 2743 } 2744 RegWidth = 32; 2745 } else { 2746 // Range of registers: v[XX:YY]. ":YY" is optional. 2747 if (!ParseRegRange(RegNum, RegWidth)) 2748 return AMDGPU::NoRegister; 2749 } 2750 2751 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2752 } 2753 2754 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2755 unsigned &RegWidth, 2756 SmallVectorImpl<AsmToken> &Tokens) { 2757 unsigned Reg = AMDGPU::NoRegister; 2758 auto ListLoc = getLoc(); 2759 2760 if (!skipToken(AsmToken::LBrac, 2761 "expected a register or a list of registers")) { 2762 return AMDGPU::NoRegister; 2763 } 2764 2765 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2766 2767 auto Loc = getLoc(); 2768 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2769 return AMDGPU::NoRegister; 2770 if (RegWidth != 32) { 2771 Error(Loc, "expected a single 32-bit register"); 2772 return AMDGPU::NoRegister; 2773 } 2774 2775 for (; trySkipToken(AsmToken::Comma); ) { 2776 RegisterKind NextRegKind; 2777 unsigned NextReg, NextRegNum, NextRegWidth; 2778 Loc = getLoc(); 2779 2780 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2781 NextRegNum, NextRegWidth, 2782 Tokens)) { 2783 return AMDGPU::NoRegister; 2784 } 2785 if (NextRegWidth != 32) { 2786 Error(Loc, "expected a single 32-bit register"); 2787 return AMDGPU::NoRegister; 2788 } 2789 if (NextRegKind != RegKind) { 2790 Error(Loc, "registers in a list must be of the same kind"); 2791 return AMDGPU::NoRegister; 2792 } 2793 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2794 return AMDGPU::NoRegister; 2795 } 2796 2797 if (!skipToken(AsmToken::RBrac, 2798 "expected a comma or a closing square bracket")) { 2799 return AMDGPU::NoRegister; 2800 } 2801 2802 if (isRegularReg(RegKind)) 2803 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2804 2805 return Reg; 2806 } 2807 2808 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2809 unsigned &RegNum, unsigned &RegWidth, 2810 SmallVectorImpl<AsmToken> &Tokens) { 2811 auto Loc = getLoc(); 2812 Reg = AMDGPU::NoRegister; 2813 2814 if (isToken(AsmToken::Identifier)) { 2815 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2816 if (Reg == AMDGPU::NoRegister) 2817 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2818 } else { 2819 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2820 } 2821 2822 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2823 if (Reg == AMDGPU::NoRegister) { 2824 assert(Parser.hasPendingError()); 2825 return false; 2826 } 2827 2828 if (!subtargetHasRegister(*TRI, Reg)) { 2829 if (Reg == AMDGPU::SGPR_NULL) { 2830 Error(Loc, "'null' operand is not supported on this GPU"); 2831 } else { 2832 Error(Loc, "register not available on this GPU"); 2833 } 2834 return false; 2835 } 2836 2837 return true; 2838 } 2839 2840 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2841 unsigned &RegNum, unsigned &RegWidth, 2842 bool RestoreOnFailure /*=false*/) { 2843 Reg = AMDGPU::NoRegister; 2844 2845 SmallVector<AsmToken, 1> Tokens; 2846 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2847 if (RestoreOnFailure) { 2848 while (!Tokens.empty()) { 2849 getLexer().UnLex(Tokens.pop_back_val()); 2850 } 2851 } 2852 return true; 2853 } 2854 return false; 2855 } 2856 2857 std::optional<StringRef> 2858 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2859 switch (RegKind) { 2860 case IS_VGPR: 2861 return StringRef(".amdgcn.next_free_vgpr"); 2862 case IS_SGPR: 2863 return StringRef(".amdgcn.next_free_sgpr"); 2864 default: 2865 return std::nullopt; 2866 } 2867 } 2868 2869 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2870 auto SymbolName = getGprCountSymbolName(RegKind); 2871 assert(SymbolName && "initializing invalid register kind"); 2872 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2873 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2874 } 2875 2876 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2877 unsigned DwordRegIndex, 2878 unsigned RegWidth) { 2879 // Symbols are only defined for GCN targets 2880 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2881 return true; 2882 2883 auto SymbolName = getGprCountSymbolName(RegKind); 2884 if (!SymbolName) 2885 return true; 2886 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2887 2888 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 2889 int64_t OldCount; 2890 2891 if (!Sym->isVariable()) 2892 return !Error(getLoc(), 2893 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2894 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2895 return !Error( 2896 getLoc(), 2897 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2898 2899 if (OldCount <= NewMax) 2900 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2901 2902 return true; 2903 } 2904 2905 std::unique_ptr<AMDGPUOperand> 2906 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2907 const auto &Tok = getToken(); 2908 SMLoc StartLoc = Tok.getLoc(); 2909 SMLoc EndLoc = Tok.getEndLoc(); 2910 RegisterKind RegKind; 2911 unsigned Reg, RegNum, RegWidth; 2912 2913 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2914 return nullptr; 2915 } 2916 if (isHsaAbi(getSTI())) { 2917 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2918 return nullptr; 2919 } else 2920 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2921 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2922 } 2923 2924 ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands, 2925 bool HasSP3AbsModifier, bool HasLit) { 2926 // TODO: add syntactic sugar for 1/(2*PI) 2927 2928 if (isRegister()) 2929 return ParseStatus::NoMatch; 2930 assert(!isModifier()); 2931 2932 if (!HasLit) { 2933 HasLit = trySkipId("lit"); 2934 if (HasLit) { 2935 if (!skipToken(AsmToken::LParen, "expected left paren after lit")) 2936 return ParseStatus::Failure; 2937 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, HasLit); 2938 if (S.isSuccess() && 2939 !skipToken(AsmToken::RParen, "expected closing parentheses")) 2940 return ParseStatus::Failure; 2941 return S; 2942 } 2943 } 2944 2945 const auto& Tok = getToken(); 2946 const auto& NextTok = peekToken(); 2947 bool IsReal = Tok.is(AsmToken::Real); 2948 SMLoc S = getLoc(); 2949 bool Negate = false; 2950 2951 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2952 lex(); 2953 IsReal = true; 2954 Negate = true; 2955 } 2956 2957 AMDGPUOperand::Modifiers Mods; 2958 Mods.Lit = HasLit; 2959 2960 if (IsReal) { 2961 // Floating-point expressions are not supported. 2962 // Can only allow floating-point literals with an 2963 // optional sign. 2964 2965 StringRef Num = getTokenStr(); 2966 lex(); 2967 2968 APFloat RealVal(APFloat::IEEEdouble()); 2969 auto roundMode = APFloat::rmNearestTiesToEven; 2970 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) 2971 return ParseStatus::Failure; 2972 if (Negate) 2973 RealVal.changeSign(); 2974 2975 Operands.push_back( 2976 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2977 AMDGPUOperand::ImmTyNone, true)); 2978 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2979 Op.setModifiers(Mods); 2980 2981 return ParseStatus::Success; 2982 2983 } else { 2984 int64_t IntVal; 2985 const MCExpr *Expr; 2986 SMLoc S = getLoc(); 2987 2988 if (HasSP3AbsModifier) { 2989 // This is a workaround for handling expressions 2990 // as arguments of SP3 'abs' modifier, for example: 2991 // |1.0| 2992 // |-1| 2993 // |1+x| 2994 // This syntax is not compatible with syntax of standard 2995 // MC expressions (due to the trailing '|'). 2996 SMLoc EndLoc; 2997 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2998 return ParseStatus::Failure; 2999 } else { 3000 if (Parser.parseExpression(Expr)) 3001 return ParseStatus::Failure; 3002 } 3003 3004 if (Expr->evaluateAsAbsolute(IntVal)) { 3005 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 3006 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3007 Op.setModifiers(Mods); 3008 } else { 3009 if (HasLit) 3010 return ParseStatus::NoMatch; 3011 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 3012 } 3013 3014 return ParseStatus::Success; 3015 } 3016 3017 return ParseStatus::NoMatch; 3018 } 3019 3020 ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) { 3021 if (!isRegister()) 3022 return ParseStatus::NoMatch; 3023 3024 if (auto R = parseRegister()) { 3025 assert(R->isReg()); 3026 Operands.push_back(std::move(R)); 3027 return ParseStatus::Success; 3028 } 3029 return ParseStatus::Failure; 3030 } 3031 3032 ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, 3033 bool HasSP3AbsMod, bool HasLit) { 3034 ParseStatus Res = parseReg(Operands); 3035 if (!Res.isNoMatch()) 3036 return Res; 3037 if (isModifier()) 3038 return ParseStatus::NoMatch; 3039 return parseImm(Operands, HasSP3AbsMod, HasLit); 3040 } 3041 3042 bool 3043 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3044 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 3045 const auto &str = Token.getString(); 3046 return str == "abs" || str == "neg" || str == "sext"; 3047 } 3048 return false; 3049 } 3050 3051 bool 3052 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 3053 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 3054 } 3055 3056 bool 3057 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3058 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 3059 } 3060 3061 bool 3062 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3063 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 3064 } 3065 3066 // Check if this is an operand modifier or an opcode modifier 3067 // which may look like an expression but it is not. We should 3068 // avoid parsing these modifiers as expressions. Currently 3069 // recognized sequences are: 3070 // |...| 3071 // abs(...) 3072 // neg(...) 3073 // sext(...) 3074 // -reg 3075 // -|...| 3076 // -abs(...) 3077 // name:... 3078 // 3079 bool 3080 AMDGPUAsmParser::isModifier() { 3081 3082 AsmToken Tok = getToken(); 3083 AsmToken NextToken[2]; 3084 peekTokens(NextToken); 3085 3086 return isOperandModifier(Tok, NextToken[0]) || 3087 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 3088 isOpcodeModifierWithVal(Tok, NextToken[0]); 3089 } 3090 3091 // Check if the current token is an SP3 'neg' modifier. 3092 // Currently this modifier is allowed in the following context: 3093 // 3094 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 3095 // 2. Before an 'abs' modifier: -abs(...) 3096 // 3. Before an SP3 'abs' modifier: -|...| 3097 // 3098 // In all other cases "-" is handled as a part 3099 // of an expression that follows the sign. 3100 // 3101 // Note: When "-" is followed by an integer literal, 3102 // this is interpreted as integer negation rather 3103 // than a floating-point NEG modifier applied to N. 3104 // Beside being contr-intuitive, such use of floating-point 3105 // NEG modifier would have resulted in different meaning 3106 // of integer literals used with VOP1/2/C and VOP3, 3107 // for example: 3108 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3109 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3110 // Negative fp literals with preceding "-" are 3111 // handled likewise for uniformity 3112 // 3113 bool 3114 AMDGPUAsmParser::parseSP3NegModifier() { 3115 3116 AsmToken NextToken[2]; 3117 peekTokens(NextToken); 3118 3119 if (isToken(AsmToken::Minus) && 3120 (isRegister(NextToken[0], NextToken[1]) || 3121 NextToken[0].is(AsmToken::Pipe) || 3122 isId(NextToken[0], "abs"))) { 3123 lex(); 3124 return true; 3125 } 3126 3127 return false; 3128 } 3129 3130 ParseStatus 3131 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3132 bool AllowImm) { 3133 bool Neg, SP3Neg; 3134 bool Abs, SP3Abs; 3135 bool Lit; 3136 SMLoc Loc; 3137 3138 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3139 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) 3140 return Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3141 3142 SP3Neg = parseSP3NegModifier(); 3143 3144 Loc = getLoc(); 3145 Neg = trySkipId("neg"); 3146 if (Neg && SP3Neg) 3147 return Error(Loc, "expected register or immediate"); 3148 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3149 return ParseStatus::Failure; 3150 3151 Abs = trySkipId("abs"); 3152 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3153 return ParseStatus::Failure; 3154 3155 Lit = trySkipId("lit"); 3156 if (Lit && !skipToken(AsmToken::LParen, "expected left paren after lit")) 3157 return ParseStatus::Failure; 3158 3159 Loc = getLoc(); 3160 SP3Abs = trySkipToken(AsmToken::Pipe); 3161 if (Abs && SP3Abs) 3162 return Error(Loc, "expected register or immediate"); 3163 3164 ParseStatus Res; 3165 if (AllowImm) { 3166 Res = parseRegOrImm(Operands, SP3Abs, Lit); 3167 } else { 3168 Res = parseReg(Operands); 3169 } 3170 if (!Res.isSuccess()) 3171 return (SP3Neg || Neg || SP3Abs || Abs || Lit) ? ParseStatus::Failure : Res; 3172 3173 if (Lit && !Operands.back()->isImm()) 3174 Error(Loc, "expected immediate with lit modifier"); 3175 3176 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3177 return ParseStatus::Failure; 3178 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3179 return ParseStatus::Failure; 3180 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3181 return ParseStatus::Failure; 3182 if (Lit && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3183 return ParseStatus::Failure; 3184 3185 AMDGPUOperand::Modifiers Mods; 3186 Mods.Abs = Abs || SP3Abs; 3187 Mods.Neg = Neg || SP3Neg; 3188 Mods.Lit = Lit; 3189 3190 if (Mods.hasFPModifiers() || Lit) { 3191 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3192 if (Op.isExpr()) 3193 return Error(Op.getStartLoc(), "expected an absolute expression"); 3194 Op.setModifiers(Mods); 3195 } 3196 return ParseStatus::Success; 3197 } 3198 3199 ParseStatus 3200 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3201 bool AllowImm) { 3202 bool Sext = trySkipId("sext"); 3203 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3204 return ParseStatus::Failure; 3205 3206 ParseStatus Res; 3207 if (AllowImm) { 3208 Res = parseRegOrImm(Operands); 3209 } else { 3210 Res = parseReg(Operands); 3211 } 3212 if (!Res.isSuccess()) 3213 return Sext ? ParseStatus::Failure : Res; 3214 3215 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3216 return ParseStatus::Failure; 3217 3218 AMDGPUOperand::Modifiers Mods; 3219 Mods.Sext = Sext; 3220 3221 if (Mods.hasIntModifiers()) { 3222 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3223 if (Op.isExpr()) 3224 return Error(Op.getStartLoc(), "expected an absolute expression"); 3225 Op.setModifiers(Mods); 3226 } 3227 3228 return ParseStatus::Success; 3229 } 3230 3231 ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3232 return parseRegOrImmWithFPInputMods(Operands, false); 3233 } 3234 3235 ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3236 return parseRegOrImmWithIntInputMods(Operands, false); 3237 } 3238 3239 ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3240 auto Loc = getLoc(); 3241 if (trySkipId("off")) { 3242 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3243 AMDGPUOperand::ImmTyOff, false)); 3244 return ParseStatus::Success; 3245 } 3246 3247 if (!isRegister()) 3248 return ParseStatus::NoMatch; 3249 3250 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3251 if (Reg) { 3252 Operands.push_back(std::move(Reg)); 3253 return ParseStatus::Success; 3254 } 3255 3256 return ParseStatus::Failure; 3257 } 3258 3259 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3260 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3261 3262 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3263 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3264 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3265 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3266 return Match_InvalidOperand; 3267 3268 if ((TSFlags & SIInstrFlags::VOP3) && 3269 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3270 getForcedEncodingSize() != 64) 3271 return Match_PreferE32; 3272 3273 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3274 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3275 // v_mac_f32/16 allow only dst_sel == DWORD; 3276 auto OpNum = 3277 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3278 const auto &Op = Inst.getOperand(OpNum); 3279 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3280 return Match_InvalidOperand; 3281 } 3282 } 3283 3284 return Match_Success; 3285 } 3286 3287 static ArrayRef<unsigned> getAllVariants() { 3288 static const unsigned Variants[] = { 3289 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3290 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, 3291 AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP 3292 }; 3293 3294 return ArrayRef(Variants); 3295 } 3296 3297 // What asm variants we should check 3298 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3299 if (isForcedDPP() && isForcedVOP3()) { 3300 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP}; 3301 return ArrayRef(Variants); 3302 } 3303 if (getForcedEncodingSize() == 32) { 3304 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3305 return ArrayRef(Variants); 3306 } 3307 3308 if (isForcedVOP3()) { 3309 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3310 return ArrayRef(Variants); 3311 } 3312 3313 if (isForcedSDWA()) { 3314 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3315 AMDGPUAsmVariants::SDWA9}; 3316 return ArrayRef(Variants); 3317 } 3318 3319 if (isForcedDPP()) { 3320 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3321 return ArrayRef(Variants); 3322 } 3323 3324 return getAllVariants(); 3325 } 3326 3327 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3328 if (isForcedDPP() && isForcedVOP3()) 3329 return "e64_dpp"; 3330 3331 if (getForcedEncodingSize() == 32) 3332 return "e32"; 3333 3334 if (isForcedVOP3()) 3335 return "e64"; 3336 3337 if (isForcedSDWA()) 3338 return "sdwa"; 3339 3340 if (isForcedDPP()) 3341 return "dpp"; 3342 3343 return ""; 3344 } 3345 3346 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3347 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3348 for (MCPhysReg Reg : Desc.implicit_uses()) { 3349 switch (Reg) { 3350 case AMDGPU::FLAT_SCR: 3351 case AMDGPU::VCC: 3352 case AMDGPU::VCC_LO: 3353 case AMDGPU::VCC_HI: 3354 case AMDGPU::M0: 3355 return Reg; 3356 default: 3357 break; 3358 } 3359 } 3360 return AMDGPU::NoRegister; 3361 } 3362 3363 // NB: This code is correct only when used to check constant 3364 // bus limitations because GFX7 support no f16 inline constants. 3365 // Note that there are no cases when a GFX7 opcode violates 3366 // constant bus limitations due to the use of an f16 constant. 3367 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3368 unsigned OpIdx) const { 3369 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3370 3371 if (!AMDGPU::isSISrcOperand(Desc, OpIdx) || 3372 AMDGPU::isKImmOperand(Desc, OpIdx)) { 3373 return false; 3374 } 3375 3376 const MCOperand &MO = Inst.getOperand(OpIdx); 3377 3378 int64_t Val = MO.getImm(); 3379 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3380 3381 switch (OpSize) { // expected operand size 3382 case 8: 3383 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3384 case 4: 3385 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3386 case 2: { 3387 const unsigned OperandType = Desc.operands()[OpIdx].OperandType; 3388 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3389 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3390 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3391 return AMDGPU::isInlinableIntLiteral(Val); 3392 3393 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3394 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3395 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3396 return AMDGPU::isInlinableIntLiteralV216(Val); 3397 3398 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3399 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3400 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3401 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3402 3403 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3404 } 3405 default: 3406 llvm_unreachable("invalid operand size"); 3407 } 3408 } 3409 3410 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3411 if (!isGFX10Plus()) 3412 return 1; 3413 3414 switch (Opcode) { 3415 // 64-bit shift instructions can use only one scalar value input 3416 case AMDGPU::V_LSHLREV_B64_e64: 3417 case AMDGPU::V_LSHLREV_B64_gfx10: 3418 case AMDGPU::V_LSHLREV_B64_e64_gfx11: 3419 case AMDGPU::V_LSHLREV_B64_e32_gfx12: 3420 case AMDGPU::V_LSHLREV_B64_e64_gfx12: 3421 case AMDGPU::V_LSHRREV_B64_e64: 3422 case AMDGPU::V_LSHRREV_B64_gfx10: 3423 case AMDGPU::V_LSHRREV_B64_e64_gfx11: 3424 case AMDGPU::V_LSHRREV_B64_e64_gfx12: 3425 case AMDGPU::V_ASHRREV_I64_e64: 3426 case AMDGPU::V_ASHRREV_I64_gfx10: 3427 case AMDGPU::V_ASHRREV_I64_e64_gfx11: 3428 case AMDGPU::V_ASHRREV_I64_e64_gfx12: 3429 case AMDGPU::V_LSHL_B64_e64: 3430 case AMDGPU::V_LSHR_B64_e64: 3431 case AMDGPU::V_ASHR_I64_e64: 3432 return 1; 3433 default: 3434 return 2; 3435 } 3436 } 3437 3438 constexpr unsigned MAX_SRC_OPERANDS_NUM = 6; 3439 using OperandIndices = SmallVector<int16_t, MAX_SRC_OPERANDS_NUM>; 3440 3441 // Get regular operand indices in the same order as specified 3442 // in the instruction (but append mandatory literals to the end). 3443 static OperandIndices getSrcOperandIndices(unsigned Opcode, 3444 bool AddMandatoryLiterals = false) { 3445 3446 int16_t ImmIdx = 3447 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1; 3448 3449 if (isVOPD(Opcode)) { 3450 int16_t ImmDeferredIdx = 3451 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immDeferred) 3452 : -1; 3453 3454 return {getNamedOperandIdx(Opcode, OpName::src0X), 3455 getNamedOperandIdx(Opcode, OpName::vsrc1X), 3456 getNamedOperandIdx(Opcode, OpName::src0Y), 3457 getNamedOperandIdx(Opcode, OpName::vsrc1Y), 3458 ImmDeferredIdx, 3459 ImmIdx}; 3460 } 3461 3462 return {getNamedOperandIdx(Opcode, OpName::src0), 3463 getNamedOperandIdx(Opcode, OpName::src1), 3464 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx}; 3465 } 3466 3467 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3468 const MCOperand &MO = Inst.getOperand(OpIdx); 3469 if (MO.isImm()) { 3470 return !isInlineConstant(Inst, OpIdx); 3471 } else if (MO.isReg()) { 3472 auto Reg = MO.getReg(); 3473 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3474 auto PReg = mc2PseudoReg(Reg); 3475 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3476 } else { 3477 return true; 3478 } 3479 } 3480 3481 bool AMDGPUAsmParser::validateConstantBusLimitations( 3482 const MCInst &Inst, const OperandVector &Operands) { 3483 const unsigned Opcode = Inst.getOpcode(); 3484 const MCInstrDesc &Desc = MII.get(Opcode); 3485 unsigned LastSGPR = AMDGPU::NoRegister; 3486 unsigned ConstantBusUseCount = 0; 3487 unsigned NumLiterals = 0; 3488 unsigned LiteralSize; 3489 3490 if (!(Desc.TSFlags & 3491 (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3492 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | SIInstrFlags::SDWA)) && 3493 !isVOPD(Opcode)) 3494 return true; 3495 3496 // Check special imm operands (used by madmk, etc) 3497 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) { 3498 ++NumLiterals; 3499 LiteralSize = 4; 3500 } 3501 3502 SmallDenseSet<unsigned> SGPRsUsed; 3503 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3504 if (SGPRUsed != AMDGPU::NoRegister) { 3505 SGPRsUsed.insert(SGPRUsed); 3506 ++ConstantBusUseCount; 3507 } 3508 3509 OperandIndices OpIndices = getSrcOperandIndices(Opcode); 3510 3511 for (int OpIdx : OpIndices) { 3512 if (OpIdx == -1) 3513 continue; 3514 3515 const MCOperand &MO = Inst.getOperand(OpIdx); 3516 if (usesConstantBus(Inst, OpIdx)) { 3517 if (MO.isReg()) { 3518 LastSGPR = mc2PseudoReg(MO.getReg()); 3519 // Pairs of registers with a partial intersections like these 3520 // s0, s[0:1] 3521 // flat_scratch_lo, flat_scratch 3522 // flat_scratch_lo, flat_scratch_hi 3523 // are theoretically valid but they are disabled anyway. 3524 // Note that this code mimics SIInstrInfo::verifyInstruction 3525 if (SGPRsUsed.insert(LastSGPR).second) { 3526 ++ConstantBusUseCount; 3527 } 3528 } else { // Expression or a literal 3529 3530 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3531 continue; // special operand like VINTERP attr_chan 3532 3533 // An instruction may use only one literal. 3534 // This has been validated on the previous step. 3535 // See validateVOPLiteral. 3536 // This literal may be used as more than one operand. 3537 // If all these operands are of the same size, 3538 // this literal counts as one scalar value. 3539 // Otherwise it counts as 2 scalar values. 3540 // See "GFX10 Shader Programming", section 3.6.2.3. 3541 3542 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3543 if (Size < 4) 3544 Size = 4; 3545 3546 if (NumLiterals == 0) { 3547 NumLiterals = 1; 3548 LiteralSize = Size; 3549 } else if (LiteralSize != Size) { 3550 NumLiterals = 2; 3551 } 3552 } 3553 } 3554 } 3555 ConstantBusUseCount += NumLiterals; 3556 3557 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3558 return true; 3559 3560 SMLoc LitLoc = getLitLoc(Operands); 3561 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3562 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3563 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3564 return false; 3565 } 3566 3567 bool AMDGPUAsmParser::validateVOPDRegBankConstraints( 3568 const MCInst &Inst, const OperandVector &Operands) { 3569 3570 const unsigned Opcode = Inst.getOpcode(); 3571 if (!isVOPD(Opcode)) 3572 return true; 3573 3574 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3575 3576 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) { 3577 const MCOperand &Opr = Inst.getOperand(OperandIdx); 3578 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI)) 3579 ? Opr.getReg() 3580 : MCRegister::NoRegister; 3581 }; 3582 3583 // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache. 3584 bool SkipSrc = Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12; 3585 3586 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII); 3587 auto InvalidCompOprIdx = 3588 InstInfo.getInvalidCompOperandIndex(getVRegIdx, SkipSrc); 3589 if (!InvalidCompOprIdx) 3590 return true; 3591 3592 auto CompOprIdx = *InvalidCompOprIdx; 3593 auto ParsedIdx = 3594 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx), 3595 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx)); 3596 assert(ParsedIdx > 0 && ParsedIdx < Operands.size()); 3597 3598 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc(); 3599 if (CompOprIdx == VOPD::Component::DST) { 3600 Error(Loc, "one dst register must be even and the other odd"); 3601 } else { 3602 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM; 3603 Error(Loc, Twine("src") + Twine(CompSrcIdx) + 3604 " operands must use different VGPR banks"); 3605 } 3606 3607 return false; 3608 } 3609 3610 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3611 3612 const unsigned Opc = Inst.getOpcode(); 3613 const MCInstrDesc &Desc = MII.get(Opc); 3614 3615 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3616 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3617 assert(ClampIdx != -1); 3618 return Inst.getOperand(ClampIdx).getImm() == 0; 3619 } 3620 3621 return true; 3622 } 3623 3624 constexpr uint64_t MIMGFlags = 3625 SIInstrFlags::MIMG | SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE; 3626 3627 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, 3628 const SMLoc &IDLoc) { 3629 3630 const unsigned Opc = Inst.getOpcode(); 3631 const MCInstrDesc &Desc = MII.get(Opc); 3632 3633 if ((Desc.TSFlags & MIMGFlags) == 0) 3634 return true; 3635 3636 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3637 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3638 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3639 3640 assert(VDataIdx != -1); 3641 3642 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray 3643 return true; 3644 3645 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3646 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3647 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3648 if (DMask == 0) 3649 DMask = 1; 3650 3651 bool IsPackedD16 = false; 3652 unsigned DataSize = 3653 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask); 3654 if (hasPackedD16()) { 3655 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3656 IsPackedD16 = D16Idx >= 0; 3657 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm()) 3658 DataSize = (DataSize + 1) / 2; 3659 } 3660 3661 if ((VDataSize / 4) == DataSize + TFESize) 3662 return true; 3663 3664 StringRef Modifiers; 3665 if (isGFX90A()) 3666 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask"; 3667 else 3668 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe"; 3669 3670 Error(IDLoc, Twine("image data size does not match ") + Modifiers); 3671 return false; 3672 } 3673 3674 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, 3675 const SMLoc &IDLoc) { 3676 const unsigned Opc = Inst.getOpcode(); 3677 const MCInstrDesc &Desc = MII.get(Opc); 3678 3679 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus()) 3680 return true; 3681 3682 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3683 3684 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3685 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3686 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3687 int RSrcOpName = Desc.TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc 3688 : AMDGPU::OpName::rsrc; 3689 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName); 3690 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3691 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3692 3693 assert(VAddr0Idx != -1); 3694 assert(SrsrcIdx != -1); 3695 assert(SrsrcIdx > VAddr0Idx); 3696 3697 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3698 if (BaseOpcode->BVH) { 3699 if (IsA16 == BaseOpcode->A16) 3700 return true; 3701 Error(IDLoc, "image address size does not match a16"); 3702 return false; 3703 } 3704 3705 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3706 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3707 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3708 unsigned ActualAddrSize = 3709 IsNSA ? SrsrcIdx - VAddr0Idx 3710 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3711 3712 unsigned ExpectedAddrSize = 3713 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3714 3715 if (IsNSA) { 3716 if (hasPartialNSAEncoding() && 3717 ExpectedAddrSize > 3718 getNSAMaxSize(Desc.TSFlags & SIInstrFlags::VSAMPLE)) { 3719 int VAddrLastIdx = SrsrcIdx - 1; 3720 unsigned VAddrLastSize = 3721 AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4; 3722 3723 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize; 3724 } 3725 } else { 3726 if (ExpectedAddrSize > 12) 3727 ExpectedAddrSize = 16; 3728 3729 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3730 // This provides backward compatibility for assembly created 3731 // before 160b/192b/224b types were directly supported. 3732 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3733 return true; 3734 } 3735 3736 if (ActualAddrSize == ExpectedAddrSize) 3737 return true; 3738 3739 Error(IDLoc, "image address size does not match dim and a16"); 3740 return false; 3741 } 3742 3743 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3744 3745 const unsigned Opc = Inst.getOpcode(); 3746 const MCInstrDesc &Desc = MII.get(Opc); 3747 3748 if ((Desc.TSFlags & MIMGFlags) == 0) 3749 return true; 3750 if (!Desc.mayLoad() || !Desc.mayStore()) 3751 return true; // Not atomic 3752 3753 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3754 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3755 3756 // This is an incomplete check because image_atomic_cmpswap 3757 // may only use 0x3 and 0xf while other atomic operations 3758 // may use 0x1 and 0x3. However these limitations are 3759 // verified when we check that dmask matches dst size. 3760 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3761 } 3762 3763 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3764 3765 const unsigned Opc = Inst.getOpcode(); 3766 const MCInstrDesc &Desc = MII.get(Opc); 3767 3768 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3769 return true; 3770 3771 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3772 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3773 3774 // GATHER4 instructions use dmask in a different fashion compared to 3775 // other MIMG instructions. The only useful DMASK values are 3776 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3777 // (red,red,red,red) etc.) The ISA document doesn't mention 3778 // this. 3779 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3780 } 3781 3782 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3783 const unsigned Opc = Inst.getOpcode(); 3784 const MCInstrDesc &Desc = MII.get(Opc); 3785 3786 if ((Desc.TSFlags & MIMGFlags) == 0) 3787 return true; 3788 3789 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3790 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3791 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3792 3793 if (!BaseOpcode->MSAA) 3794 return true; 3795 3796 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3797 assert(DimIdx != -1); 3798 3799 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3800 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3801 3802 return DimInfo->MSAA; 3803 } 3804 3805 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3806 { 3807 switch (Opcode) { 3808 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3809 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3810 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3811 return true; 3812 default: 3813 return false; 3814 } 3815 } 3816 3817 // movrels* opcodes should only allow VGPRS as src0. 3818 // This is specified in .td description for vop1/vop3, 3819 // but sdwa is handled differently. See isSDWAOperand. 3820 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3821 const OperandVector &Operands) { 3822 3823 const unsigned Opc = Inst.getOpcode(); 3824 const MCInstrDesc &Desc = MII.get(Opc); 3825 3826 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3827 return true; 3828 3829 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3830 assert(Src0Idx != -1); 3831 3832 SMLoc ErrLoc; 3833 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3834 if (Src0.isReg()) { 3835 auto Reg = mc2PseudoReg(Src0.getReg()); 3836 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3837 if (!isSGPR(Reg, TRI)) 3838 return true; 3839 ErrLoc = getRegLoc(Reg, Operands); 3840 } else { 3841 ErrLoc = getConstLoc(Operands); 3842 } 3843 3844 Error(ErrLoc, "source operand must be a VGPR"); 3845 return false; 3846 } 3847 3848 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3849 const OperandVector &Operands) { 3850 3851 const unsigned Opc = Inst.getOpcode(); 3852 3853 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3854 return true; 3855 3856 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3857 assert(Src0Idx != -1); 3858 3859 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3860 if (!Src0.isReg()) 3861 return true; 3862 3863 auto Reg = mc2PseudoReg(Src0.getReg()); 3864 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3865 if (!isGFX90A() && isSGPR(Reg, TRI)) { 3866 Error(getRegLoc(Reg, Operands), 3867 "source operand must be either a VGPR or an inline constant"); 3868 return false; 3869 } 3870 3871 return true; 3872 } 3873 3874 bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst, 3875 const OperandVector &Operands) { 3876 unsigned Opcode = Inst.getOpcode(); 3877 const MCInstrDesc &Desc = MII.get(Opcode); 3878 3879 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) || 3880 !getFeatureBits()[FeatureMFMAInlineLiteralBug]) 3881 return true; 3882 3883 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2); 3884 if (Src2Idx == -1) 3885 return true; 3886 3887 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) { 3888 Error(getConstLoc(Operands), 3889 "inline constants are not allowed for this operand"); 3890 return false; 3891 } 3892 3893 return true; 3894 } 3895 3896 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3897 const OperandVector &Operands) { 3898 const unsigned Opc = Inst.getOpcode(); 3899 const MCInstrDesc &Desc = MII.get(Opc); 3900 3901 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3902 return true; 3903 3904 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3905 if (Src2Idx == -1) 3906 return true; 3907 3908 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3909 if (!Src2.isReg()) 3910 return true; 3911 3912 MCRegister Src2Reg = Src2.getReg(); 3913 MCRegister DstReg = Inst.getOperand(0).getReg(); 3914 if (Src2Reg == DstReg) 3915 return true; 3916 3917 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3918 if (TRI->getRegClass(Desc.operands()[0].RegClass).getSizeInBits() <= 128) 3919 return true; 3920 3921 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3922 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3923 "source 2 operand must not partially overlap with dst"); 3924 return false; 3925 } 3926 3927 return true; 3928 } 3929 3930 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3931 switch (Inst.getOpcode()) { 3932 default: 3933 return true; 3934 case V_DIV_SCALE_F32_gfx6_gfx7: 3935 case V_DIV_SCALE_F32_vi: 3936 case V_DIV_SCALE_F32_gfx10: 3937 case V_DIV_SCALE_F64_gfx6_gfx7: 3938 case V_DIV_SCALE_F64_vi: 3939 case V_DIV_SCALE_F64_gfx10: 3940 break; 3941 } 3942 3943 // TODO: Check that src0 = src1 or src2. 3944 3945 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3946 AMDGPU::OpName::src2_modifiers, 3947 AMDGPU::OpName::src2_modifiers}) { 3948 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3949 .getImm() & 3950 SISrcMods::ABS) { 3951 return false; 3952 } 3953 } 3954 3955 return true; 3956 } 3957 3958 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3959 3960 const unsigned Opc = Inst.getOpcode(); 3961 const MCInstrDesc &Desc = MII.get(Opc); 3962 3963 if ((Desc.TSFlags & MIMGFlags) == 0) 3964 return true; 3965 3966 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3967 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3968 if (isCI() || isSI()) 3969 return false; 3970 } 3971 3972 return true; 3973 } 3974 3975 static bool IsRevOpcode(const unsigned Opcode) 3976 { 3977 switch (Opcode) { 3978 case AMDGPU::V_SUBREV_F32_e32: 3979 case AMDGPU::V_SUBREV_F32_e64: 3980 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3981 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3982 case AMDGPU::V_SUBREV_F32_e32_vi: 3983 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3984 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3985 case AMDGPU::V_SUBREV_F32_e64_vi: 3986 3987 case AMDGPU::V_SUBREV_CO_U32_e32: 3988 case AMDGPU::V_SUBREV_CO_U32_e64: 3989 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3990 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3991 3992 case AMDGPU::V_SUBBREV_U32_e32: 3993 case AMDGPU::V_SUBBREV_U32_e64: 3994 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3995 case AMDGPU::V_SUBBREV_U32_e32_vi: 3996 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3997 case AMDGPU::V_SUBBREV_U32_e64_vi: 3998 3999 case AMDGPU::V_SUBREV_U32_e32: 4000 case AMDGPU::V_SUBREV_U32_e64: 4001 case AMDGPU::V_SUBREV_U32_e32_gfx9: 4002 case AMDGPU::V_SUBREV_U32_e32_vi: 4003 case AMDGPU::V_SUBREV_U32_e64_gfx9: 4004 case AMDGPU::V_SUBREV_U32_e64_vi: 4005 4006 case AMDGPU::V_SUBREV_F16_e32: 4007 case AMDGPU::V_SUBREV_F16_e64: 4008 case AMDGPU::V_SUBREV_F16_e32_gfx10: 4009 case AMDGPU::V_SUBREV_F16_e32_vi: 4010 case AMDGPU::V_SUBREV_F16_e64_gfx10: 4011 case AMDGPU::V_SUBREV_F16_e64_vi: 4012 4013 case AMDGPU::V_SUBREV_U16_e32: 4014 case AMDGPU::V_SUBREV_U16_e64: 4015 case AMDGPU::V_SUBREV_U16_e32_vi: 4016 case AMDGPU::V_SUBREV_U16_e64_vi: 4017 4018 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 4019 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 4020 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 4021 4022 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 4023 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 4024 4025 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 4026 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 4027 4028 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 4029 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 4030 4031 case AMDGPU::V_LSHRREV_B32_e32: 4032 case AMDGPU::V_LSHRREV_B32_e64: 4033 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 4034 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 4035 case AMDGPU::V_LSHRREV_B32_e32_vi: 4036 case AMDGPU::V_LSHRREV_B32_e64_vi: 4037 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 4038 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 4039 4040 case AMDGPU::V_ASHRREV_I32_e32: 4041 case AMDGPU::V_ASHRREV_I32_e64: 4042 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 4043 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 4044 case AMDGPU::V_ASHRREV_I32_e32_vi: 4045 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 4046 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 4047 case AMDGPU::V_ASHRREV_I32_e64_vi: 4048 4049 case AMDGPU::V_LSHLREV_B32_e32: 4050 case AMDGPU::V_LSHLREV_B32_e64: 4051 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 4052 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 4053 case AMDGPU::V_LSHLREV_B32_e32_vi: 4054 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 4055 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 4056 case AMDGPU::V_LSHLREV_B32_e64_vi: 4057 4058 case AMDGPU::V_LSHLREV_B16_e32: 4059 case AMDGPU::V_LSHLREV_B16_e64: 4060 case AMDGPU::V_LSHLREV_B16_e32_vi: 4061 case AMDGPU::V_LSHLREV_B16_e64_vi: 4062 case AMDGPU::V_LSHLREV_B16_gfx10: 4063 4064 case AMDGPU::V_LSHRREV_B16_e32: 4065 case AMDGPU::V_LSHRREV_B16_e64: 4066 case AMDGPU::V_LSHRREV_B16_e32_vi: 4067 case AMDGPU::V_LSHRREV_B16_e64_vi: 4068 case AMDGPU::V_LSHRREV_B16_gfx10: 4069 4070 case AMDGPU::V_ASHRREV_I16_e32: 4071 case AMDGPU::V_ASHRREV_I16_e64: 4072 case AMDGPU::V_ASHRREV_I16_e32_vi: 4073 case AMDGPU::V_ASHRREV_I16_e64_vi: 4074 case AMDGPU::V_ASHRREV_I16_gfx10: 4075 4076 case AMDGPU::V_LSHLREV_B64_e64: 4077 case AMDGPU::V_LSHLREV_B64_gfx10: 4078 case AMDGPU::V_LSHLREV_B64_vi: 4079 4080 case AMDGPU::V_LSHRREV_B64_e64: 4081 case AMDGPU::V_LSHRREV_B64_gfx10: 4082 case AMDGPU::V_LSHRREV_B64_vi: 4083 4084 case AMDGPU::V_ASHRREV_I64_e64: 4085 case AMDGPU::V_ASHRREV_I64_gfx10: 4086 case AMDGPU::V_ASHRREV_I64_vi: 4087 4088 case AMDGPU::V_PK_LSHLREV_B16: 4089 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 4090 case AMDGPU::V_PK_LSHLREV_B16_vi: 4091 4092 case AMDGPU::V_PK_LSHRREV_B16: 4093 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 4094 case AMDGPU::V_PK_LSHRREV_B16_vi: 4095 case AMDGPU::V_PK_ASHRREV_I16: 4096 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 4097 case AMDGPU::V_PK_ASHRREV_I16_vi: 4098 return true; 4099 default: 4100 return false; 4101 } 4102 } 4103 4104 std::optional<StringRef> 4105 AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 4106 4107 using namespace SIInstrFlags; 4108 const unsigned Opcode = Inst.getOpcode(); 4109 const MCInstrDesc &Desc = MII.get(Opcode); 4110 4111 // lds_direct register is defined so that it can be used 4112 // with 9-bit operands only. Ignore encodings which do not accept these. 4113 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 4114 if ((Desc.TSFlags & Enc) == 0) 4115 return std::nullopt; 4116 4117 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 4118 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 4119 if (SrcIdx == -1) 4120 break; 4121 const auto &Src = Inst.getOperand(SrcIdx); 4122 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 4123 4124 if (isGFX90A() || isGFX11Plus()) 4125 return StringRef("lds_direct is not supported on this GPU"); 4126 4127 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 4128 return StringRef("lds_direct cannot be used with this instruction"); 4129 4130 if (SrcName != OpName::src0) 4131 return StringRef("lds_direct may be used as src0 only"); 4132 } 4133 } 4134 4135 return std::nullopt; 4136 } 4137 4138 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 4139 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4140 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4141 if (Op.isFlatOffset()) 4142 return Op.getStartLoc(); 4143 } 4144 return getLoc(); 4145 } 4146 4147 bool AMDGPUAsmParser::validateOffset(const MCInst &Inst, 4148 const OperandVector &Operands) { 4149 auto Opcode = Inst.getOpcode(); 4150 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4151 if (OpNum == -1) 4152 return true; 4153 4154 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4155 if ((TSFlags & SIInstrFlags::FLAT)) 4156 return validateFlatOffset(Inst, Operands); 4157 4158 if ((TSFlags & SIInstrFlags::SMRD)) 4159 return validateSMEMOffset(Inst, Operands); 4160 4161 const auto &Op = Inst.getOperand(OpNum); 4162 if (isGFX12Plus() && 4163 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) { 4164 const unsigned OffsetSize = 24; 4165 if (!isIntN(OffsetSize, Op.getImm())) { 4166 Error(getFlatOffsetLoc(Operands), 4167 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 4168 return false; 4169 } 4170 } else { 4171 const unsigned OffsetSize = 16; 4172 if (!isUIntN(OffsetSize, Op.getImm())) { 4173 Error(getFlatOffsetLoc(Operands), 4174 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 4175 return false; 4176 } 4177 } 4178 return true; 4179 } 4180 4181 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 4182 const OperandVector &Operands) { 4183 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4184 if ((TSFlags & SIInstrFlags::FLAT) == 0) 4185 return true; 4186 4187 auto Opcode = Inst.getOpcode(); 4188 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4189 assert(OpNum != -1); 4190 4191 const auto &Op = Inst.getOperand(OpNum); 4192 if (!hasFlatOffsets() && Op.getImm() != 0) { 4193 Error(getFlatOffsetLoc(Operands), 4194 "flat offset modifier is not supported on this GPU"); 4195 return false; 4196 } 4197 4198 // For pre-GFX12 FLAT instructions the offset must be positive; 4199 // MSB is ignored and forced to zero. 4200 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI()); 4201 bool AllowNegative = 4202 (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) || 4203 isGFX12Plus(); 4204 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) { 4205 Error(getFlatOffsetLoc(Operands), 4206 Twine("expected a ") + 4207 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset" 4208 : Twine(OffsetSize - 1) + "-bit unsigned offset")); 4209 return false; 4210 } 4211 4212 return true; 4213 } 4214 4215 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4216 // Start with second operand because SMEM Offset cannot be dst or src0. 4217 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4218 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4219 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod()) 4220 return Op.getStartLoc(); 4221 } 4222 return getLoc(); 4223 } 4224 4225 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4226 const OperandVector &Operands) { 4227 if (isCI() || isSI()) 4228 return true; 4229 4230 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4231 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4232 return true; 4233 4234 auto Opcode = Inst.getOpcode(); 4235 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4236 if (OpNum == -1) 4237 return true; 4238 4239 const auto &Op = Inst.getOperand(OpNum); 4240 if (!Op.isImm()) 4241 return true; 4242 4243 uint64_t Offset = Op.getImm(); 4244 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4245 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4246 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4247 return true; 4248 4249 Error(getSMEMOffsetLoc(Operands), 4250 isGFX12Plus() ? "expected a 24-bit signed offset" 4251 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" 4252 : "expected a 21-bit signed offset"); 4253 4254 return false; 4255 } 4256 4257 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4258 unsigned Opcode = Inst.getOpcode(); 4259 const MCInstrDesc &Desc = MII.get(Opcode); 4260 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4261 return true; 4262 4263 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4264 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4265 4266 const int OpIndices[] = { Src0Idx, Src1Idx }; 4267 4268 unsigned NumExprs = 0; 4269 unsigned NumLiterals = 0; 4270 uint32_t LiteralValue; 4271 4272 for (int OpIdx : OpIndices) { 4273 if (OpIdx == -1) break; 4274 4275 const MCOperand &MO = Inst.getOperand(OpIdx); 4276 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4277 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4278 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4279 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4280 if (NumLiterals == 0 || LiteralValue != Value) { 4281 LiteralValue = Value; 4282 ++NumLiterals; 4283 } 4284 } else if (MO.isExpr()) { 4285 ++NumExprs; 4286 } 4287 } 4288 } 4289 4290 return NumLiterals + NumExprs <= 1; 4291 } 4292 4293 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4294 const unsigned Opc = Inst.getOpcode(); 4295 if (isPermlane16(Opc)) { 4296 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4297 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4298 4299 if (OpSel & ~3) 4300 return false; 4301 } 4302 4303 uint64_t TSFlags = MII.get(Opc).TSFlags; 4304 4305 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) { 4306 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4307 if (OpSelIdx != -1) { 4308 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4309 return false; 4310 } 4311 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4312 if (OpSelHiIdx != -1) { 4313 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4314 return false; 4315 } 4316 } 4317 4318 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot). 4319 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) && 4320 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) { 4321 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4322 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4323 if (OpSel & 3) 4324 return false; 4325 } 4326 4327 return true; 4328 } 4329 4330 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4331 const OperandVector &Operands) { 4332 const unsigned Opc = Inst.getOpcode(); 4333 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4334 if (DppCtrlIdx >= 0) { 4335 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4336 4337 if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) && 4338 AMDGPU::isDPALU_DPP(MII.get(Opc))) { 4339 // DP ALU DPP is supported for row_newbcast only on GFX9* 4340 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4341 Error(S, "DP ALU dpp only supports row_newbcast"); 4342 return false; 4343 } 4344 } 4345 4346 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8); 4347 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0; 4348 4349 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) { 4350 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); 4351 if (Src1Idx >= 0) { 4352 const MCOperand &Src1 = Inst.getOperand(Src1Idx); 4353 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 4354 if (Src1.isImm() || 4355 (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI))) { 4356 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[Src1Idx]); 4357 Error(Op.getStartLoc(), "invalid operand for instruction"); 4358 return false; 4359 } 4360 } 4361 } 4362 4363 return true; 4364 } 4365 4366 // Check if VCC register matches wavefront size 4367 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4368 auto FB = getFeatureBits(); 4369 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4370 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4371 } 4372 4373 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4374 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4375 const OperandVector &Operands) { 4376 unsigned Opcode = Inst.getOpcode(); 4377 const MCInstrDesc &Desc = MII.get(Opcode); 4378 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1; 4379 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4380 !HasMandatoryLiteral && !isVOPD(Opcode)) 4381 return true; 4382 4383 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral); 4384 4385 unsigned NumExprs = 0; 4386 unsigned NumLiterals = 0; 4387 uint32_t LiteralValue; 4388 4389 for (int OpIdx : OpIndices) { 4390 if (OpIdx == -1) 4391 continue; 4392 4393 const MCOperand &MO = Inst.getOperand(OpIdx); 4394 if (!MO.isImm() && !MO.isExpr()) 4395 continue; 4396 if (!isSISrcOperand(Desc, OpIdx)) 4397 continue; 4398 4399 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4400 uint64_t Value = static_cast<uint64_t>(MO.getImm()); 4401 bool IsFP64 = AMDGPU::isSISrcFPOperand(Desc, OpIdx) && 4402 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8; 4403 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64); 4404 4405 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value)) { 4406 Error(getLitLoc(Operands), "invalid operand for instruction"); 4407 return false; 4408 } 4409 4410 if (IsFP64 && IsValid32Op) 4411 Value = Hi_32(Value); 4412 4413 if (NumLiterals == 0 || LiteralValue != Value) { 4414 LiteralValue = Value; 4415 ++NumLiterals; 4416 } 4417 } else if (MO.isExpr()) { 4418 ++NumExprs; 4419 } 4420 } 4421 NumLiterals += NumExprs; 4422 4423 if (!NumLiterals) 4424 return true; 4425 4426 if (!HasMandatoryLiteral && !getFeatureBits()[FeatureVOP3Literal]) { 4427 Error(getLitLoc(Operands), "literal operands are not supported"); 4428 return false; 4429 } 4430 4431 if (NumLiterals > 1) { 4432 Error(getLitLoc(Operands, true), "only one unique literal operand is allowed"); 4433 return false; 4434 } 4435 4436 return true; 4437 } 4438 4439 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4440 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4441 const MCRegisterInfo *MRI) { 4442 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4443 if (OpIdx < 0) 4444 return -1; 4445 4446 const MCOperand &Op = Inst.getOperand(OpIdx); 4447 if (!Op.isReg()) 4448 return -1; 4449 4450 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4451 auto Reg = Sub ? Sub : Op.getReg(); 4452 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4453 return AGPR32.contains(Reg) ? 1 : 0; 4454 } 4455 4456 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4457 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4458 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4459 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4460 SIInstrFlags::DS)) == 0) 4461 return true; 4462 4463 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4464 : AMDGPU::OpName::vdata; 4465 4466 const MCRegisterInfo *MRI = getMRI(); 4467 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4468 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4469 4470 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4471 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4472 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4473 return false; 4474 } 4475 4476 auto FB = getFeatureBits(); 4477 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4478 if (DataAreg < 0 || DstAreg < 0) 4479 return true; 4480 return DstAreg == DataAreg; 4481 } 4482 4483 return DstAreg < 1 && DataAreg < 1; 4484 } 4485 4486 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4487 auto FB = getFeatureBits(); 4488 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4489 return true; 4490 4491 const MCRegisterInfo *MRI = getMRI(); 4492 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4493 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4494 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4495 const MCOperand &Op = Inst.getOperand(I); 4496 if (!Op.isReg()) 4497 continue; 4498 4499 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4500 if (!Sub) 4501 continue; 4502 4503 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4504 return false; 4505 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4506 return false; 4507 } 4508 4509 return true; 4510 } 4511 4512 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { 4513 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4514 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4515 if (Op.isBLGP()) 4516 return Op.getStartLoc(); 4517 } 4518 return SMLoc(); 4519 } 4520 4521 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, 4522 const OperandVector &Operands) { 4523 unsigned Opc = Inst.getOpcode(); 4524 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4525 if (BlgpIdx == -1) 4526 return true; 4527 SMLoc BLGPLoc = getBLGPLoc(Operands); 4528 if (!BLGPLoc.isValid()) 4529 return true; 4530 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:"); 4531 auto FB = getFeatureBits(); 4532 bool UsesNeg = false; 4533 if (FB[AMDGPU::FeatureGFX940Insts]) { 4534 switch (Opc) { 4535 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: 4536 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: 4537 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: 4538 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: 4539 UsesNeg = true; 4540 } 4541 } 4542 4543 if (IsNeg == UsesNeg) 4544 return true; 4545 4546 Error(BLGPLoc, 4547 UsesNeg ? "invalid modifier: blgp is not supported" 4548 : "invalid modifier: neg is not supported"); 4549 4550 return false; 4551 } 4552 4553 bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst, 4554 const OperandVector &Operands) { 4555 if (!isGFX11Plus()) 4556 return true; 4557 4558 unsigned Opc = Inst.getOpcode(); 4559 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 && 4560 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 && 4561 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 && 4562 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11) 4563 return true; 4564 4565 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst); 4566 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg()); 4567 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg()); 4568 if (Reg == AMDGPU::SGPR_NULL) 4569 return true; 4570 4571 SMLoc RegLoc = getRegLoc(Reg, Operands); 4572 Error(RegLoc, "src0 must be null"); 4573 return false; 4574 } 4575 4576 bool AMDGPUAsmParser::validateDS(const MCInst &Inst, 4577 const OperandVector &Operands) { 4578 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4579 if ((TSFlags & SIInstrFlags::DS) == 0) 4580 return true; 4581 if (TSFlags & SIInstrFlags::GWS) 4582 return validateGWS(Inst, Operands); 4583 // Only validate GDS for non-GWS instructions. 4584 if (hasGDS()) 4585 return true; 4586 int GDSIdx = 4587 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds); 4588 if (GDSIdx < 0) 4589 return true; 4590 unsigned GDS = Inst.getOperand(GDSIdx).getImm(); 4591 if (GDS) { 4592 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands); 4593 Error(S, "gds modifier is not supported on this GPU"); 4594 return false; 4595 } 4596 return true; 4597 } 4598 4599 // gfx90a has an undocumented limitation: 4600 // DS_GWS opcodes must use even aligned registers. 4601 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4602 const OperandVector &Operands) { 4603 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4604 return true; 4605 4606 int Opc = Inst.getOpcode(); 4607 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4608 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4609 return true; 4610 4611 const MCRegisterInfo *MRI = getMRI(); 4612 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4613 int Data0Pos = 4614 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4615 assert(Data0Pos != -1); 4616 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4617 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4618 if (RegIdx & 1) { 4619 SMLoc RegLoc = getRegLoc(Reg, Operands); 4620 Error(RegLoc, "vgpr must be even aligned"); 4621 return false; 4622 } 4623 4624 return true; 4625 } 4626 4627 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4628 const OperandVector &Operands, 4629 const SMLoc &IDLoc) { 4630 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4631 AMDGPU::OpName::cpol); 4632 if (CPolPos == -1) 4633 return true; 4634 4635 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4636 4637 if (isGFX12Plus()) 4638 return validateTHAndScopeBits(Inst, Operands, CPol); 4639 4640 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4641 if (TSFlags & SIInstrFlags::SMRD) { 4642 if (CPol && (isSI() || isCI())) { 4643 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4644 Error(S, "cache policy is not supported for SMRD instructions"); 4645 return false; 4646 } 4647 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) { 4648 Error(IDLoc, "invalid cache policy for SMEM instruction"); 4649 return false; 4650 } 4651 } 4652 4653 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4654 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF | 4655 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4656 SIInstrFlags::FLAT; 4657 if (!(TSFlags & AllowSCCModifier)) { 4658 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4659 StringRef CStr(S.getPointer()); 4660 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4661 Error(S, 4662 "scc modifier is not supported for this instruction on this GPU"); 4663 return false; 4664 } 4665 } 4666 4667 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4668 return true; 4669 4670 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4671 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4672 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4673 : "instruction must use glc"); 4674 return false; 4675 } 4676 } else { 4677 if (CPol & CPol::GLC) { 4678 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4679 StringRef CStr(S.getPointer()); 4680 S = SMLoc::getFromPointer( 4681 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); 4682 Error(S, isGFX940() ? "instruction must not use sc0" 4683 : "instruction must not use glc"); 4684 return false; 4685 } 4686 } 4687 4688 return true; 4689 } 4690 4691 bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst, 4692 const OperandVector &Operands, 4693 const unsigned CPol) { 4694 const unsigned TH = CPol & AMDGPU::CPol::TH; 4695 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE; 4696 4697 const unsigned Opcode = Inst.getOpcode(); 4698 const MCInstrDesc &TID = MII.get(Opcode); 4699 4700 auto PrintError = [&](StringRef Msg) { 4701 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4702 Error(S, Msg); 4703 return false; 4704 }; 4705 4706 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) && 4707 (TID.TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF)) && 4708 (!(TH & AMDGPU::CPol::TH_ATOMIC_RETURN))) 4709 return PrintError("instruction must use th:TH_ATOMIC_RETURN"); 4710 4711 if (TH == 0) 4712 return true; 4713 4714 if ((TID.TSFlags & SIInstrFlags::SMRD) && 4715 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) || 4716 (TH == AMDGPU::CPol::TH_NT_HT))) 4717 return PrintError("invalid th value for SMEM instruction"); 4718 4719 if (TH == AMDGPU::CPol::TH_BYPASS) { 4720 if ((Scope != AMDGPU::CPol::SCOPE_SYS && 4721 CPol & AMDGPU::CPol::TH_REAL_BYPASS) || 4722 (Scope == AMDGPU::CPol::SCOPE_SYS && 4723 !(CPol & AMDGPU::CPol::TH_REAL_BYPASS))) 4724 return PrintError("scope and th combination is not valid"); 4725 } 4726 4727 bool IsStore = TID.mayStore(); 4728 bool IsAtomic = 4729 TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet); 4730 4731 if (IsAtomic) { 4732 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC)) 4733 return PrintError("invalid th value for atomic instructions"); 4734 } else if (IsStore) { 4735 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE)) 4736 return PrintError("invalid th value for store instructions"); 4737 } else { 4738 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD)) 4739 return PrintError("invalid th value for load instructions"); 4740 } 4741 4742 return true; 4743 } 4744 4745 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) { 4746 if (!isGFX11Plus()) 4747 return true; 4748 for (auto &Operand : Operands) { 4749 if (!Operand->isReg()) 4750 continue; 4751 unsigned Reg = Operand->getReg(); 4752 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) { 4753 Error(getRegLoc(Reg, Operands), 4754 "execz and vccz are not supported on this GPU"); 4755 return false; 4756 } 4757 } 4758 return true; 4759 } 4760 4761 bool AMDGPUAsmParser::validateTFE(const MCInst &Inst, 4762 const OperandVector &Operands) { 4763 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 4764 if (Desc.mayStore() && 4765 (Desc.TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) { 4766 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands); 4767 if (Loc != getInstLoc(Operands)) { 4768 Error(Loc, "TFE modifier has no meaning for store instructions"); 4769 return false; 4770 } 4771 } 4772 4773 return true; 4774 } 4775 4776 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4777 const SMLoc &IDLoc, 4778 const OperandVector &Operands) { 4779 if (auto ErrMsg = validateLdsDirect(Inst)) { 4780 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4781 return false; 4782 } 4783 if (!validateSOPLiteral(Inst)) { 4784 Error(getLitLoc(Operands), 4785 "only one unique literal operand is allowed"); 4786 return false; 4787 } 4788 if (!validateVOPLiteral(Inst, Operands)) { 4789 return false; 4790 } 4791 if (!validateConstantBusLimitations(Inst, Operands)) { 4792 return false; 4793 } 4794 if (!validateVOPDRegBankConstraints(Inst, Operands)) { 4795 return false; 4796 } 4797 if (!validateIntClampSupported(Inst)) { 4798 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4799 "integer clamping is not supported on this GPU"); 4800 return false; 4801 } 4802 if (!validateOpSel(Inst)) { 4803 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4804 "invalid op_sel operand"); 4805 return false; 4806 } 4807 if (!validateDPP(Inst, Operands)) { 4808 return false; 4809 } 4810 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4811 if (!validateMIMGD16(Inst)) { 4812 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4813 "d16 modifier is not supported on this GPU"); 4814 return false; 4815 } 4816 if (!validateMIMGMSAA(Inst)) { 4817 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4818 "invalid dim; must be MSAA type"); 4819 return false; 4820 } 4821 if (!validateMIMGDataSize(Inst, IDLoc)) { 4822 return false; 4823 } 4824 if (!validateMIMGAddrSize(Inst, IDLoc)) 4825 return false; 4826 if (!validateMIMGAtomicDMask(Inst)) { 4827 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4828 "invalid atomic image dmask"); 4829 return false; 4830 } 4831 if (!validateMIMGGatherDMask(Inst)) { 4832 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4833 "invalid image_gather dmask: only one bit must be set"); 4834 return false; 4835 } 4836 if (!validateMovrels(Inst, Operands)) { 4837 return false; 4838 } 4839 if (!validateOffset(Inst, Operands)) { 4840 return false; 4841 } 4842 if (!validateMAIAccWrite(Inst, Operands)) { 4843 return false; 4844 } 4845 if (!validateMAISrc2(Inst, Operands)) { 4846 return false; 4847 } 4848 if (!validateMFMA(Inst, Operands)) { 4849 return false; 4850 } 4851 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4852 return false; 4853 } 4854 4855 if (!validateAGPRLdSt(Inst)) { 4856 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4857 ? "invalid register class: data and dst should be all VGPR or AGPR" 4858 : "invalid register class: agpr loads and stores not supported on this GPU" 4859 ); 4860 return false; 4861 } 4862 if (!validateVGPRAlign(Inst)) { 4863 Error(IDLoc, 4864 "invalid register class: vgpr tuples must be 64 bit aligned"); 4865 return false; 4866 } 4867 if (!validateDS(Inst, Operands)) { 4868 return false; 4869 } 4870 4871 if (!validateBLGP(Inst, Operands)) { 4872 return false; 4873 } 4874 4875 if (!validateDivScale(Inst)) { 4876 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4877 return false; 4878 } 4879 if (!validateWaitCnt(Inst, Operands)) { 4880 return false; 4881 } 4882 if (!validateExeczVcczOperands(Operands)) { 4883 return false; 4884 } 4885 if (!validateTFE(Inst, Operands)) { 4886 return false; 4887 } 4888 4889 return true; 4890 } 4891 4892 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4893 const FeatureBitset &FBS, 4894 unsigned VariantID = 0); 4895 4896 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4897 const FeatureBitset &AvailableFeatures, 4898 unsigned VariantID); 4899 4900 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4901 const FeatureBitset &FBS) { 4902 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4903 } 4904 4905 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4906 const FeatureBitset &FBS, 4907 ArrayRef<unsigned> Variants) { 4908 for (auto Variant : Variants) { 4909 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4910 return true; 4911 } 4912 4913 return false; 4914 } 4915 4916 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4917 const SMLoc &IDLoc) { 4918 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits()); 4919 4920 // Check if requested instruction variant is supported. 4921 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4922 return false; 4923 4924 // This instruction is not supported. 4925 // Clear any other pending errors because they are no longer relevant. 4926 getParser().clearPendingErrors(); 4927 4928 // Requested instruction variant is not supported. 4929 // Check if any other variants are supported. 4930 StringRef VariantName = getMatchedVariantName(); 4931 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4932 return Error(IDLoc, 4933 Twine(VariantName, 4934 " variant of this instruction is not supported")); 4935 } 4936 4937 // Check if this instruction may be used with a different wavesize. 4938 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && 4939 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) { 4940 4941 FeatureBitset FeaturesWS32 = getFeatureBits(); 4942 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64) 4943 .flip(AMDGPU::FeatureWavefrontSize32); 4944 FeatureBitset AvailableFeaturesWS32 = 4945 ComputeAvailableFeatures(FeaturesWS32); 4946 4947 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants())) 4948 return Error(IDLoc, "instruction requires wavesize=32"); 4949 } 4950 4951 // Finally check if this instruction is supported on any other GPU. 4952 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4953 return Error(IDLoc, "instruction not supported on this GPU"); 4954 } 4955 4956 // Instruction not supported on any GPU. Probably a typo. 4957 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4958 return Error(IDLoc, "invalid instruction" + Suggestion); 4959 } 4960 4961 static bool isInvalidVOPDY(const OperandVector &Operands, 4962 uint64_t InvalidOprIdx) { 4963 assert(InvalidOprIdx < Operands.size()); 4964 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]); 4965 if (Op.isToken() && InvalidOprIdx > 1) { 4966 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]); 4967 return PrevOp.isToken() && PrevOp.getToken() == "::"; 4968 } 4969 return false; 4970 } 4971 4972 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4973 OperandVector &Operands, 4974 MCStreamer &Out, 4975 uint64_t &ErrorInfo, 4976 bool MatchingInlineAsm) { 4977 MCInst Inst; 4978 unsigned Result = Match_Success; 4979 for (auto Variant : getMatchedVariants()) { 4980 uint64_t EI; 4981 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4982 Variant); 4983 // We order match statuses from least to most specific. We use most specific 4984 // status as resulting 4985 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4986 if ((R == Match_Success) || 4987 (R == Match_PreferE32) || 4988 (R == Match_MissingFeature && Result != Match_PreferE32) || 4989 (R == Match_InvalidOperand && Result != Match_MissingFeature 4990 && Result != Match_PreferE32) || 4991 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4992 && Result != Match_MissingFeature 4993 && Result != Match_PreferE32)) { 4994 Result = R; 4995 ErrorInfo = EI; 4996 } 4997 if (R == Match_Success) 4998 break; 4999 } 5000 5001 if (Result == Match_Success) { 5002 if (!validateInstruction(Inst, IDLoc, Operands)) { 5003 return true; 5004 } 5005 Inst.setLoc(IDLoc); 5006 Out.emitInstruction(Inst, getSTI()); 5007 return false; 5008 } 5009 5010 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5011 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 5012 return true; 5013 } 5014 5015 switch (Result) { 5016 default: break; 5017 case Match_MissingFeature: 5018 // It has been verified that the specified instruction 5019 // mnemonic is valid. A match was found but it requires 5020 // features which are not supported on this GPU. 5021 return Error(IDLoc, "operands are not valid for this GPU or mode"); 5022 5023 case Match_InvalidOperand: { 5024 SMLoc ErrorLoc = IDLoc; 5025 if (ErrorInfo != ~0ULL) { 5026 if (ErrorInfo >= Operands.size()) { 5027 return Error(IDLoc, "too few operands for instruction"); 5028 } 5029 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 5030 if (ErrorLoc == SMLoc()) 5031 ErrorLoc = IDLoc; 5032 5033 if (isInvalidVOPDY(Operands, ErrorInfo)) 5034 return Error(ErrorLoc, "invalid VOPDY instruction"); 5035 } 5036 return Error(ErrorLoc, "invalid operand for instruction"); 5037 } 5038 5039 case Match_PreferE32: 5040 return Error(IDLoc, "internal error: instruction without _e64 suffix " 5041 "should be encoded as e32"); 5042 case Match_MnemonicFail: 5043 llvm_unreachable("Invalid instructions should have been handled already"); 5044 } 5045 llvm_unreachable("Implement any new match types added!"); 5046 } 5047 5048 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 5049 int64_t Tmp = -1; 5050 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 5051 return true; 5052 } 5053 if (getParser().parseAbsoluteExpression(Tmp)) { 5054 return true; 5055 } 5056 Ret = static_cast<uint32_t>(Tmp); 5057 return false; 5058 } 5059 5060 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 5061 uint32_t &Minor) { 5062 if (ParseAsAbsoluteExpression(Major)) 5063 return TokError("invalid major version"); 5064 5065 if (!trySkipToken(AsmToken::Comma)) 5066 return TokError("minor version number required, comma expected"); 5067 5068 if (ParseAsAbsoluteExpression(Minor)) 5069 return TokError("invalid minor version"); 5070 5071 return false; 5072 } 5073 5074 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 5075 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 5076 return TokError("directive only supported for amdgcn architecture"); 5077 5078 std::string TargetIDDirective; 5079 SMLoc TargetStart = getTok().getLoc(); 5080 if (getParser().parseEscapedString(TargetIDDirective)) 5081 return true; 5082 5083 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 5084 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5085 return getParser().Error(TargetRange.Start, 5086 (Twine(".amdgcn_target directive's target id ") + 5087 Twine(TargetIDDirective) + 5088 Twine(" does not match the specified target id ") + 5089 Twine(getTargetStreamer().getTargetID()->toString())).str()); 5090 5091 return false; 5092 } 5093 5094 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 5095 return Error(Range.Start, "value out of range", Range); 5096 } 5097 5098 bool AMDGPUAsmParser::calculateGPRBlocks( 5099 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 5100 bool XNACKUsed, std::optional<bool> EnableWavefrontSize32, 5101 unsigned NextFreeVGPR, SMRange VGPRRange, unsigned NextFreeSGPR, 5102 SMRange SGPRRange, unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 5103 // TODO(scott.linder): These calculations are duplicated from 5104 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 5105 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 5106 5107 unsigned NumVGPRs = NextFreeVGPR; 5108 unsigned NumSGPRs = NextFreeSGPR; 5109 5110 if (Version.Major >= 10) 5111 NumSGPRs = 0; 5112 else { 5113 unsigned MaxAddressableNumSGPRs = 5114 IsaInfo::getAddressableNumSGPRs(&getSTI()); 5115 5116 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 5117 NumSGPRs > MaxAddressableNumSGPRs) 5118 return OutOfRangeError(SGPRRange); 5119 5120 NumSGPRs += 5121 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 5122 5123 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 5124 NumSGPRs > MaxAddressableNumSGPRs) 5125 return OutOfRangeError(SGPRRange); 5126 5127 if (Features.test(FeatureSGPRInitBug)) 5128 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 5129 } 5130 5131 VGPRBlocks = 5132 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 5133 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 5134 5135 return false; 5136 } 5137 5138 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 5139 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 5140 return TokError("directive only supported for amdgcn architecture"); 5141 5142 if (!isHsaAbi(getSTI())) 5143 return TokError("directive only supported for amdhsa OS"); 5144 5145 StringRef KernelName; 5146 if (getParser().parseIdentifier(KernelName)) 5147 return true; 5148 5149 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 5150 5151 StringSet<> Seen; 5152 5153 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 5154 5155 SMRange VGPRRange; 5156 uint64_t NextFreeVGPR = 0; 5157 uint64_t AccumOffset = 0; 5158 uint64_t SharedVGPRCount = 0; 5159 uint64_t PreloadLength = 0; 5160 uint64_t PreloadOffset = 0; 5161 SMRange SGPRRange; 5162 uint64_t NextFreeSGPR = 0; 5163 5164 // Count the number of user SGPRs implied from the enabled feature bits. 5165 unsigned ImpliedUserSGPRCount = 0; 5166 5167 // Track if the asm explicitly contains the directive for the user SGPR 5168 // count. 5169 std::optional<unsigned> ExplicitUserSGPRCount; 5170 bool ReserveVCC = true; 5171 bool ReserveFlatScr = true; 5172 std::optional<bool> EnableWavefrontSize32; 5173 5174 while (true) { 5175 while (trySkipToken(AsmToken::EndOfStatement)); 5176 5177 StringRef ID; 5178 SMRange IDRange = getTok().getLocRange(); 5179 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 5180 return true; 5181 5182 if (ID == ".end_amdhsa_kernel") 5183 break; 5184 5185 if (!Seen.insert(ID).second) 5186 return TokError(".amdhsa_ directives cannot be repeated"); 5187 5188 SMLoc ValStart = getLoc(); 5189 int64_t IVal; 5190 if (getParser().parseAbsoluteExpression(IVal)) 5191 return true; 5192 SMLoc ValEnd = getLoc(); 5193 SMRange ValRange = SMRange(ValStart, ValEnd); 5194 5195 if (IVal < 0) 5196 return OutOfRangeError(ValRange); 5197 5198 uint64_t Val = IVal; 5199 5200 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 5201 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 5202 return OutOfRangeError(RANGE); \ 5203 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 5204 5205 if (ID == ".amdhsa_group_segment_fixed_size") { 5206 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 5207 return OutOfRangeError(ValRange); 5208 KD.group_segment_fixed_size = Val; 5209 } else if (ID == ".amdhsa_private_segment_fixed_size") { 5210 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 5211 return OutOfRangeError(ValRange); 5212 KD.private_segment_fixed_size = Val; 5213 } else if (ID == ".amdhsa_kernarg_size") { 5214 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 5215 return OutOfRangeError(ValRange); 5216 KD.kernarg_size = Val; 5217 } else if (ID == ".amdhsa_user_sgpr_count") { 5218 ExplicitUserSGPRCount = Val; 5219 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 5220 if (hasArchitectedFlatScratch()) 5221 return Error(IDRange.Start, 5222 "directive is not supported with architected flat scratch", 5223 IDRange); 5224 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5225 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 5226 Val, ValRange); 5227 if (Val) 5228 ImpliedUserSGPRCount += 4; 5229 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") { 5230 if (!hasKernargPreload()) 5231 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5232 5233 if (Val > getMaxNumUserSGPRs()) 5234 return OutOfRangeError(ValRange); 5235 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, Val, 5236 ValRange); 5237 if (Val) { 5238 ImpliedUserSGPRCount += Val; 5239 PreloadLength = Val; 5240 } 5241 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") { 5242 if (!hasKernargPreload()) 5243 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5244 5245 if (Val >= 1024) 5246 return OutOfRangeError(ValRange); 5247 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, Val, 5248 ValRange); 5249 if (Val) 5250 PreloadOffset = Val; 5251 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 5252 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5253 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 5254 ValRange); 5255 if (Val) 5256 ImpliedUserSGPRCount += 2; 5257 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 5258 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5259 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 5260 ValRange); 5261 if (Val) 5262 ImpliedUserSGPRCount += 2; 5263 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 5264 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5265 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 5266 Val, ValRange); 5267 if (Val) 5268 ImpliedUserSGPRCount += 2; 5269 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 5270 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5271 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 5272 ValRange); 5273 if (Val) 5274 ImpliedUserSGPRCount += 2; 5275 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 5276 if (hasArchitectedFlatScratch()) 5277 return Error(IDRange.Start, 5278 "directive is not supported with architected flat scratch", 5279 IDRange); 5280 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5281 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 5282 ValRange); 5283 if (Val) 5284 ImpliedUserSGPRCount += 2; 5285 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 5286 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5287 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 5288 Val, ValRange); 5289 if (Val) 5290 ImpliedUserSGPRCount += 1; 5291 } else if (ID == ".amdhsa_wavefront_size32") { 5292 if (IVersion.Major < 10) 5293 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5294 EnableWavefrontSize32 = Val; 5295 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5296 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 5297 Val, ValRange); 5298 } else if (ID == ".amdhsa_uses_dynamic_stack") { 5299 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5300 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, Val, ValRange); 5301 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 5302 if (hasArchitectedFlatScratch()) 5303 return Error(IDRange.Start, 5304 "directive is not supported with architected flat scratch", 5305 IDRange); 5306 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5307 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 5308 } else if (ID == ".amdhsa_enable_private_segment") { 5309 if (!hasArchitectedFlatScratch()) 5310 return Error( 5311 IDRange.Start, 5312 "directive is not supported without architected flat scratch", 5313 IDRange); 5314 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5315 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 5316 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 5317 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5318 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 5319 ValRange); 5320 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 5321 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5322 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 5323 ValRange); 5324 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 5325 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5326 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 5327 ValRange); 5328 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 5329 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5330 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 5331 ValRange); 5332 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 5333 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5334 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 5335 ValRange); 5336 } else if (ID == ".amdhsa_next_free_vgpr") { 5337 VGPRRange = ValRange; 5338 NextFreeVGPR = Val; 5339 } else if (ID == ".amdhsa_next_free_sgpr") { 5340 SGPRRange = ValRange; 5341 NextFreeSGPR = Val; 5342 } else if (ID == ".amdhsa_accum_offset") { 5343 if (!isGFX90A()) 5344 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5345 AccumOffset = Val; 5346 } else if (ID == ".amdhsa_reserve_vcc") { 5347 if (!isUInt<1>(Val)) 5348 return OutOfRangeError(ValRange); 5349 ReserveVCC = Val; 5350 } else if (ID == ".amdhsa_reserve_flat_scratch") { 5351 if (IVersion.Major < 7) 5352 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 5353 if (hasArchitectedFlatScratch()) 5354 return Error(IDRange.Start, 5355 "directive is not supported with architected flat scratch", 5356 IDRange); 5357 if (!isUInt<1>(Val)) 5358 return OutOfRangeError(ValRange); 5359 ReserveFlatScr = Val; 5360 } else if (ID == ".amdhsa_reserve_xnack_mask") { 5361 if (IVersion.Major < 8) 5362 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 5363 if (!isUInt<1>(Val)) 5364 return OutOfRangeError(ValRange); 5365 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 5366 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 5367 IDRange); 5368 } else if (ID == ".amdhsa_float_round_mode_32") { 5369 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5370 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 5371 } else if (ID == ".amdhsa_float_round_mode_16_64") { 5372 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5373 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 5374 } else if (ID == ".amdhsa_float_denorm_mode_32") { 5375 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5376 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 5377 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 5378 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5379 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 5380 ValRange); 5381 } else if (ID == ".amdhsa_dx10_clamp") { 5382 if (IVersion.Major >= 12) 5383 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange); 5384 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5385 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Val, 5386 ValRange); 5387 } else if (ID == ".amdhsa_ieee_mode") { 5388 if (IVersion.Major >= 12) 5389 return Error(IDRange.Start, "directive unsupported on gfx12+", IDRange); 5390 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5391 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Val, 5392 ValRange); 5393 } else if (ID == ".amdhsa_fp16_overflow") { 5394 if (IVersion.Major < 9) 5395 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 5396 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, Val, 5397 ValRange); 5398 } else if (ID == ".amdhsa_tg_split") { 5399 if (!isGFX90A()) 5400 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5401 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 5402 ValRange); 5403 } else if (ID == ".amdhsa_workgroup_processor_mode") { 5404 if (IVersion.Major < 10) 5405 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5406 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Val, 5407 ValRange); 5408 } else if (ID == ".amdhsa_memory_ordered") { 5409 if (IVersion.Major < 10) 5410 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5411 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Val, 5412 ValRange); 5413 } else if (ID == ".amdhsa_forward_progress") { 5414 if (IVersion.Major < 10) 5415 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5416 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, Val, 5417 ValRange); 5418 } else if (ID == ".amdhsa_shared_vgpr_count") { 5419 if (IVersion.Major < 10) 5420 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5421 SharedVGPRCount = Val; 5422 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 5423 COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val, 5424 ValRange); 5425 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 5426 PARSE_BITS_ENTRY( 5427 KD.compute_pgm_rsrc2, 5428 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 5429 ValRange); 5430 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 5431 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5432 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 5433 Val, ValRange); 5434 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 5435 PARSE_BITS_ENTRY( 5436 KD.compute_pgm_rsrc2, 5437 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 5438 ValRange); 5439 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 5440 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5441 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 5442 Val, ValRange); 5443 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 5444 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5445 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 5446 Val, ValRange); 5447 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 5448 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5449 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 5450 Val, ValRange); 5451 } else if (ID == ".amdhsa_exception_int_div_zero") { 5452 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5453 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 5454 Val, ValRange); 5455 } else if (ID == ".amdhsa_round_robin_scheduling") { 5456 if (IVersion.Major < 12) 5457 return Error(IDRange.Start, "directive requires gfx12+", IDRange); 5458 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5459 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, Val, 5460 ValRange); 5461 } else { 5462 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 5463 } 5464 5465 #undef PARSE_BITS_ENTRY 5466 } 5467 5468 if (!Seen.contains(".amdhsa_next_free_vgpr")) 5469 return TokError(".amdhsa_next_free_vgpr directive is required"); 5470 5471 if (!Seen.contains(".amdhsa_next_free_sgpr")) 5472 return TokError(".amdhsa_next_free_sgpr directive is required"); 5473 5474 unsigned VGPRBlocks; 5475 unsigned SGPRBlocks; 5476 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 5477 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 5478 EnableWavefrontSize32, NextFreeVGPR, 5479 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 5480 SGPRBlocks)) 5481 return true; 5482 5483 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 5484 VGPRBlocks)) 5485 return OutOfRangeError(VGPRRange); 5486 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5487 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 5488 5489 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 5490 SGPRBlocks)) 5491 return OutOfRangeError(SGPRRange); 5492 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5493 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 5494 SGPRBlocks); 5495 5496 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 5497 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 5498 "enabled user SGPRs"); 5499 5500 unsigned UserSGPRCount = 5501 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 5502 5503 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 5504 return TokError("too many user SGPRs enabled"); 5505 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 5506 UserSGPRCount); 5507 5508 if (PreloadLength && KD.kernarg_size && 5509 (PreloadLength * 4 + PreloadOffset * 4 > KD.kernarg_size)) 5510 return TokError("Kernarg preload length + offset is larger than the " 5511 "kernarg segment size"); 5512 5513 if (isGFX90A()) { 5514 if (!Seen.contains(".amdhsa_accum_offset")) 5515 return TokError(".amdhsa_accum_offset directive is required"); 5516 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 5517 return TokError("accum_offset should be in range [4..256] in " 5518 "increments of 4"); 5519 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 5520 return TokError("accum_offset exceeds total VGPR allocation"); 5521 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 5522 (AccumOffset / 4 - 1)); 5523 } 5524 5525 if (IVersion.Major >= 10) { 5526 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5527 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) { 5528 return TokError("shared_vgpr_count directive not valid on " 5529 "wavefront size 32"); 5530 } 5531 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 5532 return TokError("shared_vgpr_count*2 + " 5533 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5534 "exceed 63\n"); 5535 } 5536 } 5537 5538 getTargetStreamer().EmitAmdhsaKernelDescriptor( 5539 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 5540 ReserveFlatScr, AMDGPU::getAmdhsaCodeObjectVersion()); 5541 return false; 5542 } 5543 5544 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 5545 uint32_t Major; 5546 uint32_t Minor; 5547 5548 if (ParseDirectiveMajorMinor(Major, Minor)) 5549 return true; 5550 5551 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 5552 return false; 5553 } 5554 5555 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 5556 uint32_t Major; 5557 uint32_t Minor; 5558 uint32_t Stepping; 5559 StringRef VendorName; 5560 StringRef ArchName; 5561 5562 // If this directive has no arguments, then use the ISA version for the 5563 // targeted GPU. 5564 if (isToken(AsmToken::EndOfStatement)) { 5565 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5566 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 5567 ISA.Stepping, 5568 "AMD", "AMDGPU"); 5569 return false; 5570 } 5571 5572 if (ParseDirectiveMajorMinor(Major, Minor)) 5573 return true; 5574 5575 if (!trySkipToken(AsmToken::Comma)) 5576 return TokError("stepping version number required, comma expected"); 5577 5578 if (ParseAsAbsoluteExpression(Stepping)) 5579 return TokError("invalid stepping version"); 5580 5581 if (!trySkipToken(AsmToken::Comma)) 5582 return TokError("vendor name required, comma expected"); 5583 5584 if (!parseString(VendorName, "invalid vendor name")) 5585 return true; 5586 5587 if (!trySkipToken(AsmToken::Comma)) 5588 return TokError("arch name required, comma expected"); 5589 5590 if (!parseString(ArchName, "invalid arch name")) 5591 return true; 5592 5593 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5594 VendorName, ArchName); 5595 return false; 5596 } 5597 5598 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5599 amd_kernel_code_t &Header) { 5600 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5601 // assembly for backwards compatibility. 5602 if (ID == "max_scratch_backing_memory_byte_size") { 5603 Parser.eatToEndOfStatement(); 5604 return false; 5605 } 5606 5607 SmallString<40> ErrStr; 5608 raw_svector_ostream Err(ErrStr); 5609 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5610 return TokError(Err.str()); 5611 } 5612 Lex(); 5613 5614 if (ID == "enable_dx10_clamp") { 5615 if (G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) && 5616 isGFX12Plus()) 5617 return TokError("enable_dx10_clamp=1 is not allowed on GFX12+"); 5618 } 5619 5620 if (ID == "enable_ieee_mode") { 5621 if (G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) && 5622 isGFX12Plus()) 5623 return TokError("enable_ieee_mode=1 is not allowed on GFX12+"); 5624 } 5625 5626 if (ID == "enable_wavefront_size32") { 5627 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5628 if (!isGFX10Plus()) 5629 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5630 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5631 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5632 } else { 5633 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5634 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5635 } 5636 } 5637 5638 if (ID == "wavefront_size") { 5639 if (Header.wavefront_size == 5) { 5640 if (!isGFX10Plus()) 5641 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5642 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5643 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5644 } else if (Header.wavefront_size == 6) { 5645 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5646 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5647 } 5648 } 5649 5650 if (ID == "enable_wgp_mode") { 5651 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5652 !isGFX10Plus()) 5653 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5654 } 5655 5656 if (ID == "enable_mem_ordered") { 5657 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5658 !isGFX10Plus()) 5659 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5660 } 5661 5662 if (ID == "enable_fwd_progress") { 5663 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5664 !isGFX10Plus()) 5665 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5666 } 5667 5668 return false; 5669 } 5670 5671 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5672 amd_kernel_code_t Header; 5673 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5674 5675 while (true) { 5676 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5677 // will set the current token to EndOfStatement. 5678 while(trySkipToken(AsmToken::EndOfStatement)); 5679 5680 StringRef ID; 5681 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5682 return true; 5683 5684 if (ID == ".end_amd_kernel_code_t") 5685 break; 5686 5687 if (ParseAMDKernelCodeTValue(ID, Header)) 5688 return true; 5689 } 5690 5691 getTargetStreamer().EmitAMDKernelCodeT(Header); 5692 5693 return false; 5694 } 5695 5696 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5697 StringRef KernelName; 5698 if (!parseId(KernelName, "expected symbol name")) 5699 return true; 5700 5701 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5702 ELF::STT_AMDGPU_HSA_KERNEL); 5703 5704 KernelScope.initialize(getContext()); 5705 return false; 5706 } 5707 5708 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5709 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5710 return Error(getLoc(), 5711 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5712 "architectures"); 5713 } 5714 5715 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5716 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5717 return Error(getParser().getTok().getLoc(), "target id must match options"); 5718 5719 getTargetStreamer().EmitISAVersion(); 5720 Lex(); 5721 5722 return false; 5723 } 5724 5725 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5726 assert(isHsaAbi(getSTI())); 5727 5728 std::string HSAMetadataString; 5729 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin, 5730 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString)) 5731 return true; 5732 5733 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5734 return Error(getLoc(), "invalid HSA metadata"); 5735 5736 return false; 5737 } 5738 5739 /// Common code to parse out a block of text (typically YAML) between start and 5740 /// end directives. 5741 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5742 const char *AssemblerDirectiveEnd, 5743 std::string &CollectString) { 5744 5745 raw_string_ostream CollectStream(CollectString); 5746 5747 getLexer().setSkipSpace(false); 5748 5749 bool FoundEnd = false; 5750 while (!isToken(AsmToken::Eof)) { 5751 while (isToken(AsmToken::Space)) { 5752 CollectStream << getTokenStr(); 5753 Lex(); 5754 } 5755 5756 if (trySkipId(AssemblerDirectiveEnd)) { 5757 FoundEnd = true; 5758 break; 5759 } 5760 5761 CollectStream << Parser.parseStringToEndOfStatement() 5762 << getContext().getAsmInfo()->getSeparatorString(); 5763 5764 Parser.eatToEndOfStatement(); 5765 } 5766 5767 getLexer().setSkipSpace(true); 5768 5769 if (isToken(AsmToken::Eof) && !FoundEnd) { 5770 return TokError(Twine("expected directive ") + 5771 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5772 } 5773 5774 CollectStream.flush(); 5775 return false; 5776 } 5777 5778 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5779 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5780 std::string String; 5781 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5782 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5783 return true; 5784 5785 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5786 if (!PALMetadata->setFromString(String)) 5787 return Error(getLoc(), "invalid PAL metadata"); 5788 return false; 5789 } 5790 5791 /// Parse the assembler directive for old linear-format PAL metadata. 5792 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5793 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5794 return Error(getLoc(), 5795 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5796 "not available on non-amdpal OSes")).str()); 5797 } 5798 5799 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5800 PALMetadata->setLegacy(); 5801 for (;;) { 5802 uint32_t Key, Value; 5803 if (ParseAsAbsoluteExpression(Key)) { 5804 return TokError(Twine("invalid value in ") + 5805 Twine(PALMD::AssemblerDirective)); 5806 } 5807 if (!trySkipToken(AsmToken::Comma)) { 5808 return TokError(Twine("expected an even number of values in ") + 5809 Twine(PALMD::AssemblerDirective)); 5810 } 5811 if (ParseAsAbsoluteExpression(Value)) { 5812 return TokError(Twine("invalid value in ") + 5813 Twine(PALMD::AssemblerDirective)); 5814 } 5815 PALMetadata->setRegister(Key, Value); 5816 if (!trySkipToken(AsmToken::Comma)) 5817 break; 5818 } 5819 return false; 5820 } 5821 5822 /// ParseDirectiveAMDGPULDS 5823 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5824 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5825 if (getParser().checkForValidSection()) 5826 return true; 5827 5828 StringRef Name; 5829 SMLoc NameLoc = getLoc(); 5830 if (getParser().parseIdentifier(Name)) 5831 return TokError("expected identifier in directive"); 5832 5833 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5834 if (getParser().parseComma()) 5835 return true; 5836 5837 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5838 5839 int64_t Size; 5840 SMLoc SizeLoc = getLoc(); 5841 if (getParser().parseAbsoluteExpression(Size)) 5842 return true; 5843 if (Size < 0) 5844 return Error(SizeLoc, "size must be non-negative"); 5845 if (Size > LocalMemorySize) 5846 return Error(SizeLoc, "size is too large"); 5847 5848 int64_t Alignment = 4; 5849 if (trySkipToken(AsmToken::Comma)) { 5850 SMLoc AlignLoc = getLoc(); 5851 if (getParser().parseAbsoluteExpression(Alignment)) 5852 return true; 5853 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5854 return Error(AlignLoc, "alignment must be a power of two"); 5855 5856 // Alignment larger than the size of LDS is possible in theory, as long 5857 // as the linker manages to place to symbol at address 0, but we do want 5858 // to make sure the alignment fits nicely into a 32-bit integer. 5859 if (Alignment >= 1u << 31) 5860 return Error(AlignLoc, "alignment is too large"); 5861 } 5862 5863 if (parseEOL()) 5864 return true; 5865 5866 Symbol->redefineIfPossible(); 5867 if (!Symbol->isUndefined()) 5868 return Error(NameLoc, "invalid symbol redefinition"); 5869 5870 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5871 return false; 5872 } 5873 5874 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5875 StringRef IDVal = DirectiveID.getString(); 5876 5877 if (isHsaAbi(getSTI())) { 5878 if (IDVal == ".amdhsa_kernel") 5879 return ParseDirectiveAMDHSAKernel(); 5880 5881 // TODO: Restructure/combine with PAL metadata directive. 5882 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5883 return ParseDirectiveHSAMetadata(); 5884 } else { 5885 if (IDVal == ".hsa_code_object_version") 5886 return ParseDirectiveHSACodeObjectVersion(); 5887 5888 if (IDVal == ".hsa_code_object_isa") 5889 return ParseDirectiveHSACodeObjectISA(); 5890 5891 if (IDVal == ".amd_kernel_code_t") 5892 return ParseDirectiveAMDKernelCodeT(); 5893 5894 if (IDVal == ".amdgpu_hsa_kernel") 5895 return ParseDirectiveAMDGPUHsaKernel(); 5896 5897 if (IDVal == ".amd_amdgpu_isa") 5898 return ParseDirectiveISAVersion(); 5899 5900 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) { 5901 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) + 5902 Twine(" directive is " 5903 "not available on non-amdhsa OSes")) 5904 .str()); 5905 } 5906 } 5907 5908 if (IDVal == ".amdgcn_target") 5909 return ParseDirectiveAMDGCNTarget(); 5910 5911 if (IDVal == ".amdgpu_lds") 5912 return ParseDirectiveAMDGPULDS(); 5913 5914 if (IDVal == PALMD::AssemblerDirectiveBegin) 5915 return ParseDirectivePALMetadataBegin(); 5916 5917 if (IDVal == PALMD::AssemblerDirective) 5918 return ParseDirectivePALMetadata(); 5919 5920 return true; 5921 } 5922 5923 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5924 unsigned RegNo) { 5925 5926 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5927 return isGFX9Plus(); 5928 5929 // GFX10+ has 2 more SGPRs 104 and 105. 5930 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5931 return hasSGPR104_SGPR105(); 5932 5933 switch (RegNo) { 5934 case AMDGPU::SRC_SHARED_BASE_LO: 5935 case AMDGPU::SRC_SHARED_BASE: 5936 case AMDGPU::SRC_SHARED_LIMIT_LO: 5937 case AMDGPU::SRC_SHARED_LIMIT: 5938 case AMDGPU::SRC_PRIVATE_BASE_LO: 5939 case AMDGPU::SRC_PRIVATE_BASE: 5940 case AMDGPU::SRC_PRIVATE_LIMIT_LO: 5941 case AMDGPU::SRC_PRIVATE_LIMIT: 5942 return isGFX9Plus(); 5943 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5944 return isGFX9Plus() && !isGFX11Plus(); 5945 case AMDGPU::TBA: 5946 case AMDGPU::TBA_LO: 5947 case AMDGPU::TBA_HI: 5948 case AMDGPU::TMA: 5949 case AMDGPU::TMA_LO: 5950 case AMDGPU::TMA_HI: 5951 return !isGFX9Plus(); 5952 case AMDGPU::XNACK_MASK: 5953 case AMDGPU::XNACK_MASK_LO: 5954 case AMDGPU::XNACK_MASK_HI: 5955 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5956 case AMDGPU::SGPR_NULL: 5957 return isGFX10Plus(); 5958 default: 5959 break; 5960 } 5961 5962 if (isCI()) 5963 return true; 5964 5965 if (isSI() || isGFX10Plus()) { 5966 // No flat_scr on SI. 5967 // On GFX10Plus flat scratch is not a valid register operand and can only be 5968 // accessed with s_setreg/s_getreg. 5969 switch (RegNo) { 5970 case AMDGPU::FLAT_SCR: 5971 case AMDGPU::FLAT_SCR_LO: 5972 case AMDGPU::FLAT_SCR_HI: 5973 return false; 5974 default: 5975 return true; 5976 } 5977 } 5978 5979 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5980 // SI/CI have. 5981 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5982 return hasSGPR102_SGPR103(); 5983 5984 return true; 5985 } 5986 5987 ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands, 5988 StringRef Mnemonic, 5989 OperandMode Mode) { 5990 ParseStatus Res = parseVOPD(Operands); 5991 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement)) 5992 return Res; 5993 5994 // Try to parse with a custom parser 5995 Res = MatchOperandParserImpl(Operands, Mnemonic); 5996 5997 // If we successfully parsed the operand or if there as an error parsing, 5998 // we are done. 5999 // 6000 // If we are parsing after we reach EndOfStatement then this means we 6001 // are appending default values to the Operands list. This is only done 6002 // by custom parser, so we shouldn't continue on to the generic parsing. 6003 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement)) 6004 return Res; 6005 6006 SMLoc RBraceLoc; 6007 SMLoc LBraceLoc = getLoc(); 6008 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 6009 unsigned Prefix = Operands.size(); 6010 6011 for (;;) { 6012 auto Loc = getLoc(); 6013 Res = parseReg(Operands); 6014 if (Res.isNoMatch()) 6015 Error(Loc, "expected a register"); 6016 if (!Res.isSuccess()) 6017 return ParseStatus::Failure; 6018 6019 RBraceLoc = getLoc(); 6020 if (trySkipToken(AsmToken::RBrac)) 6021 break; 6022 6023 if (!skipToken(AsmToken::Comma, 6024 "expected a comma or a closing square bracket")) 6025 return ParseStatus::Failure; 6026 } 6027 6028 if (Operands.size() - Prefix > 1) { 6029 Operands.insert(Operands.begin() + Prefix, 6030 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 6031 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 6032 } 6033 6034 return ParseStatus::Success; 6035 } 6036 6037 return parseRegOrImm(Operands); 6038 } 6039 6040 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 6041 // Clear any forced encodings from the previous instruction. 6042 setForcedEncodingSize(0); 6043 setForcedDPP(false); 6044 setForcedSDWA(false); 6045 6046 if (Name.ends_with("_e64_dpp")) { 6047 setForcedDPP(true); 6048 setForcedEncodingSize(64); 6049 return Name.substr(0, Name.size() - 8); 6050 } else if (Name.ends_with("_e64")) { 6051 setForcedEncodingSize(64); 6052 return Name.substr(0, Name.size() - 4); 6053 } else if (Name.ends_with("_e32")) { 6054 setForcedEncodingSize(32); 6055 return Name.substr(0, Name.size() - 4); 6056 } else if (Name.ends_with("_dpp")) { 6057 setForcedDPP(true); 6058 return Name.substr(0, Name.size() - 4); 6059 } else if (Name.ends_with("_sdwa")) { 6060 setForcedSDWA(true); 6061 return Name.substr(0, Name.size() - 5); 6062 } 6063 return Name; 6064 } 6065 6066 static void applyMnemonicAliases(StringRef &Mnemonic, 6067 const FeatureBitset &Features, 6068 unsigned VariantID); 6069 6070 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 6071 StringRef Name, 6072 SMLoc NameLoc, OperandVector &Operands) { 6073 // Add the instruction mnemonic 6074 Name = parseMnemonicSuffix(Name); 6075 6076 // If the target architecture uses MnemonicAlias, call it here to parse 6077 // operands correctly. 6078 applyMnemonicAliases(Name, getAvailableFeatures(), 0); 6079 6080 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 6081 6082 bool IsMIMG = Name.starts_with("image_"); 6083 6084 while (!trySkipToken(AsmToken::EndOfStatement)) { 6085 OperandMode Mode = OperandMode_Default; 6086 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 6087 Mode = OperandMode_NSA; 6088 ParseStatus Res = parseOperand(Operands, Name, Mode); 6089 6090 if (!Res.isSuccess()) { 6091 checkUnsupportedInstruction(Name, NameLoc); 6092 if (!Parser.hasPendingError()) { 6093 // FIXME: use real operand location rather than the current location. 6094 StringRef Msg = Res.isFailure() ? "failed parsing operand." 6095 : "not a valid operand."; 6096 Error(getLoc(), Msg); 6097 } 6098 while (!trySkipToken(AsmToken::EndOfStatement)) { 6099 lex(); 6100 } 6101 return true; 6102 } 6103 6104 // Eat the comma or space if there is one. 6105 trySkipToken(AsmToken::Comma); 6106 } 6107 6108 return false; 6109 } 6110 6111 //===----------------------------------------------------------------------===// 6112 // Utility functions 6113 //===----------------------------------------------------------------------===// 6114 6115 ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name, 6116 OperandVector &Operands) { 6117 SMLoc S = getLoc(); 6118 if (!trySkipId(Name)) 6119 return ParseStatus::NoMatch; 6120 6121 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S)); 6122 return ParseStatus::Success; 6123 } 6124 6125 ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, 6126 int64_t &IntVal) { 6127 6128 if (!trySkipId(Prefix, AsmToken::Colon)) 6129 return ParseStatus::NoMatch; 6130 6131 return parseExpr(IntVal) ? ParseStatus::Success : ParseStatus::Failure; 6132 } 6133 6134 ParseStatus AMDGPUAsmParser::parseIntWithPrefix( 6135 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy, 6136 std::function<bool(int64_t &)> ConvertResult) { 6137 SMLoc S = getLoc(); 6138 int64_t Value = 0; 6139 6140 ParseStatus Res = parseIntWithPrefix(Prefix, Value); 6141 if (!Res.isSuccess()) 6142 return Res; 6143 6144 if (ConvertResult && !ConvertResult(Value)) { 6145 Error(S, "invalid " + StringRef(Prefix) + " value."); 6146 } 6147 6148 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 6149 return ParseStatus::Success; 6150 } 6151 6152 ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix( 6153 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy, 6154 bool (*ConvertResult)(int64_t &)) { 6155 SMLoc S = getLoc(); 6156 if (!trySkipId(Prefix, AsmToken::Colon)) 6157 return ParseStatus::NoMatch; 6158 6159 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 6160 return ParseStatus::Failure; 6161 6162 unsigned Val = 0; 6163 const unsigned MaxSize = 4; 6164 6165 // FIXME: How to verify the number of elements matches the number of src 6166 // operands? 6167 for (int I = 0; ; ++I) { 6168 int64_t Op; 6169 SMLoc Loc = getLoc(); 6170 if (!parseExpr(Op)) 6171 return ParseStatus::Failure; 6172 6173 if (Op != 0 && Op != 1) 6174 return Error(Loc, "invalid " + StringRef(Prefix) + " value."); 6175 6176 Val |= (Op << I); 6177 6178 if (trySkipToken(AsmToken::RBrac)) 6179 break; 6180 6181 if (I + 1 == MaxSize) 6182 return Error(getLoc(), "expected a closing square bracket"); 6183 6184 if (!skipToken(AsmToken::Comma, "expected a comma")) 6185 return ParseStatus::Failure; 6186 } 6187 6188 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 6189 return ParseStatus::Success; 6190 } 6191 6192 ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name, 6193 OperandVector &Operands, 6194 AMDGPUOperand::ImmTy ImmTy) { 6195 int64_t Bit; 6196 SMLoc S = getLoc(); 6197 6198 if (trySkipId(Name)) { 6199 Bit = 1; 6200 } else if (trySkipId("no", Name)) { 6201 Bit = 0; 6202 } else { 6203 return ParseStatus::NoMatch; 6204 } 6205 6206 if (Name == "r128" && !hasMIMG_R128()) 6207 return Error(S, "r128 modifier is not supported on this GPU"); 6208 if (Name == "a16" && !hasA16()) 6209 return Error(S, "a16 modifier is not supported on this GPU"); 6210 6211 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 6212 ImmTy = AMDGPUOperand::ImmTyR128A16; 6213 6214 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 6215 return ParseStatus::Success; 6216 } 6217 6218 unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo, 6219 bool &Disabling) const { 6220 Disabling = Id.consume_front("no"); 6221 6222 if (isGFX940() && !Mnemo.starts_with("s_")) { 6223 return StringSwitch<unsigned>(Id) 6224 .Case("nt", AMDGPU::CPol::NT) 6225 .Case("sc0", AMDGPU::CPol::SC0) 6226 .Case("sc1", AMDGPU::CPol::SC1) 6227 .Default(0); 6228 } 6229 6230 return StringSwitch<unsigned>(Id) 6231 .Case("dlc", AMDGPU::CPol::DLC) 6232 .Case("glc", AMDGPU::CPol::GLC) 6233 .Case("scc", AMDGPU::CPol::SCC) 6234 .Case("slc", AMDGPU::CPol::SLC) 6235 .Default(0); 6236 } 6237 6238 ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 6239 if (isGFX12Plus()) { 6240 SMLoc StringLoc = getLoc(); 6241 6242 int64_t CPolVal = 0; 6243 ParseStatus ResTH = ParseStatus::NoMatch; 6244 ParseStatus ResScope = ParseStatus::NoMatch; 6245 6246 for (;;) { 6247 if (ResTH.isNoMatch()) { 6248 int64_t TH; 6249 ResTH = parseTH(Operands, TH); 6250 if (ResTH.isFailure()) 6251 return ResTH; 6252 if (ResTH.isSuccess()) { 6253 CPolVal |= TH; 6254 continue; 6255 } 6256 } 6257 6258 if (ResScope.isNoMatch()) { 6259 int64_t Scope; 6260 ResScope = parseScope(Operands, Scope); 6261 if (ResScope.isFailure()) 6262 return ResScope; 6263 if (ResScope.isSuccess()) { 6264 CPolVal |= Scope; 6265 continue; 6266 } 6267 } 6268 6269 break; 6270 } 6271 6272 if (ResTH.isNoMatch() && ResScope.isNoMatch()) 6273 return ParseStatus::NoMatch; 6274 6275 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc, 6276 AMDGPUOperand::ImmTyCPol)); 6277 return ParseStatus::Success; 6278 } 6279 6280 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 6281 SMLoc OpLoc = getLoc(); 6282 unsigned Enabled = 0, Seen = 0; 6283 for (;;) { 6284 SMLoc S = getLoc(); 6285 bool Disabling; 6286 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling); 6287 if (!CPol) 6288 break; 6289 6290 lex(); 6291 6292 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC) 6293 return Error(S, "dlc modifier is not supported on this GPU"); 6294 6295 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC) 6296 return Error(S, "scc modifier is not supported on this GPU"); 6297 6298 if (Seen & CPol) 6299 return Error(S, "duplicate cache policy modifier"); 6300 6301 if (!Disabling) 6302 Enabled |= CPol; 6303 6304 Seen |= CPol; 6305 } 6306 6307 if (!Seen) 6308 return ParseStatus::NoMatch; 6309 6310 Operands.push_back( 6311 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol)); 6312 return ParseStatus::Success; 6313 } 6314 6315 ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands, 6316 int64_t &Scope) { 6317 Scope = AMDGPU::CPol::SCOPE_CU; // default; 6318 6319 StringRef Value; 6320 SMLoc StringLoc; 6321 ParseStatus Res; 6322 6323 Res = parseStringWithPrefix("scope", Value, StringLoc); 6324 if (!Res.isSuccess()) 6325 return Res; 6326 6327 Scope = StringSwitch<int64_t>(Value) 6328 .Case("SCOPE_CU", AMDGPU::CPol::SCOPE_CU) 6329 .Case("SCOPE_SE", AMDGPU::CPol::SCOPE_SE) 6330 .Case("SCOPE_DEV", AMDGPU::CPol::SCOPE_DEV) 6331 .Case("SCOPE_SYS", AMDGPU::CPol::SCOPE_SYS) 6332 .Default(0xffffffff); 6333 6334 if (Scope == 0xffffffff) 6335 return Error(StringLoc, "invalid scope value"); 6336 6337 return ParseStatus::Success; 6338 } 6339 6340 ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) { 6341 TH = AMDGPU::CPol::TH_RT; // default 6342 6343 StringRef Value; 6344 SMLoc StringLoc; 6345 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc); 6346 if (!Res.isSuccess()) 6347 return Res; 6348 6349 if (Value == "TH_DEFAULT") 6350 TH = AMDGPU::CPol::TH_RT; 6351 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" || 6352 Value == "TH_LOAD_NT_WB") { 6353 return Error(StringLoc, "invalid th value"); 6354 } else if (Value.starts_with("TH_ATOMIC_")) { 6355 Value = Value.drop_front(10); 6356 TH = AMDGPU::CPol::TH_TYPE_ATOMIC; 6357 } else if (Value.starts_with("TH_LOAD_")) { 6358 Value = Value.drop_front(8); 6359 TH = AMDGPU::CPol::TH_TYPE_LOAD; 6360 } else if (Value.starts_with("TH_STORE_")) { 6361 Value = Value.drop_front(9); 6362 TH = AMDGPU::CPol::TH_TYPE_STORE; 6363 } else { 6364 return Error(StringLoc, "invalid th value"); 6365 } 6366 6367 if (Value == "BYPASS") 6368 TH |= AMDGPU::CPol::TH_REAL_BYPASS; 6369 6370 if (TH != 0) { 6371 if (TH & AMDGPU::CPol::TH_TYPE_ATOMIC) 6372 TH |= StringSwitch<int64_t>(Value) 6373 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN) 6374 .Case("RT", AMDGPU::CPol::TH_RT) 6375 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN) 6376 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT) 6377 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT | 6378 AMDGPU::CPol::TH_ATOMIC_RETURN) 6379 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE) 6380 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE | 6381 AMDGPU::CPol::TH_ATOMIC_NT) 6382 .Default(0xffffffff); 6383 else 6384 TH |= StringSwitch<int64_t>(Value) 6385 .Case("RT", AMDGPU::CPol::TH_RT) 6386 .Case("NT", AMDGPU::CPol::TH_NT) 6387 .Case("HT", AMDGPU::CPol::TH_HT) 6388 .Case("LU", AMDGPU::CPol::TH_LU) 6389 .Case("RT_WB", AMDGPU::CPol::TH_RT_WB) 6390 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT) 6391 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT) 6392 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT) 6393 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB) 6394 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS) 6395 .Default(0xffffffff); 6396 } 6397 6398 if (TH == 0xffffffff) 6399 return Error(StringLoc, "invalid th value"); 6400 6401 return ParseStatus::Success; 6402 } 6403 6404 static void addOptionalImmOperand( 6405 MCInst& Inst, const OperandVector& Operands, 6406 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 6407 AMDGPUOperand::ImmTy ImmT, 6408 int64_t Default = 0) { 6409 auto i = OptionalIdx.find(ImmT); 6410 if (i != OptionalIdx.end()) { 6411 unsigned Idx = i->second; 6412 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 6413 } else { 6414 Inst.addOperand(MCOperand::createImm(Default)); 6415 } 6416 } 6417 6418 ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 6419 StringRef &Value, 6420 SMLoc &StringLoc) { 6421 if (!trySkipId(Prefix, AsmToken::Colon)) 6422 return ParseStatus::NoMatch; 6423 6424 StringLoc = getLoc(); 6425 return parseId(Value, "expected an identifier") ? ParseStatus::Success 6426 : ParseStatus::Failure; 6427 } 6428 6429 //===----------------------------------------------------------------------===// 6430 // MTBUF format 6431 //===----------------------------------------------------------------------===// 6432 6433 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 6434 int64_t MaxVal, 6435 int64_t &Fmt) { 6436 int64_t Val; 6437 SMLoc Loc = getLoc(); 6438 6439 auto Res = parseIntWithPrefix(Pref, Val); 6440 if (Res.isFailure()) 6441 return false; 6442 if (Res.isNoMatch()) 6443 return true; 6444 6445 if (Val < 0 || Val > MaxVal) { 6446 Error(Loc, Twine("out of range ", StringRef(Pref))); 6447 return false; 6448 } 6449 6450 Fmt = Val; 6451 return true; 6452 } 6453 6454 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 6455 // values to live in a joint format operand in the MCInst encoding. 6456 ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 6457 using namespace llvm::AMDGPU::MTBUFFormat; 6458 6459 int64_t Dfmt = DFMT_UNDEF; 6460 int64_t Nfmt = NFMT_UNDEF; 6461 6462 // dfmt and nfmt can appear in either order, and each is optional. 6463 for (int I = 0; I < 2; ++I) { 6464 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 6465 return ParseStatus::Failure; 6466 6467 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) 6468 return ParseStatus::Failure; 6469 6470 // Skip optional comma between dfmt/nfmt 6471 // but guard against 2 commas following each other. 6472 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 6473 !peekToken().is(AsmToken::Comma)) { 6474 trySkipToken(AsmToken::Comma); 6475 } 6476 } 6477 6478 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 6479 return ParseStatus::NoMatch; 6480 6481 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6482 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6483 6484 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6485 return ParseStatus::Success; 6486 } 6487 6488 ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) { 6489 using namespace llvm::AMDGPU::MTBUFFormat; 6490 6491 int64_t Fmt = UFMT_UNDEF; 6492 6493 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 6494 return ParseStatus::Failure; 6495 6496 if (Fmt == UFMT_UNDEF) 6497 return ParseStatus::NoMatch; 6498 6499 Format = Fmt; 6500 return ParseStatus::Success; 6501 } 6502 6503 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 6504 int64_t &Nfmt, 6505 StringRef FormatStr, 6506 SMLoc Loc) { 6507 using namespace llvm::AMDGPU::MTBUFFormat; 6508 int64_t Format; 6509 6510 Format = getDfmt(FormatStr); 6511 if (Format != DFMT_UNDEF) { 6512 Dfmt = Format; 6513 return true; 6514 } 6515 6516 Format = getNfmt(FormatStr, getSTI()); 6517 if (Format != NFMT_UNDEF) { 6518 Nfmt = Format; 6519 return true; 6520 } 6521 6522 Error(Loc, "unsupported format"); 6523 return false; 6524 } 6525 6526 ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 6527 SMLoc FormatLoc, 6528 int64_t &Format) { 6529 using namespace llvm::AMDGPU::MTBUFFormat; 6530 6531 int64_t Dfmt = DFMT_UNDEF; 6532 int64_t Nfmt = NFMT_UNDEF; 6533 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 6534 return ParseStatus::Failure; 6535 6536 if (trySkipToken(AsmToken::Comma)) { 6537 StringRef Str; 6538 SMLoc Loc = getLoc(); 6539 if (!parseId(Str, "expected a format string") || 6540 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) 6541 return ParseStatus::Failure; 6542 if (Dfmt == DFMT_UNDEF) 6543 return Error(Loc, "duplicate numeric format"); 6544 if (Nfmt == NFMT_UNDEF) 6545 return Error(Loc, "duplicate data format"); 6546 } 6547 6548 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6549 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6550 6551 if (isGFX10Plus()) { 6552 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI()); 6553 if (Ufmt == UFMT_UNDEF) 6554 return Error(FormatLoc, "unsupported format"); 6555 Format = Ufmt; 6556 } else { 6557 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6558 } 6559 6560 return ParseStatus::Success; 6561 } 6562 6563 ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 6564 SMLoc Loc, 6565 int64_t &Format) { 6566 using namespace llvm::AMDGPU::MTBUFFormat; 6567 6568 auto Id = getUnifiedFormat(FormatStr, getSTI()); 6569 if (Id == UFMT_UNDEF) 6570 return ParseStatus::NoMatch; 6571 6572 if (!isGFX10Plus()) 6573 return Error(Loc, "unified format is not supported on this GPU"); 6574 6575 Format = Id; 6576 return ParseStatus::Success; 6577 } 6578 6579 ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 6580 using namespace llvm::AMDGPU::MTBUFFormat; 6581 SMLoc Loc = getLoc(); 6582 6583 if (!parseExpr(Format)) 6584 return ParseStatus::Failure; 6585 if (!isValidFormatEncoding(Format, getSTI())) 6586 return Error(Loc, "out of range format"); 6587 6588 return ParseStatus::Success; 6589 } 6590 6591 ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 6592 using namespace llvm::AMDGPU::MTBUFFormat; 6593 6594 if (!trySkipId("format", AsmToken::Colon)) 6595 return ParseStatus::NoMatch; 6596 6597 if (trySkipToken(AsmToken::LBrac)) { 6598 StringRef FormatStr; 6599 SMLoc Loc = getLoc(); 6600 if (!parseId(FormatStr, "expected a format string")) 6601 return ParseStatus::Failure; 6602 6603 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 6604 if (Res.isNoMatch()) 6605 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 6606 if (!Res.isSuccess()) 6607 return Res; 6608 6609 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 6610 return ParseStatus::Failure; 6611 6612 return ParseStatus::Success; 6613 } 6614 6615 return parseNumericFormat(Format); 6616 } 6617 6618 ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 6619 using namespace llvm::AMDGPU::MTBUFFormat; 6620 6621 int64_t Format = getDefaultFormatEncoding(getSTI()); 6622 ParseStatus Res; 6623 SMLoc Loc = getLoc(); 6624 6625 // Parse legacy format syntax. 6626 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 6627 if (Res.isFailure()) 6628 return Res; 6629 6630 bool FormatFound = Res.isSuccess(); 6631 6632 Operands.push_back( 6633 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 6634 6635 if (FormatFound) 6636 trySkipToken(AsmToken::Comma); 6637 6638 if (isToken(AsmToken::EndOfStatement)) { 6639 // We are expecting an soffset operand, 6640 // but let matcher handle the error. 6641 return ParseStatus::Success; 6642 } 6643 6644 // Parse soffset. 6645 Res = parseRegOrImm(Operands); 6646 if (!Res.isSuccess()) 6647 return Res; 6648 6649 trySkipToken(AsmToken::Comma); 6650 6651 if (!FormatFound) { 6652 Res = parseSymbolicOrNumericFormat(Format); 6653 if (Res.isFailure()) 6654 return Res; 6655 if (Res.isSuccess()) { 6656 auto Size = Operands.size(); 6657 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6658 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6659 Op.setImm(Format); 6660 } 6661 return ParseStatus::Success; 6662 } 6663 6664 if (isId("format") && peekToken().is(AsmToken::Colon)) 6665 return Error(getLoc(), "duplicate format"); 6666 return ParseStatus::Success; 6667 } 6668 6669 ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) { 6670 ParseStatus Res = 6671 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset); 6672 if (Res.isNoMatch()) { 6673 Res = parseIntWithPrefix("inst_offset", Operands, 6674 AMDGPUOperand::ImmTyInstOffset); 6675 } 6676 return Res; 6677 } 6678 6679 ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) { 6680 ParseStatus Res = 6681 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16); 6682 if (Res.isNoMatch()) 6683 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16); 6684 return Res; 6685 } 6686 6687 ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) { 6688 ParseStatus Res = 6689 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP); 6690 if (Res.isNoMatch()) { 6691 Res = 6692 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP); 6693 } 6694 return Res; 6695 } 6696 6697 //===----------------------------------------------------------------------===// 6698 // Exp 6699 //===----------------------------------------------------------------------===// 6700 6701 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6702 OptionalImmIndexMap OptionalIdx; 6703 6704 unsigned OperandIdx[4]; 6705 unsigned EnMask = 0; 6706 int SrcIdx = 0; 6707 6708 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6709 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6710 6711 // Add the register arguments 6712 if (Op.isReg()) { 6713 assert(SrcIdx < 4); 6714 OperandIdx[SrcIdx] = Inst.size(); 6715 Op.addRegOperands(Inst, 1); 6716 ++SrcIdx; 6717 continue; 6718 } 6719 6720 if (Op.isOff()) { 6721 assert(SrcIdx < 4); 6722 OperandIdx[SrcIdx] = Inst.size(); 6723 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6724 ++SrcIdx; 6725 continue; 6726 } 6727 6728 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6729 Op.addImmOperands(Inst, 1); 6730 continue; 6731 } 6732 6733 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en")) 6734 continue; 6735 6736 // Handle optional arguments 6737 OptionalIdx[Op.getImmTy()] = i; 6738 } 6739 6740 assert(SrcIdx == 4); 6741 6742 bool Compr = false; 6743 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6744 Compr = true; 6745 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6746 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6747 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6748 } 6749 6750 for (auto i = 0; i < SrcIdx; ++i) { 6751 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6752 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6753 } 6754 } 6755 6756 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6757 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6758 6759 Inst.addOperand(MCOperand::createImm(EnMask)); 6760 } 6761 6762 //===----------------------------------------------------------------------===// 6763 // s_waitcnt 6764 //===----------------------------------------------------------------------===// 6765 6766 static bool 6767 encodeCnt( 6768 const AMDGPU::IsaVersion ISA, 6769 int64_t &IntVal, 6770 int64_t CntVal, 6771 bool Saturate, 6772 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6773 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6774 { 6775 bool Failed = false; 6776 6777 IntVal = encode(ISA, IntVal, CntVal); 6778 if (CntVal != decode(ISA, IntVal)) { 6779 if (Saturate) { 6780 IntVal = encode(ISA, IntVal, -1); 6781 } else { 6782 Failed = true; 6783 } 6784 } 6785 return Failed; 6786 } 6787 6788 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6789 6790 SMLoc CntLoc = getLoc(); 6791 StringRef CntName = getTokenStr(); 6792 6793 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6794 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6795 return false; 6796 6797 int64_t CntVal; 6798 SMLoc ValLoc = getLoc(); 6799 if (!parseExpr(CntVal)) 6800 return false; 6801 6802 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6803 6804 bool Failed = true; 6805 bool Sat = CntName.ends_with("_sat"); 6806 6807 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6808 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6809 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6810 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6811 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6812 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6813 } else { 6814 Error(CntLoc, "invalid counter name " + CntName); 6815 return false; 6816 } 6817 6818 if (Failed) { 6819 Error(ValLoc, "too large value for " + CntName); 6820 return false; 6821 } 6822 6823 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6824 return false; 6825 6826 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6827 if (isToken(AsmToken::EndOfStatement)) { 6828 Error(getLoc(), "expected a counter name"); 6829 return false; 6830 } 6831 } 6832 6833 return true; 6834 } 6835 6836 ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) { 6837 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6838 int64_t Waitcnt = getWaitcntBitMask(ISA); 6839 SMLoc S = getLoc(); 6840 6841 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6842 while (!isToken(AsmToken::EndOfStatement)) { 6843 if (!parseCnt(Waitcnt)) 6844 return ParseStatus::Failure; 6845 } 6846 } else { 6847 if (!parseExpr(Waitcnt)) 6848 return ParseStatus::Failure; 6849 } 6850 6851 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6852 return ParseStatus::Success; 6853 } 6854 6855 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) { 6856 SMLoc FieldLoc = getLoc(); 6857 StringRef FieldName = getTokenStr(); 6858 if (!skipToken(AsmToken::Identifier, "expected a field name") || 6859 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6860 return false; 6861 6862 SMLoc ValueLoc = getLoc(); 6863 StringRef ValueName = getTokenStr(); 6864 if (!skipToken(AsmToken::Identifier, "expected a value name") || 6865 !skipToken(AsmToken::RParen, "expected a right parenthesis")) 6866 return false; 6867 6868 unsigned Shift; 6869 if (FieldName == "instid0") { 6870 Shift = 0; 6871 } else if (FieldName == "instskip") { 6872 Shift = 4; 6873 } else if (FieldName == "instid1") { 6874 Shift = 7; 6875 } else { 6876 Error(FieldLoc, "invalid field name " + FieldName); 6877 return false; 6878 } 6879 6880 int Value; 6881 if (Shift == 4) { 6882 // Parse values for instskip. 6883 Value = StringSwitch<int>(ValueName) 6884 .Case("SAME", 0) 6885 .Case("NEXT", 1) 6886 .Case("SKIP_1", 2) 6887 .Case("SKIP_2", 3) 6888 .Case("SKIP_3", 4) 6889 .Case("SKIP_4", 5) 6890 .Default(-1); 6891 } else { 6892 // Parse values for instid0 and instid1. 6893 Value = StringSwitch<int>(ValueName) 6894 .Case("NO_DEP", 0) 6895 .Case("VALU_DEP_1", 1) 6896 .Case("VALU_DEP_2", 2) 6897 .Case("VALU_DEP_3", 3) 6898 .Case("VALU_DEP_4", 4) 6899 .Case("TRANS32_DEP_1", 5) 6900 .Case("TRANS32_DEP_2", 6) 6901 .Case("TRANS32_DEP_3", 7) 6902 .Case("FMA_ACCUM_CYCLE_1", 8) 6903 .Case("SALU_CYCLE_1", 9) 6904 .Case("SALU_CYCLE_2", 10) 6905 .Case("SALU_CYCLE_3", 11) 6906 .Default(-1); 6907 } 6908 if (Value < 0) { 6909 Error(ValueLoc, "invalid value name " + ValueName); 6910 return false; 6911 } 6912 6913 Delay |= Value << Shift; 6914 return true; 6915 } 6916 6917 ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) { 6918 int64_t Delay = 0; 6919 SMLoc S = getLoc(); 6920 6921 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6922 do { 6923 if (!parseDelay(Delay)) 6924 return ParseStatus::Failure; 6925 } while (trySkipToken(AsmToken::Pipe)); 6926 } else { 6927 if (!parseExpr(Delay)) 6928 return ParseStatus::Failure; 6929 } 6930 6931 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S)); 6932 return ParseStatus::Success; 6933 } 6934 6935 bool 6936 AMDGPUOperand::isSWaitCnt() const { 6937 return isImm(); 6938 } 6939 6940 bool AMDGPUOperand::isSDelayALU() const { return isImm(); } 6941 6942 //===----------------------------------------------------------------------===// 6943 // DepCtr 6944 //===----------------------------------------------------------------------===// 6945 6946 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, 6947 StringRef DepCtrName) { 6948 switch (ErrorId) { 6949 case OPR_ID_UNKNOWN: 6950 Error(Loc, Twine("invalid counter name ", DepCtrName)); 6951 return; 6952 case OPR_ID_UNSUPPORTED: 6953 Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); 6954 return; 6955 case OPR_ID_DUPLICATE: 6956 Error(Loc, Twine("duplicate counter name ", DepCtrName)); 6957 return; 6958 case OPR_VAL_INVALID: 6959 Error(Loc, Twine("invalid value for ", DepCtrName)); 6960 return; 6961 default: 6962 assert(false); 6963 } 6964 } 6965 6966 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { 6967 6968 using namespace llvm::AMDGPU::DepCtr; 6969 6970 SMLoc DepCtrLoc = getLoc(); 6971 StringRef DepCtrName = getTokenStr(); 6972 6973 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6974 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6975 return false; 6976 6977 int64_t ExprVal; 6978 if (!parseExpr(ExprVal)) 6979 return false; 6980 6981 unsigned PrevOprMask = UsedOprMask; 6982 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); 6983 6984 if (CntVal < 0) { 6985 depCtrError(DepCtrLoc, CntVal, DepCtrName); 6986 return false; 6987 } 6988 6989 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6990 return false; 6991 6992 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6993 if (isToken(AsmToken::EndOfStatement)) { 6994 Error(getLoc(), "expected a counter name"); 6995 return false; 6996 } 6997 } 6998 6999 unsigned CntValMask = PrevOprMask ^ UsedOprMask; 7000 DepCtr = (DepCtr & ~CntValMask) | CntVal; 7001 return true; 7002 } 7003 7004 ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) { 7005 using namespace llvm::AMDGPU::DepCtr; 7006 7007 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); 7008 SMLoc Loc = getLoc(); 7009 7010 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 7011 unsigned UsedOprMask = 0; 7012 while (!isToken(AsmToken::EndOfStatement)) { 7013 if (!parseDepCtr(DepCtr, UsedOprMask)) 7014 return ParseStatus::Failure; 7015 } 7016 } else { 7017 if (!parseExpr(DepCtr)) 7018 return ParseStatus::Failure; 7019 } 7020 7021 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); 7022 return ParseStatus::Success; 7023 } 7024 7025 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } 7026 7027 //===----------------------------------------------------------------------===// 7028 // hwreg 7029 //===----------------------------------------------------------------------===// 7030 7031 bool 7032 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 7033 OperandInfoTy &Offset, 7034 OperandInfoTy &Width) { 7035 using namespace llvm::AMDGPU::Hwreg; 7036 7037 // The register may be specified by name or using a numeric code 7038 HwReg.Loc = getLoc(); 7039 if (isToken(AsmToken::Identifier) && 7040 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 7041 HwReg.IsSymbolic = true; 7042 lex(); // skip register name 7043 } else if (!parseExpr(HwReg.Id, "a register name")) { 7044 return false; 7045 } 7046 7047 if (trySkipToken(AsmToken::RParen)) 7048 return true; 7049 7050 // parse optional params 7051 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 7052 return false; 7053 7054 Offset.Loc = getLoc(); 7055 if (!parseExpr(Offset.Id)) 7056 return false; 7057 7058 if (!skipToken(AsmToken::Comma, "expected a comma")) 7059 return false; 7060 7061 Width.Loc = getLoc(); 7062 return parseExpr(Width.Id) && 7063 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 7064 } 7065 7066 bool 7067 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 7068 const OperandInfoTy &Offset, 7069 const OperandInfoTy &Width) { 7070 7071 using namespace llvm::AMDGPU::Hwreg; 7072 7073 if (HwReg.IsSymbolic) { 7074 if (HwReg.Id == OPR_ID_UNSUPPORTED) { 7075 Error(HwReg.Loc, 7076 "specified hardware register is not supported on this GPU"); 7077 return false; 7078 } 7079 } else { 7080 if (!isValidHwreg(HwReg.Id)) { 7081 Error(HwReg.Loc, 7082 "invalid code of hardware register: only 6-bit values are legal"); 7083 return false; 7084 } 7085 } 7086 if (!isValidHwregOffset(Offset.Id)) { 7087 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 7088 return false; 7089 } 7090 if (!isValidHwregWidth(Width.Id)) { 7091 Error(Width.Loc, 7092 "invalid bitfield width: only values from 1 to 32 are legal"); 7093 return false; 7094 } 7095 return true; 7096 } 7097 7098 ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 7099 using namespace llvm::AMDGPU::Hwreg; 7100 7101 int64_t ImmVal = 0; 7102 SMLoc Loc = getLoc(); 7103 7104 if (trySkipId("hwreg", AsmToken::LParen)) { 7105 OperandInfoTy HwReg(OPR_ID_UNKNOWN); 7106 OperandInfoTy Offset(OFFSET_DEFAULT_); 7107 OperandInfoTy Width(WIDTH_DEFAULT_); 7108 if (parseHwregBody(HwReg, Offset, Width) && 7109 validateHwreg(HwReg, Offset, Width)) { 7110 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 7111 } else { 7112 return ParseStatus::Failure; 7113 } 7114 } else if (parseExpr(ImmVal, "a hwreg macro")) { 7115 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 7116 return Error(Loc, "invalid immediate: only 16-bit values are legal"); 7117 } else { 7118 return ParseStatus::Failure; 7119 } 7120 7121 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 7122 return ParseStatus::Success; 7123 } 7124 7125 bool AMDGPUOperand::isHwreg() const { 7126 return isImmTy(ImmTyHwreg); 7127 } 7128 7129 //===----------------------------------------------------------------------===// 7130 // sendmsg 7131 //===----------------------------------------------------------------------===// 7132 7133 bool 7134 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 7135 OperandInfoTy &Op, 7136 OperandInfoTy &Stream) { 7137 using namespace llvm::AMDGPU::SendMsg; 7138 7139 Msg.Loc = getLoc(); 7140 if (isToken(AsmToken::Identifier) && 7141 (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 7142 Msg.IsSymbolic = true; 7143 lex(); // skip message name 7144 } else if (!parseExpr(Msg.Id, "a message name")) { 7145 return false; 7146 } 7147 7148 if (trySkipToken(AsmToken::Comma)) { 7149 Op.IsDefined = true; 7150 Op.Loc = getLoc(); 7151 if (isToken(AsmToken::Identifier) && 7152 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 7153 lex(); // skip operation name 7154 } else if (!parseExpr(Op.Id, "an operation name")) { 7155 return false; 7156 } 7157 7158 if (trySkipToken(AsmToken::Comma)) { 7159 Stream.IsDefined = true; 7160 Stream.Loc = getLoc(); 7161 if (!parseExpr(Stream.Id)) 7162 return false; 7163 } 7164 } 7165 7166 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 7167 } 7168 7169 bool 7170 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 7171 const OperandInfoTy &Op, 7172 const OperandInfoTy &Stream) { 7173 using namespace llvm::AMDGPU::SendMsg; 7174 7175 // Validation strictness depends on whether message is specified 7176 // in a symbolic or in a numeric form. In the latter case 7177 // only encoding possibility is checked. 7178 bool Strict = Msg.IsSymbolic; 7179 7180 if (Strict) { 7181 if (Msg.Id == OPR_ID_UNSUPPORTED) { 7182 Error(Msg.Loc, "specified message id is not supported on this GPU"); 7183 return false; 7184 } 7185 } else { 7186 if (!isValidMsgId(Msg.Id, getSTI())) { 7187 Error(Msg.Loc, "invalid message id"); 7188 return false; 7189 } 7190 } 7191 if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) { 7192 if (Op.IsDefined) { 7193 Error(Op.Loc, "message does not support operations"); 7194 } else { 7195 Error(Msg.Loc, "missing message operation"); 7196 } 7197 return false; 7198 } 7199 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 7200 Error(Op.Loc, "invalid operation id"); 7201 return false; 7202 } 7203 if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) && 7204 Stream.IsDefined) { 7205 Error(Stream.Loc, "message operation does not support streams"); 7206 return false; 7207 } 7208 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 7209 Error(Stream.Loc, "invalid message stream id"); 7210 return false; 7211 } 7212 return true; 7213 } 7214 7215 ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) { 7216 using namespace llvm::AMDGPU::SendMsg; 7217 7218 int64_t ImmVal = 0; 7219 SMLoc Loc = getLoc(); 7220 7221 if (trySkipId("sendmsg", AsmToken::LParen)) { 7222 OperandInfoTy Msg(OPR_ID_UNKNOWN); 7223 OperandInfoTy Op(OP_NONE_); 7224 OperandInfoTy Stream(STREAM_ID_NONE_); 7225 if (parseSendMsgBody(Msg, Op, Stream) && 7226 validateSendMsg(Msg, Op, Stream)) { 7227 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 7228 } else { 7229 return ParseStatus::Failure; 7230 } 7231 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 7232 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 7233 return Error(Loc, "invalid immediate: only 16-bit values are legal"); 7234 } else { 7235 return ParseStatus::Failure; 7236 } 7237 7238 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 7239 return ParseStatus::Success; 7240 } 7241 7242 bool AMDGPUOperand::isSendMsg() const { 7243 return isImmTy(ImmTySendMsg); 7244 } 7245 7246 //===----------------------------------------------------------------------===// 7247 // v_interp 7248 //===----------------------------------------------------------------------===// 7249 7250 ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 7251 StringRef Str; 7252 SMLoc S = getLoc(); 7253 7254 if (!parseId(Str)) 7255 return ParseStatus::NoMatch; 7256 7257 int Slot = StringSwitch<int>(Str) 7258 .Case("p10", 0) 7259 .Case("p20", 1) 7260 .Case("p0", 2) 7261 .Default(-1); 7262 7263 if (Slot == -1) 7264 return Error(S, "invalid interpolation slot"); 7265 7266 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 7267 AMDGPUOperand::ImmTyInterpSlot)); 7268 return ParseStatus::Success; 7269 } 7270 7271 ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 7272 StringRef Str; 7273 SMLoc S = getLoc(); 7274 7275 if (!parseId(Str)) 7276 return ParseStatus::NoMatch; 7277 7278 if (!Str.starts_with("attr")) 7279 return Error(S, "invalid interpolation attribute"); 7280 7281 StringRef Chan = Str.take_back(2); 7282 int AttrChan = StringSwitch<int>(Chan) 7283 .Case(".x", 0) 7284 .Case(".y", 1) 7285 .Case(".z", 2) 7286 .Case(".w", 3) 7287 .Default(-1); 7288 if (AttrChan == -1) 7289 return Error(S, "invalid or missing interpolation attribute channel"); 7290 7291 Str = Str.drop_back(2).drop_front(4); 7292 7293 uint8_t Attr; 7294 if (Str.getAsInteger(10, Attr)) 7295 return Error(S, "invalid or missing interpolation attribute number"); 7296 7297 if (Attr > 32) 7298 return Error(S, "out of bounds interpolation attribute number"); 7299 7300 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 7301 7302 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 7303 AMDGPUOperand::ImmTyInterpAttr)); 7304 Operands.push_back(AMDGPUOperand::CreateImm( 7305 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan)); 7306 return ParseStatus::Success; 7307 } 7308 7309 //===----------------------------------------------------------------------===// 7310 // exp 7311 //===----------------------------------------------------------------------===// 7312 7313 ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 7314 using namespace llvm::AMDGPU::Exp; 7315 7316 StringRef Str; 7317 SMLoc S = getLoc(); 7318 7319 if (!parseId(Str)) 7320 return ParseStatus::NoMatch; 7321 7322 unsigned Id = getTgtId(Str); 7323 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) 7324 return Error(S, (Id == ET_INVALID) 7325 ? "invalid exp target" 7326 : "exp target is not supported on this GPU"); 7327 7328 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 7329 AMDGPUOperand::ImmTyExpTgt)); 7330 return ParseStatus::Success; 7331 } 7332 7333 //===----------------------------------------------------------------------===// 7334 // parser helpers 7335 //===----------------------------------------------------------------------===// 7336 7337 bool 7338 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 7339 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 7340 } 7341 7342 bool 7343 AMDGPUAsmParser::isId(const StringRef Id) const { 7344 return isId(getToken(), Id); 7345 } 7346 7347 bool 7348 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 7349 return getTokenKind() == Kind; 7350 } 7351 7352 StringRef AMDGPUAsmParser::getId() const { 7353 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef(); 7354 } 7355 7356 bool 7357 AMDGPUAsmParser::trySkipId(const StringRef Id) { 7358 if (isId(Id)) { 7359 lex(); 7360 return true; 7361 } 7362 return false; 7363 } 7364 7365 bool 7366 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 7367 if (isToken(AsmToken::Identifier)) { 7368 StringRef Tok = getTokenStr(); 7369 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) { 7370 lex(); 7371 return true; 7372 } 7373 } 7374 return false; 7375 } 7376 7377 bool 7378 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 7379 if (isId(Id) && peekToken().is(Kind)) { 7380 lex(); 7381 lex(); 7382 return true; 7383 } 7384 return false; 7385 } 7386 7387 bool 7388 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 7389 if (isToken(Kind)) { 7390 lex(); 7391 return true; 7392 } 7393 return false; 7394 } 7395 7396 bool 7397 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 7398 const StringRef ErrMsg) { 7399 if (!trySkipToken(Kind)) { 7400 Error(getLoc(), ErrMsg); 7401 return false; 7402 } 7403 return true; 7404 } 7405 7406 bool 7407 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 7408 SMLoc S = getLoc(); 7409 7410 const MCExpr *Expr; 7411 if (Parser.parseExpression(Expr)) 7412 return false; 7413 7414 if (Expr->evaluateAsAbsolute(Imm)) 7415 return true; 7416 7417 if (Expected.empty()) { 7418 Error(S, "expected absolute expression"); 7419 } else { 7420 Error(S, Twine("expected ", Expected) + 7421 Twine(" or an absolute expression")); 7422 } 7423 return false; 7424 } 7425 7426 bool 7427 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 7428 SMLoc S = getLoc(); 7429 7430 const MCExpr *Expr; 7431 if (Parser.parseExpression(Expr)) 7432 return false; 7433 7434 int64_t IntVal; 7435 if (Expr->evaluateAsAbsolute(IntVal)) { 7436 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 7437 } else { 7438 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 7439 } 7440 return true; 7441 } 7442 7443 bool 7444 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 7445 if (isToken(AsmToken::String)) { 7446 Val = getToken().getStringContents(); 7447 lex(); 7448 return true; 7449 } else { 7450 Error(getLoc(), ErrMsg); 7451 return false; 7452 } 7453 } 7454 7455 bool 7456 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 7457 if (isToken(AsmToken::Identifier)) { 7458 Val = getTokenStr(); 7459 lex(); 7460 return true; 7461 } else { 7462 if (!ErrMsg.empty()) 7463 Error(getLoc(), ErrMsg); 7464 return false; 7465 } 7466 } 7467 7468 AsmToken 7469 AMDGPUAsmParser::getToken() const { 7470 return Parser.getTok(); 7471 } 7472 7473 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) { 7474 return isToken(AsmToken::EndOfStatement) 7475 ? getToken() 7476 : getLexer().peekTok(ShouldSkipSpace); 7477 } 7478 7479 void 7480 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 7481 auto TokCount = getLexer().peekTokens(Tokens); 7482 7483 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 7484 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 7485 } 7486 7487 AsmToken::TokenKind 7488 AMDGPUAsmParser::getTokenKind() const { 7489 return getLexer().getKind(); 7490 } 7491 7492 SMLoc 7493 AMDGPUAsmParser::getLoc() const { 7494 return getToken().getLoc(); 7495 } 7496 7497 StringRef 7498 AMDGPUAsmParser::getTokenStr() const { 7499 return getToken().getString(); 7500 } 7501 7502 void 7503 AMDGPUAsmParser::lex() { 7504 Parser.Lex(); 7505 } 7506 7507 SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const { 7508 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 7509 } 7510 7511 SMLoc 7512 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 7513 const OperandVector &Operands) const { 7514 for (unsigned i = Operands.size() - 1; i > 0; --i) { 7515 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7516 if (Test(Op)) 7517 return Op.getStartLoc(); 7518 } 7519 return getInstLoc(Operands); 7520 } 7521 7522 SMLoc 7523 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 7524 const OperandVector &Operands) const { 7525 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 7526 return getOperandLoc(Test, Operands); 7527 } 7528 7529 SMLoc 7530 AMDGPUAsmParser::getRegLoc(unsigned Reg, 7531 const OperandVector &Operands) const { 7532 auto Test = [=](const AMDGPUOperand& Op) { 7533 return Op.isRegKind() && Op.getReg() == Reg; 7534 }; 7535 return getOperandLoc(Test, Operands); 7536 } 7537 7538 SMLoc AMDGPUAsmParser::getLitLoc(const OperandVector &Operands, 7539 bool SearchMandatoryLiterals) const { 7540 auto Test = [](const AMDGPUOperand& Op) { 7541 return Op.IsImmKindLiteral() || Op.isExpr(); 7542 }; 7543 SMLoc Loc = getOperandLoc(Test, Operands); 7544 if (SearchMandatoryLiterals && Loc == getInstLoc(Operands)) 7545 Loc = getMandatoryLitLoc(Operands); 7546 return Loc; 7547 } 7548 7549 SMLoc AMDGPUAsmParser::getMandatoryLitLoc(const OperandVector &Operands) const { 7550 auto Test = [](const AMDGPUOperand &Op) { 7551 return Op.IsImmKindMandatoryLiteral(); 7552 }; 7553 return getOperandLoc(Test, Operands); 7554 } 7555 7556 SMLoc 7557 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 7558 auto Test = [](const AMDGPUOperand& Op) { 7559 return Op.isImmKindConst(); 7560 }; 7561 return getOperandLoc(Test, Operands); 7562 } 7563 7564 //===----------------------------------------------------------------------===// 7565 // swizzle 7566 //===----------------------------------------------------------------------===// 7567 7568 LLVM_READNONE 7569 static unsigned 7570 encodeBitmaskPerm(const unsigned AndMask, 7571 const unsigned OrMask, 7572 const unsigned XorMask) { 7573 using namespace llvm::AMDGPU::Swizzle; 7574 7575 return BITMASK_PERM_ENC | 7576 (AndMask << BITMASK_AND_SHIFT) | 7577 (OrMask << BITMASK_OR_SHIFT) | 7578 (XorMask << BITMASK_XOR_SHIFT); 7579 } 7580 7581 bool 7582 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 7583 const unsigned MinVal, 7584 const unsigned MaxVal, 7585 const StringRef ErrMsg, 7586 SMLoc &Loc) { 7587 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7588 return false; 7589 } 7590 Loc = getLoc(); 7591 if (!parseExpr(Op)) { 7592 return false; 7593 } 7594 if (Op < MinVal || Op > MaxVal) { 7595 Error(Loc, ErrMsg); 7596 return false; 7597 } 7598 7599 return true; 7600 } 7601 7602 bool 7603 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 7604 const unsigned MinVal, 7605 const unsigned MaxVal, 7606 const StringRef ErrMsg) { 7607 SMLoc Loc; 7608 for (unsigned i = 0; i < OpNum; ++i) { 7609 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 7610 return false; 7611 } 7612 7613 return true; 7614 } 7615 7616 bool 7617 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 7618 using namespace llvm::AMDGPU::Swizzle; 7619 7620 int64_t Lane[LANE_NUM]; 7621 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 7622 "expected a 2-bit lane id")) { 7623 Imm = QUAD_PERM_ENC; 7624 for (unsigned I = 0; I < LANE_NUM; ++I) { 7625 Imm |= Lane[I] << (LANE_SHIFT * I); 7626 } 7627 return true; 7628 } 7629 return false; 7630 } 7631 7632 bool 7633 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 7634 using namespace llvm::AMDGPU::Swizzle; 7635 7636 SMLoc Loc; 7637 int64_t GroupSize; 7638 int64_t LaneIdx; 7639 7640 if (!parseSwizzleOperand(GroupSize, 7641 2, 32, 7642 "group size must be in the interval [2,32]", 7643 Loc)) { 7644 return false; 7645 } 7646 if (!isPowerOf2_64(GroupSize)) { 7647 Error(Loc, "group size must be a power of two"); 7648 return false; 7649 } 7650 if (parseSwizzleOperand(LaneIdx, 7651 0, GroupSize - 1, 7652 "lane id must be in the interval [0,group size - 1]", 7653 Loc)) { 7654 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 7655 return true; 7656 } 7657 return false; 7658 } 7659 7660 bool 7661 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 7662 using namespace llvm::AMDGPU::Swizzle; 7663 7664 SMLoc Loc; 7665 int64_t GroupSize; 7666 7667 if (!parseSwizzleOperand(GroupSize, 7668 2, 32, 7669 "group size must be in the interval [2,32]", 7670 Loc)) { 7671 return false; 7672 } 7673 if (!isPowerOf2_64(GroupSize)) { 7674 Error(Loc, "group size must be a power of two"); 7675 return false; 7676 } 7677 7678 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 7679 return true; 7680 } 7681 7682 bool 7683 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 7684 using namespace llvm::AMDGPU::Swizzle; 7685 7686 SMLoc Loc; 7687 int64_t GroupSize; 7688 7689 if (!parseSwizzleOperand(GroupSize, 7690 1, 16, 7691 "group size must be in the interval [1,16]", 7692 Loc)) { 7693 return false; 7694 } 7695 if (!isPowerOf2_64(GroupSize)) { 7696 Error(Loc, "group size must be a power of two"); 7697 return false; 7698 } 7699 7700 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 7701 return true; 7702 } 7703 7704 bool 7705 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 7706 using namespace llvm::AMDGPU::Swizzle; 7707 7708 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7709 return false; 7710 } 7711 7712 StringRef Ctl; 7713 SMLoc StrLoc = getLoc(); 7714 if (!parseString(Ctl)) { 7715 return false; 7716 } 7717 if (Ctl.size() != BITMASK_WIDTH) { 7718 Error(StrLoc, "expected a 5-character mask"); 7719 return false; 7720 } 7721 7722 unsigned AndMask = 0; 7723 unsigned OrMask = 0; 7724 unsigned XorMask = 0; 7725 7726 for (size_t i = 0; i < Ctl.size(); ++i) { 7727 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 7728 switch(Ctl[i]) { 7729 default: 7730 Error(StrLoc, "invalid mask"); 7731 return false; 7732 case '0': 7733 break; 7734 case '1': 7735 OrMask |= Mask; 7736 break; 7737 case 'p': 7738 AndMask |= Mask; 7739 break; 7740 case 'i': 7741 AndMask |= Mask; 7742 XorMask |= Mask; 7743 break; 7744 } 7745 } 7746 7747 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 7748 return true; 7749 } 7750 7751 bool 7752 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 7753 7754 SMLoc OffsetLoc = getLoc(); 7755 7756 if (!parseExpr(Imm, "a swizzle macro")) { 7757 return false; 7758 } 7759 if (!isUInt<16>(Imm)) { 7760 Error(OffsetLoc, "expected a 16-bit offset"); 7761 return false; 7762 } 7763 return true; 7764 } 7765 7766 bool 7767 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 7768 using namespace llvm::AMDGPU::Swizzle; 7769 7770 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 7771 7772 SMLoc ModeLoc = getLoc(); 7773 bool Ok = false; 7774 7775 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 7776 Ok = parseSwizzleQuadPerm(Imm); 7777 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 7778 Ok = parseSwizzleBitmaskPerm(Imm); 7779 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 7780 Ok = parseSwizzleBroadcast(Imm); 7781 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 7782 Ok = parseSwizzleSwap(Imm); 7783 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 7784 Ok = parseSwizzleReverse(Imm); 7785 } else { 7786 Error(ModeLoc, "expected a swizzle mode"); 7787 } 7788 7789 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 7790 } 7791 7792 return false; 7793 } 7794 7795 ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) { 7796 SMLoc S = getLoc(); 7797 int64_t Imm = 0; 7798 7799 if (trySkipId("offset")) { 7800 7801 bool Ok = false; 7802 if (skipToken(AsmToken::Colon, "expected a colon")) { 7803 if (trySkipId("swizzle")) { 7804 Ok = parseSwizzleMacro(Imm); 7805 } else { 7806 Ok = parseSwizzleOffset(Imm); 7807 } 7808 } 7809 7810 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7811 7812 return Ok ? ParseStatus::Success : ParseStatus::Failure; 7813 } 7814 return ParseStatus::NoMatch; 7815 } 7816 7817 bool 7818 AMDGPUOperand::isSwizzle() const { 7819 return isImmTy(ImmTySwizzle); 7820 } 7821 7822 //===----------------------------------------------------------------------===// 7823 // VGPR Index Mode 7824 //===----------------------------------------------------------------------===// 7825 7826 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7827 7828 using namespace llvm::AMDGPU::VGPRIndexMode; 7829 7830 if (trySkipToken(AsmToken::RParen)) { 7831 return OFF; 7832 } 7833 7834 int64_t Imm = 0; 7835 7836 while (true) { 7837 unsigned Mode = 0; 7838 SMLoc S = getLoc(); 7839 7840 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7841 if (trySkipId(IdSymbolic[ModeId])) { 7842 Mode = 1 << ModeId; 7843 break; 7844 } 7845 } 7846 7847 if (Mode == 0) { 7848 Error(S, (Imm == 0)? 7849 "expected a VGPR index mode or a closing parenthesis" : 7850 "expected a VGPR index mode"); 7851 return UNDEF; 7852 } 7853 7854 if (Imm & Mode) { 7855 Error(S, "duplicate VGPR index mode"); 7856 return UNDEF; 7857 } 7858 Imm |= Mode; 7859 7860 if (trySkipToken(AsmToken::RParen)) 7861 break; 7862 if (!skipToken(AsmToken::Comma, 7863 "expected a comma or a closing parenthesis")) 7864 return UNDEF; 7865 } 7866 7867 return Imm; 7868 } 7869 7870 ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7871 7872 using namespace llvm::AMDGPU::VGPRIndexMode; 7873 7874 int64_t Imm = 0; 7875 SMLoc S = getLoc(); 7876 7877 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7878 Imm = parseGPRIdxMacro(); 7879 if (Imm == UNDEF) 7880 return ParseStatus::Failure; 7881 } else { 7882 if (getParser().parseAbsoluteExpression(Imm)) 7883 return ParseStatus::Failure; 7884 if (Imm < 0 || !isUInt<4>(Imm)) 7885 return Error(S, "invalid immediate: only 4-bit values are legal"); 7886 } 7887 7888 Operands.push_back( 7889 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7890 return ParseStatus::Success; 7891 } 7892 7893 bool AMDGPUOperand::isGPRIdxMode() const { 7894 return isImmTy(ImmTyGprIdxMode); 7895 } 7896 7897 //===----------------------------------------------------------------------===// 7898 // sopp branch targets 7899 //===----------------------------------------------------------------------===// 7900 7901 ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) { 7902 7903 // Make sure we are not parsing something 7904 // that looks like a label or an expression but is not. 7905 // This will improve error messages. 7906 if (isRegister() || isModifier()) 7907 return ParseStatus::NoMatch; 7908 7909 if (!parseExpr(Operands)) 7910 return ParseStatus::Failure; 7911 7912 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7913 assert(Opr.isImm() || Opr.isExpr()); 7914 SMLoc Loc = Opr.getStartLoc(); 7915 7916 // Currently we do not support arbitrary expressions as branch targets. 7917 // Only labels and absolute expressions are accepted. 7918 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7919 Error(Loc, "expected an absolute expression or a label"); 7920 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7921 Error(Loc, "expected a 16-bit signed jump offset"); 7922 } 7923 7924 return ParseStatus::Success; 7925 } 7926 7927 //===----------------------------------------------------------------------===// 7928 // Boolean holding registers 7929 //===----------------------------------------------------------------------===// 7930 7931 ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7932 return parseReg(Operands); 7933 } 7934 7935 //===----------------------------------------------------------------------===// 7936 // mubuf 7937 //===----------------------------------------------------------------------===// 7938 7939 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7940 const OperandVector &Operands, 7941 bool IsAtomic) { 7942 OptionalImmIndexMap OptionalIdx; 7943 unsigned FirstOperandIdx = 1; 7944 bool IsAtomicReturn = false; 7945 7946 if (IsAtomic) { 7947 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7948 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7949 if (!Op.isCPol()) 7950 continue; 7951 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7952 break; 7953 } 7954 7955 if (!IsAtomicReturn) { 7956 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7957 if (NewOpc != -1) 7958 Inst.setOpcode(NewOpc); 7959 } 7960 7961 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7962 SIInstrFlags::IsAtomicRet; 7963 } 7964 7965 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7966 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7967 7968 // Add the register arguments 7969 if (Op.isReg()) { 7970 Op.addRegOperands(Inst, 1); 7971 // Insert a tied src for atomic return dst. 7972 // This cannot be postponed as subsequent calls to 7973 // addImmOperands rely on correct number of MC operands. 7974 if (IsAtomicReturn && i == FirstOperandIdx) 7975 Op.addRegOperands(Inst, 1); 7976 continue; 7977 } 7978 7979 // Handle the case where soffset is an immediate 7980 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7981 Op.addImmOperands(Inst, 1); 7982 continue; 7983 } 7984 7985 // Handle tokens like 'offen' which are sometimes hard-coded into the 7986 // asm string. There are no MCInst operands for these. 7987 if (Op.isToken()) { 7988 continue; 7989 } 7990 assert(Op.isImm()); 7991 7992 // Handle optional arguments 7993 OptionalIdx[Op.getImmTy()] = i; 7994 } 7995 7996 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7997 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7998 } 7999 8000 //===----------------------------------------------------------------------===// 8001 // smrd 8002 //===----------------------------------------------------------------------===// 8003 8004 bool AMDGPUOperand::isSMRDOffset8() const { 8005 return isImmLiteral() && isUInt<8>(getImm()); 8006 } 8007 8008 bool AMDGPUOperand::isSMEMOffset() const { 8009 // Offset range is checked later by validator. 8010 return isImmLiteral(); 8011 } 8012 8013 bool AMDGPUOperand::isSMRDLiteralOffset() const { 8014 // 32-bit literals are only supported on CI and we only want to use them 8015 // when the offset is > 8-bits. 8016 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 8017 } 8018 8019 //===----------------------------------------------------------------------===// 8020 // vop3 8021 //===----------------------------------------------------------------------===// 8022 8023 static bool ConvertOmodMul(int64_t &Mul) { 8024 if (Mul != 1 && Mul != 2 && Mul != 4) 8025 return false; 8026 8027 Mul >>= 1; 8028 return true; 8029 } 8030 8031 static bool ConvertOmodDiv(int64_t &Div) { 8032 if (Div == 1) { 8033 Div = 0; 8034 return true; 8035 } 8036 8037 if (Div == 2) { 8038 Div = 3; 8039 return true; 8040 } 8041 8042 return false; 8043 } 8044 8045 // For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 8046 // This is intentional and ensures compatibility with sp3. 8047 // See bug 35397 for details. 8048 bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) { 8049 if (BoundCtrl == 0 || BoundCtrl == 1) { 8050 if (!isGFX11Plus()) 8051 BoundCtrl = 1; 8052 return true; 8053 } 8054 return false; 8055 } 8056 8057 void AMDGPUAsmParser::onBeginOfFile() { 8058 if (!getParser().getStreamer().getTargetStreamer() || 8059 getSTI().getTargetTriple().getArch() == Triple::r600) 8060 return; 8061 8062 if (!getTargetStreamer().getTargetID()) 8063 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString(), 8064 // TODO: Should try to check code object version from directive??? 8065 AMDGPU::getAmdhsaCodeObjectVersion()); 8066 8067 if (isHsaAbi(getSTI())) 8068 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 8069 } 8070 8071 ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) { 8072 StringRef Name = getTokenStr(); 8073 if (Name == "mul") { 8074 return parseIntWithPrefix("mul", Operands, 8075 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 8076 } 8077 8078 if (Name == "div") { 8079 return parseIntWithPrefix("div", Operands, 8080 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 8081 } 8082 8083 return ParseStatus::NoMatch; 8084 } 8085 8086 // Determines which bit DST_OP_SEL occupies in the op_sel operand according to 8087 // the number of src operands present, then copies that bit into src0_modifiers. 8088 void cvtVOP3DstOpSelOnly(MCInst &Inst) { 8089 int Opc = Inst.getOpcode(); 8090 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8091 if (OpSelIdx == -1) 8092 return; 8093 8094 int SrcNum; 8095 const int Ops[] = { AMDGPU::OpName::src0, 8096 AMDGPU::OpName::src1, 8097 AMDGPU::OpName::src2 }; 8098 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]); 8099 ++SrcNum) 8100 ; 8101 assert(SrcNum > 0); 8102 8103 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8104 8105 if ((OpSel & (1 << SrcNum)) != 0) { 8106 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 8107 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8108 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 8109 } 8110 } 8111 8112 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, 8113 const OperandVector &Operands) { 8114 cvtVOP3P(Inst, Operands); 8115 cvtVOP3DstOpSelOnly(Inst); 8116 } 8117 8118 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands, 8119 OptionalImmIndexMap &OptionalIdx) { 8120 cvtVOP3P(Inst, Operands, OptionalIdx); 8121 cvtVOP3DstOpSelOnly(Inst); 8122 } 8123 8124 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 8125 return 8126 // 1. This operand is input modifiers 8127 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 8128 // 2. This is not last operand 8129 && Desc.NumOperands > (OpNum + 1) 8130 // 3. Next operand is register class 8131 && Desc.operands()[OpNum + 1].RegClass != -1 8132 // 4. Next register is not tied to any other operand 8133 && Desc.getOperandConstraint(OpNum + 1, 8134 MCOI::OperandConstraint::TIED_TO) == -1; 8135 } 8136 8137 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 8138 { 8139 OptionalImmIndexMap OptionalIdx; 8140 unsigned Opc = Inst.getOpcode(); 8141 8142 unsigned I = 1; 8143 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8144 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8145 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8146 } 8147 8148 for (unsigned E = Operands.size(); I != E; ++I) { 8149 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8150 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8151 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8152 } else if (Op.isInterpSlot() || Op.isInterpAttr() || 8153 Op.isInterpAttrChan()) { 8154 Inst.addOperand(MCOperand::createImm(Op.getImm())); 8155 } else if (Op.isImmModifier()) { 8156 OptionalIdx[Op.getImmTy()] = I; 8157 } else { 8158 llvm_unreachable("unhandled operand type"); 8159 } 8160 } 8161 8162 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high)) 8163 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8164 AMDGPUOperand::ImmTyHigh); 8165 8166 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 8167 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8168 AMDGPUOperand::ImmTyClampSI); 8169 8170 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 8171 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8172 AMDGPUOperand::ImmTyOModSI); 8173 } 8174 8175 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) 8176 { 8177 OptionalImmIndexMap OptionalIdx; 8178 unsigned Opc = Inst.getOpcode(); 8179 8180 unsigned I = 1; 8181 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8182 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8183 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8184 } 8185 8186 for (unsigned E = Operands.size(); I != E; ++I) { 8187 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8188 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8189 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8190 } else if (Op.isImmModifier()) { 8191 OptionalIdx[Op.getImmTy()] = I; 8192 } else { 8193 llvm_unreachable("unhandled operand type"); 8194 } 8195 } 8196 8197 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8198 8199 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8200 if (OpSelIdx != -1) 8201 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8202 8203 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP); 8204 8205 if (OpSelIdx == -1) 8206 return; 8207 8208 const int Ops[] = { AMDGPU::OpName::src0, 8209 AMDGPU::OpName::src1, 8210 AMDGPU::OpName::src2 }; 8211 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8212 AMDGPU::OpName::src1_modifiers, 8213 AMDGPU::OpName::src2_modifiers }; 8214 8215 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8216 8217 for (int J = 0; J < 3; ++J) { 8218 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8219 if (OpIdx == -1) 8220 break; 8221 8222 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8223 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8224 8225 if ((OpSel & (1 << J)) != 0) 8226 ModVal |= SISrcMods::OP_SEL_0; 8227 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && 8228 (OpSel & (1 << 3)) != 0) 8229 ModVal |= SISrcMods::DST_OP_SEL; 8230 8231 Inst.getOperand(ModIdx).setImm(ModVal); 8232 } 8233 } 8234 8235 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 8236 OptionalImmIndexMap &OptionalIdx) { 8237 unsigned Opc = Inst.getOpcode(); 8238 8239 unsigned I = 1; 8240 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8241 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8242 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8243 } 8244 8245 for (unsigned E = Operands.size(); I != E; ++I) { 8246 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8247 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8248 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8249 } else if (Op.isImmModifier()) { 8250 OptionalIdx[Op.getImmTy()] = I; 8251 } else if (Op.isRegOrImm()) { 8252 Op.addRegOrImmOperands(Inst, 1); 8253 } else { 8254 llvm_unreachable("unhandled operand type"); 8255 } 8256 } 8257 8258 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 8259 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8260 AMDGPUOperand::ImmTyClampSI); 8261 8262 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 8263 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8264 AMDGPUOperand::ImmTyOModSI); 8265 8266 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 8267 // it has src2 register operand that is tied to dst operand 8268 // we don't allow modifiers for this operand in assembler so src2_modifiers 8269 // should be 0. 8270 if (isMAC(Opc)) { 8271 auto it = Inst.begin(); 8272 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 8273 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 8274 ++it; 8275 // Copy the operand to ensure it's not invalidated when Inst grows. 8276 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 8277 } 8278 } 8279 8280 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 8281 OptionalImmIndexMap OptionalIdx; 8282 cvtVOP3(Inst, Operands, OptionalIdx); 8283 } 8284 8285 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 8286 OptionalImmIndexMap &OptIdx) { 8287 const int Opc = Inst.getOpcode(); 8288 const MCInstrDesc &Desc = MII.get(Opc); 8289 8290 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 8291 8292 if (Opc == AMDGPU::V_CVT_SR_BF8_F32_vi || 8293 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi) { 8294 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods 8295 Inst.addOperand(Inst.getOperand(0)); 8296 } 8297 8298 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in)) { 8299 assert(!IsPacked); 8300 Inst.addOperand(Inst.getOperand(0)); 8301 } 8302 8303 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 8304 // instruction, and then figure out where to actually put the modifiers 8305 8306 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8307 if (OpSelIdx != -1) { 8308 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 8309 } 8310 8311 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 8312 if (OpSelHiIdx != -1) { 8313 int DefaultVal = IsPacked ? -1 : 0; 8314 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 8315 DefaultVal); 8316 } 8317 8318 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 8319 if (NegLoIdx != -1) { 8320 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 8321 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 8322 } 8323 8324 const int Ops[] = { AMDGPU::OpName::src0, 8325 AMDGPU::OpName::src1, 8326 AMDGPU::OpName::src2 }; 8327 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8328 AMDGPU::OpName::src1_modifiers, 8329 AMDGPU::OpName::src2_modifiers }; 8330 8331 unsigned OpSel = 0; 8332 unsigned OpSelHi = 0; 8333 unsigned NegLo = 0; 8334 unsigned NegHi = 0; 8335 8336 if (OpSelIdx != -1) 8337 OpSel = Inst.getOperand(OpSelIdx).getImm(); 8338 8339 if (OpSelHiIdx != -1) 8340 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 8341 8342 if (NegLoIdx != -1) { 8343 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 8344 NegLo = Inst.getOperand(NegLoIdx).getImm(); 8345 NegHi = Inst.getOperand(NegHiIdx).getImm(); 8346 } 8347 8348 for (int J = 0; J < 3; ++J) { 8349 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8350 if (OpIdx == -1) 8351 break; 8352 8353 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8354 8355 if (ModIdx == -1) 8356 continue; 8357 8358 uint32_t ModVal = 0; 8359 8360 if ((OpSel & (1 << J)) != 0) 8361 ModVal |= SISrcMods::OP_SEL_0; 8362 8363 if ((OpSelHi & (1 << J)) != 0) 8364 ModVal |= SISrcMods::OP_SEL_1; 8365 8366 if ((NegLo & (1 << J)) != 0) 8367 ModVal |= SISrcMods::NEG; 8368 8369 if ((NegHi & (1 << J)) != 0) 8370 ModVal |= SISrcMods::NEG_HI; 8371 8372 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 8373 } 8374 } 8375 8376 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 8377 OptionalImmIndexMap OptIdx; 8378 cvtVOP3(Inst, Operands, OptIdx); 8379 cvtVOP3P(Inst, Operands, OptIdx); 8380 } 8381 8382 //===----------------------------------------------------------------------===// 8383 // VOPD 8384 //===----------------------------------------------------------------------===// 8385 8386 ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) { 8387 if (!hasVOPD(getSTI())) 8388 return ParseStatus::NoMatch; 8389 8390 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) { 8391 SMLoc S = getLoc(); 8392 lex(); 8393 lex(); 8394 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S)); 8395 SMLoc OpYLoc = getLoc(); 8396 StringRef OpYName; 8397 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) { 8398 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc)); 8399 return ParseStatus::Success; 8400 } 8401 return Error(OpYLoc, "expected a VOPDY instruction after ::"); 8402 } 8403 return ParseStatus::NoMatch; 8404 } 8405 8406 // Create VOPD MCInst operands using parsed assembler operands. 8407 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) { 8408 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer 8409 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]); 8410 if (Op.isReg()) { 8411 Op.addRegOperands(Inst, 1); 8412 return; 8413 } 8414 if (Op.isImm()) { 8415 Op.addImmOperands(Inst, 1); 8416 return; 8417 } 8418 llvm_unreachable("Unhandled operand type in cvtVOPD"); 8419 }; 8420 8421 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII); 8422 8423 // MCInst operands are ordered as follows: 8424 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands] 8425 8426 for (auto CompIdx : VOPD::COMPONENTS) { 8427 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands()); 8428 } 8429 8430 for (auto CompIdx : VOPD::COMPONENTS) { 8431 const auto &CInfo = InstInfo[CompIdx]; 8432 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum(); 8433 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx) 8434 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx)); 8435 if (CInfo.hasSrc2Acc()) 8436 addOp(CInfo.getIndexOfDstInParsedOperands()); 8437 } 8438 } 8439 8440 //===----------------------------------------------------------------------===// 8441 // dpp 8442 //===----------------------------------------------------------------------===// 8443 8444 bool AMDGPUOperand::isDPP8() const { 8445 return isImmTy(ImmTyDPP8); 8446 } 8447 8448 bool AMDGPUOperand::isDPPCtrl() const { 8449 using namespace AMDGPU::DPP; 8450 8451 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 8452 if (result) { 8453 int64_t Imm = getImm(); 8454 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 8455 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 8456 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 8457 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 8458 (Imm == DppCtrl::WAVE_SHL1) || 8459 (Imm == DppCtrl::WAVE_ROL1) || 8460 (Imm == DppCtrl::WAVE_SHR1) || 8461 (Imm == DppCtrl::WAVE_ROR1) || 8462 (Imm == DppCtrl::ROW_MIRROR) || 8463 (Imm == DppCtrl::ROW_HALF_MIRROR) || 8464 (Imm == DppCtrl::BCAST15) || 8465 (Imm == DppCtrl::BCAST31) || 8466 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 8467 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 8468 } 8469 return false; 8470 } 8471 8472 //===----------------------------------------------------------------------===// 8473 // mAI 8474 //===----------------------------------------------------------------------===// 8475 8476 bool AMDGPUOperand::isBLGP() const { 8477 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 8478 } 8479 8480 bool AMDGPUOperand::isCBSZ() const { 8481 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 8482 } 8483 8484 bool AMDGPUOperand::isABID() const { 8485 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 8486 } 8487 8488 bool AMDGPUOperand::isS16Imm() const { 8489 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 8490 } 8491 8492 bool AMDGPUOperand::isU16Imm() const { 8493 return isImmLiteral() && isUInt<16>(getImm()); 8494 } 8495 8496 //===----------------------------------------------------------------------===// 8497 // dim 8498 //===----------------------------------------------------------------------===// 8499 8500 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 8501 // We want to allow "dim:1D" etc., 8502 // but the initial 1 is tokenized as an integer. 8503 std::string Token; 8504 if (isToken(AsmToken::Integer)) { 8505 SMLoc Loc = getToken().getEndLoc(); 8506 Token = std::string(getTokenStr()); 8507 lex(); 8508 if (getLoc() != Loc) 8509 return false; 8510 } 8511 8512 StringRef Suffix; 8513 if (!parseId(Suffix)) 8514 return false; 8515 Token += Suffix; 8516 8517 StringRef DimId = Token; 8518 if (DimId.starts_with("SQ_RSRC_IMG_")) 8519 DimId = DimId.drop_front(12); 8520 8521 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 8522 if (!DimInfo) 8523 return false; 8524 8525 Encoding = DimInfo->Encoding; 8526 return true; 8527 } 8528 8529 ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) { 8530 if (!isGFX10Plus()) 8531 return ParseStatus::NoMatch; 8532 8533 SMLoc S = getLoc(); 8534 8535 if (!trySkipId("dim", AsmToken::Colon)) 8536 return ParseStatus::NoMatch; 8537 8538 unsigned Encoding; 8539 SMLoc Loc = getLoc(); 8540 if (!parseDimId(Encoding)) 8541 return Error(Loc, "invalid dim value"); 8542 8543 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 8544 AMDGPUOperand::ImmTyDim)); 8545 return ParseStatus::Success; 8546 } 8547 8548 //===----------------------------------------------------------------------===// 8549 // dpp 8550 //===----------------------------------------------------------------------===// 8551 8552 ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 8553 SMLoc S = getLoc(); 8554 8555 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 8556 return ParseStatus::NoMatch; 8557 8558 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 8559 8560 int64_t Sels[8]; 8561 8562 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8563 return ParseStatus::Failure; 8564 8565 for (size_t i = 0; i < 8; ++i) { 8566 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8567 return ParseStatus::Failure; 8568 8569 SMLoc Loc = getLoc(); 8570 if (getParser().parseAbsoluteExpression(Sels[i])) 8571 return ParseStatus::Failure; 8572 if (0 > Sels[i] || 7 < Sels[i]) 8573 return Error(Loc, "expected a 3-bit value"); 8574 } 8575 8576 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8577 return ParseStatus::Failure; 8578 8579 unsigned DPP8 = 0; 8580 for (size_t i = 0; i < 8; ++i) 8581 DPP8 |= (Sels[i] << (i * 3)); 8582 8583 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 8584 return ParseStatus::Success; 8585 } 8586 8587 bool 8588 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 8589 const OperandVector &Operands) { 8590 if (Ctrl == "row_newbcast") 8591 return isGFX90A(); 8592 8593 if (Ctrl == "row_share" || 8594 Ctrl == "row_xmask") 8595 return isGFX10Plus(); 8596 8597 if (Ctrl == "wave_shl" || 8598 Ctrl == "wave_shr" || 8599 Ctrl == "wave_rol" || 8600 Ctrl == "wave_ror" || 8601 Ctrl == "row_bcast") 8602 return isVI() || isGFX9(); 8603 8604 return Ctrl == "row_mirror" || 8605 Ctrl == "row_half_mirror" || 8606 Ctrl == "quad_perm" || 8607 Ctrl == "row_shl" || 8608 Ctrl == "row_shr" || 8609 Ctrl == "row_ror"; 8610 } 8611 8612 int64_t 8613 AMDGPUAsmParser::parseDPPCtrlPerm() { 8614 // quad_perm:[%d,%d,%d,%d] 8615 8616 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8617 return -1; 8618 8619 int64_t Val = 0; 8620 for (int i = 0; i < 4; ++i) { 8621 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8622 return -1; 8623 8624 int64_t Temp; 8625 SMLoc Loc = getLoc(); 8626 if (getParser().parseAbsoluteExpression(Temp)) 8627 return -1; 8628 if (Temp < 0 || Temp > 3) { 8629 Error(Loc, "expected a 2-bit value"); 8630 return -1; 8631 } 8632 8633 Val += (Temp << i * 2); 8634 } 8635 8636 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8637 return -1; 8638 8639 return Val; 8640 } 8641 8642 int64_t 8643 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8644 using namespace AMDGPU::DPP; 8645 8646 // sel:%d 8647 8648 int64_t Val; 8649 SMLoc Loc = getLoc(); 8650 8651 if (getParser().parseAbsoluteExpression(Val)) 8652 return -1; 8653 8654 struct DppCtrlCheck { 8655 int64_t Ctrl; 8656 int Lo; 8657 int Hi; 8658 }; 8659 8660 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8661 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8662 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8663 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8664 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8665 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8666 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8667 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8668 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8669 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8670 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8671 .Default({-1, 0, 0}); 8672 8673 bool Valid; 8674 if (Check.Ctrl == -1) { 8675 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8676 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8677 } else { 8678 Valid = Check.Lo <= Val && Val <= Check.Hi; 8679 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8680 } 8681 8682 if (!Valid) { 8683 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8684 return -1; 8685 } 8686 8687 return Val; 8688 } 8689 8690 ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8691 using namespace AMDGPU::DPP; 8692 8693 if (!isToken(AsmToken::Identifier) || 8694 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8695 return ParseStatus::NoMatch; 8696 8697 SMLoc S = getLoc(); 8698 int64_t Val = -1; 8699 StringRef Ctrl; 8700 8701 parseId(Ctrl); 8702 8703 if (Ctrl == "row_mirror") { 8704 Val = DppCtrl::ROW_MIRROR; 8705 } else if (Ctrl == "row_half_mirror") { 8706 Val = DppCtrl::ROW_HALF_MIRROR; 8707 } else { 8708 if (skipToken(AsmToken::Colon, "expected a colon")) { 8709 if (Ctrl == "quad_perm") { 8710 Val = parseDPPCtrlPerm(); 8711 } else { 8712 Val = parseDPPCtrlSel(Ctrl); 8713 } 8714 } 8715 } 8716 8717 if (Val == -1) 8718 return ParseStatus::Failure; 8719 8720 Operands.push_back( 8721 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8722 return ParseStatus::Success; 8723 } 8724 8725 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, 8726 bool IsDPP8) { 8727 OptionalImmIndexMap OptionalIdx; 8728 unsigned Opc = Inst.getOpcode(); 8729 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8730 8731 // MAC instructions are special because they have 'old' 8732 // operand which is not tied to dst (but assumed to be). 8733 // They also have dummy unused src2_modifiers. 8734 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old); 8735 int Src2ModIdx = 8736 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers); 8737 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 && 8738 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1; 8739 8740 unsigned I = 1; 8741 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8742 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8743 } 8744 8745 int Fi = 0; 8746 for (unsigned E = Operands.size(); I != E; ++I) { 8747 8748 if (IsMAC) { 8749 int NumOperands = Inst.getNumOperands(); 8750 if (OldIdx == NumOperands) { 8751 // Handle old operand 8752 constexpr int DST_IDX = 0; 8753 Inst.addOperand(Inst.getOperand(DST_IDX)); 8754 } else if (Src2ModIdx == NumOperands) { 8755 // Add unused dummy src2_modifiers 8756 Inst.addOperand(MCOperand::createImm(0)); 8757 } 8758 } 8759 8760 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8761 MCOI::TIED_TO); 8762 if (TiedTo != -1) { 8763 assert((unsigned)TiedTo < Inst.getNumOperands()); 8764 // handle tied old or src2 for MAC instructions 8765 Inst.addOperand(Inst.getOperand(TiedTo)); 8766 } 8767 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8768 // Add the register arguments 8769 if (IsDPP8 && Op.isDppFI()) { 8770 Fi = Op.getImm(); 8771 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8772 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8773 } else if (Op.isReg()) { 8774 Op.addRegOperands(Inst, 1); 8775 } else if (Op.isImm() && 8776 Desc.operands()[Inst.getNumOperands()].RegClass != -1) { 8777 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP"); 8778 Op.addImmOperands(Inst, 1); 8779 } else if (Op.isImm()) { 8780 OptionalIdx[Op.getImmTy()] = I; 8781 } else { 8782 llvm_unreachable("unhandled operand type"); 8783 } 8784 } 8785 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 8786 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8787 8788 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 8789 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8790 8791 if (Desc.TSFlags & SIInstrFlags::VOP3P) 8792 cvtVOP3P(Inst, Operands, OptionalIdx); 8793 else if (Desc.TSFlags & SIInstrFlags::VOP3) 8794 cvtVOP3OpSel(Inst, Operands, OptionalIdx); 8795 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) { 8796 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8797 } 8798 8799 if (IsDPP8) { 8800 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8); 8801 using namespace llvm::AMDGPU::DPP; 8802 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8803 } else { 8804 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4); 8805 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8806 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8807 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8808 8809 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) 8810 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8811 AMDGPUOperand::ImmTyDppFI); 8812 } 8813 } 8814 8815 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8816 OptionalImmIndexMap OptionalIdx; 8817 8818 unsigned I = 1; 8819 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8820 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8821 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8822 } 8823 8824 int Fi = 0; 8825 for (unsigned E = Operands.size(); I != E; ++I) { 8826 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8827 MCOI::TIED_TO); 8828 if (TiedTo != -1) { 8829 assert((unsigned)TiedTo < Inst.getNumOperands()); 8830 // handle tied old or src2 for MAC instructions 8831 Inst.addOperand(Inst.getOperand(TiedTo)); 8832 } 8833 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8834 // Add the register arguments 8835 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8836 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8837 // Skip it. 8838 continue; 8839 } 8840 8841 if (IsDPP8) { 8842 if (Op.isDPP8()) { 8843 Op.addImmOperands(Inst, 1); 8844 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8845 Op.addRegWithFPInputModsOperands(Inst, 2); 8846 } else if (Op.isDppFI()) { 8847 Fi = Op.getImm(); 8848 } else if (Op.isReg()) { 8849 Op.addRegOperands(Inst, 1); 8850 } else { 8851 llvm_unreachable("Invalid operand type"); 8852 } 8853 } else { 8854 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8855 Op.addRegWithFPInputModsOperands(Inst, 2); 8856 } else if (Op.isReg()) { 8857 Op.addRegOperands(Inst, 1); 8858 } else if (Op.isDPPCtrl()) { 8859 Op.addImmOperands(Inst, 1); 8860 } else if (Op.isImm()) { 8861 // Handle optional arguments 8862 OptionalIdx[Op.getImmTy()] = I; 8863 } else { 8864 llvm_unreachable("Invalid operand type"); 8865 } 8866 } 8867 } 8868 8869 if (IsDPP8) { 8870 using namespace llvm::AMDGPU::DPP; 8871 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8872 } else { 8873 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8874 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8875 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8876 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) { 8877 addOptionalImmOperand(Inst, Operands, OptionalIdx, 8878 AMDGPUOperand::ImmTyDppFI); 8879 } 8880 } 8881 } 8882 8883 //===----------------------------------------------------------------------===// 8884 // sdwa 8885 //===----------------------------------------------------------------------===// 8886 8887 ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, 8888 StringRef Prefix, 8889 AMDGPUOperand::ImmTy Type) { 8890 using namespace llvm::AMDGPU::SDWA; 8891 8892 SMLoc S = getLoc(); 8893 StringRef Value; 8894 8895 SMLoc StringLoc; 8896 ParseStatus Res = parseStringWithPrefix(Prefix, Value, StringLoc); 8897 if (!Res.isSuccess()) 8898 return Res; 8899 8900 int64_t Int; 8901 Int = StringSwitch<int64_t>(Value) 8902 .Case("BYTE_0", SdwaSel::BYTE_0) 8903 .Case("BYTE_1", SdwaSel::BYTE_1) 8904 .Case("BYTE_2", SdwaSel::BYTE_2) 8905 .Case("BYTE_3", SdwaSel::BYTE_3) 8906 .Case("WORD_0", SdwaSel::WORD_0) 8907 .Case("WORD_1", SdwaSel::WORD_1) 8908 .Case("DWORD", SdwaSel::DWORD) 8909 .Default(0xffffffff); 8910 8911 if (Int == 0xffffffff) 8912 return Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8913 8914 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8915 return ParseStatus::Success; 8916 } 8917 8918 ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8919 using namespace llvm::AMDGPU::SDWA; 8920 8921 SMLoc S = getLoc(); 8922 StringRef Value; 8923 8924 SMLoc StringLoc; 8925 ParseStatus Res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8926 if (!Res.isSuccess()) 8927 return Res; 8928 8929 int64_t Int; 8930 Int = StringSwitch<int64_t>(Value) 8931 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8932 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8933 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8934 .Default(0xffffffff); 8935 8936 if (Int == 0xffffffff) 8937 return Error(StringLoc, "invalid dst_unused value"); 8938 8939 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySDWADstUnused)); 8940 return ParseStatus::Success; 8941 } 8942 8943 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8944 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8945 } 8946 8947 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8948 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8949 } 8950 8951 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8952 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8953 } 8954 8955 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8956 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8957 } 8958 8959 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8960 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8961 } 8962 8963 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8964 uint64_t BasicInstType, 8965 bool SkipDstVcc, 8966 bool SkipSrcVcc) { 8967 using namespace llvm::AMDGPU::SDWA; 8968 8969 OptionalImmIndexMap OptionalIdx; 8970 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8971 bool SkippedVcc = false; 8972 8973 unsigned I = 1; 8974 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8975 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8976 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8977 } 8978 8979 for (unsigned E = Operands.size(); I != E; ++I) { 8980 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8981 if (SkipVcc && !SkippedVcc && Op.isReg() && 8982 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8983 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8984 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8985 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8986 // Skip VCC only if we didn't skip it on previous iteration. 8987 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8988 if (BasicInstType == SIInstrFlags::VOP2 && 8989 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8990 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8991 SkippedVcc = true; 8992 continue; 8993 } else if (BasicInstType == SIInstrFlags::VOPC && 8994 Inst.getNumOperands() == 0) { 8995 SkippedVcc = true; 8996 continue; 8997 } 8998 } 8999 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 9000 Op.addRegOrImmWithInputModsOperands(Inst, 2); 9001 } else if (Op.isImm()) { 9002 // Handle optional arguments 9003 OptionalIdx[Op.getImmTy()] = I; 9004 } else { 9005 llvm_unreachable("Invalid operand type"); 9006 } 9007 SkippedVcc = false; 9008 } 9009 9010 const unsigned Opc = Inst.getOpcode(); 9011 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 && 9012 Opc != AMDGPU::V_NOP_sdwa_vi) { 9013 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 9014 switch (BasicInstType) { 9015 case SIInstrFlags::VOP1: 9016 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp)) 9017 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9018 AMDGPUOperand::ImmTyClampSI, 0); 9019 9020 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod)) 9021 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9022 AMDGPUOperand::ImmTyOModSI, 0); 9023 9024 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel)) 9025 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9026 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD); 9027 9028 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused)) 9029 addOptionalImmOperand(Inst, Operands, OptionalIdx, 9030 AMDGPUOperand::ImmTySDWADstUnused, 9031 DstUnused::UNUSED_PRESERVE); 9032 9033 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD); 9034 break; 9035 9036 case SIInstrFlags::VOP2: 9037 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 9038 9039 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod)) 9040 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 9041 9042 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD); 9043 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE); 9044 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD); 9045 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD); 9046 break; 9047 9048 case SIInstrFlags::VOPC: 9049 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp)) 9050 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 9051 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD); 9052 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD); 9053 break; 9054 9055 default: 9056 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 9057 } 9058 } 9059 9060 // special case v_mac_{f16, f32}: 9061 // it has src2 register operand that is tied to dst operand 9062 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 9063 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 9064 auto it = Inst.begin(); 9065 std::advance( 9066 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 9067 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 9068 } 9069 } 9070 9071 /// Force static initialization. 9072 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 9073 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheR600Target()); 9074 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 9075 } 9076 9077 #define GET_REGISTER_MATCHER 9078 #define GET_MATCHER_IMPLEMENTATION 9079 #define GET_MNEMONIC_SPELL_CHECKER 9080 #define GET_MNEMONIC_CHECKER 9081 #include "AMDGPUGenAsmMatcher.inc" 9082 9083 ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands, 9084 unsigned MCK) { 9085 switch (MCK) { 9086 case MCK_addr64: 9087 return parseTokenOp("addr64", Operands); 9088 case MCK_done: 9089 return parseTokenOp("done", Operands); 9090 case MCK_idxen: 9091 return parseTokenOp("idxen", Operands); 9092 case MCK_lds: 9093 return parseTokenOp("lds", Operands); 9094 case MCK_offen: 9095 return parseTokenOp("offen", Operands); 9096 case MCK_off: 9097 return parseTokenOp("off", Operands); 9098 case MCK_row_95_en: 9099 return parseTokenOp("row_en", Operands); 9100 case MCK_gds: 9101 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS); 9102 case MCK_tfe: 9103 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE); 9104 } 9105 return tryCustomParseOperand(Operands, MCK); 9106 } 9107 9108 // This function should be defined after auto-generated include so that we have 9109 // MatchClassKind enum defined 9110 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 9111 unsigned Kind) { 9112 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 9113 // But MatchInstructionImpl() expects to meet token and fails to validate 9114 // operand. This method checks if we are given immediate operand but expect to 9115 // get corresponding token. 9116 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 9117 switch (Kind) { 9118 case MCK_addr64: 9119 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 9120 case MCK_gds: 9121 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 9122 case MCK_lds: 9123 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 9124 case MCK_idxen: 9125 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 9126 case MCK_offen: 9127 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 9128 case MCK_tfe: 9129 return Operand.isTFE() ? Match_Success : Match_InvalidOperand; 9130 case MCK_SSrcB32: 9131 // When operands have expression values, they will return true for isToken, 9132 // because it is not possible to distinguish between a token and an 9133 // expression at parse time. MatchInstructionImpl() will always try to 9134 // match an operand as a token, when isToken returns true, and when the 9135 // name of the expression is not a valid token, the match will fail, 9136 // so we need to handle it here. 9137 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 9138 case MCK_SSrcF32: 9139 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 9140 case MCK_SOPPBrTarget: 9141 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand; 9142 case MCK_VReg32OrOff: 9143 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 9144 case MCK_InterpSlot: 9145 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 9146 case MCK_InterpAttr: 9147 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 9148 case MCK_InterpAttrChan: 9149 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand; 9150 case MCK_SReg_64: 9151 case MCK_SReg_64_XEXEC: 9152 // Null is defined as a 32-bit register but 9153 // it should also be enabled with 64-bit operands. 9154 // The following code enables it for SReg_64 operands 9155 // used as source and destination. Remaining source 9156 // operands are handled in isInlinableImm. 9157 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 9158 default: 9159 return Match_InvalidOperand; 9160 } 9161 } 9162 9163 //===----------------------------------------------------------------------===// 9164 // endpgm 9165 //===----------------------------------------------------------------------===// 9166 9167 ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) { 9168 SMLoc S = getLoc(); 9169 int64_t Imm = 0; 9170 9171 if (!parseExpr(Imm)) { 9172 // The operand is optional, if not present default to 0 9173 Imm = 0; 9174 } 9175 9176 if (!isUInt<16>(Imm)) 9177 return Error(S, "expected a 16-bit value"); 9178 9179 Operands.push_back( 9180 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 9181 return ParseStatus::Success; 9182 } 9183 9184 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 9185 9186 //===----------------------------------------------------------------------===// 9187 // LDSDIR 9188 //===----------------------------------------------------------------------===// 9189 9190 bool AMDGPUOperand::isWaitVDST() const { 9191 return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm()); 9192 } 9193 9194 //===----------------------------------------------------------------------===// 9195 // VINTERP 9196 //===----------------------------------------------------------------------===// 9197 9198 bool AMDGPUOperand::isWaitEXP() const { 9199 return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm()); 9200 } 9201 9202 //===----------------------------------------------------------------------===// 9203 // Split Barrier 9204 //===----------------------------------------------------------------------===// 9205 9206 bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); } 9207