1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCInstrDesc.h" 29 #include "llvm/MC/MCParser/MCAsmLexer.h" 30 #include "llvm/MC/MCParser/MCAsmParser.h" 31 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 32 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 33 #include "llvm/MC/MCSymbol.h" 34 #include "llvm/MC/TargetRegistry.h" 35 #include "llvm/Support/AMDGPUMetadata.h" 36 #include "llvm/Support/AMDHSAKernelDescriptor.h" 37 #include "llvm/Support/Casting.h" 38 #include "llvm/Support/MachineValueType.h" 39 #include "llvm/Support/MathExtras.h" 40 #include "llvm/Support/TargetParser.h" 41 42 using namespace llvm; 43 using namespace llvm::AMDGPU; 44 using namespace llvm::amdhsa; 45 46 namespace { 47 48 class AMDGPUAsmParser; 49 50 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 51 52 //===----------------------------------------------------------------------===// 53 // Operand 54 //===----------------------------------------------------------------------===// 55 56 class AMDGPUOperand : public MCParsedAsmOperand { 57 enum KindTy { 58 Token, 59 Immediate, 60 Register, 61 Expression 62 } Kind; 63 64 SMLoc StartLoc, EndLoc; 65 const AMDGPUAsmParser *AsmParser; 66 67 public: 68 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 69 : Kind(Kind_), AsmParser(AsmParser_) {} 70 71 using Ptr = std::unique_ptr<AMDGPUOperand>; 72 73 struct Modifiers { 74 bool Abs = false; 75 bool Neg = false; 76 bool Sext = false; 77 78 bool hasFPModifiers() const { return Abs || Neg; } 79 bool hasIntModifiers() const { return Sext; } 80 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 81 82 int64_t getFPModifiersOperand() const { 83 int64_t Operand = 0; 84 Operand |= Abs ? SISrcMods::ABS : 0u; 85 Operand |= Neg ? SISrcMods::NEG : 0u; 86 return Operand; 87 } 88 89 int64_t getIntModifiersOperand() const { 90 int64_t Operand = 0; 91 Operand |= Sext ? SISrcMods::SEXT : 0u; 92 return Operand; 93 } 94 95 int64_t getModifiersOperand() const { 96 assert(!(hasFPModifiers() && hasIntModifiers()) 97 && "fp and int modifiers should not be used simultaneously"); 98 if (hasFPModifiers()) { 99 return getFPModifiersOperand(); 100 } else if (hasIntModifiers()) { 101 return getIntModifiersOperand(); 102 } else { 103 return 0; 104 } 105 } 106 107 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 108 }; 109 110 enum ImmTy { 111 ImmTyNone, 112 ImmTyGDS, 113 ImmTyLDS, 114 ImmTyOffen, 115 ImmTyIdxen, 116 ImmTyAddr64, 117 ImmTyOffset, 118 ImmTyInstOffset, 119 ImmTyOffset0, 120 ImmTyOffset1, 121 ImmTyCPol, 122 ImmTySWZ, 123 ImmTyTFE, 124 ImmTyD16, 125 ImmTyClampSI, 126 ImmTyOModSI, 127 ImmTySdwaDstSel, 128 ImmTySdwaSrc0Sel, 129 ImmTySdwaSrc1Sel, 130 ImmTySdwaDstUnused, 131 ImmTyDMask, 132 ImmTyDim, 133 ImmTyUNorm, 134 ImmTyDA, 135 ImmTyR128A16, 136 ImmTyA16, 137 ImmTyLWE, 138 ImmTyExpTgt, 139 ImmTyExpCompr, 140 ImmTyExpVM, 141 ImmTyFORMAT, 142 ImmTyHwreg, 143 ImmTyOff, 144 ImmTySendMsg, 145 ImmTyInterpSlot, 146 ImmTyInterpAttr, 147 ImmTyAttrChan, 148 ImmTyOpSel, 149 ImmTyOpSelHi, 150 ImmTyNegLo, 151 ImmTyNegHi, 152 ImmTyDPP8, 153 ImmTyDppCtrl, 154 ImmTyDppRowMask, 155 ImmTyDppBankMask, 156 ImmTyDppBoundCtrl, 157 ImmTyDppFi, 158 ImmTySwizzle, 159 ImmTyGprIdxMode, 160 ImmTyHigh, 161 ImmTyBLGP, 162 ImmTyCBSZ, 163 ImmTyABID, 164 ImmTyEndpgm, 165 ImmTyWaitVDST, 166 ImmTyWaitEXP, 167 }; 168 169 enum ImmKindTy { 170 ImmKindTyNone, 171 ImmKindTyLiteral, 172 ImmKindTyConst, 173 }; 174 175 private: 176 struct TokOp { 177 const char *Data; 178 unsigned Length; 179 }; 180 181 struct ImmOp { 182 int64_t Val; 183 ImmTy Type; 184 bool IsFPImm; 185 mutable ImmKindTy Kind; 186 Modifiers Mods; 187 }; 188 189 struct RegOp { 190 unsigned RegNo; 191 Modifiers Mods; 192 }; 193 194 union { 195 TokOp Tok; 196 ImmOp Imm; 197 RegOp Reg; 198 const MCExpr *Expr; 199 }; 200 201 public: 202 bool isToken() const override { 203 if (Kind == Token) 204 return true; 205 206 // When parsing operands, we can't always tell if something was meant to be 207 // a token, like 'gds', or an expression that references a global variable. 208 // In this case, we assume the string is an expression, and if we need to 209 // interpret is a token, then we treat the symbol name as the token. 210 return isSymbolRefExpr(); 211 } 212 213 bool isSymbolRefExpr() const { 214 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 215 } 216 217 bool isImm() const override { 218 return Kind == Immediate; 219 } 220 221 void setImmKindNone() const { 222 assert(isImm()); 223 Imm.Kind = ImmKindTyNone; 224 } 225 226 void setImmKindLiteral() const { 227 assert(isImm()); 228 Imm.Kind = ImmKindTyLiteral; 229 } 230 231 void setImmKindConst() const { 232 assert(isImm()); 233 Imm.Kind = ImmKindTyConst; 234 } 235 236 bool IsImmKindLiteral() const { 237 return isImm() && Imm.Kind == ImmKindTyLiteral; 238 } 239 240 bool isImmKindConst() const { 241 return isImm() && Imm.Kind == ImmKindTyConst; 242 } 243 244 bool isInlinableImm(MVT type) const; 245 bool isLiteralImm(MVT type) const; 246 247 bool isRegKind() const { 248 return Kind == Register; 249 } 250 251 bool isReg() const override { 252 return isRegKind() && !hasModifiers(); 253 } 254 255 bool isRegOrInline(unsigned RCID, MVT type) const { 256 return isRegClass(RCID) || isInlinableImm(type); 257 } 258 259 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 260 return isRegOrInline(RCID, type) || isLiteralImm(type); 261 } 262 263 bool isRegOrImmWithInt16InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 265 } 266 267 bool isRegOrImmWithInt32InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 269 } 270 271 bool isRegOrInlineImmWithInt16InputMods() const { 272 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16); 273 } 274 275 bool isRegOrInlineImmWithInt32InputMods() const { 276 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32); 277 } 278 279 bool isRegOrImmWithInt64InputMods() const { 280 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 281 } 282 283 bool isRegOrImmWithFP16InputMods() const { 284 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 285 } 286 287 bool isRegOrImmWithFP32InputMods() const { 288 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 289 } 290 291 bool isRegOrImmWithFP64InputMods() const { 292 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 293 } 294 295 bool isRegOrInlineImmWithFP16InputMods() const { 296 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f16); 297 } 298 299 bool isRegOrInlineImmWithFP32InputMods() const { 300 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32); 301 } 302 303 304 bool isVReg() const { 305 return isRegClass(AMDGPU::VGPR_32RegClassID) || 306 isRegClass(AMDGPU::VReg_64RegClassID) || 307 isRegClass(AMDGPU::VReg_96RegClassID) || 308 isRegClass(AMDGPU::VReg_128RegClassID) || 309 isRegClass(AMDGPU::VReg_160RegClassID) || 310 isRegClass(AMDGPU::VReg_192RegClassID) || 311 isRegClass(AMDGPU::VReg_256RegClassID) || 312 isRegClass(AMDGPU::VReg_512RegClassID) || 313 isRegClass(AMDGPU::VReg_1024RegClassID); 314 } 315 316 bool isVReg32() const { 317 return isRegClass(AMDGPU::VGPR_32RegClassID); 318 } 319 320 bool isVReg32OrOff() const { 321 return isOff() || isVReg32(); 322 } 323 324 bool isNull() const { 325 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 326 } 327 328 bool isVRegWithInputMods() const; 329 330 bool isSDWAOperand(MVT type) const; 331 bool isSDWAFP16Operand() const; 332 bool isSDWAFP32Operand() const; 333 bool isSDWAInt16Operand() const; 334 bool isSDWAInt32Operand() const; 335 336 bool isImmTy(ImmTy ImmT) const { 337 return isImm() && Imm.Type == ImmT; 338 } 339 340 bool isImmModifier() const { 341 return isImm() && Imm.Type != ImmTyNone; 342 } 343 344 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 345 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 346 bool isDMask() const { return isImmTy(ImmTyDMask); } 347 bool isDim() const { return isImmTy(ImmTyDim); } 348 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 349 bool isDA() const { return isImmTy(ImmTyDA); } 350 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 351 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 352 bool isLWE() const { return isImmTy(ImmTyLWE); } 353 bool isOff() const { return isImmTy(ImmTyOff); } 354 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 355 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 356 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 357 bool isOffen() const { return isImmTy(ImmTyOffen); } 358 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 359 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 360 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 361 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 362 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 363 364 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 365 bool isGDS() const { return isImmTy(ImmTyGDS); } 366 bool isLDS() const { return isImmTy(ImmTyLDS); } 367 bool isCPol() const { return isImmTy(ImmTyCPol); } 368 bool isSWZ() const { return isImmTy(ImmTySWZ); } 369 bool isTFE() const { return isImmTy(ImmTyTFE); } 370 bool isD16() const { return isImmTy(ImmTyD16); } 371 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 372 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 373 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 374 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 375 bool isFI() const { return isImmTy(ImmTyDppFi); } 376 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 377 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 378 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 379 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 380 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 381 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 382 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 383 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 384 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 385 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 386 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 387 bool isHigh() const { return isImmTy(ImmTyHigh); } 388 389 bool isMod() const { 390 return isClampSI() || isOModSI(); 391 } 392 393 bool isRegOrImm() const { 394 return isReg() || isImm(); 395 } 396 397 bool isRegClass(unsigned RCID) const; 398 399 bool isInlineValue() const; 400 401 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 402 return isRegOrInline(RCID, type) && !hasModifiers(); 403 } 404 405 bool isSCSrcB16() const { 406 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 407 } 408 409 bool isSCSrcV2B16() const { 410 return isSCSrcB16(); 411 } 412 413 bool isSCSrcB32() const { 414 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 415 } 416 417 bool isSCSrcB64() const { 418 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 419 } 420 421 bool isBoolReg() const; 422 423 bool isSCSrcF16() const { 424 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 425 } 426 427 bool isSCSrcV2F16() const { 428 return isSCSrcF16(); 429 } 430 431 bool isSCSrcF32() const { 432 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 433 } 434 435 bool isSCSrcF64() const { 436 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 437 } 438 439 bool isSSrcB32() const { 440 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 441 } 442 443 bool isSSrcB16() const { 444 return isSCSrcB16() || isLiteralImm(MVT::i16); 445 } 446 447 bool isSSrcV2B16() const { 448 llvm_unreachable("cannot happen"); 449 return isSSrcB16(); 450 } 451 452 bool isSSrcB64() const { 453 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 454 // See isVSrc64(). 455 return isSCSrcB64() || isLiteralImm(MVT::i64); 456 } 457 458 bool isSSrcF32() const { 459 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 460 } 461 462 bool isSSrcF64() const { 463 return isSCSrcB64() || isLiteralImm(MVT::f64); 464 } 465 466 bool isSSrcF16() const { 467 return isSCSrcB16() || isLiteralImm(MVT::f16); 468 } 469 470 bool isSSrcV2F16() const { 471 llvm_unreachable("cannot happen"); 472 return isSSrcF16(); 473 } 474 475 bool isSSrcV2FP32() const { 476 llvm_unreachable("cannot happen"); 477 return isSSrcF32(); 478 } 479 480 bool isSCSrcV2FP32() const { 481 llvm_unreachable("cannot happen"); 482 return isSCSrcF32(); 483 } 484 485 bool isSSrcV2INT32() const { 486 llvm_unreachable("cannot happen"); 487 return isSSrcB32(); 488 } 489 490 bool isSCSrcV2INT32() const { 491 llvm_unreachable("cannot happen"); 492 return isSCSrcB32(); 493 } 494 495 bool isSSrcOrLdsB32() const { 496 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 497 isLiteralImm(MVT::i32) || isExpr(); 498 } 499 500 bool isVCSrcB32() const { 501 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 502 } 503 504 bool isVCSrcB64() const { 505 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 506 } 507 508 bool isVCSrcB16() const { 509 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 510 } 511 512 bool isVCSrcV2B16() const { 513 return isVCSrcB16(); 514 } 515 516 bool isVCSrcF32() const { 517 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 518 } 519 520 bool isVCSrcF64() const { 521 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 522 } 523 524 bool isVCSrcF16() const { 525 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 526 } 527 528 bool isVCSrcV2F16() const { 529 return isVCSrcF16(); 530 } 531 532 bool isVSrcB32() const { 533 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 534 } 535 536 bool isVSrcB64() const { 537 return isVCSrcF64() || isLiteralImm(MVT::i64); 538 } 539 540 bool isVSrcB16() const { 541 return isVCSrcB16() || isLiteralImm(MVT::i16); 542 } 543 544 bool isVSrcV2B16() const { 545 return isVSrcB16() || isLiteralImm(MVT::v2i16); 546 } 547 548 bool isVCSrcV2FP32() const { 549 return isVCSrcF64(); 550 } 551 552 bool isVSrcV2FP32() const { 553 return isVSrcF64() || isLiteralImm(MVT::v2f32); 554 } 555 556 bool isVCSrcV2INT32() const { 557 return isVCSrcB64(); 558 } 559 560 bool isVSrcV2INT32() const { 561 return isVSrcB64() || isLiteralImm(MVT::v2i32); 562 } 563 564 bool isVSrcF32() const { 565 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 566 } 567 568 bool isVSrcF64() const { 569 return isVCSrcF64() || isLiteralImm(MVT::f64); 570 } 571 572 bool isVSrcF16() const { 573 return isVCSrcF16() || isLiteralImm(MVT::f16); 574 } 575 576 bool isVSrcV2F16() const { 577 return isVSrcF16() || isLiteralImm(MVT::v2f16); 578 } 579 580 bool isVISrcB32() const { 581 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 582 } 583 584 bool isVISrcB16() const { 585 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 586 } 587 588 bool isVISrcV2B16() const { 589 return isVISrcB16(); 590 } 591 592 bool isVISrcF32() const { 593 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 594 } 595 596 bool isVISrcF16() const { 597 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 598 } 599 600 bool isVISrcV2F16() const { 601 return isVISrcF16() || isVISrcB32(); 602 } 603 604 bool isVISrc_64B64() const { 605 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 606 } 607 608 bool isVISrc_64F64() const { 609 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 610 } 611 612 bool isVISrc_64V2FP32() const { 613 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 614 } 615 616 bool isVISrc_64V2INT32() const { 617 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 618 } 619 620 bool isVISrc_256B64() const { 621 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 622 } 623 624 bool isVISrc_256F64() const { 625 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 626 } 627 628 bool isVISrc_128B16() const { 629 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 630 } 631 632 bool isVISrc_128V2B16() const { 633 return isVISrc_128B16(); 634 } 635 636 bool isVISrc_128B32() const { 637 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 638 } 639 640 bool isVISrc_128F32() const { 641 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 642 } 643 644 bool isVISrc_256V2FP32() const { 645 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 646 } 647 648 bool isVISrc_256V2INT32() const { 649 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 650 } 651 652 bool isVISrc_512B32() const { 653 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 654 } 655 656 bool isVISrc_512B16() const { 657 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 658 } 659 660 bool isVISrc_512V2B16() const { 661 return isVISrc_512B16(); 662 } 663 664 bool isVISrc_512F32() const { 665 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 666 } 667 668 bool isVISrc_512F16() const { 669 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 670 } 671 672 bool isVISrc_512V2F16() const { 673 return isVISrc_512F16() || isVISrc_512B32(); 674 } 675 676 bool isVISrc_1024B32() const { 677 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 678 } 679 680 bool isVISrc_1024B16() const { 681 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 682 } 683 684 bool isVISrc_1024V2B16() const { 685 return isVISrc_1024B16(); 686 } 687 688 bool isVISrc_1024F32() const { 689 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 690 } 691 692 bool isVISrc_1024F16() const { 693 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 694 } 695 696 bool isVISrc_1024V2F16() const { 697 return isVISrc_1024F16() || isVISrc_1024B32(); 698 } 699 700 bool isAISrcB32() const { 701 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 702 } 703 704 bool isAISrcB16() const { 705 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 706 } 707 708 bool isAISrcV2B16() const { 709 return isAISrcB16(); 710 } 711 712 bool isAISrcF32() const { 713 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 714 } 715 716 bool isAISrcF16() const { 717 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 718 } 719 720 bool isAISrcV2F16() const { 721 return isAISrcF16() || isAISrcB32(); 722 } 723 724 bool isAISrc_64B64() const { 725 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 726 } 727 728 bool isAISrc_64F64() const { 729 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 730 } 731 732 bool isAISrc_128B32() const { 733 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 734 } 735 736 bool isAISrc_128B16() const { 737 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 738 } 739 740 bool isAISrc_128V2B16() const { 741 return isAISrc_128B16(); 742 } 743 744 bool isAISrc_128F32() const { 745 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 746 } 747 748 bool isAISrc_128F16() const { 749 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 750 } 751 752 bool isAISrc_128V2F16() const { 753 return isAISrc_128F16() || isAISrc_128B32(); 754 } 755 756 bool isVISrc_128F16() const { 757 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 758 } 759 760 bool isVISrc_128V2F16() const { 761 return isVISrc_128F16() || isVISrc_128B32(); 762 } 763 764 bool isAISrc_256B64() const { 765 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 766 } 767 768 bool isAISrc_256F64() const { 769 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 770 } 771 772 bool isAISrc_512B32() const { 773 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 774 } 775 776 bool isAISrc_512B16() const { 777 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 778 } 779 780 bool isAISrc_512V2B16() const { 781 return isAISrc_512B16(); 782 } 783 784 bool isAISrc_512F32() const { 785 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 786 } 787 788 bool isAISrc_512F16() const { 789 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 790 } 791 792 bool isAISrc_512V2F16() const { 793 return isAISrc_512F16() || isAISrc_512B32(); 794 } 795 796 bool isAISrc_1024B32() const { 797 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 798 } 799 800 bool isAISrc_1024B16() const { 801 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 802 } 803 804 bool isAISrc_1024V2B16() const { 805 return isAISrc_1024B16(); 806 } 807 808 bool isAISrc_1024F32() const { 809 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 810 } 811 812 bool isAISrc_1024F16() const { 813 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 814 } 815 816 bool isAISrc_1024V2F16() const { 817 return isAISrc_1024F16() || isAISrc_1024B32(); 818 } 819 820 bool isKImmFP32() const { 821 return isLiteralImm(MVT::f32); 822 } 823 824 bool isKImmFP16() const { 825 return isLiteralImm(MVT::f16); 826 } 827 828 bool isMem() const override { 829 return false; 830 } 831 832 bool isExpr() const { 833 return Kind == Expression; 834 } 835 836 bool isSoppBrTarget() const { 837 return isExpr() || isImm(); 838 } 839 840 bool isSWaitCnt() const; 841 bool isDepCtr() const; 842 bool isSDelayAlu() const; 843 bool isHwreg() const; 844 bool isSendMsg() const; 845 bool isSwizzle() const; 846 bool isSMRDOffset8() const; 847 bool isSMEMOffset() const; 848 bool isSMRDLiteralOffset() const; 849 bool isDPP8() const; 850 bool isDPPCtrl() const; 851 bool isBLGP() const; 852 bool isCBSZ() const; 853 bool isABID() const; 854 bool isGPRIdxMode() const; 855 bool isS16Imm() const; 856 bool isU16Imm() const; 857 bool isEndpgm() const; 858 bool isWaitVDST() const; 859 bool isWaitEXP() const; 860 861 StringRef getExpressionAsToken() const { 862 assert(isExpr()); 863 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 864 return S->getSymbol().getName(); 865 } 866 867 StringRef getToken() const { 868 assert(isToken()); 869 870 if (Kind == Expression) 871 return getExpressionAsToken(); 872 873 return StringRef(Tok.Data, Tok.Length); 874 } 875 876 int64_t getImm() const { 877 assert(isImm()); 878 return Imm.Val; 879 } 880 881 void setImm(int64_t Val) { 882 assert(isImm()); 883 Imm.Val = Val; 884 } 885 886 ImmTy getImmTy() const { 887 assert(isImm()); 888 return Imm.Type; 889 } 890 891 unsigned getReg() const override { 892 assert(isRegKind()); 893 return Reg.RegNo; 894 } 895 896 SMLoc getStartLoc() const override { 897 return StartLoc; 898 } 899 900 SMLoc getEndLoc() const override { 901 return EndLoc; 902 } 903 904 SMRange getLocRange() const { 905 return SMRange(StartLoc, EndLoc); 906 } 907 908 Modifiers getModifiers() const { 909 assert(isRegKind() || isImmTy(ImmTyNone)); 910 return isRegKind() ? Reg.Mods : Imm.Mods; 911 } 912 913 void setModifiers(Modifiers Mods) { 914 assert(isRegKind() || isImmTy(ImmTyNone)); 915 if (isRegKind()) 916 Reg.Mods = Mods; 917 else 918 Imm.Mods = Mods; 919 } 920 921 bool hasModifiers() const { 922 return getModifiers().hasModifiers(); 923 } 924 925 bool hasFPModifiers() const { 926 return getModifiers().hasFPModifiers(); 927 } 928 929 bool hasIntModifiers() const { 930 return getModifiers().hasIntModifiers(); 931 } 932 933 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 934 935 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 936 937 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 938 939 template <unsigned Bitwidth> 940 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 941 942 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 943 addKImmFPOperands<16>(Inst, N); 944 } 945 946 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 947 addKImmFPOperands<32>(Inst, N); 948 } 949 950 void addRegOperands(MCInst &Inst, unsigned N) const; 951 952 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 953 addRegOperands(Inst, N); 954 } 955 956 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 957 if (isRegKind()) 958 addRegOperands(Inst, N); 959 else if (isExpr()) 960 Inst.addOperand(MCOperand::createExpr(Expr)); 961 else 962 addImmOperands(Inst, N); 963 } 964 965 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 966 Modifiers Mods = getModifiers(); 967 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 968 if (isRegKind()) { 969 addRegOperands(Inst, N); 970 } else { 971 addImmOperands(Inst, N, false); 972 } 973 } 974 975 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 976 assert(!hasIntModifiers()); 977 addRegOrImmWithInputModsOperands(Inst, N); 978 } 979 980 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 981 assert(!hasFPModifiers()); 982 addRegOrImmWithInputModsOperands(Inst, N); 983 } 984 985 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 986 Modifiers Mods = getModifiers(); 987 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 988 assert(isRegKind()); 989 addRegOperands(Inst, N); 990 } 991 992 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 993 assert(!hasIntModifiers()); 994 addRegWithInputModsOperands(Inst, N); 995 } 996 997 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 998 assert(!hasFPModifiers()); 999 addRegWithInputModsOperands(Inst, N); 1000 } 1001 1002 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 1003 if (isImm()) 1004 addImmOperands(Inst, N); 1005 else { 1006 assert(isExpr()); 1007 Inst.addOperand(MCOperand::createExpr(Expr)); 1008 } 1009 } 1010 1011 static void printImmTy(raw_ostream& OS, ImmTy Type) { 1012 switch (Type) { 1013 case ImmTyNone: OS << "None"; break; 1014 case ImmTyGDS: OS << "GDS"; break; 1015 case ImmTyLDS: OS << "LDS"; break; 1016 case ImmTyOffen: OS << "Offen"; break; 1017 case ImmTyIdxen: OS << "Idxen"; break; 1018 case ImmTyAddr64: OS << "Addr64"; break; 1019 case ImmTyOffset: OS << "Offset"; break; 1020 case ImmTyInstOffset: OS << "InstOffset"; break; 1021 case ImmTyOffset0: OS << "Offset0"; break; 1022 case ImmTyOffset1: OS << "Offset1"; break; 1023 case ImmTyCPol: OS << "CPol"; break; 1024 case ImmTySWZ: OS << "SWZ"; break; 1025 case ImmTyTFE: OS << "TFE"; break; 1026 case ImmTyD16: OS << "D16"; break; 1027 case ImmTyFORMAT: OS << "FORMAT"; break; 1028 case ImmTyClampSI: OS << "ClampSI"; break; 1029 case ImmTyOModSI: OS << "OModSI"; break; 1030 case ImmTyDPP8: OS << "DPP8"; break; 1031 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1032 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1033 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1034 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1035 case ImmTyDppFi: OS << "FI"; break; 1036 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1037 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1038 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1039 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1040 case ImmTyDMask: OS << "DMask"; break; 1041 case ImmTyDim: OS << "Dim"; break; 1042 case ImmTyUNorm: OS << "UNorm"; break; 1043 case ImmTyDA: OS << "DA"; break; 1044 case ImmTyR128A16: OS << "R128A16"; break; 1045 case ImmTyA16: OS << "A16"; break; 1046 case ImmTyLWE: OS << "LWE"; break; 1047 case ImmTyOff: OS << "Off"; break; 1048 case ImmTyExpTgt: OS << "ExpTgt"; break; 1049 case ImmTyExpCompr: OS << "ExpCompr"; break; 1050 case ImmTyExpVM: OS << "ExpVM"; break; 1051 case ImmTyHwreg: OS << "Hwreg"; break; 1052 case ImmTySendMsg: OS << "SendMsg"; break; 1053 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1054 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1055 case ImmTyAttrChan: OS << "AttrChan"; break; 1056 case ImmTyOpSel: OS << "OpSel"; break; 1057 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1058 case ImmTyNegLo: OS << "NegLo"; break; 1059 case ImmTyNegHi: OS << "NegHi"; break; 1060 case ImmTySwizzle: OS << "Swizzle"; break; 1061 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1062 case ImmTyHigh: OS << "High"; break; 1063 case ImmTyBLGP: OS << "BLGP"; break; 1064 case ImmTyCBSZ: OS << "CBSZ"; break; 1065 case ImmTyABID: OS << "ABID"; break; 1066 case ImmTyEndpgm: OS << "Endpgm"; break; 1067 case ImmTyWaitVDST: OS << "WaitVDST"; break; 1068 case ImmTyWaitEXP: OS << "WaitEXP"; break; 1069 } 1070 } 1071 1072 void print(raw_ostream &OS) const override { 1073 switch (Kind) { 1074 case Register: 1075 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1076 break; 1077 case Immediate: 1078 OS << '<' << getImm(); 1079 if (getImmTy() != ImmTyNone) { 1080 OS << " type: "; printImmTy(OS, getImmTy()); 1081 } 1082 OS << " mods: " << Imm.Mods << '>'; 1083 break; 1084 case Token: 1085 OS << '\'' << getToken() << '\''; 1086 break; 1087 case Expression: 1088 OS << "<expr " << *Expr << '>'; 1089 break; 1090 } 1091 } 1092 1093 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1094 int64_t Val, SMLoc Loc, 1095 ImmTy Type = ImmTyNone, 1096 bool IsFPImm = false) { 1097 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1098 Op->Imm.Val = Val; 1099 Op->Imm.IsFPImm = IsFPImm; 1100 Op->Imm.Kind = ImmKindTyNone; 1101 Op->Imm.Type = Type; 1102 Op->Imm.Mods = Modifiers(); 1103 Op->StartLoc = Loc; 1104 Op->EndLoc = Loc; 1105 return Op; 1106 } 1107 1108 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1109 StringRef Str, SMLoc Loc, 1110 bool HasExplicitEncodingSize = true) { 1111 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1112 Res->Tok.Data = Str.data(); 1113 Res->Tok.Length = Str.size(); 1114 Res->StartLoc = Loc; 1115 Res->EndLoc = Loc; 1116 return Res; 1117 } 1118 1119 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1120 unsigned RegNo, SMLoc S, 1121 SMLoc E) { 1122 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1123 Op->Reg.RegNo = RegNo; 1124 Op->Reg.Mods = Modifiers(); 1125 Op->StartLoc = S; 1126 Op->EndLoc = E; 1127 return Op; 1128 } 1129 1130 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1131 const class MCExpr *Expr, SMLoc S) { 1132 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1133 Op->Expr = Expr; 1134 Op->StartLoc = S; 1135 Op->EndLoc = S; 1136 return Op; 1137 } 1138 }; 1139 1140 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1141 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1142 return OS; 1143 } 1144 1145 //===----------------------------------------------------------------------===// 1146 // AsmParser 1147 //===----------------------------------------------------------------------===// 1148 1149 // Holds info related to the current kernel, e.g. count of SGPRs used. 1150 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1151 // .amdgpu_hsa_kernel or at EOF. 1152 class KernelScopeInfo { 1153 int SgprIndexUnusedMin = -1; 1154 int VgprIndexUnusedMin = -1; 1155 int AgprIndexUnusedMin = -1; 1156 MCContext *Ctx = nullptr; 1157 MCSubtargetInfo const *MSTI = nullptr; 1158 1159 void usesSgprAt(int i) { 1160 if (i >= SgprIndexUnusedMin) { 1161 SgprIndexUnusedMin = ++i; 1162 if (Ctx) { 1163 MCSymbol* const Sym = 1164 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1165 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1166 } 1167 } 1168 } 1169 1170 void usesVgprAt(int i) { 1171 if (i >= VgprIndexUnusedMin) { 1172 VgprIndexUnusedMin = ++i; 1173 if (Ctx) { 1174 MCSymbol* const Sym = 1175 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1176 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1177 VgprIndexUnusedMin); 1178 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1179 } 1180 } 1181 } 1182 1183 void usesAgprAt(int i) { 1184 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1185 if (!hasMAIInsts(*MSTI)) 1186 return; 1187 1188 if (i >= AgprIndexUnusedMin) { 1189 AgprIndexUnusedMin = ++i; 1190 if (Ctx) { 1191 MCSymbol* const Sym = 1192 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1193 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1194 1195 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1196 MCSymbol* const vSym = 1197 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1198 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1199 VgprIndexUnusedMin); 1200 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1201 } 1202 } 1203 } 1204 1205 public: 1206 KernelScopeInfo() = default; 1207 1208 void initialize(MCContext &Context) { 1209 Ctx = &Context; 1210 MSTI = Ctx->getSubtargetInfo(); 1211 1212 usesSgprAt(SgprIndexUnusedMin = -1); 1213 usesVgprAt(VgprIndexUnusedMin = -1); 1214 if (hasMAIInsts(*MSTI)) { 1215 usesAgprAt(AgprIndexUnusedMin = -1); 1216 } 1217 } 1218 1219 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1220 unsigned RegWidth) { 1221 switch (RegKind) { 1222 case IS_SGPR: 1223 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1224 break; 1225 case IS_AGPR: 1226 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1227 break; 1228 case IS_VGPR: 1229 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1230 break; 1231 default: 1232 break; 1233 } 1234 } 1235 }; 1236 1237 class AMDGPUAsmParser : public MCTargetAsmParser { 1238 MCAsmParser &Parser; 1239 1240 // Number of extra operands parsed after the first optional operand. 1241 // This may be necessary to skip hardcoded mandatory operands. 1242 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1243 1244 unsigned ForcedEncodingSize = 0; 1245 bool ForcedDPP = false; 1246 bool ForcedSDWA = false; 1247 KernelScopeInfo KernelScope; 1248 unsigned CPolSeen; 1249 1250 /// @name Auto-generated Match Functions 1251 /// { 1252 1253 #define GET_ASSEMBLER_HEADER 1254 #include "AMDGPUGenAsmMatcher.inc" 1255 1256 /// } 1257 1258 private: 1259 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1260 bool OutOfRangeError(SMRange Range); 1261 /// Calculate VGPR/SGPR blocks required for given target, reserved 1262 /// registers, and user-specified NextFreeXGPR values. 1263 /// 1264 /// \param Features [in] Target features, used for bug corrections. 1265 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1266 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1267 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1268 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1269 /// descriptor field, if valid. 1270 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1271 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1272 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1273 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1274 /// \param VGPRBlocks [out] Result VGPR block count. 1275 /// \param SGPRBlocks [out] Result SGPR block count. 1276 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1277 bool FlatScrUsed, bool XNACKUsed, 1278 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1279 SMRange VGPRRange, unsigned NextFreeSGPR, 1280 SMRange SGPRRange, unsigned &VGPRBlocks, 1281 unsigned &SGPRBlocks); 1282 bool ParseDirectiveAMDGCNTarget(); 1283 bool ParseDirectiveAMDHSAKernel(); 1284 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1285 bool ParseDirectiveHSACodeObjectVersion(); 1286 bool ParseDirectiveHSACodeObjectISA(); 1287 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1288 bool ParseDirectiveAMDKernelCodeT(); 1289 // TODO: Possibly make subtargetHasRegister const. 1290 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1291 bool ParseDirectiveAMDGPUHsaKernel(); 1292 1293 bool ParseDirectiveISAVersion(); 1294 bool ParseDirectiveHSAMetadata(); 1295 bool ParseDirectivePALMetadataBegin(); 1296 bool ParseDirectivePALMetadata(); 1297 bool ParseDirectiveAMDGPULDS(); 1298 1299 /// Common code to parse out a block of text (typically YAML) between start and 1300 /// end directives. 1301 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1302 const char *AssemblerDirectiveEnd, 1303 std::string &CollectString); 1304 1305 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1306 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1307 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1308 unsigned &RegNum, unsigned &RegWidth, 1309 bool RestoreOnFailure = false); 1310 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1311 unsigned &RegNum, unsigned &RegWidth, 1312 SmallVectorImpl<AsmToken> &Tokens); 1313 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1314 unsigned &RegWidth, 1315 SmallVectorImpl<AsmToken> &Tokens); 1316 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1317 unsigned &RegWidth, 1318 SmallVectorImpl<AsmToken> &Tokens); 1319 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1320 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1321 bool ParseRegRange(unsigned& Num, unsigned& Width); 1322 unsigned getRegularReg(RegisterKind RegKind, 1323 unsigned RegNum, 1324 unsigned RegWidth, 1325 SMLoc Loc); 1326 1327 bool isRegister(); 1328 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1329 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1330 void initializeGprCountSymbol(RegisterKind RegKind); 1331 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1332 unsigned RegWidth); 1333 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1334 bool IsAtomic, bool IsLds = false); 1335 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1336 bool IsGdsHardcoded); 1337 1338 public: 1339 enum AMDGPUMatchResultTy { 1340 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1341 }; 1342 enum OperandMode { 1343 OperandMode_Default, 1344 OperandMode_NSA, 1345 }; 1346 1347 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1348 1349 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1350 const MCInstrInfo &MII, 1351 const MCTargetOptions &Options) 1352 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1353 MCAsmParserExtension::Initialize(Parser); 1354 1355 if (getFeatureBits().none()) { 1356 // Set default features. 1357 copySTI().ToggleFeature("southern-islands"); 1358 } 1359 1360 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1361 1362 { 1363 // TODO: make those pre-defined variables read-only. 1364 // Currently there is none suitable machinery in the core llvm-mc for this. 1365 // MCSymbol::isRedefinable is intended for another purpose, and 1366 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1367 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1368 MCContext &Ctx = getContext(); 1369 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1370 MCSymbol *Sym = 1371 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1372 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1373 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1374 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1375 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1376 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1377 } else { 1378 MCSymbol *Sym = 1379 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1380 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1381 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1382 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1383 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1384 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1385 } 1386 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1387 initializeGprCountSymbol(IS_VGPR); 1388 initializeGprCountSymbol(IS_SGPR); 1389 } else 1390 KernelScope.initialize(getContext()); 1391 } 1392 } 1393 1394 bool hasMIMG_R128() const { 1395 return AMDGPU::hasMIMG_R128(getSTI()); 1396 } 1397 1398 bool hasPackedD16() const { 1399 return AMDGPU::hasPackedD16(getSTI()); 1400 } 1401 1402 bool hasGFX10A16() const { 1403 return AMDGPU::hasGFX10A16(getSTI()); 1404 } 1405 1406 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1407 1408 bool isSI() const { 1409 return AMDGPU::isSI(getSTI()); 1410 } 1411 1412 bool isCI() const { 1413 return AMDGPU::isCI(getSTI()); 1414 } 1415 1416 bool isVI() const { 1417 return AMDGPU::isVI(getSTI()); 1418 } 1419 1420 bool isGFX9() const { 1421 return AMDGPU::isGFX9(getSTI()); 1422 } 1423 1424 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1425 bool isGFX90A() const { 1426 return AMDGPU::isGFX90A(getSTI()); 1427 } 1428 1429 bool isGFX940() const { 1430 return AMDGPU::isGFX940(getSTI()); 1431 } 1432 1433 bool isGFX9Plus() const { 1434 return AMDGPU::isGFX9Plus(getSTI()); 1435 } 1436 1437 bool isGFX10() const { 1438 return AMDGPU::isGFX10(getSTI()); 1439 } 1440 1441 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1442 1443 bool isGFX11() const { 1444 return AMDGPU::isGFX11(getSTI()); 1445 } 1446 1447 bool isGFX11Plus() const { 1448 return AMDGPU::isGFX11Plus(getSTI()); 1449 } 1450 1451 bool isGFX10_BEncoding() const { 1452 return AMDGPU::isGFX10_BEncoding(getSTI()); 1453 } 1454 1455 bool hasInv2PiInlineImm() const { 1456 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1457 } 1458 1459 bool hasFlatOffsets() const { 1460 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1461 } 1462 1463 bool hasArchitectedFlatScratch() const { 1464 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1465 } 1466 1467 bool hasSGPR102_SGPR103() const { 1468 return !isVI() && !isGFX9(); 1469 } 1470 1471 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1472 1473 bool hasIntClamp() const { 1474 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1475 } 1476 1477 AMDGPUTargetStreamer &getTargetStreamer() { 1478 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1479 return static_cast<AMDGPUTargetStreamer &>(TS); 1480 } 1481 1482 const MCRegisterInfo *getMRI() const { 1483 // We need this const_cast because for some reason getContext() is not const 1484 // in MCAsmParser. 1485 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1486 } 1487 1488 const MCInstrInfo *getMII() const { 1489 return &MII; 1490 } 1491 1492 const FeatureBitset &getFeatureBits() const { 1493 return getSTI().getFeatureBits(); 1494 } 1495 1496 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1497 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1498 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1499 1500 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1501 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1502 bool isForcedDPP() const { return ForcedDPP; } 1503 bool isForcedSDWA() const { return ForcedSDWA; } 1504 ArrayRef<unsigned> getMatchedVariants() const; 1505 StringRef getMatchedVariantName() const; 1506 1507 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1508 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1509 bool RestoreOnFailure); 1510 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1511 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1512 SMLoc &EndLoc) override; 1513 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1514 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1515 unsigned Kind) override; 1516 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1517 OperandVector &Operands, MCStreamer &Out, 1518 uint64_t &ErrorInfo, 1519 bool MatchingInlineAsm) override; 1520 bool ParseDirective(AsmToken DirectiveID) override; 1521 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1522 OperandMode Mode = OperandMode_Default); 1523 StringRef parseMnemonicSuffix(StringRef Name); 1524 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1525 SMLoc NameLoc, OperandVector &Operands) override; 1526 //bool ProcessInstruction(MCInst &Inst); 1527 1528 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1529 1530 OperandMatchResultTy 1531 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1532 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1533 bool (*ConvertResult)(int64_t &) = nullptr); 1534 1535 OperandMatchResultTy 1536 parseOperandArrayWithPrefix(const char *Prefix, 1537 OperandVector &Operands, 1538 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1539 bool (*ConvertResult)(int64_t&) = nullptr); 1540 1541 OperandMatchResultTy 1542 parseNamedBit(StringRef Name, OperandVector &Operands, 1543 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1544 OperandMatchResultTy parseCPol(OperandVector &Operands); 1545 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1546 StringRef &Value, 1547 SMLoc &StringLoc); 1548 1549 bool isModifier(); 1550 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1551 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1552 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1553 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1554 bool parseSP3NegModifier(); 1555 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1556 OperandMatchResultTy parseReg(OperandVector &Operands); 1557 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1558 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1559 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1560 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1561 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1562 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1563 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1564 OperandMatchResultTy parseUfmt(int64_t &Format); 1565 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1566 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1567 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1568 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1569 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1570 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1571 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1572 1573 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1574 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1575 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1576 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1577 1578 bool parseCnt(int64_t &IntVal); 1579 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1580 1581 bool parseDepCtr(int64_t &IntVal, unsigned &Mask); 1582 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); 1583 OperandMatchResultTy parseDepCtrOps(OperandVector &Operands); 1584 1585 bool parseDelay(int64_t &Delay); 1586 OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands); 1587 1588 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1589 1590 private: 1591 struct OperandInfoTy { 1592 SMLoc Loc; 1593 int64_t Id; 1594 bool IsSymbolic = false; 1595 bool IsDefined = false; 1596 1597 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1598 }; 1599 1600 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1601 bool validateSendMsg(const OperandInfoTy &Msg, 1602 const OperandInfoTy &Op, 1603 const OperandInfoTy &Stream); 1604 1605 bool parseHwregBody(OperandInfoTy &HwReg, 1606 OperandInfoTy &Offset, 1607 OperandInfoTy &Width); 1608 bool validateHwreg(const OperandInfoTy &HwReg, 1609 const OperandInfoTy &Offset, 1610 const OperandInfoTy &Width); 1611 1612 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1613 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1614 SMLoc getBLGPLoc(const OperandVector &Operands) const; 1615 1616 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1617 const OperandVector &Operands) const; 1618 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1619 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1620 SMLoc getLitLoc(const OperandVector &Operands) const; 1621 SMLoc getConstLoc(const OperandVector &Operands) const; 1622 1623 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1624 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1625 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1626 bool validateSOPLiteral(const MCInst &Inst) const; 1627 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1628 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1629 bool validateIntClampSupported(const MCInst &Inst); 1630 bool validateMIMGAtomicDMask(const MCInst &Inst); 1631 bool validateMIMGGatherDMask(const MCInst &Inst); 1632 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1633 Optional<StringRef> validateMIMGDataSize(const MCInst &Inst); 1634 bool validateMIMGAddrSize(const MCInst &Inst); 1635 bool validateMIMGD16(const MCInst &Inst); 1636 bool validateMIMGDim(const MCInst &Inst); 1637 bool validateMIMGMSAA(const MCInst &Inst); 1638 bool validateOpSel(const MCInst &Inst); 1639 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1640 bool validateVccOperand(unsigned Reg) const; 1641 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1642 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1643 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1644 bool validateAGPRLdSt(const MCInst &Inst) const; 1645 bool validateVGPRAlign(const MCInst &Inst) const; 1646 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); 1647 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1648 bool validateDivScale(const MCInst &Inst); 1649 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1650 const SMLoc &IDLoc); 1651 bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands, 1652 const SMLoc &IDLoc); 1653 bool validateExeczVcczOperands(const OperandVector &Operands); 1654 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1655 unsigned getConstantBusLimit(unsigned Opcode) const; 1656 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1657 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1658 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1659 1660 bool isSupportedMnemo(StringRef Mnemo, 1661 const FeatureBitset &FBS); 1662 bool isSupportedMnemo(StringRef Mnemo, 1663 const FeatureBitset &FBS, 1664 ArrayRef<unsigned> Variants); 1665 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1666 1667 bool isId(const StringRef Id) const; 1668 bool isId(const AsmToken &Token, const StringRef Id) const; 1669 bool isToken(const AsmToken::TokenKind Kind) const; 1670 bool trySkipId(const StringRef Id); 1671 bool trySkipId(const StringRef Pref, const StringRef Id); 1672 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1673 bool trySkipToken(const AsmToken::TokenKind Kind); 1674 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1675 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1676 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1677 1678 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1679 AsmToken::TokenKind getTokenKind() const; 1680 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1681 bool parseExpr(OperandVector &Operands); 1682 StringRef getTokenStr() const; 1683 AsmToken peekToken(bool ShouldSkipSpace = true); 1684 AsmToken getToken() const; 1685 SMLoc getLoc() const; 1686 void lex(); 1687 1688 public: 1689 void onBeginOfFile() override; 1690 1691 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1692 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1693 1694 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1695 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1696 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1697 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1698 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1699 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1700 1701 bool parseSwizzleOperand(int64_t &Op, 1702 const unsigned MinVal, 1703 const unsigned MaxVal, 1704 const StringRef ErrMsg, 1705 SMLoc &Loc); 1706 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1707 const unsigned MinVal, 1708 const unsigned MaxVal, 1709 const StringRef ErrMsg); 1710 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1711 bool parseSwizzleOffset(int64_t &Imm); 1712 bool parseSwizzleMacro(int64_t &Imm); 1713 bool parseSwizzleQuadPerm(int64_t &Imm); 1714 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1715 bool parseSwizzleBroadcast(int64_t &Imm); 1716 bool parseSwizzleSwap(int64_t &Imm); 1717 bool parseSwizzleReverse(int64_t &Imm); 1718 1719 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1720 int64_t parseGPRIdxMacro(); 1721 1722 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1723 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1724 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1725 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1726 1727 AMDGPUOperand::Ptr defaultCPol() const; 1728 1729 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1730 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1731 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1732 AMDGPUOperand::Ptr defaultFlatOffset() const; 1733 1734 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1735 1736 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1737 OptionalImmIndexMap &OptionalIdx); 1738 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1739 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1740 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1741 void cvtVOPD(MCInst &Inst, const OperandVector &Operands); 1742 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1743 OptionalImmIndexMap &OptionalIdx); 1744 1745 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1746 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands); 1747 1748 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1749 bool IsAtomic = false); 1750 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1751 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1752 1753 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1754 1755 bool parseDimId(unsigned &Encoding); 1756 OperandMatchResultTy parseDim(OperandVector &Operands); 1757 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1758 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1759 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1760 int64_t parseDPPCtrlSel(StringRef Ctrl); 1761 int64_t parseDPPCtrlPerm(); 1762 AMDGPUOperand::Ptr defaultRowMask() const; 1763 AMDGPUOperand::Ptr defaultBankMask() const; 1764 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1765 AMDGPUOperand::Ptr defaultFI() const; 1766 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1767 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { 1768 cvtDPP(Inst, Operands, true); 1769 } 1770 void cvtVOPCNoDstDPP(MCInst &Inst, const OperandVector &Operands, 1771 bool IsDPP8 = false); 1772 void cvtVOPCNoDstDPP8(MCInst &Inst, const OperandVector &Operands) { 1773 cvtVOPCNoDstDPP(Inst, Operands, true); 1774 } 1775 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, 1776 bool IsDPP8 = false); 1777 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) { 1778 cvtVOP3DPP(Inst, Operands, true); 1779 } 1780 void cvtVOPC64NoDstDPP(MCInst &Inst, const OperandVector &Operands, 1781 bool IsDPP8 = false); 1782 void cvtVOPC64NoDstDPP8(MCInst &Inst, const OperandVector &Operands) { 1783 cvtVOPC64NoDstDPP(Inst, Operands, true); 1784 } 1785 1786 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1787 AMDGPUOperand::ImmTy Type); 1788 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1789 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1790 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1791 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1792 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1793 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1794 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1795 uint64_t BasicInstType, 1796 bool SkipDstVcc = false, 1797 bool SkipSrcVcc = false); 1798 1799 AMDGPUOperand::Ptr defaultBLGP() const; 1800 AMDGPUOperand::Ptr defaultCBSZ() const; 1801 AMDGPUOperand::Ptr defaultABID() const; 1802 1803 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1804 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1805 1806 AMDGPUOperand::Ptr defaultWaitVDST() const; 1807 AMDGPUOperand::Ptr defaultWaitEXP() const; 1808 OperandMatchResultTy parseVOPD(OperandVector &Operands); 1809 }; 1810 1811 struct OptionalOperand { 1812 const char *Name; 1813 AMDGPUOperand::ImmTy Type; 1814 bool IsBit; 1815 bool (*ConvertResult)(int64_t&); 1816 }; 1817 1818 } // end anonymous namespace 1819 1820 // May be called with integer type with equivalent bitwidth. 1821 static const fltSemantics *getFltSemantics(unsigned Size) { 1822 switch (Size) { 1823 case 4: 1824 return &APFloat::IEEEsingle(); 1825 case 8: 1826 return &APFloat::IEEEdouble(); 1827 case 2: 1828 return &APFloat::IEEEhalf(); 1829 default: 1830 llvm_unreachable("unsupported fp type"); 1831 } 1832 } 1833 1834 static const fltSemantics *getFltSemantics(MVT VT) { 1835 return getFltSemantics(VT.getSizeInBits() / 8); 1836 } 1837 1838 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1839 switch (OperandType) { 1840 case AMDGPU::OPERAND_REG_IMM_INT32: 1841 case AMDGPU::OPERAND_REG_IMM_FP32: 1842 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1843 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1844 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1845 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1846 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1847 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1848 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1849 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1850 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1851 case AMDGPU::OPERAND_KIMM32: 1852 return &APFloat::IEEEsingle(); 1853 case AMDGPU::OPERAND_REG_IMM_INT64: 1854 case AMDGPU::OPERAND_REG_IMM_FP64: 1855 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1856 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1857 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1858 return &APFloat::IEEEdouble(); 1859 case AMDGPU::OPERAND_REG_IMM_INT16: 1860 case AMDGPU::OPERAND_REG_IMM_FP16: 1861 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1862 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1863 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1864 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1865 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1866 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1867 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1868 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1869 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1870 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1871 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1872 case AMDGPU::OPERAND_KIMM16: 1873 return &APFloat::IEEEhalf(); 1874 default: 1875 llvm_unreachable("unsupported fp type"); 1876 } 1877 } 1878 1879 //===----------------------------------------------------------------------===// 1880 // Operand 1881 //===----------------------------------------------------------------------===// 1882 1883 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1884 bool Lost; 1885 1886 // Convert literal to single precision 1887 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1888 APFloat::rmNearestTiesToEven, 1889 &Lost); 1890 // We allow precision lost but not overflow or underflow 1891 if (Status != APFloat::opOK && 1892 Lost && 1893 ((Status & APFloat::opOverflow) != 0 || 1894 (Status & APFloat::opUnderflow) != 0)) { 1895 return false; 1896 } 1897 1898 return true; 1899 } 1900 1901 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1902 return isUIntN(Size, Val) || isIntN(Size, Val); 1903 } 1904 1905 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1906 if (VT.getScalarType() == MVT::i16) { 1907 // FP immediate values are broken. 1908 return isInlinableIntLiteral(Val); 1909 } 1910 1911 // f16/v2f16 operands work correctly for all values. 1912 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1913 } 1914 1915 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1916 1917 // This is a hack to enable named inline values like 1918 // shared_base with both 32-bit and 64-bit operands. 1919 // Note that these values are defined as 1920 // 32-bit operands only. 1921 if (isInlineValue()) { 1922 return true; 1923 } 1924 1925 if (!isImmTy(ImmTyNone)) { 1926 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1927 return false; 1928 } 1929 // TODO: We should avoid using host float here. It would be better to 1930 // check the float bit values which is what a few other places do. 1931 // We've had bot failures before due to weird NaN support on mips hosts. 1932 1933 APInt Literal(64, Imm.Val); 1934 1935 if (Imm.IsFPImm) { // We got fp literal token 1936 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1937 return AMDGPU::isInlinableLiteral64(Imm.Val, 1938 AsmParser->hasInv2PiInlineImm()); 1939 } 1940 1941 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1942 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1943 return false; 1944 1945 if (type.getScalarSizeInBits() == 16) { 1946 return isInlineableLiteralOp16( 1947 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1948 type, AsmParser->hasInv2PiInlineImm()); 1949 } 1950 1951 // Check if single precision literal is inlinable 1952 return AMDGPU::isInlinableLiteral32( 1953 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1954 AsmParser->hasInv2PiInlineImm()); 1955 } 1956 1957 // We got int literal token. 1958 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1959 return AMDGPU::isInlinableLiteral64(Imm.Val, 1960 AsmParser->hasInv2PiInlineImm()); 1961 } 1962 1963 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1964 return false; 1965 } 1966 1967 if (type.getScalarSizeInBits() == 16) { 1968 return isInlineableLiteralOp16( 1969 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1970 type, AsmParser->hasInv2PiInlineImm()); 1971 } 1972 1973 return AMDGPU::isInlinableLiteral32( 1974 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1975 AsmParser->hasInv2PiInlineImm()); 1976 } 1977 1978 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1979 // Check that this immediate can be added as literal 1980 if (!isImmTy(ImmTyNone)) { 1981 return false; 1982 } 1983 1984 if (!Imm.IsFPImm) { 1985 // We got int literal token. 1986 1987 if (type == MVT::f64 && hasFPModifiers()) { 1988 // Cannot apply fp modifiers to int literals preserving the same semantics 1989 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1990 // disable these cases. 1991 return false; 1992 } 1993 1994 unsigned Size = type.getSizeInBits(); 1995 if (Size == 64) 1996 Size = 32; 1997 1998 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1999 // types. 2000 return isSafeTruncation(Imm.Val, Size); 2001 } 2002 2003 // We got fp literal token 2004 if (type == MVT::f64) { // Expected 64-bit fp operand 2005 // We would set low 64-bits of literal to zeroes but we accept this literals 2006 return true; 2007 } 2008 2009 if (type == MVT::i64) { // Expected 64-bit int operand 2010 // We don't allow fp literals in 64-bit integer instructions. It is 2011 // unclear how we should encode them. 2012 return false; 2013 } 2014 2015 // We allow fp literals with f16x2 operands assuming that the specified 2016 // literal goes into the lower half and the upper half is zero. We also 2017 // require that the literal may be losslessly converted to f16. 2018 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 2019 (type == MVT::v2i16)? MVT::i16 : 2020 (type == MVT::v2f32)? MVT::f32 : type; 2021 2022 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 2023 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 2024 } 2025 2026 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 2027 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 2028 } 2029 2030 bool AMDGPUOperand::isVRegWithInputMods() const { 2031 return isRegClass(AMDGPU::VGPR_32RegClassID) || 2032 // GFX90A allows DPP on 64-bit operands. 2033 (isRegClass(AMDGPU::VReg_64RegClassID) && 2034 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 2035 } 2036 2037 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 2038 if (AsmParser->isVI()) 2039 return isVReg32(); 2040 else if (AsmParser->isGFX9Plus()) 2041 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 2042 else 2043 return false; 2044 } 2045 2046 bool AMDGPUOperand::isSDWAFP16Operand() const { 2047 return isSDWAOperand(MVT::f16); 2048 } 2049 2050 bool AMDGPUOperand::isSDWAFP32Operand() const { 2051 return isSDWAOperand(MVT::f32); 2052 } 2053 2054 bool AMDGPUOperand::isSDWAInt16Operand() const { 2055 return isSDWAOperand(MVT::i16); 2056 } 2057 2058 bool AMDGPUOperand::isSDWAInt32Operand() const { 2059 return isSDWAOperand(MVT::i32); 2060 } 2061 2062 bool AMDGPUOperand::isBoolReg() const { 2063 auto FB = AsmParser->getFeatureBits(); 2064 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 2065 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 2066 } 2067 2068 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 2069 { 2070 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2071 assert(Size == 2 || Size == 4 || Size == 8); 2072 2073 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2074 2075 if (Imm.Mods.Abs) { 2076 Val &= ~FpSignMask; 2077 } 2078 if (Imm.Mods.Neg) { 2079 Val ^= FpSignMask; 2080 } 2081 2082 return Val; 2083 } 2084 2085 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2086 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2087 Inst.getNumOperands())) { 2088 addLiteralImmOperand(Inst, Imm.Val, 2089 ApplyModifiers & 2090 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2091 } else { 2092 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2093 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2094 setImmKindNone(); 2095 } 2096 } 2097 2098 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2099 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2100 auto OpNum = Inst.getNumOperands(); 2101 // Check that this operand accepts literals 2102 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2103 2104 if (ApplyModifiers) { 2105 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2106 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2107 Val = applyInputFPModifiers(Val, Size); 2108 } 2109 2110 APInt Literal(64, Val); 2111 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 2112 2113 if (Imm.IsFPImm) { // We got fp literal token 2114 switch (OpTy) { 2115 case AMDGPU::OPERAND_REG_IMM_INT64: 2116 case AMDGPU::OPERAND_REG_IMM_FP64: 2117 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2118 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2119 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2120 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2121 AsmParser->hasInv2PiInlineImm())) { 2122 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2123 setImmKindConst(); 2124 return; 2125 } 2126 2127 // Non-inlineable 2128 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2129 // For fp operands we check if low 32 bits are zeros 2130 if (Literal.getLoBits(32) != 0) { 2131 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2132 "Can't encode literal as exact 64-bit floating-point operand. " 2133 "Low 32-bits will be set to zero"); 2134 } 2135 2136 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2137 setImmKindLiteral(); 2138 return; 2139 } 2140 2141 // We don't allow fp literals in 64-bit integer instructions. It is 2142 // unclear how we should encode them. This case should be checked earlier 2143 // in predicate methods (isLiteralImm()) 2144 llvm_unreachable("fp literal in 64-bit integer instruction."); 2145 2146 case AMDGPU::OPERAND_REG_IMM_INT32: 2147 case AMDGPU::OPERAND_REG_IMM_FP32: 2148 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2149 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2150 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2151 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2152 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2153 case AMDGPU::OPERAND_REG_IMM_INT16: 2154 case AMDGPU::OPERAND_REG_IMM_FP16: 2155 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2156 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2157 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2158 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2159 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2160 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2161 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2162 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2163 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2164 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2165 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2166 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2167 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2168 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2169 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2170 case AMDGPU::OPERAND_KIMM32: 2171 case AMDGPU::OPERAND_KIMM16: { 2172 bool lost; 2173 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2174 // Convert literal to single precision 2175 FPLiteral.convert(*getOpFltSemantics(OpTy), 2176 APFloat::rmNearestTiesToEven, &lost); 2177 // We allow precision lost but not overflow or underflow. This should be 2178 // checked earlier in isLiteralImm() 2179 2180 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2181 Inst.addOperand(MCOperand::createImm(ImmVal)); 2182 setImmKindLiteral(); 2183 return; 2184 } 2185 default: 2186 llvm_unreachable("invalid operand size"); 2187 } 2188 2189 return; 2190 } 2191 2192 // We got int literal token. 2193 // Only sign extend inline immediates. 2194 switch (OpTy) { 2195 case AMDGPU::OPERAND_REG_IMM_INT32: 2196 case AMDGPU::OPERAND_REG_IMM_FP32: 2197 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2198 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2199 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2200 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2201 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2202 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2203 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2204 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2205 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2206 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2207 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2208 if (isSafeTruncation(Val, 32) && 2209 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2210 AsmParser->hasInv2PiInlineImm())) { 2211 Inst.addOperand(MCOperand::createImm(Val)); 2212 setImmKindConst(); 2213 return; 2214 } 2215 2216 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2217 setImmKindLiteral(); 2218 return; 2219 2220 case AMDGPU::OPERAND_REG_IMM_INT64: 2221 case AMDGPU::OPERAND_REG_IMM_FP64: 2222 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2223 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2224 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2225 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2226 Inst.addOperand(MCOperand::createImm(Val)); 2227 setImmKindConst(); 2228 return; 2229 } 2230 2231 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2232 setImmKindLiteral(); 2233 return; 2234 2235 case AMDGPU::OPERAND_REG_IMM_INT16: 2236 case AMDGPU::OPERAND_REG_IMM_FP16: 2237 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2238 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2239 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2240 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2241 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2242 if (isSafeTruncation(Val, 16) && 2243 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2244 AsmParser->hasInv2PiInlineImm())) { 2245 Inst.addOperand(MCOperand::createImm(Val)); 2246 setImmKindConst(); 2247 return; 2248 } 2249 2250 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2251 setImmKindLiteral(); 2252 return; 2253 2254 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2255 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2256 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2257 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2258 assert(isSafeTruncation(Val, 16)); 2259 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2260 AsmParser->hasInv2PiInlineImm())); 2261 2262 Inst.addOperand(MCOperand::createImm(Val)); 2263 return; 2264 } 2265 case AMDGPU::OPERAND_KIMM32: 2266 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2267 setImmKindNone(); 2268 return; 2269 case AMDGPU::OPERAND_KIMM16: 2270 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2271 setImmKindNone(); 2272 return; 2273 default: 2274 llvm_unreachable("invalid operand size"); 2275 } 2276 } 2277 2278 template <unsigned Bitwidth> 2279 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2280 APInt Literal(64, Imm.Val); 2281 setImmKindNone(); 2282 2283 if (!Imm.IsFPImm) { 2284 // We got int literal token. 2285 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2286 return; 2287 } 2288 2289 bool Lost; 2290 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2291 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2292 APFloat::rmNearestTiesToEven, &Lost); 2293 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2294 } 2295 2296 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2297 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2298 } 2299 2300 static bool isInlineValue(unsigned Reg) { 2301 switch (Reg) { 2302 case AMDGPU::SRC_SHARED_BASE: 2303 case AMDGPU::SRC_SHARED_LIMIT: 2304 case AMDGPU::SRC_PRIVATE_BASE: 2305 case AMDGPU::SRC_PRIVATE_LIMIT: 2306 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2307 return true; 2308 case AMDGPU::SRC_VCCZ: 2309 case AMDGPU::SRC_EXECZ: 2310 case AMDGPU::SRC_SCC: 2311 return true; 2312 case AMDGPU::SGPR_NULL: 2313 return true; 2314 default: 2315 return false; 2316 } 2317 } 2318 2319 bool AMDGPUOperand::isInlineValue() const { 2320 return isRegKind() && ::isInlineValue(getReg()); 2321 } 2322 2323 //===----------------------------------------------------------------------===// 2324 // AsmParser 2325 //===----------------------------------------------------------------------===// 2326 2327 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2328 if (Is == IS_VGPR) { 2329 switch (RegWidth) { 2330 default: return -1; 2331 case 32: 2332 return AMDGPU::VGPR_32RegClassID; 2333 case 64: 2334 return AMDGPU::VReg_64RegClassID; 2335 case 96: 2336 return AMDGPU::VReg_96RegClassID; 2337 case 128: 2338 return AMDGPU::VReg_128RegClassID; 2339 case 160: 2340 return AMDGPU::VReg_160RegClassID; 2341 case 192: 2342 return AMDGPU::VReg_192RegClassID; 2343 case 224: 2344 return AMDGPU::VReg_224RegClassID; 2345 case 256: 2346 return AMDGPU::VReg_256RegClassID; 2347 case 512: 2348 return AMDGPU::VReg_512RegClassID; 2349 case 1024: 2350 return AMDGPU::VReg_1024RegClassID; 2351 } 2352 } else if (Is == IS_TTMP) { 2353 switch (RegWidth) { 2354 default: return -1; 2355 case 32: 2356 return AMDGPU::TTMP_32RegClassID; 2357 case 64: 2358 return AMDGPU::TTMP_64RegClassID; 2359 case 128: 2360 return AMDGPU::TTMP_128RegClassID; 2361 case 256: 2362 return AMDGPU::TTMP_256RegClassID; 2363 case 512: 2364 return AMDGPU::TTMP_512RegClassID; 2365 } 2366 } else if (Is == IS_SGPR) { 2367 switch (RegWidth) { 2368 default: return -1; 2369 case 32: 2370 return AMDGPU::SGPR_32RegClassID; 2371 case 64: 2372 return AMDGPU::SGPR_64RegClassID; 2373 case 96: 2374 return AMDGPU::SGPR_96RegClassID; 2375 case 128: 2376 return AMDGPU::SGPR_128RegClassID; 2377 case 160: 2378 return AMDGPU::SGPR_160RegClassID; 2379 case 192: 2380 return AMDGPU::SGPR_192RegClassID; 2381 case 224: 2382 return AMDGPU::SGPR_224RegClassID; 2383 case 256: 2384 return AMDGPU::SGPR_256RegClassID; 2385 case 512: 2386 return AMDGPU::SGPR_512RegClassID; 2387 } 2388 } else if (Is == IS_AGPR) { 2389 switch (RegWidth) { 2390 default: return -1; 2391 case 32: 2392 return AMDGPU::AGPR_32RegClassID; 2393 case 64: 2394 return AMDGPU::AReg_64RegClassID; 2395 case 96: 2396 return AMDGPU::AReg_96RegClassID; 2397 case 128: 2398 return AMDGPU::AReg_128RegClassID; 2399 case 160: 2400 return AMDGPU::AReg_160RegClassID; 2401 case 192: 2402 return AMDGPU::AReg_192RegClassID; 2403 case 224: 2404 return AMDGPU::AReg_224RegClassID; 2405 case 256: 2406 return AMDGPU::AReg_256RegClassID; 2407 case 512: 2408 return AMDGPU::AReg_512RegClassID; 2409 case 1024: 2410 return AMDGPU::AReg_1024RegClassID; 2411 } 2412 } 2413 return -1; 2414 } 2415 2416 static unsigned getSpecialRegForName(StringRef RegName) { 2417 return StringSwitch<unsigned>(RegName) 2418 .Case("exec", AMDGPU::EXEC) 2419 .Case("vcc", AMDGPU::VCC) 2420 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2421 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2422 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2423 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2424 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2425 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2426 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2427 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2428 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2429 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2430 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2431 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2432 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2433 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2434 .Case("m0", AMDGPU::M0) 2435 .Case("vccz", AMDGPU::SRC_VCCZ) 2436 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2437 .Case("execz", AMDGPU::SRC_EXECZ) 2438 .Case("src_execz", AMDGPU::SRC_EXECZ) 2439 .Case("scc", AMDGPU::SRC_SCC) 2440 .Case("src_scc", AMDGPU::SRC_SCC) 2441 .Case("tba", AMDGPU::TBA) 2442 .Case("tma", AMDGPU::TMA) 2443 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2444 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2445 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2446 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2447 .Case("vcc_lo", AMDGPU::VCC_LO) 2448 .Case("vcc_hi", AMDGPU::VCC_HI) 2449 .Case("exec_lo", AMDGPU::EXEC_LO) 2450 .Case("exec_hi", AMDGPU::EXEC_HI) 2451 .Case("tma_lo", AMDGPU::TMA_LO) 2452 .Case("tma_hi", AMDGPU::TMA_HI) 2453 .Case("tba_lo", AMDGPU::TBA_LO) 2454 .Case("tba_hi", AMDGPU::TBA_HI) 2455 .Case("pc", AMDGPU::PC_REG) 2456 .Case("null", AMDGPU::SGPR_NULL) 2457 .Default(AMDGPU::NoRegister); 2458 } 2459 2460 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2461 SMLoc &EndLoc, bool RestoreOnFailure) { 2462 auto R = parseRegister(); 2463 if (!R) return true; 2464 assert(R->isReg()); 2465 RegNo = R->getReg(); 2466 StartLoc = R->getStartLoc(); 2467 EndLoc = R->getEndLoc(); 2468 return false; 2469 } 2470 2471 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2472 SMLoc &EndLoc) { 2473 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2474 } 2475 2476 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2477 SMLoc &StartLoc, 2478 SMLoc &EndLoc) { 2479 bool Result = 2480 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2481 bool PendingErrors = getParser().hasPendingError(); 2482 getParser().clearPendingErrors(); 2483 if (PendingErrors) 2484 return MatchOperand_ParseFail; 2485 if (Result) 2486 return MatchOperand_NoMatch; 2487 return MatchOperand_Success; 2488 } 2489 2490 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2491 RegisterKind RegKind, unsigned Reg1, 2492 SMLoc Loc) { 2493 switch (RegKind) { 2494 case IS_SPECIAL: 2495 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2496 Reg = AMDGPU::EXEC; 2497 RegWidth = 64; 2498 return true; 2499 } 2500 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2501 Reg = AMDGPU::FLAT_SCR; 2502 RegWidth = 64; 2503 return true; 2504 } 2505 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2506 Reg = AMDGPU::XNACK_MASK; 2507 RegWidth = 64; 2508 return true; 2509 } 2510 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2511 Reg = AMDGPU::VCC; 2512 RegWidth = 64; 2513 return true; 2514 } 2515 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2516 Reg = AMDGPU::TBA; 2517 RegWidth = 64; 2518 return true; 2519 } 2520 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2521 Reg = AMDGPU::TMA; 2522 RegWidth = 64; 2523 return true; 2524 } 2525 Error(Loc, "register does not fit in the list"); 2526 return false; 2527 case IS_VGPR: 2528 case IS_SGPR: 2529 case IS_AGPR: 2530 case IS_TTMP: 2531 if (Reg1 != Reg + RegWidth / 32) { 2532 Error(Loc, "registers in a list must have consecutive indices"); 2533 return false; 2534 } 2535 RegWidth += 32; 2536 return true; 2537 default: 2538 llvm_unreachable("unexpected register kind"); 2539 } 2540 } 2541 2542 struct RegInfo { 2543 StringLiteral Name; 2544 RegisterKind Kind; 2545 }; 2546 2547 static constexpr RegInfo RegularRegisters[] = { 2548 {{"v"}, IS_VGPR}, 2549 {{"s"}, IS_SGPR}, 2550 {{"ttmp"}, IS_TTMP}, 2551 {{"acc"}, IS_AGPR}, 2552 {{"a"}, IS_AGPR}, 2553 }; 2554 2555 static bool isRegularReg(RegisterKind Kind) { 2556 return Kind == IS_VGPR || 2557 Kind == IS_SGPR || 2558 Kind == IS_TTMP || 2559 Kind == IS_AGPR; 2560 } 2561 2562 static const RegInfo* getRegularRegInfo(StringRef Str) { 2563 for (const RegInfo &Reg : RegularRegisters) 2564 if (Str.startswith(Reg.Name)) 2565 return &Reg; 2566 return nullptr; 2567 } 2568 2569 static bool getRegNum(StringRef Str, unsigned& Num) { 2570 return !Str.getAsInteger(10, Num); 2571 } 2572 2573 bool 2574 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2575 const AsmToken &NextToken) const { 2576 2577 // A list of consecutive registers: [s0,s1,s2,s3] 2578 if (Token.is(AsmToken::LBrac)) 2579 return true; 2580 2581 if (!Token.is(AsmToken::Identifier)) 2582 return false; 2583 2584 // A single register like s0 or a range of registers like s[0:1] 2585 2586 StringRef Str = Token.getString(); 2587 const RegInfo *Reg = getRegularRegInfo(Str); 2588 if (Reg) { 2589 StringRef RegName = Reg->Name; 2590 StringRef RegSuffix = Str.substr(RegName.size()); 2591 if (!RegSuffix.empty()) { 2592 unsigned Num; 2593 // A single register with an index: rXX 2594 if (getRegNum(RegSuffix, Num)) 2595 return true; 2596 } else { 2597 // A range of registers: r[XX:YY]. 2598 if (NextToken.is(AsmToken::LBrac)) 2599 return true; 2600 } 2601 } 2602 2603 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2604 } 2605 2606 bool 2607 AMDGPUAsmParser::isRegister() 2608 { 2609 return isRegister(getToken(), peekToken()); 2610 } 2611 2612 unsigned 2613 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2614 unsigned RegNum, 2615 unsigned RegWidth, 2616 SMLoc Loc) { 2617 2618 assert(isRegularReg(RegKind)); 2619 2620 unsigned AlignSize = 1; 2621 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2622 // SGPR and TTMP registers must be aligned. 2623 // Max required alignment is 4 dwords. 2624 AlignSize = std::min(RegWidth / 32, 4u); 2625 } 2626 2627 if (RegNum % AlignSize != 0) { 2628 Error(Loc, "invalid register alignment"); 2629 return AMDGPU::NoRegister; 2630 } 2631 2632 unsigned RegIdx = RegNum / AlignSize; 2633 int RCID = getRegClass(RegKind, RegWidth); 2634 if (RCID == -1) { 2635 Error(Loc, "invalid or unsupported register size"); 2636 return AMDGPU::NoRegister; 2637 } 2638 2639 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2640 const MCRegisterClass RC = TRI->getRegClass(RCID); 2641 if (RegIdx >= RC.getNumRegs()) { 2642 Error(Loc, "register index is out of range"); 2643 return AMDGPU::NoRegister; 2644 } 2645 2646 return RC.getRegister(RegIdx); 2647 } 2648 2649 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { 2650 int64_t RegLo, RegHi; 2651 if (!skipToken(AsmToken::LBrac, "missing register index")) 2652 return false; 2653 2654 SMLoc FirstIdxLoc = getLoc(); 2655 SMLoc SecondIdxLoc; 2656 2657 if (!parseExpr(RegLo)) 2658 return false; 2659 2660 if (trySkipToken(AsmToken::Colon)) { 2661 SecondIdxLoc = getLoc(); 2662 if (!parseExpr(RegHi)) 2663 return false; 2664 } else { 2665 RegHi = RegLo; 2666 } 2667 2668 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2669 return false; 2670 2671 if (!isUInt<32>(RegLo)) { 2672 Error(FirstIdxLoc, "invalid register index"); 2673 return false; 2674 } 2675 2676 if (!isUInt<32>(RegHi)) { 2677 Error(SecondIdxLoc, "invalid register index"); 2678 return false; 2679 } 2680 2681 if (RegLo > RegHi) { 2682 Error(FirstIdxLoc, "first register index should not exceed second index"); 2683 return false; 2684 } 2685 2686 Num = static_cast<unsigned>(RegLo); 2687 RegWidth = 32 * ((RegHi - RegLo) + 1); 2688 return true; 2689 } 2690 2691 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2692 unsigned &RegNum, unsigned &RegWidth, 2693 SmallVectorImpl<AsmToken> &Tokens) { 2694 assert(isToken(AsmToken::Identifier)); 2695 unsigned Reg = getSpecialRegForName(getTokenStr()); 2696 if (Reg) { 2697 RegNum = 0; 2698 RegWidth = 32; 2699 RegKind = IS_SPECIAL; 2700 Tokens.push_back(getToken()); 2701 lex(); // skip register name 2702 } 2703 return Reg; 2704 } 2705 2706 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2707 unsigned &RegNum, unsigned &RegWidth, 2708 SmallVectorImpl<AsmToken> &Tokens) { 2709 assert(isToken(AsmToken::Identifier)); 2710 StringRef RegName = getTokenStr(); 2711 auto Loc = getLoc(); 2712 2713 const RegInfo *RI = getRegularRegInfo(RegName); 2714 if (!RI) { 2715 Error(Loc, "invalid register name"); 2716 return AMDGPU::NoRegister; 2717 } 2718 2719 Tokens.push_back(getToken()); 2720 lex(); // skip register name 2721 2722 RegKind = RI->Kind; 2723 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2724 if (!RegSuffix.empty()) { 2725 // Single 32-bit register: vXX. 2726 if (!getRegNum(RegSuffix, RegNum)) { 2727 Error(Loc, "invalid register index"); 2728 return AMDGPU::NoRegister; 2729 } 2730 RegWidth = 32; 2731 } else { 2732 // Range of registers: v[XX:YY]. ":YY" is optional. 2733 if (!ParseRegRange(RegNum, RegWidth)) 2734 return AMDGPU::NoRegister; 2735 } 2736 2737 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2738 } 2739 2740 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2741 unsigned &RegWidth, 2742 SmallVectorImpl<AsmToken> &Tokens) { 2743 unsigned Reg = AMDGPU::NoRegister; 2744 auto ListLoc = getLoc(); 2745 2746 if (!skipToken(AsmToken::LBrac, 2747 "expected a register or a list of registers")) { 2748 return AMDGPU::NoRegister; 2749 } 2750 2751 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2752 2753 auto Loc = getLoc(); 2754 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2755 return AMDGPU::NoRegister; 2756 if (RegWidth != 32) { 2757 Error(Loc, "expected a single 32-bit register"); 2758 return AMDGPU::NoRegister; 2759 } 2760 2761 for (; trySkipToken(AsmToken::Comma); ) { 2762 RegisterKind NextRegKind; 2763 unsigned NextReg, NextRegNum, NextRegWidth; 2764 Loc = getLoc(); 2765 2766 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2767 NextRegNum, NextRegWidth, 2768 Tokens)) { 2769 return AMDGPU::NoRegister; 2770 } 2771 if (NextRegWidth != 32) { 2772 Error(Loc, "expected a single 32-bit register"); 2773 return AMDGPU::NoRegister; 2774 } 2775 if (NextRegKind != RegKind) { 2776 Error(Loc, "registers in a list must be of the same kind"); 2777 return AMDGPU::NoRegister; 2778 } 2779 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2780 return AMDGPU::NoRegister; 2781 } 2782 2783 if (!skipToken(AsmToken::RBrac, 2784 "expected a comma or a closing square bracket")) { 2785 return AMDGPU::NoRegister; 2786 } 2787 2788 if (isRegularReg(RegKind)) 2789 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2790 2791 return Reg; 2792 } 2793 2794 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2795 unsigned &RegNum, unsigned &RegWidth, 2796 SmallVectorImpl<AsmToken> &Tokens) { 2797 auto Loc = getLoc(); 2798 Reg = AMDGPU::NoRegister; 2799 2800 if (isToken(AsmToken::Identifier)) { 2801 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2802 if (Reg == AMDGPU::NoRegister) 2803 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2804 } else { 2805 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2806 } 2807 2808 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2809 if (Reg == AMDGPU::NoRegister) { 2810 assert(Parser.hasPendingError()); 2811 return false; 2812 } 2813 2814 if (!subtargetHasRegister(*TRI, Reg)) { 2815 if (Reg == AMDGPU::SGPR_NULL) { 2816 Error(Loc, "'null' operand is not supported on this GPU"); 2817 } else { 2818 Error(Loc, "register not available on this GPU"); 2819 } 2820 return false; 2821 } 2822 2823 return true; 2824 } 2825 2826 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2827 unsigned &RegNum, unsigned &RegWidth, 2828 bool RestoreOnFailure /*=false*/) { 2829 Reg = AMDGPU::NoRegister; 2830 2831 SmallVector<AsmToken, 1> Tokens; 2832 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2833 if (RestoreOnFailure) { 2834 while (!Tokens.empty()) { 2835 getLexer().UnLex(Tokens.pop_back_val()); 2836 } 2837 } 2838 return true; 2839 } 2840 return false; 2841 } 2842 2843 Optional<StringRef> 2844 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2845 switch (RegKind) { 2846 case IS_VGPR: 2847 return StringRef(".amdgcn.next_free_vgpr"); 2848 case IS_SGPR: 2849 return StringRef(".amdgcn.next_free_sgpr"); 2850 default: 2851 return None; 2852 } 2853 } 2854 2855 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2856 auto SymbolName = getGprCountSymbolName(RegKind); 2857 assert(SymbolName && "initializing invalid register kind"); 2858 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2859 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2860 } 2861 2862 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2863 unsigned DwordRegIndex, 2864 unsigned RegWidth) { 2865 // Symbols are only defined for GCN targets 2866 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2867 return true; 2868 2869 auto SymbolName = getGprCountSymbolName(RegKind); 2870 if (!SymbolName) 2871 return true; 2872 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2873 2874 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 2875 int64_t OldCount; 2876 2877 if (!Sym->isVariable()) 2878 return !Error(getLoc(), 2879 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2880 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2881 return !Error( 2882 getLoc(), 2883 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2884 2885 if (OldCount <= NewMax) 2886 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2887 2888 return true; 2889 } 2890 2891 std::unique_ptr<AMDGPUOperand> 2892 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2893 const auto &Tok = getToken(); 2894 SMLoc StartLoc = Tok.getLoc(); 2895 SMLoc EndLoc = Tok.getEndLoc(); 2896 RegisterKind RegKind; 2897 unsigned Reg, RegNum, RegWidth; 2898 2899 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2900 return nullptr; 2901 } 2902 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2903 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2904 return nullptr; 2905 } else 2906 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2907 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2908 } 2909 2910 OperandMatchResultTy 2911 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2912 // TODO: add syntactic sugar for 1/(2*PI) 2913 2914 if (isRegister()) 2915 return MatchOperand_NoMatch; 2916 assert(!isModifier()); 2917 2918 const auto& Tok = getToken(); 2919 const auto& NextTok = peekToken(); 2920 bool IsReal = Tok.is(AsmToken::Real); 2921 SMLoc S = getLoc(); 2922 bool Negate = false; 2923 2924 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2925 lex(); 2926 IsReal = true; 2927 Negate = true; 2928 } 2929 2930 if (IsReal) { 2931 // Floating-point expressions are not supported. 2932 // Can only allow floating-point literals with an 2933 // optional sign. 2934 2935 StringRef Num = getTokenStr(); 2936 lex(); 2937 2938 APFloat RealVal(APFloat::IEEEdouble()); 2939 auto roundMode = APFloat::rmNearestTiesToEven; 2940 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2941 return MatchOperand_ParseFail; 2942 } 2943 if (Negate) 2944 RealVal.changeSign(); 2945 2946 Operands.push_back( 2947 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2948 AMDGPUOperand::ImmTyNone, true)); 2949 2950 return MatchOperand_Success; 2951 2952 } else { 2953 int64_t IntVal; 2954 const MCExpr *Expr; 2955 SMLoc S = getLoc(); 2956 2957 if (HasSP3AbsModifier) { 2958 // This is a workaround for handling expressions 2959 // as arguments of SP3 'abs' modifier, for example: 2960 // |1.0| 2961 // |-1| 2962 // |1+x| 2963 // This syntax is not compatible with syntax of standard 2964 // MC expressions (due to the trailing '|'). 2965 SMLoc EndLoc; 2966 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2967 return MatchOperand_ParseFail; 2968 } else { 2969 if (Parser.parseExpression(Expr)) 2970 return MatchOperand_ParseFail; 2971 } 2972 2973 if (Expr->evaluateAsAbsolute(IntVal)) { 2974 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2975 } else { 2976 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2977 } 2978 2979 return MatchOperand_Success; 2980 } 2981 2982 return MatchOperand_NoMatch; 2983 } 2984 2985 OperandMatchResultTy 2986 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2987 if (!isRegister()) 2988 return MatchOperand_NoMatch; 2989 2990 if (auto R = parseRegister()) { 2991 assert(R->isReg()); 2992 Operands.push_back(std::move(R)); 2993 return MatchOperand_Success; 2994 } 2995 return MatchOperand_ParseFail; 2996 } 2997 2998 OperandMatchResultTy 2999 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 3000 auto res = parseReg(Operands); 3001 if (res != MatchOperand_NoMatch) { 3002 return res; 3003 } else if (isModifier()) { 3004 return MatchOperand_NoMatch; 3005 } else { 3006 return parseImm(Operands, HasSP3AbsMod); 3007 } 3008 } 3009 3010 bool 3011 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3012 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 3013 const auto &str = Token.getString(); 3014 return str == "abs" || str == "neg" || str == "sext"; 3015 } 3016 return false; 3017 } 3018 3019 bool 3020 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 3021 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 3022 } 3023 3024 bool 3025 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3026 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 3027 } 3028 3029 bool 3030 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 3031 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 3032 } 3033 3034 // Check if this is an operand modifier or an opcode modifier 3035 // which may look like an expression but it is not. We should 3036 // avoid parsing these modifiers as expressions. Currently 3037 // recognized sequences are: 3038 // |...| 3039 // abs(...) 3040 // neg(...) 3041 // sext(...) 3042 // -reg 3043 // -|...| 3044 // -abs(...) 3045 // name:... 3046 // Note that simple opcode modifiers like 'gds' may be parsed as 3047 // expressions; this is a special case. See getExpressionAsToken. 3048 // 3049 bool 3050 AMDGPUAsmParser::isModifier() { 3051 3052 AsmToken Tok = getToken(); 3053 AsmToken NextToken[2]; 3054 peekTokens(NextToken); 3055 3056 return isOperandModifier(Tok, NextToken[0]) || 3057 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 3058 isOpcodeModifierWithVal(Tok, NextToken[0]); 3059 } 3060 3061 // Check if the current token is an SP3 'neg' modifier. 3062 // Currently this modifier is allowed in the following context: 3063 // 3064 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 3065 // 2. Before an 'abs' modifier: -abs(...) 3066 // 3. Before an SP3 'abs' modifier: -|...| 3067 // 3068 // In all other cases "-" is handled as a part 3069 // of an expression that follows the sign. 3070 // 3071 // Note: When "-" is followed by an integer literal, 3072 // this is interpreted as integer negation rather 3073 // than a floating-point NEG modifier applied to N. 3074 // Beside being contr-intuitive, such use of floating-point 3075 // NEG modifier would have resulted in different meaning 3076 // of integer literals used with VOP1/2/C and VOP3, 3077 // for example: 3078 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3079 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3080 // Negative fp literals with preceding "-" are 3081 // handled likewise for uniformity 3082 // 3083 bool 3084 AMDGPUAsmParser::parseSP3NegModifier() { 3085 3086 AsmToken NextToken[2]; 3087 peekTokens(NextToken); 3088 3089 if (isToken(AsmToken::Minus) && 3090 (isRegister(NextToken[0], NextToken[1]) || 3091 NextToken[0].is(AsmToken::Pipe) || 3092 isId(NextToken[0], "abs"))) { 3093 lex(); 3094 return true; 3095 } 3096 3097 return false; 3098 } 3099 3100 OperandMatchResultTy 3101 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3102 bool AllowImm) { 3103 bool Neg, SP3Neg; 3104 bool Abs, SP3Abs; 3105 SMLoc Loc; 3106 3107 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3108 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 3109 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3110 return MatchOperand_ParseFail; 3111 } 3112 3113 SP3Neg = parseSP3NegModifier(); 3114 3115 Loc = getLoc(); 3116 Neg = trySkipId("neg"); 3117 if (Neg && SP3Neg) { 3118 Error(Loc, "expected register or immediate"); 3119 return MatchOperand_ParseFail; 3120 } 3121 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3122 return MatchOperand_ParseFail; 3123 3124 Abs = trySkipId("abs"); 3125 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3126 return MatchOperand_ParseFail; 3127 3128 Loc = getLoc(); 3129 SP3Abs = trySkipToken(AsmToken::Pipe); 3130 if (Abs && SP3Abs) { 3131 Error(Loc, "expected register or immediate"); 3132 return MatchOperand_ParseFail; 3133 } 3134 3135 OperandMatchResultTy Res; 3136 if (AllowImm) { 3137 Res = parseRegOrImm(Operands, SP3Abs); 3138 } else { 3139 Res = parseReg(Operands); 3140 } 3141 if (Res != MatchOperand_Success) { 3142 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 3143 } 3144 3145 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3146 return MatchOperand_ParseFail; 3147 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3148 return MatchOperand_ParseFail; 3149 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3150 return MatchOperand_ParseFail; 3151 3152 AMDGPUOperand::Modifiers Mods; 3153 Mods.Abs = Abs || SP3Abs; 3154 Mods.Neg = Neg || SP3Neg; 3155 3156 if (Mods.hasFPModifiers()) { 3157 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3158 if (Op.isExpr()) { 3159 Error(Op.getStartLoc(), "expected an absolute expression"); 3160 return MatchOperand_ParseFail; 3161 } 3162 Op.setModifiers(Mods); 3163 } 3164 return MatchOperand_Success; 3165 } 3166 3167 OperandMatchResultTy 3168 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3169 bool AllowImm) { 3170 bool Sext = trySkipId("sext"); 3171 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3172 return MatchOperand_ParseFail; 3173 3174 OperandMatchResultTy Res; 3175 if (AllowImm) { 3176 Res = parseRegOrImm(Operands); 3177 } else { 3178 Res = parseReg(Operands); 3179 } 3180 if (Res != MatchOperand_Success) { 3181 return Sext? MatchOperand_ParseFail : Res; 3182 } 3183 3184 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3185 return MatchOperand_ParseFail; 3186 3187 AMDGPUOperand::Modifiers Mods; 3188 Mods.Sext = Sext; 3189 3190 if (Mods.hasIntModifiers()) { 3191 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3192 if (Op.isExpr()) { 3193 Error(Op.getStartLoc(), "expected an absolute expression"); 3194 return MatchOperand_ParseFail; 3195 } 3196 Op.setModifiers(Mods); 3197 } 3198 3199 return MatchOperand_Success; 3200 } 3201 3202 OperandMatchResultTy 3203 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3204 return parseRegOrImmWithFPInputMods(Operands, false); 3205 } 3206 3207 OperandMatchResultTy 3208 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3209 return parseRegOrImmWithIntInputMods(Operands, false); 3210 } 3211 3212 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3213 auto Loc = getLoc(); 3214 if (trySkipId("off")) { 3215 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3216 AMDGPUOperand::ImmTyOff, false)); 3217 return MatchOperand_Success; 3218 } 3219 3220 if (!isRegister()) 3221 return MatchOperand_NoMatch; 3222 3223 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3224 if (Reg) { 3225 Operands.push_back(std::move(Reg)); 3226 return MatchOperand_Success; 3227 } 3228 3229 return MatchOperand_ParseFail; 3230 3231 } 3232 3233 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3234 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3235 3236 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3237 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3238 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3239 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3240 return Match_InvalidOperand; 3241 3242 if ((TSFlags & SIInstrFlags::VOP3) && 3243 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3244 getForcedEncodingSize() != 64) 3245 return Match_PreferE32; 3246 3247 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3248 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3249 // v_mac_f32/16 allow only dst_sel == DWORD; 3250 auto OpNum = 3251 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3252 const auto &Op = Inst.getOperand(OpNum); 3253 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3254 return Match_InvalidOperand; 3255 } 3256 } 3257 3258 return Match_Success; 3259 } 3260 3261 static ArrayRef<unsigned> getAllVariants() { 3262 static const unsigned Variants[] = { 3263 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3264 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, 3265 AMDGPUAsmVariants::DPP, AMDGPUAsmVariants::VOP3_DPP 3266 }; 3267 3268 return makeArrayRef(Variants); 3269 } 3270 3271 // What asm variants we should check 3272 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3273 if (isForcedDPP() && isForcedVOP3()) { 3274 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP}; 3275 return makeArrayRef(Variants); 3276 } 3277 if (getForcedEncodingSize() == 32) { 3278 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3279 return makeArrayRef(Variants); 3280 } 3281 3282 if (isForcedVOP3()) { 3283 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3284 return makeArrayRef(Variants); 3285 } 3286 3287 if (isForcedSDWA()) { 3288 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3289 AMDGPUAsmVariants::SDWA9}; 3290 return makeArrayRef(Variants); 3291 } 3292 3293 if (isForcedDPP()) { 3294 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3295 return makeArrayRef(Variants); 3296 } 3297 3298 return getAllVariants(); 3299 } 3300 3301 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3302 if (isForcedDPP() && isForcedVOP3()) 3303 return "e64_dpp"; 3304 3305 if (getForcedEncodingSize() == 32) 3306 return "e32"; 3307 3308 if (isForcedVOP3()) 3309 return "e64"; 3310 3311 if (isForcedSDWA()) 3312 return "sdwa"; 3313 3314 if (isForcedDPP()) 3315 return "dpp"; 3316 3317 return ""; 3318 } 3319 3320 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3321 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3322 const unsigned Num = Desc.getNumImplicitUses(); 3323 for (unsigned i = 0; i < Num; ++i) { 3324 unsigned Reg = Desc.ImplicitUses[i]; 3325 switch (Reg) { 3326 case AMDGPU::FLAT_SCR: 3327 case AMDGPU::VCC: 3328 case AMDGPU::VCC_LO: 3329 case AMDGPU::VCC_HI: 3330 case AMDGPU::M0: 3331 return Reg; 3332 default: 3333 break; 3334 } 3335 } 3336 return AMDGPU::NoRegister; 3337 } 3338 3339 // NB: This code is correct only when used to check constant 3340 // bus limitations because GFX7 support no f16 inline constants. 3341 // Note that there are no cases when a GFX7 opcode violates 3342 // constant bus limitations due to the use of an f16 constant. 3343 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3344 unsigned OpIdx) const { 3345 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3346 3347 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3348 return false; 3349 } 3350 3351 const MCOperand &MO = Inst.getOperand(OpIdx); 3352 3353 int64_t Val = MO.getImm(); 3354 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3355 3356 switch (OpSize) { // expected operand size 3357 case 8: 3358 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3359 case 4: 3360 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3361 case 2: { 3362 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3363 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3364 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3365 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3366 return AMDGPU::isInlinableIntLiteral(Val); 3367 3368 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3369 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3370 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3371 return AMDGPU::isInlinableIntLiteralV216(Val); 3372 3373 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3374 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3375 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3376 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3377 3378 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3379 } 3380 default: 3381 llvm_unreachable("invalid operand size"); 3382 } 3383 } 3384 3385 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3386 if (!isGFX10Plus()) 3387 return 1; 3388 3389 switch (Opcode) { 3390 // 64-bit shift instructions can use only one scalar value input 3391 case AMDGPU::V_LSHLREV_B64_e64: 3392 case AMDGPU::V_LSHLREV_B64_gfx10: 3393 case AMDGPU::V_LSHLREV_B64_e64_gfx11: 3394 case AMDGPU::V_LSHRREV_B64_e64: 3395 case AMDGPU::V_LSHRREV_B64_gfx10: 3396 case AMDGPU::V_LSHRREV_B64_e64_gfx11: 3397 case AMDGPU::V_ASHRREV_I64_e64: 3398 case AMDGPU::V_ASHRREV_I64_gfx10: 3399 case AMDGPU::V_ASHRREV_I64_e64_gfx11: 3400 case AMDGPU::V_LSHL_B64_e64: 3401 case AMDGPU::V_LSHR_B64_e64: 3402 case AMDGPU::V_ASHR_I64_e64: 3403 return 1; 3404 default: 3405 return 2; 3406 } 3407 } 3408 3409 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3410 const MCOperand &MO = Inst.getOperand(OpIdx); 3411 if (MO.isImm()) { 3412 return !isInlineConstant(Inst, OpIdx); 3413 } else if (MO.isReg()) { 3414 auto Reg = MO.getReg(); 3415 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3416 auto PReg = mc2PseudoReg(Reg); 3417 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3418 } else { 3419 return true; 3420 } 3421 } 3422 3423 bool 3424 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3425 const OperandVector &Operands) { 3426 const unsigned Opcode = Inst.getOpcode(); 3427 const MCInstrDesc &Desc = MII.get(Opcode); 3428 unsigned LastSGPR = AMDGPU::NoRegister; 3429 unsigned ConstantBusUseCount = 0; 3430 unsigned NumLiterals = 0; 3431 unsigned LiteralSize; 3432 3433 if (Desc.TSFlags & 3434 (SIInstrFlags::VOPC | 3435 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3436 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3437 SIInstrFlags::SDWA)) { 3438 // Check special imm operands (used by madmk, etc) 3439 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3440 ++NumLiterals; 3441 LiteralSize = 4; 3442 } 3443 3444 SmallDenseSet<unsigned> SGPRsUsed; 3445 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3446 if (SGPRUsed != AMDGPU::NoRegister) { 3447 SGPRsUsed.insert(SGPRUsed); 3448 ++ConstantBusUseCount; 3449 } 3450 3451 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3452 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3453 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3454 3455 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3456 3457 for (int OpIdx : OpIndices) { 3458 if (OpIdx == -1) break; 3459 3460 const MCOperand &MO = Inst.getOperand(OpIdx); 3461 if (usesConstantBus(Inst, OpIdx)) { 3462 if (MO.isReg()) { 3463 LastSGPR = mc2PseudoReg(MO.getReg()); 3464 // Pairs of registers with a partial intersections like these 3465 // s0, s[0:1] 3466 // flat_scratch_lo, flat_scratch 3467 // flat_scratch_lo, flat_scratch_hi 3468 // are theoretically valid but they are disabled anyway. 3469 // Note that this code mimics SIInstrInfo::verifyInstruction 3470 if (SGPRsUsed.insert(LastSGPR).second) { 3471 ++ConstantBusUseCount; 3472 } 3473 } else { // Expression or a literal 3474 3475 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3476 continue; // special operand like VINTERP attr_chan 3477 3478 // An instruction may use only one literal. 3479 // This has been validated on the previous step. 3480 // See validateVOPLiteral. 3481 // This literal may be used as more than one operand. 3482 // If all these operands are of the same size, 3483 // this literal counts as one scalar value. 3484 // Otherwise it counts as 2 scalar values. 3485 // See "GFX10 Shader Programming", section 3.6.2.3. 3486 3487 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3488 if (Size < 4) Size = 4; 3489 3490 if (NumLiterals == 0) { 3491 NumLiterals = 1; 3492 LiteralSize = Size; 3493 } else if (LiteralSize != Size) { 3494 NumLiterals = 2; 3495 } 3496 } 3497 } 3498 } 3499 } 3500 ConstantBusUseCount += NumLiterals; 3501 3502 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3503 return true; 3504 3505 SMLoc LitLoc = getLitLoc(Operands); 3506 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3507 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3508 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3509 return false; 3510 } 3511 3512 bool 3513 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3514 const OperandVector &Operands) { 3515 const unsigned Opcode = Inst.getOpcode(); 3516 const MCInstrDesc &Desc = MII.get(Opcode); 3517 3518 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3519 if (DstIdx == -1 || 3520 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3521 return true; 3522 } 3523 3524 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3525 3526 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3527 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3528 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3529 3530 assert(DstIdx != -1); 3531 const MCOperand &Dst = Inst.getOperand(DstIdx); 3532 assert(Dst.isReg()); 3533 3534 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3535 3536 for (int SrcIdx : SrcIndices) { 3537 if (SrcIdx == -1) break; 3538 const MCOperand &Src = Inst.getOperand(SrcIdx); 3539 if (Src.isReg()) { 3540 if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { 3541 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3542 Error(getRegLoc(SrcReg, Operands), 3543 "destination must be different than all sources"); 3544 return false; 3545 } 3546 } 3547 } 3548 3549 return true; 3550 } 3551 3552 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3553 3554 const unsigned Opc = Inst.getOpcode(); 3555 const MCInstrDesc &Desc = MII.get(Opc); 3556 3557 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3558 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3559 assert(ClampIdx != -1); 3560 return Inst.getOperand(ClampIdx).getImm() == 0; 3561 } 3562 3563 return true; 3564 } 3565 3566 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3567 3568 const unsigned Opc = Inst.getOpcode(); 3569 const MCInstrDesc &Desc = MII.get(Opc); 3570 3571 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3572 return None; 3573 3574 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3575 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3576 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3577 3578 assert(VDataIdx != -1); 3579 3580 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3581 return None; 3582 3583 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3584 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3585 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3586 if (DMask == 0) 3587 DMask = 1; 3588 3589 bool isPackedD16 = false; 3590 unsigned DataSize = 3591 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3592 if (hasPackedD16()) { 3593 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3594 isPackedD16 = D16Idx >= 0; 3595 if (isPackedD16 && Inst.getOperand(D16Idx).getImm()) 3596 DataSize = (DataSize + 1) / 2; 3597 } 3598 3599 if ((VDataSize / 4) == DataSize + TFESize) 3600 return None; 3601 3602 return StringRef(isPackedD16 3603 ? "image data size does not match dmask, d16 and tfe" 3604 : "image data size does not match dmask and tfe"); 3605 } 3606 3607 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3608 const unsigned Opc = Inst.getOpcode(); 3609 const MCInstrDesc &Desc = MII.get(Opc); 3610 3611 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3612 return true; 3613 3614 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3615 3616 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3617 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3618 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3619 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3620 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3621 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3622 3623 assert(VAddr0Idx != -1); 3624 assert(SrsrcIdx != -1); 3625 assert(SrsrcIdx > VAddr0Idx); 3626 3627 if (DimIdx == -1) 3628 return true; // intersect_ray 3629 3630 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3631 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3632 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3633 unsigned ActualAddrSize = 3634 IsNSA ? SrsrcIdx - VAddr0Idx 3635 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3636 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3637 3638 unsigned ExpectedAddrSize = 3639 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3640 3641 if (!IsNSA) { 3642 if (ExpectedAddrSize > 8) 3643 ExpectedAddrSize = 16; 3644 3645 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3646 // This provides backward compatibility for assembly created 3647 // before 160b/192b/224b types were directly supported. 3648 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3649 return true; 3650 } 3651 3652 return ActualAddrSize == ExpectedAddrSize; 3653 } 3654 3655 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3656 3657 const unsigned Opc = Inst.getOpcode(); 3658 const MCInstrDesc &Desc = MII.get(Opc); 3659 3660 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3661 return true; 3662 if (!Desc.mayLoad() || !Desc.mayStore()) 3663 return true; // Not atomic 3664 3665 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3666 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3667 3668 // This is an incomplete check because image_atomic_cmpswap 3669 // may only use 0x3 and 0xf while other atomic operations 3670 // may use 0x1 and 0x3. However these limitations are 3671 // verified when we check that dmask matches dst size. 3672 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3673 } 3674 3675 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3676 3677 const unsigned Opc = Inst.getOpcode(); 3678 const MCInstrDesc &Desc = MII.get(Opc); 3679 3680 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3681 return true; 3682 3683 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3684 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3685 3686 // GATHER4 instructions use dmask in a different fashion compared to 3687 // other MIMG instructions. The only useful DMASK values are 3688 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3689 // (red,red,red,red) etc.) The ISA document doesn't mention 3690 // this. 3691 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3692 } 3693 3694 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3695 const unsigned Opc = Inst.getOpcode(); 3696 const MCInstrDesc &Desc = MII.get(Opc); 3697 3698 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3699 return true; 3700 3701 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3702 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3703 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3704 3705 if (!BaseOpcode->MSAA) 3706 return true; 3707 3708 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3709 assert(DimIdx != -1); 3710 3711 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3712 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3713 3714 return DimInfo->MSAA; 3715 } 3716 3717 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3718 { 3719 switch (Opcode) { 3720 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3721 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3722 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3723 return true; 3724 default: 3725 return false; 3726 } 3727 } 3728 3729 // movrels* opcodes should only allow VGPRS as src0. 3730 // This is specified in .td description for vop1/vop3, 3731 // but sdwa is handled differently. See isSDWAOperand. 3732 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3733 const OperandVector &Operands) { 3734 3735 const unsigned Opc = Inst.getOpcode(); 3736 const MCInstrDesc &Desc = MII.get(Opc); 3737 3738 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3739 return true; 3740 3741 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3742 assert(Src0Idx != -1); 3743 3744 SMLoc ErrLoc; 3745 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3746 if (Src0.isReg()) { 3747 auto Reg = mc2PseudoReg(Src0.getReg()); 3748 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3749 if (!isSGPR(Reg, TRI)) 3750 return true; 3751 ErrLoc = getRegLoc(Reg, Operands); 3752 } else { 3753 ErrLoc = getConstLoc(Operands); 3754 } 3755 3756 Error(ErrLoc, "source operand must be a VGPR"); 3757 return false; 3758 } 3759 3760 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3761 const OperandVector &Operands) { 3762 3763 const unsigned Opc = Inst.getOpcode(); 3764 3765 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3766 return true; 3767 3768 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3769 assert(Src0Idx != -1); 3770 3771 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3772 if (!Src0.isReg()) 3773 return true; 3774 3775 auto Reg = mc2PseudoReg(Src0.getReg()); 3776 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3777 if (!isGFX90A() && isSGPR(Reg, TRI)) { 3778 Error(getRegLoc(Reg, Operands), 3779 "source operand must be either a VGPR or an inline constant"); 3780 return false; 3781 } 3782 3783 return true; 3784 } 3785 3786 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3787 const OperandVector &Operands) { 3788 const unsigned Opc = Inst.getOpcode(); 3789 const MCInstrDesc &Desc = MII.get(Opc); 3790 3791 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3792 return true; 3793 3794 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3795 if (Src2Idx == -1) 3796 return true; 3797 3798 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3799 if (!Src2.isReg()) 3800 return true; 3801 3802 MCRegister Src2Reg = Src2.getReg(); 3803 MCRegister DstReg = Inst.getOperand(0).getReg(); 3804 if (Src2Reg == DstReg) 3805 return true; 3806 3807 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3808 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3809 return true; 3810 3811 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3812 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3813 "source 2 operand must not partially overlap with dst"); 3814 return false; 3815 } 3816 3817 return true; 3818 } 3819 3820 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3821 switch (Inst.getOpcode()) { 3822 default: 3823 return true; 3824 case V_DIV_SCALE_F32_gfx6_gfx7: 3825 case V_DIV_SCALE_F32_vi: 3826 case V_DIV_SCALE_F32_gfx10: 3827 case V_DIV_SCALE_F64_gfx6_gfx7: 3828 case V_DIV_SCALE_F64_vi: 3829 case V_DIV_SCALE_F64_gfx10: 3830 break; 3831 } 3832 3833 // TODO: Check that src0 = src1 or src2. 3834 3835 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3836 AMDGPU::OpName::src2_modifiers, 3837 AMDGPU::OpName::src2_modifiers}) { 3838 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3839 .getImm() & 3840 SISrcMods::ABS) { 3841 return false; 3842 } 3843 } 3844 3845 return true; 3846 } 3847 3848 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3849 3850 const unsigned Opc = Inst.getOpcode(); 3851 const MCInstrDesc &Desc = MII.get(Opc); 3852 3853 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3854 return true; 3855 3856 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3857 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3858 if (isCI() || isSI()) 3859 return false; 3860 } 3861 3862 return true; 3863 } 3864 3865 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3866 const unsigned Opc = Inst.getOpcode(); 3867 const MCInstrDesc &Desc = MII.get(Opc); 3868 3869 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3870 return true; 3871 3872 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3873 if (DimIdx < 0) 3874 return true; 3875 3876 long Imm = Inst.getOperand(DimIdx).getImm(); 3877 if (Imm < 0 || Imm >= 8) 3878 return false; 3879 3880 return true; 3881 } 3882 3883 static bool IsRevOpcode(const unsigned Opcode) 3884 { 3885 switch (Opcode) { 3886 case AMDGPU::V_SUBREV_F32_e32: 3887 case AMDGPU::V_SUBREV_F32_e64: 3888 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3889 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3890 case AMDGPU::V_SUBREV_F32_e32_vi: 3891 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3892 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3893 case AMDGPU::V_SUBREV_F32_e64_vi: 3894 3895 case AMDGPU::V_SUBREV_CO_U32_e32: 3896 case AMDGPU::V_SUBREV_CO_U32_e64: 3897 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3898 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3899 3900 case AMDGPU::V_SUBBREV_U32_e32: 3901 case AMDGPU::V_SUBBREV_U32_e64: 3902 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3903 case AMDGPU::V_SUBBREV_U32_e32_vi: 3904 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3905 case AMDGPU::V_SUBBREV_U32_e64_vi: 3906 3907 case AMDGPU::V_SUBREV_U32_e32: 3908 case AMDGPU::V_SUBREV_U32_e64: 3909 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3910 case AMDGPU::V_SUBREV_U32_e32_vi: 3911 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3912 case AMDGPU::V_SUBREV_U32_e64_vi: 3913 3914 case AMDGPU::V_SUBREV_F16_e32: 3915 case AMDGPU::V_SUBREV_F16_e64: 3916 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3917 case AMDGPU::V_SUBREV_F16_e32_vi: 3918 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3919 case AMDGPU::V_SUBREV_F16_e64_vi: 3920 3921 case AMDGPU::V_SUBREV_U16_e32: 3922 case AMDGPU::V_SUBREV_U16_e64: 3923 case AMDGPU::V_SUBREV_U16_e32_vi: 3924 case AMDGPU::V_SUBREV_U16_e64_vi: 3925 3926 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3927 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3928 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3929 3930 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3931 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3932 3933 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3934 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3935 3936 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3937 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3938 3939 case AMDGPU::V_LSHRREV_B32_e32: 3940 case AMDGPU::V_LSHRREV_B32_e64: 3941 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3942 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3943 case AMDGPU::V_LSHRREV_B32_e32_vi: 3944 case AMDGPU::V_LSHRREV_B32_e64_vi: 3945 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3946 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3947 3948 case AMDGPU::V_ASHRREV_I32_e32: 3949 case AMDGPU::V_ASHRREV_I32_e64: 3950 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3951 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3952 case AMDGPU::V_ASHRREV_I32_e32_vi: 3953 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3954 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3955 case AMDGPU::V_ASHRREV_I32_e64_vi: 3956 3957 case AMDGPU::V_LSHLREV_B32_e32: 3958 case AMDGPU::V_LSHLREV_B32_e64: 3959 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3960 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3961 case AMDGPU::V_LSHLREV_B32_e32_vi: 3962 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3963 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3964 case AMDGPU::V_LSHLREV_B32_e64_vi: 3965 3966 case AMDGPU::V_LSHLREV_B16_e32: 3967 case AMDGPU::V_LSHLREV_B16_e64: 3968 case AMDGPU::V_LSHLREV_B16_e32_vi: 3969 case AMDGPU::V_LSHLREV_B16_e64_vi: 3970 case AMDGPU::V_LSHLREV_B16_gfx10: 3971 3972 case AMDGPU::V_LSHRREV_B16_e32: 3973 case AMDGPU::V_LSHRREV_B16_e64: 3974 case AMDGPU::V_LSHRREV_B16_e32_vi: 3975 case AMDGPU::V_LSHRREV_B16_e64_vi: 3976 case AMDGPU::V_LSHRREV_B16_gfx10: 3977 3978 case AMDGPU::V_ASHRREV_I16_e32: 3979 case AMDGPU::V_ASHRREV_I16_e64: 3980 case AMDGPU::V_ASHRREV_I16_e32_vi: 3981 case AMDGPU::V_ASHRREV_I16_e64_vi: 3982 case AMDGPU::V_ASHRREV_I16_gfx10: 3983 3984 case AMDGPU::V_LSHLREV_B64_e64: 3985 case AMDGPU::V_LSHLREV_B64_gfx10: 3986 case AMDGPU::V_LSHLREV_B64_vi: 3987 3988 case AMDGPU::V_LSHRREV_B64_e64: 3989 case AMDGPU::V_LSHRREV_B64_gfx10: 3990 case AMDGPU::V_LSHRREV_B64_vi: 3991 3992 case AMDGPU::V_ASHRREV_I64_e64: 3993 case AMDGPU::V_ASHRREV_I64_gfx10: 3994 case AMDGPU::V_ASHRREV_I64_vi: 3995 3996 case AMDGPU::V_PK_LSHLREV_B16: 3997 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3998 case AMDGPU::V_PK_LSHLREV_B16_vi: 3999 4000 case AMDGPU::V_PK_LSHRREV_B16: 4001 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 4002 case AMDGPU::V_PK_LSHRREV_B16_vi: 4003 case AMDGPU::V_PK_ASHRREV_I16: 4004 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 4005 case AMDGPU::V_PK_ASHRREV_I16_vi: 4006 return true; 4007 default: 4008 return false; 4009 } 4010 } 4011 4012 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 4013 4014 using namespace SIInstrFlags; 4015 const unsigned Opcode = Inst.getOpcode(); 4016 const MCInstrDesc &Desc = MII.get(Opcode); 4017 4018 // lds_direct register is defined so that it can be used 4019 // with 9-bit operands only. Ignore encodings which do not accept these. 4020 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 4021 if ((Desc.TSFlags & Enc) == 0) 4022 return None; 4023 4024 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 4025 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 4026 if (SrcIdx == -1) 4027 break; 4028 const auto &Src = Inst.getOperand(SrcIdx); 4029 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 4030 4031 if (isGFX90A() || isGFX11Plus()) 4032 return StringRef("lds_direct is not supported on this GPU"); 4033 4034 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 4035 return StringRef("lds_direct cannot be used with this instruction"); 4036 4037 if (SrcName != OpName::src0) 4038 return StringRef("lds_direct may be used as src0 only"); 4039 } 4040 } 4041 4042 return None; 4043 } 4044 4045 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 4046 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4047 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4048 if (Op.isFlatOffset()) 4049 return Op.getStartLoc(); 4050 } 4051 return getLoc(); 4052 } 4053 4054 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 4055 const OperandVector &Operands) { 4056 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4057 if ((TSFlags & SIInstrFlags::FLAT) == 0) 4058 return true; 4059 4060 auto Opcode = Inst.getOpcode(); 4061 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4062 assert(OpNum != -1); 4063 4064 const auto &Op = Inst.getOperand(OpNum); 4065 if (!hasFlatOffsets() && Op.getImm() != 0) { 4066 Error(getFlatOffsetLoc(Operands), 4067 "flat offset modifier is not supported on this GPU"); 4068 return false; 4069 } 4070 4071 // For FLAT segment the offset must be positive; 4072 // MSB is ignored and forced to zero. 4073 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 4074 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 4075 if (!isIntN(OffsetSize, Op.getImm())) { 4076 Error(getFlatOffsetLoc(Operands), 4077 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 4078 return false; 4079 } 4080 } else { 4081 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 4082 if (!isUIntN(OffsetSize, Op.getImm())) { 4083 Error(getFlatOffsetLoc(Operands), 4084 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 4085 return false; 4086 } 4087 } 4088 4089 return true; 4090 } 4091 4092 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4093 // Start with second operand because SMEM Offset cannot be dst or src0. 4094 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4095 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4096 if (Op.isSMEMOffset()) 4097 return Op.getStartLoc(); 4098 } 4099 return getLoc(); 4100 } 4101 4102 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4103 const OperandVector &Operands) { 4104 if (isCI() || isSI()) 4105 return true; 4106 4107 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4108 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4109 return true; 4110 4111 auto Opcode = Inst.getOpcode(); 4112 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4113 if (OpNum == -1) 4114 return true; 4115 4116 const auto &Op = Inst.getOperand(OpNum); 4117 if (!Op.isImm()) 4118 return true; 4119 4120 uint64_t Offset = Op.getImm(); 4121 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4122 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4123 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4124 return true; 4125 4126 Error(getSMEMOffsetLoc(Operands), 4127 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 4128 "expected a 21-bit signed offset"); 4129 4130 return false; 4131 } 4132 4133 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4134 unsigned Opcode = Inst.getOpcode(); 4135 const MCInstrDesc &Desc = MII.get(Opcode); 4136 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4137 return true; 4138 4139 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4140 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4141 4142 const int OpIndices[] = { Src0Idx, Src1Idx }; 4143 4144 unsigned NumExprs = 0; 4145 unsigned NumLiterals = 0; 4146 uint32_t LiteralValue; 4147 4148 for (int OpIdx : OpIndices) { 4149 if (OpIdx == -1) break; 4150 4151 const MCOperand &MO = Inst.getOperand(OpIdx); 4152 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4153 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4154 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4155 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4156 if (NumLiterals == 0 || LiteralValue != Value) { 4157 LiteralValue = Value; 4158 ++NumLiterals; 4159 } 4160 } else if (MO.isExpr()) { 4161 ++NumExprs; 4162 } 4163 } 4164 } 4165 4166 return NumLiterals + NumExprs <= 1; 4167 } 4168 4169 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4170 const unsigned Opc = Inst.getOpcode(); 4171 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4172 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4173 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4174 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4175 4176 if (OpSel & ~3) 4177 return false; 4178 } 4179 4180 if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) { 4181 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4182 if (OpSelIdx != -1) { 4183 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4184 return false; 4185 } 4186 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4187 if (OpSelHiIdx != -1) { 4188 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4189 return false; 4190 } 4191 } 4192 4193 return true; 4194 } 4195 4196 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4197 const OperandVector &Operands) { 4198 const unsigned Opc = Inst.getOpcode(); 4199 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4200 if (DppCtrlIdx < 0) 4201 return true; 4202 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4203 4204 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4205 // DPP64 is supported for row_newbcast only. 4206 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4207 if (Src0Idx >= 0 && 4208 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4209 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4210 Error(S, "64 bit dpp only supports row_newbcast"); 4211 return false; 4212 } 4213 } 4214 4215 return true; 4216 } 4217 4218 // Check if VCC register matches wavefront size 4219 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4220 auto FB = getFeatureBits(); 4221 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4222 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4223 } 4224 4225 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4226 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4227 const OperandVector &Operands) { 4228 unsigned Opcode = Inst.getOpcode(); 4229 const MCInstrDesc &Desc = MII.get(Opcode); 4230 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4231 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4232 ImmIdx == -1) 4233 return true; 4234 4235 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4236 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4237 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4238 4239 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4240 4241 unsigned NumExprs = 0; 4242 unsigned NumLiterals = 0; 4243 uint32_t LiteralValue; 4244 4245 for (int OpIdx : OpIndices) { 4246 if (OpIdx == -1) 4247 continue; 4248 4249 const MCOperand &MO = Inst.getOperand(OpIdx); 4250 if (!MO.isImm() && !MO.isExpr()) 4251 continue; 4252 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4253 continue; 4254 4255 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4256 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4257 Error(getConstLoc(Operands), 4258 "inline constants are not allowed for this operand"); 4259 return false; 4260 } 4261 4262 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4263 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4264 if (NumLiterals == 0 || LiteralValue != Value) { 4265 LiteralValue = Value; 4266 ++NumLiterals; 4267 } 4268 } else if (MO.isExpr()) { 4269 ++NumExprs; 4270 } 4271 } 4272 NumLiterals += NumExprs; 4273 4274 if (!NumLiterals) 4275 return true; 4276 4277 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4278 Error(getLitLoc(Operands), "literal operands are not supported"); 4279 return false; 4280 } 4281 4282 if (NumLiterals > 1) { 4283 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4284 return false; 4285 } 4286 4287 return true; 4288 } 4289 4290 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4291 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4292 const MCRegisterInfo *MRI) { 4293 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4294 if (OpIdx < 0) 4295 return -1; 4296 4297 const MCOperand &Op = Inst.getOperand(OpIdx); 4298 if (!Op.isReg()) 4299 return -1; 4300 4301 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4302 auto Reg = Sub ? Sub : Op.getReg(); 4303 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4304 return AGPR32.contains(Reg) ? 1 : 0; 4305 } 4306 4307 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4308 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4309 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4310 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4311 SIInstrFlags::DS)) == 0) 4312 return true; 4313 4314 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4315 : AMDGPU::OpName::vdata; 4316 4317 const MCRegisterInfo *MRI = getMRI(); 4318 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4319 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4320 4321 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4322 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4323 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4324 return false; 4325 } 4326 4327 auto FB = getFeatureBits(); 4328 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4329 if (DataAreg < 0 || DstAreg < 0) 4330 return true; 4331 return DstAreg == DataAreg; 4332 } 4333 4334 return DstAreg < 1 && DataAreg < 1; 4335 } 4336 4337 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4338 auto FB = getFeatureBits(); 4339 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4340 return true; 4341 4342 const MCRegisterInfo *MRI = getMRI(); 4343 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4344 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4345 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4346 const MCOperand &Op = Inst.getOperand(I); 4347 if (!Op.isReg()) 4348 continue; 4349 4350 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4351 if (!Sub) 4352 continue; 4353 4354 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4355 return false; 4356 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4357 return false; 4358 } 4359 4360 return true; 4361 } 4362 4363 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { 4364 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4365 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4366 if (Op.isBLGP()) 4367 return Op.getStartLoc(); 4368 } 4369 return SMLoc(); 4370 } 4371 4372 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, 4373 const OperandVector &Operands) { 4374 unsigned Opc = Inst.getOpcode(); 4375 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4376 if (BlgpIdx == -1) 4377 return true; 4378 SMLoc BLGPLoc = getBLGPLoc(Operands); 4379 if (!BLGPLoc.isValid()) 4380 return true; 4381 bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:"); 4382 auto FB = getFeatureBits(); 4383 bool UsesNeg = false; 4384 if (FB[AMDGPU::FeatureGFX940Insts]) { 4385 switch (Opc) { 4386 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: 4387 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: 4388 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: 4389 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: 4390 UsesNeg = true; 4391 } 4392 } 4393 4394 if (IsNeg == UsesNeg) 4395 return true; 4396 4397 Error(BLGPLoc, 4398 UsesNeg ? "invalid modifier: blgp is not supported" 4399 : "invalid modifier: neg is not supported"); 4400 4401 return false; 4402 } 4403 4404 // gfx90a has an undocumented limitation: 4405 // DS_GWS opcodes must use even aligned registers. 4406 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4407 const OperandVector &Operands) { 4408 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4409 return true; 4410 4411 int Opc = Inst.getOpcode(); 4412 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4413 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4414 return true; 4415 4416 const MCRegisterInfo *MRI = getMRI(); 4417 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4418 int Data0Pos = 4419 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4420 assert(Data0Pos != -1); 4421 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4422 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4423 if (RegIdx & 1) { 4424 SMLoc RegLoc = getRegLoc(Reg, Operands); 4425 Error(RegLoc, "vgpr must be even aligned"); 4426 return false; 4427 } 4428 4429 return true; 4430 } 4431 4432 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4433 const OperandVector &Operands, 4434 const SMLoc &IDLoc) { 4435 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4436 AMDGPU::OpName::cpol); 4437 if (CPolPos == -1) 4438 return true; 4439 4440 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4441 4442 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4443 if (TSFlags & SIInstrFlags::SMRD) { 4444 if (CPol && (isSI() || isCI())) { 4445 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4446 Error(S, "cache policy is not supported for SMRD instructions"); 4447 return false; 4448 } 4449 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) { 4450 Error(IDLoc, "invalid cache policy for SMEM instruction"); 4451 return false; 4452 } 4453 } 4454 4455 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4456 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4457 StringRef CStr(S.getPointer()); 4458 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4459 Error(S, "scc is not supported on this GPU"); 4460 return false; 4461 } 4462 4463 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4464 return true; 4465 4466 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4467 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4468 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4469 : "instruction must use glc"); 4470 return false; 4471 } 4472 } else { 4473 if (CPol & CPol::GLC) { 4474 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4475 StringRef CStr(S.getPointer()); 4476 S = SMLoc::getFromPointer( 4477 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); 4478 Error(S, isGFX940() ? "instruction must not use sc0" 4479 : "instruction must not use glc"); 4480 return false; 4481 } 4482 } 4483 4484 return true; 4485 } 4486 4487 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst, 4488 const OperandVector &Operands, 4489 const SMLoc &IDLoc) { 4490 if (isGFX940()) 4491 return true; 4492 4493 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4494 if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) != 4495 (SIInstrFlags::VALU | SIInstrFlags::FLAT)) 4496 return true; 4497 // This is FLAT LDS DMA. 4498 4499 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands); 4500 StringRef CStr(S.getPointer()); 4501 if (!CStr.startswith("lds")) { 4502 // This is incorrectly selected LDS DMA version of a FLAT load opcode. 4503 // And LDS version should have 'lds' modifier, but it follows optional 4504 // operands so its absense is ignored by the matcher. 4505 Error(IDLoc, "invalid operands for instruction"); 4506 return false; 4507 } 4508 4509 return true; 4510 } 4511 4512 bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) { 4513 if (!isGFX11Plus()) 4514 return true; 4515 for (auto &Operand : Operands) { 4516 if (!Operand->isReg()) 4517 continue; 4518 unsigned Reg = Operand->getReg(); 4519 if (Reg == SRC_EXECZ || Reg == SRC_VCCZ) { 4520 Error(getRegLoc(Reg, Operands), 4521 "execz and vccz are not supported on this GPU"); 4522 return false; 4523 } 4524 } 4525 return true; 4526 } 4527 4528 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4529 const SMLoc &IDLoc, 4530 const OperandVector &Operands) { 4531 if (auto ErrMsg = validateLdsDirect(Inst)) { 4532 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4533 return false; 4534 } 4535 if (!validateSOPLiteral(Inst)) { 4536 Error(getLitLoc(Operands), 4537 "only one literal operand is allowed"); 4538 return false; 4539 } 4540 if (!validateVOPLiteral(Inst, Operands)) { 4541 return false; 4542 } 4543 if (!validateConstantBusLimitations(Inst, Operands)) { 4544 return false; 4545 } 4546 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4547 return false; 4548 } 4549 if (!validateIntClampSupported(Inst)) { 4550 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4551 "integer clamping is not supported on this GPU"); 4552 return false; 4553 } 4554 if (!validateOpSel(Inst)) { 4555 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4556 "invalid op_sel operand"); 4557 return false; 4558 } 4559 if (!validateDPP(Inst, Operands)) { 4560 return false; 4561 } 4562 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4563 if (!validateMIMGD16(Inst)) { 4564 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4565 "d16 modifier is not supported on this GPU"); 4566 return false; 4567 } 4568 if (!validateMIMGDim(Inst)) { 4569 Error(IDLoc, "dim modifier is required on this GPU"); 4570 return false; 4571 } 4572 if (!validateMIMGMSAA(Inst)) { 4573 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4574 "invalid dim; must be MSAA type"); 4575 return false; 4576 } 4577 if (auto ErrMsg = validateMIMGDataSize(Inst)) { 4578 Error(IDLoc, *ErrMsg); 4579 return false; 4580 } 4581 if (!validateMIMGAddrSize(Inst)) { 4582 Error(IDLoc, 4583 "image address size does not match dim and a16"); 4584 return false; 4585 } 4586 if (!validateMIMGAtomicDMask(Inst)) { 4587 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4588 "invalid atomic image dmask"); 4589 return false; 4590 } 4591 if (!validateMIMGGatherDMask(Inst)) { 4592 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4593 "invalid image_gather dmask: only one bit must be set"); 4594 return false; 4595 } 4596 if (!validateMovrels(Inst, Operands)) { 4597 return false; 4598 } 4599 if (!validateFlatOffset(Inst, Operands)) { 4600 return false; 4601 } 4602 if (!validateSMEMOffset(Inst, Operands)) { 4603 return false; 4604 } 4605 if (!validateMAIAccWrite(Inst, Operands)) { 4606 return false; 4607 } 4608 if (!validateMFMA(Inst, Operands)) { 4609 return false; 4610 } 4611 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4612 return false; 4613 } 4614 4615 if (!validateAGPRLdSt(Inst)) { 4616 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4617 ? "invalid register class: data and dst should be all VGPR or AGPR" 4618 : "invalid register class: agpr loads and stores not supported on this GPU" 4619 ); 4620 return false; 4621 } 4622 if (!validateVGPRAlign(Inst)) { 4623 Error(IDLoc, 4624 "invalid register class: vgpr tuples must be 64 bit aligned"); 4625 return false; 4626 } 4627 if (!validateGWS(Inst, Operands)) { 4628 return false; 4629 } 4630 4631 if (!validateBLGP(Inst, Operands)) { 4632 return false; 4633 } 4634 4635 if (!validateDivScale(Inst)) { 4636 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4637 return false; 4638 } 4639 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4640 return false; 4641 } 4642 if (!validateExeczVcczOperands(Operands)) { 4643 return false; 4644 } 4645 4646 if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) { 4647 return false; 4648 } 4649 4650 return true; 4651 } 4652 4653 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4654 const FeatureBitset &FBS, 4655 unsigned VariantID = 0); 4656 4657 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4658 const FeatureBitset &AvailableFeatures, 4659 unsigned VariantID); 4660 4661 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4662 const FeatureBitset &FBS) { 4663 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4664 } 4665 4666 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4667 const FeatureBitset &FBS, 4668 ArrayRef<unsigned> Variants) { 4669 for (auto Variant : Variants) { 4670 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4671 return true; 4672 } 4673 4674 return false; 4675 } 4676 4677 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4678 const SMLoc &IDLoc) { 4679 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4680 4681 // Check if requested instruction variant is supported. 4682 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4683 return false; 4684 4685 // This instruction is not supported. 4686 // Clear any other pending errors because they are no longer relevant. 4687 getParser().clearPendingErrors(); 4688 4689 // Requested instruction variant is not supported. 4690 // Check if any other variants are supported. 4691 StringRef VariantName = getMatchedVariantName(); 4692 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4693 return Error(IDLoc, 4694 Twine(VariantName, 4695 " variant of this instruction is not supported")); 4696 } 4697 4698 // Finally check if this instruction is supported on any other GPU. 4699 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4700 return Error(IDLoc, "instruction not supported on this GPU"); 4701 } 4702 4703 // Instruction not supported on any GPU. Probably a typo. 4704 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4705 return Error(IDLoc, "invalid instruction" + Suggestion); 4706 } 4707 4708 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4709 OperandVector &Operands, 4710 MCStreamer &Out, 4711 uint64_t &ErrorInfo, 4712 bool MatchingInlineAsm) { 4713 MCInst Inst; 4714 unsigned Result = Match_Success; 4715 for (auto Variant : getMatchedVariants()) { 4716 uint64_t EI; 4717 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4718 Variant); 4719 // We order match statuses from least to most specific. We use most specific 4720 // status as resulting 4721 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4722 if ((R == Match_Success) || 4723 (R == Match_PreferE32) || 4724 (R == Match_MissingFeature && Result != Match_PreferE32) || 4725 (R == Match_InvalidOperand && Result != Match_MissingFeature 4726 && Result != Match_PreferE32) || 4727 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4728 && Result != Match_MissingFeature 4729 && Result != Match_PreferE32)) { 4730 Result = R; 4731 ErrorInfo = EI; 4732 } 4733 if (R == Match_Success) 4734 break; 4735 } 4736 4737 if (Result == Match_Success) { 4738 if (!validateInstruction(Inst, IDLoc, Operands)) { 4739 return true; 4740 } 4741 Inst.setLoc(IDLoc); 4742 Out.emitInstruction(Inst, getSTI()); 4743 return false; 4744 } 4745 4746 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4747 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4748 return true; 4749 } 4750 4751 switch (Result) { 4752 default: break; 4753 case Match_MissingFeature: 4754 // It has been verified that the specified instruction 4755 // mnemonic is valid. A match was found but it requires 4756 // features which are not supported on this GPU. 4757 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4758 4759 case Match_InvalidOperand: { 4760 SMLoc ErrorLoc = IDLoc; 4761 if (ErrorInfo != ~0ULL) { 4762 if (ErrorInfo >= Operands.size()) { 4763 return Error(IDLoc, "too few operands for instruction"); 4764 } 4765 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4766 if (ErrorLoc == SMLoc()) 4767 ErrorLoc = IDLoc; 4768 } 4769 return Error(ErrorLoc, "invalid operand for instruction"); 4770 } 4771 4772 case Match_PreferE32: 4773 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4774 "should be encoded as e32"); 4775 case Match_MnemonicFail: 4776 llvm_unreachable("Invalid instructions should have been handled already"); 4777 } 4778 llvm_unreachable("Implement any new match types added!"); 4779 } 4780 4781 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4782 int64_t Tmp = -1; 4783 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4784 return true; 4785 } 4786 if (getParser().parseAbsoluteExpression(Tmp)) { 4787 return true; 4788 } 4789 Ret = static_cast<uint32_t>(Tmp); 4790 return false; 4791 } 4792 4793 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4794 uint32_t &Minor) { 4795 if (ParseAsAbsoluteExpression(Major)) 4796 return TokError("invalid major version"); 4797 4798 if (!trySkipToken(AsmToken::Comma)) 4799 return TokError("minor version number required, comma expected"); 4800 4801 if (ParseAsAbsoluteExpression(Minor)) 4802 return TokError("invalid minor version"); 4803 4804 return false; 4805 } 4806 4807 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4808 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4809 return TokError("directive only supported for amdgcn architecture"); 4810 4811 std::string TargetIDDirective; 4812 SMLoc TargetStart = getTok().getLoc(); 4813 if (getParser().parseEscapedString(TargetIDDirective)) 4814 return true; 4815 4816 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4817 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4818 return getParser().Error(TargetRange.Start, 4819 (Twine(".amdgcn_target directive's target id ") + 4820 Twine(TargetIDDirective) + 4821 Twine(" does not match the specified target id ") + 4822 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4823 4824 return false; 4825 } 4826 4827 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4828 return Error(Range.Start, "value out of range", Range); 4829 } 4830 4831 bool AMDGPUAsmParser::calculateGPRBlocks( 4832 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4833 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4834 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4835 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4836 // TODO(scott.linder): These calculations are duplicated from 4837 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4838 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4839 4840 unsigned NumVGPRs = NextFreeVGPR; 4841 unsigned NumSGPRs = NextFreeSGPR; 4842 4843 if (Version.Major >= 10) 4844 NumSGPRs = 0; 4845 else { 4846 unsigned MaxAddressableNumSGPRs = 4847 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4848 4849 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4850 NumSGPRs > MaxAddressableNumSGPRs) 4851 return OutOfRangeError(SGPRRange); 4852 4853 NumSGPRs += 4854 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4855 4856 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4857 NumSGPRs > MaxAddressableNumSGPRs) 4858 return OutOfRangeError(SGPRRange); 4859 4860 if (Features.test(FeatureSGPRInitBug)) 4861 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4862 } 4863 4864 VGPRBlocks = 4865 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4866 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4867 4868 return false; 4869 } 4870 4871 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4872 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4873 return TokError("directive only supported for amdgcn architecture"); 4874 4875 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4876 return TokError("directive only supported for amdhsa OS"); 4877 4878 StringRef KernelName; 4879 if (getParser().parseIdentifier(KernelName)) 4880 return true; 4881 4882 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4883 4884 StringSet<> Seen; 4885 4886 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4887 4888 SMRange VGPRRange; 4889 uint64_t NextFreeVGPR = 0; 4890 uint64_t AccumOffset = 0; 4891 uint64_t SharedVGPRCount = 0; 4892 SMRange SGPRRange; 4893 uint64_t NextFreeSGPR = 0; 4894 4895 // Count the number of user SGPRs implied from the enabled feature bits. 4896 unsigned ImpliedUserSGPRCount = 0; 4897 4898 // Track if the asm explicitly contains the directive for the user SGPR 4899 // count. 4900 Optional<unsigned> ExplicitUserSGPRCount; 4901 bool ReserveVCC = true; 4902 bool ReserveFlatScr = true; 4903 Optional<bool> EnableWavefrontSize32; 4904 4905 while (true) { 4906 while (trySkipToken(AsmToken::EndOfStatement)); 4907 4908 StringRef ID; 4909 SMRange IDRange = getTok().getLocRange(); 4910 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4911 return true; 4912 4913 if (ID == ".end_amdhsa_kernel") 4914 break; 4915 4916 if (!Seen.insert(ID).second) 4917 return TokError(".amdhsa_ directives cannot be repeated"); 4918 4919 SMLoc ValStart = getLoc(); 4920 int64_t IVal; 4921 if (getParser().parseAbsoluteExpression(IVal)) 4922 return true; 4923 SMLoc ValEnd = getLoc(); 4924 SMRange ValRange = SMRange(ValStart, ValEnd); 4925 4926 if (IVal < 0) 4927 return OutOfRangeError(ValRange); 4928 4929 uint64_t Val = IVal; 4930 4931 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4932 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4933 return OutOfRangeError(RANGE); \ 4934 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4935 4936 if (ID == ".amdhsa_group_segment_fixed_size") { 4937 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4938 return OutOfRangeError(ValRange); 4939 KD.group_segment_fixed_size = Val; 4940 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4941 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4942 return OutOfRangeError(ValRange); 4943 KD.private_segment_fixed_size = Val; 4944 } else if (ID == ".amdhsa_kernarg_size") { 4945 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4946 return OutOfRangeError(ValRange); 4947 KD.kernarg_size = Val; 4948 } else if (ID == ".amdhsa_user_sgpr_count") { 4949 ExplicitUserSGPRCount = Val; 4950 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4951 if (hasArchitectedFlatScratch()) 4952 return Error(IDRange.Start, 4953 "directive is not supported with architected flat scratch", 4954 IDRange); 4955 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4956 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4957 Val, ValRange); 4958 if (Val) 4959 ImpliedUserSGPRCount += 4; 4960 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4961 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4962 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4963 ValRange); 4964 if (Val) 4965 ImpliedUserSGPRCount += 2; 4966 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4967 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4968 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4969 ValRange); 4970 if (Val) 4971 ImpliedUserSGPRCount += 2; 4972 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4973 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4974 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4975 Val, ValRange); 4976 if (Val) 4977 ImpliedUserSGPRCount += 2; 4978 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4979 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4980 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4981 ValRange); 4982 if (Val) 4983 ImpliedUserSGPRCount += 2; 4984 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4985 if (hasArchitectedFlatScratch()) 4986 return Error(IDRange.Start, 4987 "directive is not supported with architected flat scratch", 4988 IDRange); 4989 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4990 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4991 ValRange); 4992 if (Val) 4993 ImpliedUserSGPRCount += 2; 4994 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4995 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4996 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4997 Val, ValRange); 4998 if (Val) 4999 ImpliedUserSGPRCount += 1; 5000 } else if (ID == ".amdhsa_wavefront_size32") { 5001 if (IVersion.Major < 10) 5002 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5003 EnableWavefrontSize32 = Val; 5004 PARSE_BITS_ENTRY(KD.kernel_code_properties, 5005 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 5006 Val, ValRange); 5007 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 5008 if (hasArchitectedFlatScratch()) 5009 return Error(IDRange.Start, 5010 "directive is not supported with architected flat scratch", 5011 IDRange); 5012 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5013 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 5014 } else if (ID == ".amdhsa_enable_private_segment") { 5015 if (!hasArchitectedFlatScratch()) 5016 return Error( 5017 IDRange.Start, 5018 "directive is not supported without architected flat scratch", 5019 IDRange); 5020 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5021 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 5022 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 5023 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5024 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 5025 ValRange); 5026 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 5027 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5028 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 5029 ValRange); 5030 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 5031 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5032 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 5033 ValRange); 5034 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 5035 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5036 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 5037 ValRange); 5038 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 5039 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5040 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 5041 ValRange); 5042 } else if (ID == ".amdhsa_next_free_vgpr") { 5043 VGPRRange = ValRange; 5044 NextFreeVGPR = Val; 5045 } else if (ID == ".amdhsa_next_free_sgpr") { 5046 SGPRRange = ValRange; 5047 NextFreeSGPR = Val; 5048 } else if (ID == ".amdhsa_accum_offset") { 5049 if (!isGFX90A()) 5050 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5051 AccumOffset = Val; 5052 } else if (ID == ".amdhsa_reserve_vcc") { 5053 if (!isUInt<1>(Val)) 5054 return OutOfRangeError(ValRange); 5055 ReserveVCC = Val; 5056 } else if (ID == ".amdhsa_reserve_flat_scratch") { 5057 if (IVersion.Major < 7) 5058 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 5059 if (hasArchitectedFlatScratch()) 5060 return Error(IDRange.Start, 5061 "directive is not supported with architected flat scratch", 5062 IDRange); 5063 if (!isUInt<1>(Val)) 5064 return OutOfRangeError(ValRange); 5065 ReserveFlatScr = Val; 5066 } else if (ID == ".amdhsa_reserve_xnack_mask") { 5067 if (IVersion.Major < 8) 5068 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 5069 if (!isUInt<1>(Val)) 5070 return OutOfRangeError(ValRange); 5071 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 5072 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 5073 IDRange); 5074 } else if (ID == ".amdhsa_float_round_mode_32") { 5075 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5076 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 5077 } else if (ID == ".amdhsa_float_round_mode_16_64") { 5078 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5079 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 5080 } else if (ID == ".amdhsa_float_denorm_mode_32") { 5081 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5082 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 5083 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 5084 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5085 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 5086 ValRange); 5087 } else if (ID == ".amdhsa_dx10_clamp") { 5088 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5089 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 5090 } else if (ID == ".amdhsa_ieee_mode") { 5091 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 5092 Val, ValRange); 5093 } else if (ID == ".amdhsa_fp16_overflow") { 5094 if (IVersion.Major < 9) 5095 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 5096 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 5097 ValRange); 5098 } else if (ID == ".amdhsa_tg_split") { 5099 if (!isGFX90A()) 5100 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5101 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 5102 ValRange); 5103 } else if (ID == ".amdhsa_workgroup_processor_mode") { 5104 if (IVersion.Major < 10) 5105 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5106 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 5107 ValRange); 5108 } else if (ID == ".amdhsa_memory_ordered") { 5109 if (IVersion.Major < 10) 5110 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5111 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 5112 ValRange); 5113 } else if (ID == ".amdhsa_forward_progress") { 5114 if (IVersion.Major < 10) 5115 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5116 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 5117 ValRange); 5118 } else if (ID == ".amdhsa_shared_vgpr_count") { 5119 if (IVersion.Major < 10) 5120 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5121 SharedVGPRCount = Val; 5122 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 5123 COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT, Val, 5124 ValRange); 5125 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 5126 PARSE_BITS_ENTRY( 5127 KD.compute_pgm_rsrc2, 5128 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 5129 ValRange); 5130 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 5131 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5132 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 5133 Val, ValRange); 5134 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 5135 PARSE_BITS_ENTRY( 5136 KD.compute_pgm_rsrc2, 5137 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 5138 ValRange); 5139 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 5140 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5141 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 5142 Val, ValRange); 5143 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 5144 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5145 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 5146 Val, ValRange); 5147 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 5148 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5149 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 5150 Val, ValRange); 5151 } else if (ID == ".amdhsa_exception_int_div_zero") { 5152 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5153 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 5154 Val, ValRange); 5155 } else { 5156 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 5157 } 5158 5159 #undef PARSE_BITS_ENTRY 5160 } 5161 5162 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 5163 return TokError(".amdhsa_next_free_vgpr directive is required"); 5164 5165 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 5166 return TokError(".amdhsa_next_free_sgpr directive is required"); 5167 5168 unsigned VGPRBlocks; 5169 unsigned SGPRBlocks; 5170 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 5171 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 5172 EnableWavefrontSize32, NextFreeVGPR, 5173 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 5174 SGPRBlocks)) 5175 return true; 5176 5177 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 5178 VGPRBlocks)) 5179 return OutOfRangeError(VGPRRange); 5180 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5181 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 5182 5183 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 5184 SGPRBlocks)) 5185 return OutOfRangeError(SGPRRange); 5186 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5187 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 5188 SGPRBlocks); 5189 5190 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 5191 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 5192 "enabled user SGPRs"); 5193 5194 unsigned UserSGPRCount = 5195 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 5196 5197 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 5198 return TokError("too many user SGPRs enabled"); 5199 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 5200 UserSGPRCount); 5201 5202 if (isGFX90A()) { 5203 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 5204 return TokError(".amdhsa_accum_offset directive is required"); 5205 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 5206 return TokError("accum_offset should be in range [4..256] in " 5207 "increments of 4"); 5208 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 5209 return TokError("accum_offset exceeds total VGPR allocation"); 5210 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 5211 (AccumOffset / 4 - 1)); 5212 } 5213 5214 if (IVersion.Major == 10) { 5215 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5216 if (SharedVGPRCount && EnableWavefrontSize32) { 5217 return TokError("shared_vgpr_count directive not valid on " 5218 "wavefront size 32"); 5219 } 5220 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 5221 return TokError("shared_vgpr_count*2 + " 5222 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5223 "exceed 63\n"); 5224 } 5225 } 5226 5227 getTargetStreamer().EmitAmdhsaKernelDescriptor( 5228 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 5229 ReserveFlatScr); 5230 return false; 5231 } 5232 5233 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 5234 uint32_t Major; 5235 uint32_t Minor; 5236 5237 if (ParseDirectiveMajorMinor(Major, Minor)) 5238 return true; 5239 5240 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 5241 return false; 5242 } 5243 5244 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 5245 uint32_t Major; 5246 uint32_t Minor; 5247 uint32_t Stepping; 5248 StringRef VendorName; 5249 StringRef ArchName; 5250 5251 // If this directive has no arguments, then use the ISA version for the 5252 // targeted GPU. 5253 if (isToken(AsmToken::EndOfStatement)) { 5254 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5255 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 5256 ISA.Stepping, 5257 "AMD", "AMDGPU"); 5258 return false; 5259 } 5260 5261 if (ParseDirectiveMajorMinor(Major, Minor)) 5262 return true; 5263 5264 if (!trySkipToken(AsmToken::Comma)) 5265 return TokError("stepping version number required, comma expected"); 5266 5267 if (ParseAsAbsoluteExpression(Stepping)) 5268 return TokError("invalid stepping version"); 5269 5270 if (!trySkipToken(AsmToken::Comma)) 5271 return TokError("vendor name required, comma expected"); 5272 5273 if (!parseString(VendorName, "invalid vendor name")) 5274 return true; 5275 5276 if (!trySkipToken(AsmToken::Comma)) 5277 return TokError("arch name required, comma expected"); 5278 5279 if (!parseString(ArchName, "invalid arch name")) 5280 return true; 5281 5282 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5283 VendorName, ArchName); 5284 return false; 5285 } 5286 5287 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5288 amd_kernel_code_t &Header) { 5289 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5290 // assembly for backwards compatibility. 5291 if (ID == "max_scratch_backing_memory_byte_size") { 5292 Parser.eatToEndOfStatement(); 5293 return false; 5294 } 5295 5296 SmallString<40> ErrStr; 5297 raw_svector_ostream Err(ErrStr); 5298 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5299 return TokError(Err.str()); 5300 } 5301 Lex(); 5302 5303 if (ID == "enable_wavefront_size32") { 5304 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5305 if (!isGFX10Plus()) 5306 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5307 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5308 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5309 } else { 5310 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5311 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5312 } 5313 } 5314 5315 if (ID == "wavefront_size") { 5316 if (Header.wavefront_size == 5) { 5317 if (!isGFX10Plus()) 5318 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5319 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5320 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5321 } else if (Header.wavefront_size == 6) { 5322 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5323 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5324 } 5325 } 5326 5327 if (ID == "enable_wgp_mode") { 5328 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5329 !isGFX10Plus()) 5330 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5331 } 5332 5333 if (ID == "enable_mem_ordered") { 5334 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5335 !isGFX10Plus()) 5336 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5337 } 5338 5339 if (ID == "enable_fwd_progress") { 5340 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5341 !isGFX10Plus()) 5342 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5343 } 5344 5345 return false; 5346 } 5347 5348 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5349 amd_kernel_code_t Header; 5350 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5351 5352 while (true) { 5353 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5354 // will set the current token to EndOfStatement. 5355 while(trySkipToken(AsmToken::EndOfStatement)); 5356 5357 StringRef ID; 5358 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5359 return true; 5360 5361 if (ID == ".end_amd_kernel_code_t") 5362 break; 5363 5364 if (ParseAMDKernelCodeTValue(ID, Header)) 5365 return true; 5366 } 5367 5368 getTargetStreamer().EmitAMDKernelCodeT(Header); 5369 5370 return false; 5371 } 5372 5373 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5374 StringRef KernelName; 5375 if (!parseId(KernelName, "expected symbol name")) 5376 return true; 5377 5378 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5379 ELF::STT_AMDGPU_HSA_KERNEL); 5380 5381 KernelScope.initialize(getContext()); 5382 return false; 5383 } 5384 5385 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5386 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5387 return Error(getLoc(), 5388 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5389 "architectures"); 5390 } 5391 5392 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5393 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5394 return Error(getParser().getTok().getLoc(), "target id must match options"); 5395 5396 getTargetStreamer().EmitISAVersion(); 5397 Lex(); 5398 5399 return false; 5400 } 5401 5402 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5403 const char *AssemblerDirectiveBegin; 5404 const char *AssemblerDirectiveEnd; 5405 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5406 isHsaAbiVersion3AndAbove(&getSTI()) 5407 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5408 HSAMD::V3::AssemblerDirectiveEnd) 5409 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5410 HSAMD::AssemblerDirectiveEnd); 5411 5412 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5413 return Error(getLoc(), 5414 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5415 "not available on non-amdhsa OSes")).str()); 5416 } 5417 5418 std::string HSAMetadataString; 5419 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5420 HSAMetadataString)) 5421 return true; 5422 5423 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5424 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5425 return Error(getLoc(), "invalid HSA metadata"); 5426 } else { 5427 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5428 return Error(getLoc(), "invalid HSA metadata"); 5429 } 5430 5431 return false; 5432 } 5433 5434 /// Common code to parse out a block of text (typically YAML) between start and 5435 /// end directives. 5436 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5437 const char *AssemblerDirectiveEnd, 5438 std::string &CollectString) { 5439 5440 raw_string_ostream CollectStream(CollectString); 5441 5442 getLexer().setSkipSpace(false); 5443 5444 bool FoundEnd = false; 5445 while (!isToken(AsmToken::Eof)) { 5446 while (isToken(AsmToken::Space)) { 5447 CollectStream << getTokenStr(); 5448 Lex(); 5449 } 5450 5451 if (trySkipId(AssemblerDirectiveEnd)) { 5452 FoundEnd = true; 5453 break; 5454 } 5455 5456 CollectStream << Parser.parseStringToEndOfStatement() 5457 << getContext().getAsmInfo()->getSeparatorString(); 5458 5459 Parser.eatToEndOfStatement(); 5460 } 5461 5462 getLexer().setSkipSpace(true); 5463 5464 if (isToken(AsmToken::Eof) && !FoundEnd) { 5465 return TokError(Twine("expected directive ") + 5466 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5467 } 5468 5469 CollectStream.flush(); 5470 return false; 5471 } 5472 5473 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5474 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5475 std::string String; 5476 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5477 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5478 return true; 5479 5480 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5481 if (!PALMetadata->setFromString(String)) 5482 return Error(getLoc(), "invalid PAL metadata"); 5483 return false; 5484 } 5485 5486 /// Parse the assembler directive for old linear-format PAL metadata. 5487 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5488 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5489 return Error(getLoc(), 5490 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5491 "not available on non-amdpal OSes")).str()); 5492 } 5493 5494 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5495 PALMetadata->setLegacy(); 5496 for (;;) { 5497 uint32_t Key, Value; 5498 if (ParseAsAbsoluteExpression(Key)) { 5499 return TokError(Twine("invalid value in ") + 5500 Twine(PALMD::AssemblerDirective)); 5501 } 5502 if (!trySkipToken(AsmToken::Comma)) { 5503 return TokError(Twine("expected an even number of values in ") + 5504 Twine(PALMD::AssemblerDirective)); 5505 } 5506 if (ParseAsAbsoluteExpression(Value)) { 5507 return TokError(Twine("invalid value in ") + 5508 Twine(PALMD::AssemblerDirective)); 5509 } 5510 PALMetadata->setRegister(Key, Value); 5511 if (!trySkipToken(AsmToken::Comma)) 5512 break; 5513 } 5514 return false; 5515 } 5516 5517 /// ParseDirectiveAMDGPULDS 5518 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5519 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5520 if (getParser().checkForValidSection()) 5521 return true; 5522 5523 StringRef Name; 5524 SMLoc NameLoc = getLoc(); 5525 if (getParser().parseIdentifier(Name)) 5526 return TokError("expected identifier in directive"); 5527 5528 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5529 if (parseToken(AsmToken::Comma, "expected ','")) 5530 return true; 5531 5532 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5533 5534 int64_t Size; 5535 SMLoc SizeLoc = getLoc(); 5536 if (getParser().parseAbsoluteExpression(Size)) 5537 return true; 5538 if (Size < 0) 5539 return Error(SizeLoc, "size must be non-negative"); 5540 if (Size > LocalMemorySize) 5541 return Error(SizeLoc, "size is too large"); 5542 5543 int64_t Alignment = 4; 5544 if (trySkipToken(AsmToken::Comma)) { 5545 SMLoc AlignLoc = getLoc(); 5546 if (getParser().parseAbsoluteExpression(Alignment)) 5547 return true; 5548 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5549 return Error(AlignLoc, "alignment must be a power of two"); 5550 5551 // Alignment larger than the size of LDS is possible in theory, as long 5552 // as the linker manages to place to symbol at address 0, but we do want 5553 // to make sure the alignment fits nicely into a 32-bit integer. 5554 if (Alignment >= 1u << 31) 5555 return Error(AlignLoc, "alignment is too large"); 5556 } 5557 5558 if (parseEOL()) 5559 return true; 5560 5561 Symbol->redefineIfPossible(); 5562 if (!Symbol->isUndefined()) 5563 return Error(NameLoc, "invalid symbol redefinition"); 5564 5565 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5566 return false; 5567 } 5568 5569 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5570 StringRef IDVal = DirectiveID.getString(); 5571 5572 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5573 if (IDVal == ".amdhsa_kernel") 5574 return ParseDirectiveAMDHSAKernel(); 5575 5576 // TODO: Restructure/combine with PAL metadata directive. 5577 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5578 return ParseDirectiveHSAMetadata(); 5579 } else { 5580 if (IDVal == ".hsa_code_object_version") 5581 return ParseDirectiveHSACodeObjectVersion(); 5582 5583 if (IDVal == ".hsa_code_object_isa") 5584 return ParseDirectiveHSACodeObjectISA(); 5585 5586 if (IDVal == ".amd_kernel_code_t") 5587 return ParseDirectiveAMDKernelCodeT(); 5588 5589 if (IDVal == ".amdgpu_hsa_kernel") 5590 return ParseDirectiveAMDGPUHsaKernel(); 5591 5592 if (IDVal == ".amd_amdgpu_isa") 5593 return ParseDirectiveISAVersion(); 5594 5595 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5596 return ParseDirectiveHSAMetadata(); 5597 } 5598 5599 if (IDVal == ".amdgcn_target") 5600 return ParseDirectiveAMDGCNTarget(); 5601 5602 if (IDVal == ".amdgpu_lds") 5603 return ParseDirectiveAMDGPULDS(); 5604 5605 if (IDVal == PALMD::AssemblerDirectiveBegin) 5606 return ParseDirectivePALMetadataBegin(); 5607 5608 if (IDVal == PALMD::AssemblerDirective) 5609 return ParseDirectivePALMetadata(); 5610 5611 return true; 5612 } 5613 5614 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5615 unsigned RegNo) { 5616 5617 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5618 return isGFX9Plus(); 5619 5620 // GFX10+ has 2 more SGPRs 104 and 105. 5621 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5622 return hasSGPR104_SGPR105(); 5623 5624 switch (RegNo) { 5625 case AMDGPU::SRC_SHARED_BASE: 5626 case AMDGPU::SRC_SHARED_LIMIT: 5627 case AMDGPU::SRC_PRIVATE_BASE: 5628 case AMDGPU::SRC_PRIVATE_LIMIT: 5629 return isGFX9Plus(); 5630 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5631 return isGFX9Plus() && !isGFX11Plus(); 5632 case AMDGPU::TBA: 5633 case AMDGPU::TBA_LO: 5634 case AMDGPU::TBA_HI: 5635 case AMDGPU::TMA: 5636 case AMDGPU::TMA_LO: 5637 case AMDGPU::TMA_HI: 5638 return !isGFX9Plus(); 5639 case AMDGPU::XNACK_MASK: 5640 case AMDGPU::XNACK_MASK_LO: 5641 case AMDGPU::XNACK_MASK_HI: 5642 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5643 case AMDGPU::SGPR_NULL: 5644 return isGFX10Plus(); 5645 default: 5646 break; 5647 } 5648 5649 if (isCI()) 5650 return true; 5651 5652 if (isSI() || isGFX10Plus()) { 5653 // No flat_scr on SI. 5654 // On GFX10Plus flat scratch is not a valid register operand and can only be 5655 // accessed with s_setreg/s_getreg. 5656 switch (RegNo) { 5657 case AMDGPU::FLAT_SCR: 5658 case AMDGPU::FLAT_SCR_LO: 5659 case AMDGPU::FLAT_SCR_HI: 5660 return false; 5661 default: 5662 return true; 5663 } 5664 } 5665 5666 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5667 // SI/CI have. 5668 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5669 return hasSGPR102_SGPR103(); 5670 5671 return true; 5672 } 5673 5674 OperandMatchResultTy 5675 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5676 OperandMode Mode) { 5677 OperandMatchResultTy ResTy = parseVOPD(Operands); 5678 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5679 isToken(AsmToken::EndOfStatement)) 5680 return ResTy; 5681 5682 // Try to parse with a custom parser 5683 ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5684 5685 // If we successfully parsed the operand or if there as an error parsing, 5686 // we are done. 5687 // 5688 // If we are parsing after we reach EndOfStatement then this means we 5689 // are appending default values to the Operands list. This is only done 5690 // by custom parser, so we shouldn't continue on to the generic parsing. 5691 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5692 isToken(AsmToken::EndOfStatement)) 5693 return ResTy; 5694 5695 SMLoc RBraceLoc; 5696 SMLoc LBraceLoc = getLoc(); 5697 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5698 unsigned Prefix = Operands.size(); 5699 5700 for (;;) { 5701 auto Loc = getLoc(); 5702 ResTy = parseReg(Operands); 5703 if (ResTy == MatchOperand_NoMatch) 5704 Error(Loc, "expected a register"); 5705 if (ResTy != MatchOperand_Success) 5706 return MatchOperand_ParseFail; 5707 5708 RBraceLoc = getLoc(); 5709 if (trySkipToken(AsmToken::RBrac)) 5710 break; 5711 5712 if (!skipToken(AsmToken::Comma, 5713 "expected a comma or a closing square bracket")) { 5714 return MatchOperand_ParseFail; 5715 } 5716 } 5717 5718 if (Operands.size() - Prefix > 1) { 5719 Operands.insert(Operands.begin() + Prefix, 5720 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5721 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5722 } 5723 5724 return MatchOperand_Success; 5725 } 5726 5727 return parseRegOrImm(Operands); 5728 } 5729 5730 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5731 // Clear any forced encodings from the previous instruction. 5732 setForcedEncodingSize(0); 5733 setForcedDPP(false); 5734 setForcedSDWA(false); 5735 5736 if (Name.endswith("_e64_dpp")) { 5737 setForcedDPP(true); 5738 setForcedEncodingSize(64); 5739 return Name.substr(0, Name.size() - 8); 5740 } else if (Name.endswith("_e64")) { 5741 setForcedEncodingSize(64); 5742 return Name.substr(0, Name.size() - 4); 5743 } else if (Name.endswith("_e32")) { 5744 setForcedEncodingSize(32); 5745 return Name.substr(0, Name.size() - 4); 5746 } else if (Name.endswith("_dpp")) { 5747 setForcedDPP(true); 5748 return Name.substr(0, Name.size() - 4); 5749 } else if (Name.endswith("_sdwa")) { 5750 setForcedSDWA(true); 5751 return Name.substr(0, Name.size() - 5); 5752 } 5753 return Name; 5754 } 5755 5756 static void applyMnemonicAliases(StringRef &Mnemonic, 5757 const FeatureBitset &Features, 5758 unsigned VariantID); 5759 5760 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5761 StringRef Name, 5762 SMLoc NameLoc, OperandVector &Operands) { 5763 // Add the instruction mnemonic 5764 Name = parseMnemonicSuffix(Name); 5765 5766 // If the target architecture uses MnemonicAlias, call it here to parse 5767 // operands correctly. 5768 applyMnemonicAliases(Name, getAvailableFeatures(), 0); 5769 5770 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5771 5772 bool IsMIMG = Name.startswith("image_"); 5773 5774 while (!trySkipToken(AsmToken::EndOfStatement)) { 5775 OperandMode Mode = OperandMode_Default; 5776 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5777 Mode = OperandMode_NSA; 5778 CPolSeen = 0; 5779 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5780 5781 if (Res != MatchOperand_Success) { 5782 checkUnsupportedInstruction(Name, NameLoc); 5783 if (!Parser.hasPendingError()) { 5784 // FIXME: use real operand location rather than the current location. 5785 StringRef Msg = 5786 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5787 "not a valid operand."; 5788 Error(getLoc(), Msg); 5789 } 5790 while (!trySkipToken(AsmToken::EndOfStatement)) { 5791 lex(); 5792 } 5793 return true; 5794 } 5795 5796 // Eat the comma or space if there is one. 5797 trySkipToken(AsmToken::Comma); 5798 } 5799 5800 return false; 5801 } 5802 5803 //===----------------------------------------------------------------------===// 5804 // Utility functions 5805 //===----------------------------------------------------------------------===// 5806 5807 OperandMatchResultTy 5808 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5809 5810 if (!trySkipId(Prefix, AsmToken::Colon)) 5811 return MatchOperand_NoMatch; 5812 5813 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5814 } 5815 5816 OperandMatchResultTy 5817 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5818 AMDGPUOperand::ImmTy ImmTy, 5819 bool (*ConvertResult)(int64_t&)) { 5820 SMLoc S = getLoc(); 5821 int64_t Value = 0; 5822 5823 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5824 if (Res != MatchOperand_Success) 5825 return Res; 5826 5827 if (ConvertResult && !ConvertResult(Value)) { 5828 Error(S, "invalid " + StringRef(Prefix) + " value."); 5829 } 5830 5831 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5832 return MatchOperand_Success; 5833 } 5834 5835 OperandMatchResultTy 5836 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5837 OperandVector &Operands, 5838 AMDGPUOperand::ImmTy ImmTy, 5839 bool (*ConvertResult)(int64_t&)) { 5840 SMLoc S = getLoc(); 5841 if (!trySkipId(Prefix, AsmToken::Colon)) 5842 return MatchOperand_NoMatch; 5843 5844 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5845 return MatchOperand_ParseFail; 5846 5847 unsigned Val = 0; 5848 const unsigned MaxSize = 4; 5849 5850 // FIXME: How to verify the number of elements matches the number of src 5851 // operands? 5852 for (int I = 0; ; ++I) { 5853 int64_t Op; 5854 SMLoc Loc = getLoc(); 5855 if (!parseExpr(Op)) 5856 return MatchOperand_ParseFail; 5857 5858 if (Op != 0 && Op != 1) { 5859 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5860 return MatchOperand_ParseFail; 5861 } 5862 5863 Val |= (Op << I); 5864 5865 if (trySkipToken(AsmToken::RBrac)) 5866 break; 5867 5868 if (I + 1 == MaxSize) { 5869 Error(getLoc(), "expected a closing square bracket"); 5870 return MatchOperand_ParseFail; 5871 } 5872 5873 if (!skipToken(AsmToken::Comma, "expected a comma")) 5874 return MatchOperand_ParseFail; 5875 } 5876 5877 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5878 return MatchOperand_Success; 5879 } 5880 5881 OperandMatchResultTy 5882 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5883 AMDGPUOperand::ImmTy ImmTy) { 5884 int64_t Bit; 5885 SMLoc S = getLoc(); 5886 5887 if (trySkipId(Name)) { 5888 Bit = 1; 5889 } else if (trySkipId("no", Name)) { 5890 Bit = 0; 5891 } else { 5892 return MatchOperand_NoMatch; 5893 } 5894 5895 if (Name == "r128" && !hasMIMG_R128()) { 5896 Error(S, "r128 modifier is not supported on this GPU"); 5897 return MatchOperand_ParseFail; 5898 } 5899 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5900 Error(S, "a16 modifier is not supported on this GPU"); 5901 return MatchOperand_ParseFail; 5902 } 5903 5904 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5905 ImmTy = AMDGPUOperand::ImmTyR128A16; 5906 5907 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5908 return MatchOperand_Success; 5909 } 5910 5911 OperandMatchResultTy 5912 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5913 unsigned CPolOn = 0; 5914 unsigned CPolOff = 0; 5915 SMLoc S = getLoc(); 5916 5917 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5918 if (isGFX940() && !Mnemo.startswith("s_")) { 5919 if (trySkipId("sc0")) 5920 CPolOn = AMDGPU::CPol::SC0; 5921 else if (trySkipId("nosc0")) 5922 CPolOff = AMDGPU::CPol::SC0; 5923 else if (trySkipId("nt")) 5924 CPolOn = AMDGPU::CPol::NT; 5925 else if (trySkipId("nont")) 5926 CPolOff = AMDGPU::CPol::NT; 5927 else if (trySkipId("sc1")) 5928 CPolOn = AMDGPU::CPol::SC1; 5929 else if (trySkipId("nosc1")) 5930 CPolOff = AMDGPU::CPol::SC1; 5931 else 5932 return MatchOperand_NoMatch; 5933 } 5934 else if (trySkipId("glc")) 5935 CPolOn = AMDGPU::CPol::GLC; 5936 else if (trySkipId("noglc")) 5937 CPolOff = AMDGPU::CPol::GLC; 5938 else if (trySkipId("slc")) 5939 CPolOn = AMDGPU::CPol::SLC; 5940 else if (trySkipId("noslc")) 5941 CPolOff = AMDGPU::CPol::SLC; 5942 else if (trySkipId("dlc")) 5943 CPolOn = AMDGPU::CPol::DLC; 5944 else if (trySkipId("nodlc")) 5945 CPolOff = AMDGPU::CPol::DLC; 5946 else if (trySkipId("scc")) 5947 CPolOn = AMDGPU::CPol::SCC; 5948 else if (trySkipId("noscc")) 5949 CPolOff = AMDGPU::CPol::SCC; 5950 else 5951 return MatchOperand_NoMatch; 5952 5953 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5954 Error(S, "dlc modifier is not supported on this GPU"); 5955 return MatchOperand_ParseFail; 5956 } 5957 5958 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5959 Error(S, "scc modifier is not supported on this GPU"); 5960 return MatchOperand_ParseFail; 5961 } 5962 5963 if (CPolSeen & (CPolOn | CPolOff)) { 5964 Error(S, "duplicate cache policy modifier"); 5965 return MatchOperand_ParseFail; 5966 } 5967 5968 CPolSeen |= (CPolOn | CPolOff); 5969 5970 for (unsigned I = 1; I != Operands.size(); ++I) { 5971 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5972 if (Op.isCPol()) { 5973 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5974 return MatchOperand_Success; 5975 } 5976 } 5977 5978 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5979 AMDGPUOperand::ImmTyCPol)); 5980 5981 return MatchOperand_Success; 5982 } 5983 5984 static void addOptionalImmOperand( 5985 MCInst& Inst, const OperandVector& Operands, 5986 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5987 AMDGPUOperand::ImmTy ImmT, 5988 int64_t Default = 0) { 5989 auto i = OptionalIdx.find(ImmT); 5990 if (i != OptionalIdx.end()) { 5991 unsigned Idx = i->second; 5992 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5993 } else { 5994 Inst.addOperand(MCOperand::createImm(Default)); 5995 } 5996 } 5997 5998 OperandMatchResultTy 5999 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 6000 StringRef &Value, 6001 SMLoc &StringLoc) { 6002 if (!trySkipId(Prefix, AsmToken::Colon)) 6003 return MatchOperand_NoMatch; 6004 6005 StringLoc = getLoc(); 6006 return parseId(Value, "expected an identifier") ? MatchOperand_Success 6007 : MatchOperand_ParseFail; 6008 } 6009 6010 //===----------------------------------------------------------------------===// 6011 // MTBUF format 6012 //===----------------------------------------------------------------------===// 6013 6014 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 6015 int64_t MaxVal, 6016 int64_t &Fmt) { 6017 int64_t Val; 6018 SMLoc Loc = getLoc(); 6019 6020 auto Res = parseIntWithPrefix(Pref, Val); 6021 if (Res == MatchOperand_ParseFail) 6022 return false; 6023 if (Res == MatchOperand_NoMatch) 6024 return true; 6025 6026 if (Val < 0 || Val > MaxVal) { 6027 Error(Loc, Twine("out of range ", StringRef(Pref))); 6028 return false; 6029 } 6030 6031 Fmt = Val; 6032 return true; 6033 } 6034 6035 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 6036 // values to live in a joint format operand in the MCInst encoding. 6037 OperandMatchResultTy 6038 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 6039 using namespace llvm::AMDGPU::MTBUFFormat; 6040 6041 int64_t Dfmt = DFMT_UNDEF; 6042 int64_t Nfmt = NFMT_UNDEF; 6043 6044 // dfmt and nfmt can appear in either order, and each is optional. 6045 for (int I = 0; I < 2; ++I) { 6046 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 6047 return MatchOperand_ParseFail; 6048 6049 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 6050 return MatchOperand_ParseFail; 6051 } 6052 // Skip optional comma between dfmt/nfmt 6053 // but guard against 2 commas following each other. 6054 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 6055 !peekToken().is(AsmToken::Comma)) { 6056 trySkipToken(AsmToken::Comma); 6057 } 6058 } 6059 6060 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 6061 return MatchOperand_NoMatch; 6062 6063 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6064 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6065 6066 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6067 return MatchOperand_Success; 6068 } 6069 6070 OperandMatchResultTy 6071 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 6072 using namespace llvm::AMDGPU::MTBUFFormat; 6073 6074 int64_t Fmt = UFMT_UNDEF; 6075 6076 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 6077 return MatchOperand_ParseFail; 6078 6079 if (Fmt == UFMT_UNDEF) 6080 return MatchOperand_NoMatch; 6081 6082 Format = Fmt; 6083 return MatchOperand_Success; 6084 } 6085 6086 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 6087 int64_t &Nfmt, 6088 StringRef FormatStr, 6089 SMLoc Loc) { 6090 using namespace llvm::AMDGPU::MTBUFFormat; 6091 int64_t Format; 6092 6093 Format = getDfmt(FormatStr); 6094 if (Format != DFMT_UNDEF) { 6095 Dfmt = Format; 6096 return true; 6097 } 6098 6099 Format = getNfmt(FormatStr, getSTI()); 6100 if (Format != NFMT_UNDEF) { 6101 Nfmt = Format; 6102 return true; 6103 } 6104 6105 Error(Loc, "unsupported format"); 6106 return false; 6107 } 6108 6109 OperandMatchResultTy 6110 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 6111 SMLoc FormatLoc, 6112 int64_t &Format) { 6113 using namespace llvm::AMDGPU::MTBUFFormat; 6114 6115 int64_t Dfmt = DFMT_UNDEF; 6116 int64_t Nfmt = NFMT_UNDEF; 6117 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 6118 return MatchOperand_ParseFail; 6119 6120 if (trySkipToken(AsmToken::Comma)) { 6121 StringRef Str; 6122 SMLoc Loc = getLoc(); 6123 if (!parseId(Str, "expected a format string") || 6124 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 6125 return MatchOperand_ParseFail; 6126 } 6127 if (Dfmt == DFMT_UNDEF) { 6128 Error(Loc, "duplicate numeric format"); 6129 return MatchOperand_ParseFail; 6130 } else if (Nfmt == NFMT_UNDEF) { 6131 Error(Loc, "duplicate data format"); 6132 return MatchOperand_ParseFail; 6133 } 6134 } 6135 6136 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6137 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6138 6139 if (isGFX10Plus()) { 6140 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI()); 6141 if (Ufmt == UFMT_UNDEF) { 6142 Error(FormatLoc, "unsupported format"); 6143 return MatchOperand_ParseFail; 6144 } 6145 Format = Ufmt; 6146 } else { 6147 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6148 } 6149 6150 return MatchOperand_Success; 6151 } 6152 6153 OperandMatchResultTy 6154 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 6155 SMLoc Loc, 6156 int64_t &Format) { 6157 using namespace llvm::AMDGPU::MTBUFFormat; 6158 6159 auto Id = getUnifiedFormat(FormatStr, getSTI()); 6160 if (Id == UFMT_UNDEF) 6161 return MatchOperand_NoMatch; 6162 6163 if (!isGFX10Plus()) { 6164 Error(Loc, "unified format is not supported on this GPU"); 6165 return MatchOperand_ParseFail; 6166 } 6167 6168 Format = Id; 6169 return MatchOperand_Success; 6170 } 6171 6172 OperandMatchResultTy 6173 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 6174 using namespace llvm::AMDGPU::MTBUFFormat; 6175 SMLoc Loc = getLoc(); 6176 6177 if (!parseExpr(Format)) 6178 return MatchOperand_ParseFail; 6179 if (!isValidFormatEncoding(Format, getSTI())) { 6180 Error(Loc, "out of range format"); 6181 return MatchOperand_ParseFail; 6182 } 6183 6184 return MatchOperand_Success; 6185 } 6186 6187 OperandMatchResultTy 6188 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 6189 using namespace llvm::AMDGPU::MTBUFFormat; 6190 6191 if (!trySkipId("format", AsmToken::Colon)) 6192 return MatchOperand_NoMatch; 6193 6194 if (trySkipToken(AsmToken::LBrac)) { 6195 StringRef FormatStr; 6196 SMLoc Loc = getLoc(); 6197 if (!parseId(FormatStr, "expected a format string")) 6198 return MatchOperand_ParseFail; 6199 6200 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 6201 if (Res == MatchOperand_NoMatch) 6202 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 6203 if (Res != MatchOperand_Success) 6204 return Res; 6205 6206 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 6207 return MatchOperand_ParseFail; 6208 6209 return MatchOperand_Success; 6210 } 6211 6212 return parseNumericFormat(Format); 6213 } 6214 6215 OperandMatchResultTy 6216 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 6217 using namespace llvm::AMDGPU::MTBUFFormat; 6218 6219 int64_t Format = getDefaultFormatEncoding(getSTI()); 6220 OperandMatchResultTy Res; 6221 SMLoc Loc = getLoc(); 6222 6223 // Parse legacy format syntax. 6224 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 6225 if (Res == MatchOperand_ParseFail) 6226 return Res; 6227 6228 bool FormatFound = (Res == MatchOperand_Success); 6229 6230 Operands.push_back( 6231 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 6232 6233 if (FormatFound) 6234 trySkipToken(AsmToken::Comma); 6235 6236 if (isToken(AsmToken::EndOfStatement)) { 6237 // We are expecting an soffset operand, 6238 // but let matcher handle the error. 6239 return MatchOperand_Success; 6240 } 6241 6242 // Parse soffset. 6243 Res = parseRegOrImm(Operands); 6244 if (Res != MatchOperand_Success) 6245 return Res; 6246 6247 trySkipToken(AsmToken::Comma); 6248 6249 if (!FormatFound) { 6250 Res = parseSymbolicOrNumericFormat(Format); 6251 if (Res == MatchOperand_ParseFail) 6252 return Res; 6253 if (Res == MatchOperand_Success) { 6254 auto Size = Operands.size(); 6255 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6256 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6257 Op.setImm(Format); 6258 } 6259 return MatchOperand_Success; 6260 } 6261 6262 if (isId("format") && peekToken().is(AsmToken::Colon)) { 6263 Error(getLoc(), "duplicate format"); 6264 return MatchOperand_ParseFail; 6265 } 6266 return MatchOperand_Success; 6267 } 6268 6269 //===----------------------------------------------------------------------===// 6270 // ds 6271 //===----------------------------------------------------------------------===// 6272 6273 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 6274 const OperandVector &Operands) { 6275 OptionalImmIndexMap OptionalIdx; 6276 6277 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6278 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6279 6280 // Add the register arguments 6281 if (Op.isReg()) { 6282 Op.addRegOperands(Inst, 1); 6283 continue; 6284 } 6285 6286 // Handle optional arguments 6287 OptionalIdx[Op.getImmTy()] = i; 6288 } 6289 6290 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 6291 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 6292 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6293 6294 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6295 } 6296 6297 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 6298 bool IsGdsHardcoded) { 6299 OptionalImmIndexMap OptionalIdx; 6300 AMDGPUOperand::ImmTy OffsetType = AMDGPUOperand::ImmTyOffset; 6301 6302 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6303 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6304 6305 // Add the register arguments 6306 if (Op.isReg()) { 6307 Op.addRegOperands(Inst, 1); 6308 continue; 6309 } 6310 6311 if (Op.isToken() && Op.getToken() == "gds") { 6312 IsGdsHardcoded = true; 6313 continue; 6314 } 6315 6316 // Handle optional arguments 6317 OptionalIdx[Op.getImmTy()] = i; 6318 6319 if (Op.getImmTy() == AMDGPUOperand::ImmTySwizzle) 6320 OffsetType = AMDGPUOperand::ImmTySwizzle; 6321 } 6322 6323 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 6324 6325 if (!IsGdsHardcoded) { 6326 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6327 } 6328 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6329 } 6330 6331 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6332 OptionalImmIndexMap OptionalIdx; 6333 6334 unsigned OperandIdx[4]; 6335 unsigned EnMask = 0; 6336 int SrcIdx = 0; 6337 6338 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6339 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6340 6341 // Add the register arguments 6342 if (Op.isReg()) { 6343 assert(SrcIdx < 4); 6344 OperandIdx[SrcIdx] = Inst.size(); 6345 Op.addRegOperands(Inst, 1); 6346 ++SrcIdx; 6347 continue; 6348 } 6349 6350 if (Op.isOff()) { 6351 assert(SrcIdx < 4); 6352 OperandIdx[SrcIdx] = Inst.size(); 6353 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6354 ++SrcIdx; 6355 continue; 6356 } 6357 6358 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6359 Op.addImmOperands(Inst, 1); 6360 continue; 6361 } 6362 6363 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en")) 6364 continue; 6365 6366 // Handle optional arguments 6367 OptionalIdx[Op.getImmTy()] = i; 6368 } 6369 6370 assert(SrcIdx == 4); 6371 6372 bool Compr = false; 6373 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6374 Compr = true; 6375 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6376 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6377 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6378 } 6379 6380 for (auto i = 0; i < SrcIdx; ++i) { 6381 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6382 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6383 } 6384 } 6385 6386 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6387 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6388 6389 Inst.addOperand(MCOperand::createImm(EnMask)); 6390 } 6391 6392 //===----------------------------------------------------------------------===// 6393 // s_waitcnt 6394 //===----------------------------------------------------------------------===// 6395 6396 static bool 6397 encodeCnt( 6398 const AMDGPU::IsaVersion ISA, 6399 int64_t &IntVal, 6400 int64_t CntVal, 6401 bool Saturate, 6402 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6403 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6404 { 6405 bool Failed = false; 6406 6407 IntVal = encode(ISA, IntVal, CntVal); 6408 if (CntVal != decode(ISA, IntVal)) { 6409 if (Saturate) { 6410 IntVal = encode(ISA, IntVal, -1); 6411 } else { 6412 Failed = true; 6413 } 6414 } 6415 return Failed; 6416 } 6417 6418 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6419 6420 SMLoc CntLoc = getLoc(); 6421 StringRef CntName = getTokenStr(); 6422 6423 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6424 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6425 return false; 6426 6427 int64_t CntVal; 6428 SMLoc ValLoc = getLoc(); 6429 if (!parseExpr(CntVal)) 6430 return false; 6431 6432 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6433 6434 bool Failed = true; 6435 bool Sat = CntName.endswith("_sat"); 6436 6437 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6438 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6439 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6440 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6441 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6442 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6443 } else { 6444 Error(CntLoc, "invalid counter name " + CntName); 6445 return false; 6446 } 6447 6448 if (Failed) { 6449 Error(ValLoc, "too large value for " + CntName); 6450 return false; 6451 } 6452 6453 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6454 return false; 6455 6456 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6457 if (isToken(AsmToken::EndOfStatement)) { 6458 Error(getLoc(), "expected a counter name"); 6459 return false; 6460 } 6461 } 6462 6463 return true; 6464 } 6465 6466 OperandMatchResultTy 6467 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6468 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6469 int64_t Waitcnt = getWaitcntBitMask(ISA); 6470 SMLoc S = getLoc(); 6471 6472 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6473 while (!isToken(AsmToken::EndOfStatement)) { 6474 if (!parseCnt(Waitcnt)) 6475 return MatchOperand_ParseFail; 6476 } 6477 } else { 6478 if (!parseExpr(Waitcnt)) 6479 return MatchOperand_ParseFail; 6480 } 6481 6482 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6483 return MatchOperand_Success; 6484 } 6485 6486 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) { 6487 SMLoc FieldLoc = getLoc(); 6488 StringRef FieldName = getTokenStr(); 6489 if (!skipToken(AsmToken::Identifier, "expected a field name") || 6490 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6491 return false; 6492 6493 SMLoc ValueLoc = getLoc(); 6494 StringRef ValueName = getTokenStr(); 6495 if (!skipToken(AsmToken::Identifier, "expected a value name") || 6496 !skipToken(AsmToken::RParen, "expected a right parenthesis")) 6497 return false; 6498 6499 unsigned Shift; 6500 if (FieldName == "instid0") { 6501 Shift = 0; 6502 } else if (FieldName == "instskip") { 6503 Shift = 4; 6504 } else if (FieldName == "instid1") { 6505 Shift = 7; 6506 } else { 6507 Error(FieldLoc, "invalid field name " + FieldName); 6508 return false; 6509 } 6510 6511 int Value; 6512 if (Shift == 4) { 6513 // Parse values for instskip. 6514 Value = StringSwitch<int>(ValueName) 6515 .Case("SAME", 0) 6516 .Case("NEXT", 1) 6517 .Case("SKIP_1", 2) 6518 .Case("SKIP_2", 3) 6519 .Case("SKIP_3", 4) 6520 .Case("SKIP_4", 5) 6521 .Default(-1); 6522 } else { 6523 // Parse values for instid0 and instid1. 6524 Value = StringSwitch<int>(ValueName) 6525 .Case("NO_DEP", 0) 6526 .Case("VALU_DEP_1", 1) 6527 .Case("VALU_DEP_2", 2) 6528 .Case("VALU_DEP_3", 3) 6529 .Case("VALU_DEP_4", 4) 6530 .Case("TRANS32_DEP_1", 5) 6531 .Case("TRANS32_DEP_2", 6) 6532 .Case("TRANS32_DEP_3", 7) 6533 .Case("FMA_ACCUM_CYCLE_1", 8) 6534 .Case("SALU_CYCLE_1", 9) 6535 .Case("SALU_CYCLE_2", 10) 6536 .Case("SALU_CYCLE_3", 11) 6537 .Default(-1); 6538 } 6539 if (Value < 0) { 6540 Error(ValueLoc, "invalid value name " + ValueName); 6541 return false; 6542 } 6543 6544 Delay |= Value << Shift; 6545 return true; 6546 } 6547 6548 OperandMatchResultTy 6549 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) { 6550 int64_t Delay = 0; 6551 SMLoc S = getLoc(); 6552 6553 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6554 do { 6555 if (!parseDelay(Delay)) 6556 return MatchOperand_ParseFail; 6557 } while (trySkipToken(AsmToken::Pipe)); 6558 } else { 6559 if (!parseExpr(Delay)) 6560 return MatchOperand_ParseFail; 6561 } 6562 6563 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S)); 6564 return MatchOperand_Success; 6565 } 6566 6567 bool 6568 AMDGPUOperand::isSWaitCnt() const { 6569 return isImm(); 6570 } 6571 6572 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); } 6573 6574 //===----------------------------------------------------------------------===// 6575 // DepCtr 6576 //===----------------------------------------------------------------------===// 6577 6578 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, 6579 StringRef DepCtrName) { 6580 switch (ErrorId) { 6581 case OPR_ID_UNKNOWN: 6582 Error(Loc, Twine("invalid counter name ", DepCtrName)); 6583 return; 6584 case OPR_ID_UNSUPPORTED: 6585 Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); 6586 return; 6587 case OPR_ID_DUPLICATE: 6588 Error(Loc, Twine("duplicate counter name ", DepCtrName)); 6589 return; 6590 case OPR_VAL_INVALID: 6591 Error(Loc, Twine("invalid value for ", DepCtrName)); 6592 return; 6593 default: 6594 assert(false); 6595 } 6596 } 6597 6598 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { 6599 6600 using namespace llvm::AMDGPU::DepCtr; 6601 6602 SMLoc DepCtrLoc = getLoc(); 6603 StringRef DepCtrName = getTokenStr(); 6604 6605 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6606 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6607 return false; 6608 6609 int64_t ExprVal; 6610 if (!parseExpr(ExprVal)) 6611 return false; 6612 6613 unsigned PrevOprMask = UsedOprMask; 6614 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); 6615 6616 if (CntVal < 0) { 6617 depCtrError(DepCtrLoc, CntVal, DepCtrName); 6618 return false; 6619 } 6620 6621 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6622 return false; 6623 6624 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6625 if (isToken(AsmToken::EndOfStatement)) { 6626 Error(getLoc(), "expected a counter name"); 6627 return false; 6628 } 6629 } 6630 6631 unsigned CntValMask = PrevOprMask ^ UsedOprMask; 6632 DepCtr = (DepCtr & ~CntValMask) | CntVal; 6633 return true; 6634 } 6635 6636 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) { 6637 using namespace llvm::AMDGPU::DepCtr; 6638 6639 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); 6640 SMLoc Loc = getLoc(); 6641 6642 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6643 unsigned UsedOprMask = 0; 6644 while (!isToken(AsmToken::EndOfStatement)) { 6645 if (!parseDepCtr(DepCtr, UsedOprMask)) 6646 return MatchOperand_ParseFail; 6647 } 6648 } else { 6649 if (!parseExpr(DepCtr)) 6650 return MatchOperand_ParseFail; 6651 } 6652 6653 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); 6654 return MatchOperand_Success; 6655 } 6656 6657 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } 6658 6659 //===----------------------------------------------------------------------===// 6660 // hwreg 6661 //===----------------------------------------------------------------------===// 6662 6663 bool 6664 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6665 OperandInfoTy &Offset, 6666 OperandInfoTy &Width) { 6667 using namespace llvm::AMDGPU::Hwreg; 6668 6669 // The register may be specified by name or using a numeric code 6670 HwReg.Loc = getLoc(); 6671 if (isToken(AsmToken::Identifier) && 6672 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6673 HwReg.IsSymbolic = true; 6674 lex(); // skip register name 6675 } else if (!parseExpr(HwReg.Id, "a register name")) { 6676 return false; 6677 } 6678 6679 if (trySkipToken(AsmToken::RParen)) 6680 return true; 6681 6682 // parse optional params 6683 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6684 return false; 6685 6686 Offset.Loc = getLoc(); 6687 if (!parseExpr(Offset.Id)) 6688 return false; 6689 6690 if (!skipToken(AsmToken::Comma, "expected a comma")) 6691 return false; 6692 6693 Width.Loc = getLoc(); 6694 return parseExpr(Width.Id) && 6695 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6696 } 6697 6698 bool 6699 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6700 const OperandInfoTy &Offset, 6701 const OperandInfoTy &Width) { 6702 6703 using namespace llvm::AMDGPU::Hwreg; 6704 6705 if (HwReg.IsSymbolic) { 6706 if (HwReg.Id == OPR_ID_UNSUPPORTED) { 6707 Error(HwReg.Loc, 6708 "specified hardware register is not supported on this GPU"); 6709 return false; 6710 } 6711 } else { 6712 if (!isValidHwreg(HwReg.Id)) { 6713 Error(HwReg.Loc, 6714 "invalid code of hardware register: only 6-bit values are legal"); 6715 return false; 6716 } 6717 } 6718 if (!isValidHwregOffset(Offset.Id)) { 6719 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6720 return false; 6721 } 6722 if (!isValidHwregWidth(Width.Id)) { 6723 Error(Width.Loc, 6724 "invalid bitfield width: only values from 1 to 32 are legal"); 6725 return false; 6726 } 6727 return true; 6728 } 6729 6730 OperandMatchResultTy 6731 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6732 using namespace llvm::AMDGPU::Hwreg; 6733 6734 int64_t ImmVal = 0; 6735 SMLoc Loc = getLoc(); 6736 6737 if (trySkipId("hwreg", AsmToken::LParen)) { 6738 OperandInfoTy HwReg(OPR_ID_UNKNOWN); 6739 OperandInfoTy Offset(OFFSET_DEFAULT_); 6740 OperandInfoTy Width(WIDTH_DEFAULT_); 6741 if (parseHwregBody(HwReg, Offset, Width) && 6742 validateHwreg(HwReg, Offset, Width)) { 6743 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6744 } else { 6745 return MatchOperand_ParseFail; 6746 } 6747 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6748 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6749 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6750 return MatchOperand_ParseFail; 6751 } 6752 } else { 6753 return MatchOperand_ParseFail; 6754 } 6755 6756 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6757 return MatchOperand_Success; 6758 } 6759 6760 bool AMDGPUOperand::isHwreg() const { 6761 return isImmTy(ImmTyHwreg); 6762 } 6763 6764 //===----------------------------------------------------------------------===// 6765 // sendmsg 6766 //===----------------------------------------------------------------------===// 6767 6768 bool 6769 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6770 OperandInfoTy &Op, 6771 OperandInfoTy &Stream) { 6772 using namespace llvm::AMDGPU::SendMsg; 6773 6774 Msg.Loc = getLoc(); 6775 if (isToken(AsmToken::Identifier) && 6776 (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6777 Msg.IsSymbolic = true; 6778 lex(); // skip message name 6779 } else if (!parseExpr(Msg.Id, "a message name")) { 6780 return false; 6781 } 6782 6783 if (trySkipToken(AsmToken::Comma)) { 6784 Op.IsDefined = true; 6785 Op.Loc = getLoc(); 6786 if (isToken(AsmToken::Identifier) && 6787 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6788 lex(); // skip operation name 6789 } else if (!parseExpr(Op.Id, "an operation name")) { 6790 return false; 6791 } 6792 6793 if (trySkipToken(AsmToken::Comma)) { 6794 Stream.IsDefined = true; 6795 Stream.Loc = getLoc(); 6796 if (!parseExpr(Stream.Id)) 6797 return false; 6798 } 6799 } 6800 6801 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6802 } 6803 6804 bool 6805 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6806 const OperandInfoTy &Op, 6807 const OperandInfoTy &Stream) { 6808 using namespace llvm::AMDGPU::SendMsg; 6809 6810 // Validation strictness depends on whether message is specified 6811 // in a symbolic or in a numeric form. In the latter case 6812 // only encoding possibility is checked. 6813 bool Strict = Msg.IsSymbolic; 6814 6815 if (Strict) { 6816 if (Msg.Id == OPR_ID_UNSUPPORTED) { 6817 Error(Msg.Loc, "specified message id is not supported on this GPU"); 6818 return false; 6819 } 6820 } else { 6821 if (!isValidMsgId(Msg.Id, getSTI())) { 6822 Error(Msg.Loc, "invalid message id"); 6823 return false; 6824 } 6825 } 6826 if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) { 6827 if (Op.IsDefined) { 6828 Error(Op.Loc, "message does not support operations"); 6829 } else { 6830 Error(Msg.Loc, "missing message operation"); 6831 } 6832 return false; 6833 } 6834 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6835 Error(Op.Loc, "invalid operation id"); 6836 return false; 6837 } 6838 if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) && 6839 Stream.IsDefined) { 6840 Error(Stream.Loc, "message operation does not support streams"); 6841 return false; 6842 } 6843 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6844 Error(Stream.Loc, "invalid message stream id"); 6845 return false; 6846 } 6847 return true; 6848 } 6849 6850 OperandMatchResultTy 6851 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6852 using namespace llvm::AMDGPU::SendMsg; 6853 6854 int64_t ImmVal = 0; 6855 SMLoc Loc = getLoc(); 6856 6857 if (trySkipId("sendmsg", AsmToken::LParen)) { 6858 OperandInfoTy Msg(OPR_ID_UNKNOWN); 6859 OperandInfoTy Op(OP_NONE_); 6860 OperandInfoTy Stream(STREAM_ID_NONE_); 6861 if (parseSendMsgBody(Msg, Op, Stream) && 6862 validateSendMsg(Msg, Op, Stream)) { 6863 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6864 } else { 6865 return MatchOperand_ParseFail; 6866 } 6867 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6868 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6869 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6870 return MatchOperand_ParseFail; 6871 } 6872 } else { 6873 return MatchOperand_ParseFail; 6874 } 6875 6876 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6877 return MatchOperand_Success; 6878 } 6879 6880 bool AMDGPUOperand::isSendMsg() const { 6881 return isImmTy(ImmTySendMsg); 6882 } 6883 6884 //===----------------------------------------------------------------------===// 6885 // v_interp 6886 //===----------------------------------------------------------------------===// 6887 6888 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6889 StringRef Str; 6890 SMLoc S = getLoc(); 6891 6892 if (!parseId(Str)) 6893 return MatchOperand_NoMatch; 6894 6895 int Slot = StringSwitch<int>(Str) 6896 .Case("p10", 0) 6897 .Case("p20", 1) 6898 .Case("p0", 2) 6899 .Default(-1); 6900 6901 if (Slot == -1) { 6902 Error(S, "invalid interpolation slot"); 6903 return MatchOperand_ParseFail; 6904 } 6905 6906 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6907 AMDGPUOperand::ImmTyInterpSlot)); 6908 return MatchOperand_Success; 6909 } 6910 6911 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6912 StringRef Str; 6913 SMLoc S = getLoc(); 6914 6915 if (!parseId(Str)) 6916 return MatchOperand_NoMatch; 6917 6918 if (!Str.startswith("attr")) { 6919 Error(S, "invalid interpolation attribute"); 6920 return MatchOperand_ParseFail; 6921 } 6922 6923 StringRef Chan = Str.take_back(2); 6924 int AttrChan = StringSwitch<int>(Chan) 6925 .Case(".x", 0) 6926 .Case(".y", 1) 6927 .Case(".z", 2) 6928 .Case(".w", 3) 6929 .Default(-1); 6930 if (AttrChan == -1) { 6931 Error(S, "invalid or missing interpolation attribute channel"); 6932 return MatchOperand_ParseFail; 6933 } 6934 6935 Str = Str.drop_back(2).drop_front(4); 6936 6937 uint8_t Attr; 6938 if (Str.getAsInteger(10, Attr)) { 6939 Error(S, "invalid or missing interpolation attribute number"); 6940 return MatchOperand_ParseFail; 6941 } 6942 6943 if (Attr > 63) { 6944 Error(S, "out of bounds interpolation attribute number"); 6945 return MatchOperand_ParseFail; 6946 } 6947 6948 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6949 6950 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6951 AMDGPUOperand::ImmTyInterpAttr)); 6952 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6953 AMDGPUOperand::ImmTyAttrChan)); 6954 return MatchOperand_Success; 6955 } 6956 6957 //===----------------------------------------------------------------------===// 6958 // exp 6959 //===----------------------------------------------------------------------===// 6960 6961 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6962 using namespace llvm::AMDGPU::Exp; 6963 6964 StringRef Str; 6965 SMLoc S = getLoc(); 6966 6967 if (!parseId(Str)) 6968 return MatchOperand_NoMatch; 6969 6970 unsigned Id = getTgtId(Str); 6971 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6972 Error(S, (Id == ET_INVALID) ? 6973 "invalid exp target" : 6974 "exp target is not supported on this GPU"); 6975 return MatchOperand_ParseFail; 6976 } 6977 6978 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6979 AMDGPUOperand::ImmTyExpTgt)); 6980 return MatchOperand_Success; 6981 } 6982 6983 //===----------------------------------------------------------------------===// 6984 // parser helpers 6985 //===----------------------------------------------------------------------===// 6986 6987 bool 6988 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6989 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6990 } 6991 6992 bool 6993 AMDGPUAsmParser::isId(const StringRef Id) const { 6994 return isId(getToken(), Id); 6995 } 6996 6997 bool 6998 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6999 return getTokenKind() == Kind; 7000 } 7001 7002 bool 7003 AMDGPUAsmParser::trySkipId(const StringRef Id) { 7004 if (isId(Id)) { 7005 lex(); 7006 return true; 7007 } 7008 return false; 7009 } 7010 7011 bool 7012 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 7013 if (isToken(AsmToken::Identifier)) { 7014 StringRef Tok = getTokenStr(); 7015 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 7016 lex(); 7017 return true; 7018 } 7019 } 7020 return false; 7021 } 7022 7023 bool 7024 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 7025 if (isId(Id) && peekToken().is(Kind)) { 7026 lex(); 7027 lex(); 7028 return true; 7029 } 7030 return false; 7031 } 7032 7033 bool 7034 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 7035 if (isToken(Kind)) { 7036 lex(); 7037 return true; 7038 } 7039 return false; 7040 } 7041 7042 bool 7043 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 7044 const StringRef ErrMsg) { 7045 if (!trySkipToken(Kind)) { 7046 Error(getLoc(), ErrMsg); 7047 return false; 7048 } 7049 return true; 7050 } 7051 7052 bool 7053 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 7054 SMLoc S = getLoc(); 7055 7056 const MCExpr *Expr; 7057 if (Parser.parseExpression(Expr)) 7058 return false; 7059 7060 if (Expr->evaluateAsAbsolute(Imm)) 7061 return true; 7062 7063 if (Expected.empty()) { 7064 Error(S, "expected absolute expression"); 7065 } else { 7066 Error(S, Twine("expected ", Expected) + 7067 Twine(" or an absolute expression")); 7068 } 7069 return false; 7070 } 7071 7072 bool 7073 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 7074 SMLoc S = getLoc(); 7075 7076 const MCExpr *Expr; 7077 if (Parser.parseExpression(Expr)) 7078 return false; 7079 7080 int64_t IntVal; 7081 if (Expr->evaluateAsAbsolute(IntVal)) { 7082 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 7083 } else { 7084 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 7085 } 7086 return true; 7087 } 7088 7089 bool 7090 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 7091 if (isToken(AsmToken::String)) { 7092 Val = getToken().getStringContents(); 7093 lex(); 7094 return true; 7095 } else { 7096 Error(getLoc(), ErrMsg); 7097 return false; 7098 } 7099 } 7100 7101 bool 7102 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 7103 if (isToken(AsmToken::Identifier)) { 7104 Val = getTokenStr(); 7105 lex(); 7106 return true; 7107 } else { 7108 if (!ErrMsg.empty()) 7109 Error(getLoc(), ErrMsg); 7110 return false; 7111 } 7112 } 7113 7114 AsmToken 7115 AMDGPUAsmParser::getToken() const { 7116 return Parser.getTok(); 7117 } 7118 7119 AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) { 7120 return isToken(AsmToken::EndOfStatement) 7121 ? getToken() 7122 : getLexer().peekTok(ShouldSkipSpace); 7123 } 7124 7125 void 7126 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 7127 auto TokCount = getLexer().peekTokens(Tokens); 7128 7129 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 7130 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 7131 } 7132 7133 AsmToken::TokenKind 7134 AMDGPUAsmParser::getTokenKind() const { 7135 return getLexer().getKind(); 7136 } 7137 7138 SMLoc 7139 AMDGPUAsmParser::getLoc() const { 7140 return getToken().getLoc(); 7141 } 7142 7143 StringRef 7144 AMDGPUAsmParser::getTokenStr() const { 7145 return getToken().getString(); 7146 } 7147 7148 void 7149 AMDGPUAsmParser::lex() { 7150 Parser.Lex(); 7151 } 7152 7153 SMLoc 7154 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 7155 const OperandVector &Operands) const { 7156 for (unsigned i = Operands.size() - 1; i > 0; --i) { 7157 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7158 if (Test(Op)) 7159 return Op.getStartLoc(); 7160 } 7161 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 7162 } 7163 7164 SMLoc 7165 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 7166 const OperandVector &Operands) const { 7167 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 7168 return getOperandLoc(Test, Operands); 7169 } 7170 7171 SMLoc 7172 AMDGPUAsmParser::getRegLoc(unsigned Reg, 7173 const OperandVector &Operands) const { 7174 auto Test = [=](const AMDGPUOperand& Op) { 7175 return Op.isRegKind() && Op.getReg() == Reg; 7176 }; 7177 return getOperandLoc(Test, Operands); 7178 } 7179 7180 SMLoc 7181 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 7182 auto Test = [](const AMDGPUOperand& Op) { 7183 return Op.IsImmKindLiteral() || Op.isExpr(); 7184 }; 7185 return getOperandLoc(Test, Operands); 7186 } 7187 7188 SMLoc 7189 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 7190 auto Test = [](const AMDGPUOperand& Op) { 7191 return Op.isImmKindConst(); 7192 }; 7193 return getOperandLoc(Test, Operands); 7194 } 7195 7196 //===----------------------------------------------------------------------===// 7197 // swizzle 7198 //===----------------------------------------------------------------------===// 7199 7200 LLVM_READNONE 7201 static unsigned 7202 encodeBitmaskPerm(const unsigned AndMask, 7203 const unsigned OrMask, 7204 const unsigned XorMask) { 7205 using namespace llvm::AMDGPU::Swizzle; 7206 7207 return BITMASK_PERM_ENC | 7208 (AndMask << BITMASK_AND_SHIFT) | 7209 (OrMask << BITMASK_OR_SHIFT) | 7210 (XorMask << BITMASK_XOR_SHIFT); 7211 } 7212 7213 bool 7214 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 7215 const unsigned MinVal, 7216 const unsigned MaxVal, 7217 const StringRef ErrMsg, 7218 SMLoc &Loc) { 7219 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7220 return false; 7221 } 7222 Loc = getLoc(); 7223 if (!parseExpr(Op)) { 7224 return false; 7225 } 7226 if (Op < MinVal || Op > MaxVal) { 7227 Error(Loc, ErrMsg); 7228 return false; 7229 } 7230 7231 return true; 7232 } 7233 7234 bool 7235 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 7236 const unsigned MinVal, 7237 const unsigned MaxVal, 7238 const StringRef ErrMsg) { 7239 SMLoc Loc; 7240 for (unsigned i = 0; i < OpNum; ++i) { 7241 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 7242 return false; 7243 } 7244 7245 return true; 7246 } 7247 7248 bool 7249 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 7250 using namespace llvm::AMDGPU::Swizzle; 7251 7252 int64_t Lane[LANE_NUM]; 7253 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 7254 "expected a 2-bit lane id")) { 7255 Imm = QUAD_PERM_ENC; 7256 for (unsigned I = 0; I < LANE_NUM; ++I) { 7257 Imm |= Lane[I] << (LANE_SHIFT * I); 7258 } 7259 return true; 7260 } 7261 return false; 7262 } 7263 7264 bool 7265 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 7266 using namespace llvm::AMDGPU::Swizzle; 7267 7268 SMLoc Loc; 7269 int64_t GroupSize; 7270 int64_t LaneIdx; 7271 7272 if (!parseSwizzleOperand(GroupSize, 7273 2, 32, 7274 "group size must be in the interval [2,32]", 7275 Loc)) { 7276 return false; 7277 } 7278 if (!isPowerOf2_64(GroupSize)) { 7279 Error(Loc, "group size must be a power of two"); 7280 return false; 7281 } 7282 if (parseSwizzleOperand(LaneIdx, 7283 0, GroupSize - 1, 7284 "lane id must be in the interval [0,group size - 1]", 7285 Loc)) { 7286 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 7287 return true; 7288 } 7289 return false; 7290 } 7291 7292 bool 7293 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 7294 using namespace llvm::AMDGPU::Swizzle; 7295 7296 SMLoc Loc; 7297 int64_t GroupSize; 7298 7299 if (!parseSwizzleOperand(GroupSize, 7300 2, 32, 7301 "group size must be in the interval [2,32]", 7302 Loc)) { 7303 return false; 7304 } 7305 if (!isPowerOf2_64(GroupSize)) { 7306 Error(Loc, "group size must be a power of two"); 7307 return false; 7308 } 7309 7310 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 7311 return true; 7312 } 7313 7314 bool 7315 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 7316 using namespace llvm::AMDGPU::Swizzle; 7317 7318 SMLoc Loc; 7319 int64_t GroupSize; 7320 7321 if (!parseSwizzleOperand(GroupSize, 7322 1, 16, 7323 "group size must be in the interval [1,16]", 7324 Loc)) { 7325 return false; 7326 } 7327 if (!isPowerOf2_64(GroupSize)) { 7328 Error(Loc, "group size must be a power of two"); 7329 return false; 7330 } 7331 7332 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 7333 return true; 7334 } 7335 7336 bool 7337 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 7338 using namespace llvm::AMDGPU::Swizzle; 7339 7340 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7341 return false; 7342 } 7343 7344 StringRef Ctl; 7345 SMLoc StrLoc = getLoc(); 7346 if (!parseString(Ctl)) { 7347 return false; 7348 } 7349 if (Ctl.size() != BITMASK_WIDTH) { 7350 Error(StrLoc, "expected a 5-character mask"); 7351 return false; 7352 } 7353 7354 unsigned AndMask = 0; 7355 unsigned OrMask = 0; 7356 unsigned XorMask = 0; 7357 7358 for (size_t i = 0; i < Ctl.size(); ++i) { 7359 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 7360 switch(Ctl[i]) { 7361 default: 7362 Error(StrLoc, "invalid mask"); 7363 return false; 7364 case '0': 7365 break; 7366 case '1': 7367 OrMask |= Mask; 7368 break; 7369 case 'p': 7370 AndMask |= Mask; 7371 break; 7372 case 'i': 7373 AndMask |= Mask; 7374 XorMask |= Mask; 7375 break; 7376 } 7377 } 7378 7379 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 7380 return true; 7381 } 7382 7383 bool 7384 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 7385 7386 SMLoc OffsetLoc = getLoc(); 7387 7388 if (!parseExpr(Imm, "a swizzle macro")) { 7389 return false; 7390 } 7391 if (!isUInt<16>(Imm)) { 7392 Error(OffsetLoc, "expected a 16-bit offset"); 7393 return false; 7394 } 7395 return true; 7396 } 7397 7398 bool 7399 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 7400 using namespace llvm::AMDGPU::Swizzle; 7401 7402 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 7403 7404 SMLoc ModeLoc = getLoc(); 7405 bool Ok = false; 7406 7407 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 7408 Ok = parseSwizzleQuadPerm(Imm); 7409 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 7410 Ok = parseSwizzleBitmaskPerm(Imm); 7411 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 7412 Ok = parseSwizzleBroadcast(Imm); 7413 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 7414 Ok = parseSwizzleSwap(Imm); 7415 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 7416 Ok = parseSwizzleReverse(Imm); 7417 } else { 7418 Error(ModeLoc, "expected a swizzle mode"); 7419 } 7420 7421 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 7422 } 7423 7424 return false; 7425 } 7426 7427 OperandMatchResultTy 7428 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 7429 SMLoc S = getLoc(); 7430 int64_t Imm = 0; 7431 7432 if (trySkipId("offset")) { 7433 7434 bool Ok = false; 7435 if (skipToken(AsmToken::Colon, "expected a colon")) { 7436 if (trySkipId("swizzle")) { 7437 Ok = parseSwizzleMacro(Imm); 7438 } else { 7439 Ok = parseSwizzleOffset(Imm); 7440 } 7441 } 7442 7443 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7444 7445 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 7446 } else { 7447 // Swizzle "offset" operand is optional. 7448 // If it is omitted, try parsing other optional operands. 7449 return parseOptionalOpr(Operands); 7450 } 7451 } 7452 7453 bool 7454 AMDGPUOperand::isSwizzle() const { 7455 return isImmTy(ImmTySwizzle); 7456 } 7457 7458 //===----------------------------------------------------------------------===// 7459 // VGPR Index Mode 7460 //===----------------------------------------------------------------------===// 7461 7462 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7463 7464 using namespace llvm::AMDGPU::VGPRIndexMode; 7465 7466 if (trySkipToken(AsmToken::RParen)) { 7467 return OFF; 7468 } 7469 7470 int64_t Imm = 0; 7471 7472 while (true) { 7473 unsigned Mode = 0; 7474 SMLoc S = getLoc(); 7475 7476 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7477 if (trySkipId(IdSymbolic[ModeId])) { 7478 Mode = 1 << ModeId; 7479 break; 7480 } 7481 } 7482 7483 if (Mode == 0) { 7484 Error(S, (Imm == 0)? 7485 "expected a VGPR index mode or a closing parenthesis" : 7486 "expected a VGPR index mode"); 7487 return UNDEF; 7488 } 7489 7490 if (Imm & Mode) { 7491 Error(S, "duplicate VGPR index mode"); 7492 return UNDEF; 7493 } 7494 Imm |= Mode; 7495 7496 if (trySkipToken(AsmToken::RParen)) 7497 break; 7498 if (!skipToken(AsmToken::Comma, 7499 "expected a comma or a closing parenthesis")) 7500 return UNDEF; 7501 } 7502 7503 return Imm; 7504 } 7505 7506 OperandMatchResultTy 7507 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7508 7509 using namespace llvm::AMDGPU::VGPRIndexMode; 7510 7511 int64_t Imm = 0; 7512 SMLoc S = getLoc(); 7513 7514 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7515 Imm = parseGPRIdxMacro(); 7516 if (Imm == UNDEF) 7517 return MatchOperand_ParseFail; 7518 } else { 7519 if (getParser().parseAbsoluteExpression(Imm)) 7520 return MatchOperand_ParseFail; 7521 if (Imm < 0 || !isUInt<4>(Imm)) { 7522 Error(S, "invalid immediate: only 4-bit values are legal"); 7523 return MatchOperand_ParseFail; 7524 } 7525 } 7526 7527 Operands.push_back( 7528 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7529 return MatchOperand_Success; 7530 } 7531 7532 bool AMDGPUOperand::isGPRIdxMode() const { 7533 return isImmTy(ImmTyGprIdxMode); 7534 } 7535 7536 //===----------------------------------------------------------------------===// 7537 // sopp branch targets 7538 //===----------------------------------------------------------------------===// 7539 7540 OperandMatchResultTy 7541 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7542 7543 // Make sure we are not parsing something 7544 // that looks like a label or an expression but is not. 7545 // This will improve error messages. 7546 if (isRegister() || isModifier()) 7547 return MatchOperand_NoMatch; 7548 7549 if (!parseExpr(Operands)) 7550 return MatchOperand_ParseFail; 7551 7552 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7553 assert(Opr.isImm() || Opr.isExpr()); 7554 SMLoc Loc = Opr.getStartLoc(); 7555 7556 // Currently we do not support arbitrary expressions as branch targets. 7557 // Only labels and absolute expressions are accepted. 7558 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7559 Error(Loc, "expected an absolute expression or a label"); 7560 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7561 Error(Loc, "expected a 16-bit signed jump offset"); 7562 } 7563 7564 return MatchOperand_Success; 7565 } 7566 7567 //===----------------------------------------------------------------------===// 7568 // Boolean holding registers 7569 //===----------------------------------------------------------------------===// 7570 7571 OperandMatchResultTy 7572 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7573 return parseReg(Operands); 7574 } 7575 7576 //===----------------------------------------------------------------------===// 7577 // mubuf 7578 //===----------------------------------------------------------------------===// 7579 7580 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7581 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7582 } 7583 7584 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7585 const OperandVector &Operands, 7586 bool IsAtomic, 7587 bool IsLds) { 7588 OptionalImmIndexMap OptionalIdx; 7589 unsigned FirstOperandIdx = 1; 7590 bool IsAtomicReturn = false; 7591 7592 if (IsAtomic) { 7593 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7594 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7595 if (!Op.isCPol()) 7596 continue; 7597 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7598 break; 7599 } 7600 7601 if (!IsAtomicReturn) { 7602 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7603 if (NewOpc != -1) 7604 Inst.setOpcode(NewOpc); 7605 } 7606 7607 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7608 SIInstrFlags::IsAtomicRet; 7609 } 7610 7611 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7612 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7613 7614 // Add the register arguments 7615 if (Op.isReg()) { 7616 Op.addRegOperands(Inst, 1); 7617 // Insert a tied src for atomic return dst. 7618 // This cannot be postponed as subsequent calls to 7619 // addImmOperands rely on correct number of MC operands. 7620 if (IsAtomicReturn && i == FirstOperandIdx) 7621 Op.addRegOperands(Inst, 1); 7622 continue; 7623 } 7624 7625 // Handle the case where soffset is an immediate 7626 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7627 Op.addImmOperands(Inst, 1); 7628 continue; 7629 } 7630 7631 // Handle tokens like 'offen' which are sometimes hard-coded into the 7632 // asm string. There are no MCInst operands for these. 7633 if (Op.isToken()) { 7634 continue; 7635 } 7636 assert(Op.isImm()); 7637 7638 // Handle optional arguments 7639 OptionalIdx[Op.getImmTy()] = i; 7640 } 7641 7642 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7643 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7644 7645 if (!IsLds) { // tfe is not legal with lds opcodes 7646 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7647 } 7648 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7649 } 7650 7651 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7652 OptionalImmIndexMap OptionalIdx; 7653 7654 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7655 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7656 7657 // Add the register arguments 7658 if (Op.isReg()) { 7659 Op.addRegOperands(Inst, 1); 7660 continue; 7661 } 7662 7663 // Handle the case where soffset is an immediate 7664 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7665 Op.addImmOperands(Inst, 1); 7666 continue; 7667 } 7668 7669 // Handle tokens like 'offen' which are sometimes hard-coded into the 7670 // asm string. There are no MCInst operands for these. 7671 if (Op.isToken()) { 7672 continue; 7673 } 7674 assert(Op.isImm()); 7675 7676 // Handle optional arguments 7677 OptionalIdx[Op.getImmTy()] = i; 7678 } 7679 7680 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7681 AMDGPUOperand::ImmTyOffset); 7682 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7683 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7684 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7685 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7686 } 7687 7688 //===----------------------------------------------------------------------===// 7689 // mimg 7690 //===----------------------------------------------------------------------===// 7691 7692 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7693 bool IsAtomic) { 7694 unsigned I = 1; 7695 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7696 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7697 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7698 } 7699 7700 if (IsAtomic) { 7701 // Add src, same as dst 7702 assert(Desc.getNumDefs() == 1); 7703 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7704 } 7705 7706 OptionalImmIndexMap OptionalIdx; 7707 7708 for (unsigned E = Operands.size(); I != E; ++I) { 7709 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7710 7711 // Add the register arguments 7712 if (Op.isReg()) { 7713 Op.addRegOperands(Inst, 1); 7714 } else if (Op.isImmModifier()) { 7715 OptionalIdx[Op.getImmTy()] = I; 7716 } else if (!Op.isToken()) { 7717 llvm_unreachable("unexpected operand type"); 7718 } 7719 } 7720 7721 bool IsGFX10Plus = isGFX10Plus(); 7722 7723 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7724 if (IsGFX10Plus) 7725 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7726 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7727 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7728 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7729 if (IsGFX10Plus) 7730 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7731 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7732 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7733 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7734 if (!IsGFX10Plus) 7735 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7736 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7737 } 7738 7739 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7740 cvtMIMG(Inst, Operands, true); 7741 } 7742 7743 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7744 OptionalImmIndexMap OptionalIdx; 7745 bool IsAtomicReturn = false; 7746 7747 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7748 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7749 if (!Op.isCPol()) 7750 continue; 7751 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7752 break; 7753 } 7754 7755 if (!IsAtomicReturn) { 7756 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7757 if (NewOpc != -1) 7758 Inst.setOpcode(NewOpc); 7759 } 7760 7761 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7762 SIInstrFlags::IsAtomicRet; 7763 7764 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7765 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7766 7767 // Add the register arguments 7768 if (Op.isReg()) { 7769 Op.addRegOperands(Inst, 1); 7770 if (IsAtomicReturn && i == 1) 7771 Op.addRegOperands(Inst, 1); 7772 continue; 7773 } 7774 7775 // Handle the case where soffset is an immediate 7776 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7777 Op.addImmOperands(Inst, 1); 7778 continue; 7779 } 7780 7781 // Handle tokens like 'offen' which are sometimes hard-coded into the 7782 // asm string. There are no MCInst operands for these. 7783 if (Op.isToken()) { 7784 continue; 7785 } 7786 assert(Op.isImm()); 7787 7788 // Handle optional arguments 7789 OptionalIdx[Op.getImmTy()] = i; 7790 } 7791 7792 if ((int)Inst.getNumOperands() <= 7793 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7794 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7795 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7796 } 7797 7798 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7799 const OperandVector &Operands) { 7800 for (unsigned I = 1; I < Operands.size(); ++I) { 7801 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7802 if (Operand.isReg()) 7803 Operand.addRegOperands(Inst, 1); 7804 } 7805 7806 Inst.addOperand(MCOperand::createImm(1)); // a16 7807 } 7808 7809 //===----------------------------------------------------------------------===// 7810 // smrd 7811 //===----------------------------------------------------------------------===// 7812 7813 bool AMDGPUOperand::isSMRDOffset8() const { 7814 return isImm() && isUInt<8>(getImm()); 7815 } 7816 7817 bool AMDGPUOperand::isSMEMOffset() const { 7818 return isImmTy(ImmTyNone) || 7819 isImmTy(ImmTyOffset); // Offset range is checked later by validator. 7820 } 7821 7822 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7823 // 32-bit literals are only supported on CI and we only want to use them 7824 // when the offset is > 8-bits. 7825 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7826 } 7827 7828 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7829 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7830 } 7831 7832 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7833 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7834 } 7835 7836 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7837 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7838 } 7839 7840 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7841 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7842 } 7843 7844 //===----------------------------------------------------------------------===// 7845 // vop3 7846 //===----------------------------------------------------------------------===// 7847 7848 static bool ConvertOmodMul(int64_t &Mul) { 7849 if (Mul != 1 && Mul != 2 && Mul != 4) 7850 return false; 7851 7852 Mul >>= 1; 7853 return true; 7854 } 7855 7856 static bool ConvertOmodDiv(int64_t &Div) { 7857 if (Div == 1) { 7858 Div = 0; 7859 return true; 7860 } 7861 7862 if (Div == 2) { 7863 Div = 3; 7864 return true; 7865 } 7866 7867 return false; 7868 } 7869 7870 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7871 // This is intentional and ensures compatibility with sp3. 7872 // See bug 35397 for details. 7873 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7874 if (BoundCtrl == 0 || BoundCtrl == 1) { 7875 BoundCtrl = 1; 7876 return true; 7877 } 7878 return false; 7879 } 7880 7881 // Note: the order in this table matches the order of operands in AsmString. 7882 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7883 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7884 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7885 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7886 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7887 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7888 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7889 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7890 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7891 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7892 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7893 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7894 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7895 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7896 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7897 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7898 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7899 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7900 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7901 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7902 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7903 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7904 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7905 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7906 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7907 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7908 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7909 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7910 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7911 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7912 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7913 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7914 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7915 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7916 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7917 {"dpp8", AMDGPUOperand::ImmTyDPP8, false, nullptr}, 7918 {"dpp_ctrl", AMDGPUOperand::ImmTyDppCtrl, false, nullptr}, 7919 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7920 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7921 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7922 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7923 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7924 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7925 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}, 7926 {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr}, 7927 {"wait_exp", AMDGPUOperand::ImmTyWaitEXP, false, nullptr} 7928 }; 7929 7930 void AMDGPUAsmParser::onBeginOfFile() { 7931 if (!getParser().getStreamer().getTargetStreamer() || 7932 getSTI().getTargetTriple().getArch() == Triple::r600) 7933 return; 7934 7935 if (!getTargetStreamer().getTargetID()) 7936 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7937 7938 if (isHsaAbiVersion3AndAbove(&getSTI())) 7939 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7940 } 7941 7942 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7943 7944 OperandMatchResultTy res = parseOptionalOpr(Operands); 7945 7946 // This is a hack to enable hardcoded mandatory operands which follow 7947 // optional operands. 7948 // 7949 // Current design assumes that all operands after the first optional operand 7950 // are also optional. However implementation of some instructions violates 7951 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7952 // 7953 // To alleviate this problem, we have to (implicitly) parse extra operands 7954 // to make sure autogenerated parser of custom operands never hit hardcoded 7955 // mandatory operands. 7956 7957 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7958 if (res != MatchOperand_Success || 7959 isToken(AsmToken::EndOfStatement)) 7960 break; 7961 7962 trySkipToken(AsmToken::Comma); 7963 res = parseOptionalOpr(Operands); 7964 } 7965 7966 return res; 7967 } 7968 7969 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7970 OperandMatchResultTy res; 7971 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7972 // try to parse any optional operand here 7973 if (Op.IsBit) { 7974 res = parseNamedBit(Op.Name, Operands, Op.Type); 7975 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7976 res = parseOModOperand(Operands); 7977 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7978 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7979 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7980 res = parseSDWASel(Operands, Op.Name, Op.Type); 7981 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7982 res = parseSDWADstUnused(Operands); 7983 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7984 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7985 Op.Type == AMDGPUOperand::ImmTyNegLo || 7986 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7987 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7988 Op.ConvertResult); 7989 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7990 res = parseDim(Operands); 7991 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7992 res = parseCPol(Operands); 7993 } else if (Op.Type == AMDGPUOperand::ImmTyDPP8) { 7994 res = parseDPP8(Operands); 7995 } else if (Op.Type == AMDGPUOperand::ImmTyDppCtrl) { 7996 res = parseDPPCtrl(Operands); 7997 } else { 7998 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7999 if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) { 8000 res = parseOperandArrayWithPrefix("neg", Operands, 8001 AMDGPUOperand::ImmTyBLGP, 8002 nullptr); 8003 } 8004 } 8005 if (res != MatchOperand_NoMatch) { 8006 return res; 8007 } 8008 } 8009 return MatchOperand_NoMatch; 8010 } 8011 8012 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 8013 StringRef Name = getTokenStr(); 8014 if (Name == "mul") { 8015 return parseIntWithPrefix("mul", Operands, 8016 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 8017 } 8018 8019 if (Name == "div") { 8020 return parseIntWithPrefix("div", Operands, 8021 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 8022 } 8023 8024 return MatchOperand_NoMatch; 8025 } 8026 8027 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 8028 cvtVOP3P(Inst, Operands); 8029 8030 int Opc = Inst.getOpcode(); 8031 8032 int SrcNum; 8033 const int Ops[] = { AMDGPU::OpName::src0, 8034 AMDGPU::OpName::src1, 8035 AMDGPU::OpName::src2 }; 8036 for (SrcNum = 0; 8037 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 8038 ++SrcNum); 8039 assert(SrcNum > 0); 8040 8041 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8042 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8043 8044 if ((OpSel & (1 << SrcNum)) != 0) { 8045 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 8046 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8047 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 8048 } 8049 } 8050 8051 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 8052 // 1. This operand is input modifiers 8053 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 8054 // 2. This is not last operand 8055 && Desc.NumOperands > (OpNum + 1) 8056 // 3. Next operand is register class 8057 && Desc.OpInfo[OpNum + 1].RegClass != -1 8058 // 4. Next register is not tied to any other operand 8059 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 8060 } 8061 8062 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 8063 { 8064 OptionalImmIndexMap OptionalIdx; 8065 unsigned Opc = Inst.getOpcode(); 8066 8067 unsigned I = 1; 8068 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8069 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8070 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8071 } 8072 8073 for (unsigned E = Operands.size(); I != E; ++I) { 8074 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8075 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8076 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8077 } else if (Op.isInterpSlot() || 8078 Op.isInterpAttr() || 8079 Op.isAttrChan()) { 8080 Inst.addOperand(MCOperand::createImm(Op.getImm())); 8081 } else if (Op.isImmModifier()) { 8082 OptionalIdx[Op.getImmTy()] = I; 8083 } else { 8084 llvm_unreachable("unhandled operand type"); 8085 } 8086 } 8087 8088 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 8089 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 8090 } 8091 8092 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8093 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8094 } 8095 8096 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8097 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8098 } 8099 } 8100 8101 void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands) 8102 { 8103 OptionalImmIndexMap OptionalIdx; 8104 unsigned Opc = Inst.getOpcode(); 8105 8106 unsigned I = 1; 8107 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8108 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8109 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8110 } 8111 8112 for (unsigned E = Operands.size(); I != E; ++I) { 8113 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8114 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8115 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8116 } else if (Op.isImmModifier()) { 8117 OptionalIdx[Op.getImmTy()] = I; 8118 } else { 8119 llvm_unreachable("unhandled operand type"); 8120 } 8121 } 8122 8123 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8124 8125 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8126 if (OpSelIdx != -1) 8127 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8128 8129 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP); 8130 8131 if (OpSelIdx == -1) 8132 return; 8133 8134 const int Ops[] = { AMDGPU::OpName::src0, 8135 AMDGPU::OpName::src1, 8136 AMDGPU::OpName::src2 }; 8137 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8138 AMDGPU::OpName::src1_modifiers, 8139 AMDGPU::OpName::src2_modifiers }; 8140 8141 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 8142 8143 for (int J = 0; J < 3; ++J) { 8144 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8145 if (OpIdx == -1) 8146 break; 8147 8148 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8149 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 8150 8151 if ((OpSel & (1 << J)) != 0) 8152 ModVal |= SISrcMods::OP_SEL_0; 8153 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && 8154 (OpSel & (1 << 3)) != 0) 8155 ModVal |= SISrcMods::DST_OP_SEL; 8156 8157 Inst.getOperand(ModIdx).setImm(ModVal); 8158 } 8159 } 8160 8161 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 8162 OptionalImmIndexMap &OptionalIdx) { 8163 unsigned Opc = Inst.getOpcode(); 8164 8165 unsigned I = 1; 8166 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8167 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8168 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8169 } 8170 8171 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 8172 // This instruction has src modifiers 8173 for (unsigned E = Operands.size(); I != E; ++I) { 8174 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8175 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8176 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8177 } else if (Op.isImmModifier()) { 8178 OptionalIdx[Op.getImmTy()] = I; 8179 } else if (Op.isRegOrImm()) { 8180 Op.addRegOrImmOperands(Inst, 1); 8181 } else { 8182 llvm_unreachable("unhandled operand type"); 8183 } 8184 } 8185 } else { 8186 // No src modifiers 8187 for (unsigned E = Operands.size(); I != E; ++I) { 8188 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8189 if (Op.isMod()) { 8190 OptionalIdx[Op.getImmTy()] = I; 8191 } else { 8192 Op.addRegOrImmOperands(Inst, 1); 8193 } 8194 } 8195 } 8196 8197 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8198 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8199 } 8200 8201 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8202 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8203 } 8204 8205 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 8206 // it has src2 register operand that is tied to dst operand 8207 // we don't allow modifiers for this operand in assembler so src2_modifiers 8208 // should be 0. 8209 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 8210 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 8211 Opc == AMDGPU::V_MAC_F32_e64_vi || 8212 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 8213 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 8214 Opc == AMDGPU::V_MAC_F16_e64_vi || 8215 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 8216 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 8217 Opc == AMDGPU::V_FMAC_F32_e64_gfx11 || 8218 Opc == AMDGPU::V_FMAC_F32_e64_vi || 8219 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 8220 Opc == AMDGPU::V_FMAC_DX9_ZERO_F32_e64_gfx11 || 8221 Opc == AMDGPU::V_FMAC_F16_e64_gfx10 || 8222 Opc == AMDGPU::V_FMAC_F16_e64_gfx11) { 8223 auto it = Inst.begin(); 8224 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 8225 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 8226 ++it; 8227 // Copy the operand to ensure it's not invalidated when Inst grows. 8228 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 8229 } 8230 } 8231 8232 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 8233 OptionalImmIndexMap OptionalIdx; 8234 cvtVOP3(Inst, Operands, OptionalIdx); 8235 } 8236 8237 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 8238 OptionalImmIndexMap &OptIdx) { 8239 const int Opc = Inst.getOpcode(); 8240 const MCInstrDesc &Desc = MII.get(Opc); 8241 8242 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 8243 8244 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 8245 assert(!IsPacked); 8246 Inst.addOperand(Inst.getOperand(0)); 8247 } 8248 8249 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 8250 // instruction, and then figure out where to actually put the modifiers 8251 8252 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8253 if (OpSelIdx != -1) { 8254 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 8255 } 8256 8257 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 8258 if (OpSelHiIdx != -1) { 8259 int DefaultVal = IsPacked ? -1 : 0; 8260 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 8261 DefaultVal); 8262 } 8263 8264 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 8265 if (NegLoIdx != -1) { 8266 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 8267 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 8268 } 8269 8270 const int Ops[] = { AMDGPU::OpName::src0, 8271 AMDGPU::OpName::src1, 8272 AMDGPU::OpName::src2 }; 8273 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8274 AMDGPU::OpName::src1_modifiers, 8275 AMDGPU::OpName::src2_modifiers }; 8276 8277 unsigned OpSel = 0; 8278 unsigned OpSelHi = 0; 8279 unsigned NegLo = 0; 8280 unsigned NegHi = 0; 8281 8282 if (OpSelIdx != -1) 8283 OpSel = Inst.getOperand(OpSelIdx).getImm(); 8284 8285 if (OpSelHiIdx != -1) 8286 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 8287 8288 if (NegLoIdx != -1) { 8289 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 8290 NegLo = Inst.getOperand(NegLoIdx).getImm(); 8291 NegHi = Inst.getOperand(NegHiIdx).getImm(); 8292 } 8293 8294 for (int J = 0; J < 3; ++J) { 8295 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8296 if (OpIdx == -1) 8297 break; 8298 8299 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8300 8301 if (ModIdx == -1) 8302 continue; 8303 8304 uint32_t ModVal = 0; 8305 8306 if ((OpSel & (1 << J)) != 0) 8307 ModVal |= SISrcMods::OP_SEL_0; 8308 8309 if ((OpSelHi & (1 << J)) != 0) 8310 ModVal |= SISrcMods::OP_SEL_1; 8311 8312 if ((NegLo & (1 << J)) != 0) 8313 ModVal |= SISrcMods::NEG; 8314 8315 if ((NegHi & (1 << J)) != 0) 8316 ModVal |= SISrcMods::NEG_HI; 8317 8318 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 8319 } 8320 } 8321 8322 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 8323 OptionalImmIndexMap OptIdx; 8324 cvtVOP3(Inst, Operands, OptIdx); 8325 cvtVOP3P(Inst, Operands, OptIdx); 8326 } 8327 8328 //===----------------------------------------------------------------------===// 8329 // VOPD 8330 //===----------------------------------------------------------------------===// 8331 8332 OperandMatchResultTy AMDGPUAsmParser::parseVOPD(OperandVector &Operands) { 8333 if (!hasVOPD(getSTI())) 8334 return MatchOperand_NoMatch; 8335 8336 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) { 8337 SMLoc S = getLoc(); 8338 lex(); 8339 lex(); 8340 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S)); 8341 const MCExpr *Expr; 8342 if (isToken(AsmToken::Identifier) && !Parser.parseExpression(Expr)) { 8343 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 8344 return MatchOperand_Success; 8345 } 8346 Error(S, "invalid VOPD :: usage"); 8347 return MatchOperand_ParseFail; 8348 } 8349 return MatchOperand_NoMatch; 8350 } 8351 8352 // Create VOPD MCInst operands using parsed assembler operands. 8353 // Parsed VOPD operands are ordered as follows: 8354 // OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::' 8355 // OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm] 8356 // If both OpX and OpY have an imm, the first imm has a different name: 8357 // OpXMnemo dstX src0X [vsrc1X|immDeferred vsrc1X|vsrc1X immDeferred] '::' 8358 // OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm] 8359 // MCInst operands have the following order: 8360 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands] 8361 void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) { 8362 auto addOp = [&](uint16_t i) { // NOLINT:function pointer 8363 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 8364 if (Op.isReg()) { 8365 Op.addRegOperands(Inst, 1); 8366 return; 8367 } 8368 if (Op.isImm()) { 8369 Op.addImmOperands(Inst, 1); 8370 return; 8371 } 8372 // Handle tokens like 'offen' which are sometimes hard-coded into the 8373 // asm string. There are no MCInst operands for these. 8374 if (Op.isToken()) { 8375 return; 8376 } 8377 llvm_unreachable("Unhandled operand type in cvtVOPD"); 8378 }; 8379 8380 // Indices into MCInst.Operands 8381 const auto FmamkOpXImmMCIndex = 3; // dstX, dstY, src0X, imm, ... 8382 const auto FmaakOpXImmMCIndex = 4; // dstX, dstY, src0X, src1X, imm, ... 8383 const auto MinOpYImmMCIndex = 4; // dstX, dstY, src0X, src0Y, imm, ... 8384 8385 unsigned Opc = Inst.getOpcode(); 8386 bool HasVsrc1X = 8387 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1X) != -1; 8388 bool HasImmX = 8389 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 || 8390 (HasVsrc1X && (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) == 8391 FmamkOpXImmMCIndex || 8392 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) == 8393 FmaakOpXImmMCIndex)); 8394 8395 bool HasVsrc1Y = 8396 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vsrc1Y) != -1; 8397 bool HasImmY = 8398 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::immDeferred) != -1 || 8399 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::imm) >= 8400 MinOpYImmMCIndex + HasVsrc1X; 8401 8402 // Indices of parsed operands relative to dst 8403 const auto DstIdx = 0; 8404 const auto Src0Idx = 1; 8405 const auto Vsrc1OrImmIdx = 2; 8406 8407 const auto OpXOperandsSize = 2 + HasImmX + HasVsrc1X; 8408 const auto BridgeTokensSize = 2; // Special VOPD tokens ('::' and OpYMnemo) 8409 8410 // Offsets into parsed operands 8411 const auto OpXFirstOperandOffset = 1; 8412 const auto OpYFirstOperandOffset = 8413 OpXFirstOperandOffset + OpXOperandsSize + BridgeTokensSize; 8414 8415 // Order of addOp calls determines MC operand order 8416 addOp(OpXFirstOperandOffset + DstIdx); // vdstX 8417 addOp(OpYFirstOperandOffset + DstIdx); // vdstY 8418 8419 addOp(OpXFirstOperandOffset + Src0Idx); // src0X 8420 if (HasImmX) { 8421 // immX then vsrc1X for fmamk, vsrc1X then immX for fmaak 8422 addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx); 8423 addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx + 1); 8424 } else { 8425 if (HasVsrc1X) // all except v_mov 8426 addOp(OpXFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1X 8427 } 8428 8429 addOp(OpYFirstOperandOffset + Src0Idx); // src0Y 8430 if (HasImmY) { 8431 // immY then vsrc1Y for fmamk, vsrc1Y then immY for fmaak 8432 addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx); 8433 addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx + 1); 8434 } else { 8435 if (HasVsrc1Y) // all except v_mov 8436 addOp(OpYFirstOperandOffset + Vsrc1OrImmIdx); // vsrc1Y 8437 } 8438 } 8439 8440 //===----------------------------------------------------------------------===// 8441 // dpp 8442 //===----------------------------------------------------------------------===// 8443 8444 bool AMDGPUOperand::isDPP8() const { 8445 return isImmTy(ImmTyDPP8); 8446 } 8447 8448 bool AMDGPUOperand::isDPPCtrl() const { 8449 using namespace AMDGPU::DPP; 8450 8451 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 8452 if (result) { 8453 int64_t Imm = getImm(); 8454 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 8455 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 8456 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 8457 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 8458 (Imm == DppCtrl::WAVE_SHL1) || 8459 (Imm == DppCtrl::WAVE_ROL1) || 8460 (Imm == DppCtrl::WAVE_SHR1) || 8461 (Imm == DppCtrl::WAVE_ROR1) || 8462 (Imm == DppCtrl::ROW_MIRROR) || 8463 (Imm == DppCtrl::ROW_HALF_MIRROR) || 8464 (Imm == DppCtrl::BCAST15) || 8465 (Imm == DppCtrl::BCAST31) || 8466 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 8467 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 8468 } 8469 return false; 8470 } 8471 8472 //===----------------------------------------------------------------------===// 8473 // mAI 8474 //===----------------------------------------------------------------------===// 8475 8476 bool AMDGPUOperand::isBLGP() const { 8477 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 8478 } 8479 8480 bool AMDGPUOperand::isCBSZ() const { 8481 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 8482 } 8483 8484 bool AMDGPUOperand::isABID() const { 8485 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 8486 } 8487 8488 bool AMDGPUOperand::isS16Imm() const { 8489 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 8490 } 8491 8492 bool AMDGPUOperand::isU16Imm() const { 8493 return isImm() && isUInt<16>(getImm()); 8494 } 8495 8496 //===----------------------------------------------------------------------===// 8497 // dim 8498 //===----------------------------------------------------------------------===// 8499 8500 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 8501 // We want to allow "dim:1D" etc., 8502 // but the initial 1 is tokenized as an integer. 8503 std::string Token; 8504 if (isToken(AsmToken::Integer)) { 8505 SMLoc Loc = getToken().getEndLoc(); 8506 Token = std::string(getTokenStr()); 8507 lex(); 8508 if (getLoc() != Loc) 8509 return false; 8510 } 8511 8512 StringRef Suffix; 8513 if (!parseId(Suffix)) 8514 return false; 8515 Token += Suffix; 8516 8517 StringRef DimId = Token; 8518 if (DimId.startswith("SQ_RSRC_IMG_")) 8519 DimId = DimId.drop_front(12); 8520 8521 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 8522 if (!DimInfo) 8523 return false; 8524 8525 Encoding = DimInfo->Encoding; 8526 return true; 8527 } 8528 8529 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 8530 if (!isGFX10Plus()) 8531 return MatchOperand_NoMatch; 8532 8533 SMLoc S = getLoc(); 8534 8535 if (!trySkipId("dim", AsmToken::Colon)) 8536 return MatchOperand_NoMatch; 8537 8538 unsigned Encoding; 8539 SMLoc Loc = getLoc(); 8540 if (!parseDimId(Encoding)) { 8541 Error(Loc, "invalid dim value"); 8542 return MatchOperand_ParseFail; 8543 } 8544 8545 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 8546 AMDGPUOperand::ImmTyDim)); 8547 return MatchOperand_Success; 8548 } 8549 8550 //===----------------------------------------------------------------------===// 8551 // dpp 8552 //===----------------------------------------------------------------------===// 8553 8554 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 8555 SMLoc S = getLoc(); 8556 8557 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 8558 return MatchOperand_NoMatch; 8559 8560 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 8561 8562 int64_t Sels[8]; 8563 8564 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8565 return MatchOperand_ParseFail; 8566 8567 for (size_t i = 0; i < 8; ++i) { 8568 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8569 return MatchOperand_ParseFail; 8570 8571 SMLoc Loc = getLoc(); 8572 if (getParser().parseAbsoluteExpression(Sels[i])) 8573 return MatchOperand_ParseFail; 8574 if (0 > Sels[i] || 7 < Sels[i]) { 8575 Error(Loc, "expected a 3-bit value"); 8576 return MatchOperand_ParseFail; 8577 } 8578 } 8579 8580 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8581 return MatchOperand_ParseFail; 8582 8583 unsigned DPP8 = 0; 8584 for (size_t i = 0; i < 8; ++i) 8585 DPP8 |= (Sels[i] << (i * 3)); 8586 8587 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 8588 return MatchOperand_Success; 8589 } 8590 8591 bool 8592 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 8593 const OperandVector &Operands) { 8594 if (Ctrl == "row_newbcast") 8595 return isGFX90A(); 8596 8597 if (Ctrl == "row_share" || 8598 Ctrl == "row_xmask") 8599 return isGFX10Plus(); 8600 8601 if (Ctrl == "wave_shl" || 8602 Ctrl == "wave_shr" || 8603 Ctrl == "wave_rol" || 8604 Ctrl == "wave_ror" || 8605 Ctrl == "row_bcast") 8606 return isVI() || isGFX9(); 8607 8608 return Ctrl == "row_mirror" || 8609 Ctrl == "row_half_mirror" || 8610 Ctrl == "quad_perm" || 8611 Ctrl == "row_shl" || 8612 Ctrl == "row_shr" || 8613 Ctrl == "row_ror"; 8614 } 8615 8616 int64_t 8617 AMDGPUAsmParser::parseDPPCtrlPerm() { 8618 // quad_perm:[%d,%d,%d,%d] 8619 8620 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8621 return -1; 8622 8623 int64_t Val = 0; 8624 for (int i = 0; i < 4; ++i) { 8625 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8626 return -1; 8627 8628 int64_t Temp; 8629 SMLoc Loc = getLoc(); 8630 if (getParser().parseAbsoluteExpression(Temp)) 8631 return -1; 8632 if (Temp < 0 || Temp > 3) { 8633 Error(Loc, "expected a 2-bit value"); 8634 return -1; 8635 } 8636 8637 Val += (Temp << i * 2); 8638 } 8639 8640 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8641 return -1; 8642 8643 return Val; 8644 } 8645 8646 int64_t 8647 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8648 using namespace AMDGPU::DPP; 8649 8650 // sel:%d 8651 8652 int64_t Val; 8653 SMLoc Loc = getLoc(); 8654 8655 if (getParser().parseAbsoluteExpression(Val)) 8656 return -1; 8657 8658 struct DppCtrlCheck { 8659 int64_t Ctrl; 8660 int Lo; 8661 int Hi; 8662 }; 8663 8664 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8665 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8666 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8667 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8668 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8669 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8670 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8671 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8672 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8673 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8674 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8675 .Default({-1, 0, 0}); 8676 8677 bool Valid; 8678 if (Check.Ctrl == -1) { 8679 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8680 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8681 } else { 8682 Valid = Check.Lo <= Val && Val <= Check.Hi; 8683 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8684 } 8685 8686 if (!Valid) { 8687 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8688 return -1; 8689 } 8690 8691 return Val; 8692 } 8693 8694 OperandMatchResultTy 8695 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8696 using namespace AMDGPU::DPP; 8697 8698 if (!isToken(AsmToken::Identifier) || 8699 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8700 return MatchOperand_NoMatch; 8701 8702 SMLoc S = getLoc(); 8703 int64_t Val = -1; 8704 StringRef Ctrl; 8705 8706 parseId(Ctrl); 8707 8708 if (Ctrl == "row_mirror") { 8709 Val = DppCtrl::ROW_MIRROR; 8710 } else if (Ctrl == "row_half_mirror") { 8711 Val = DppCtrl::ROW_HALF_MIRROR; 8712 } else { 8713 if (skipToken(AsmToken::Colon, "expected a colon")) { 8714 if (Ctrl == "quad_perm") { 8715 Val = parseDPPCtrlPerm(); 8716 } else { 8717 Val = parseDPPCtrlSel(Ctrl); 8718 } 8719 } 8720 } 8721 8722 if (Val == -1) 8723 return MatchOperand_ParseFail; 8724 8725 Operands.push_back( 8726 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8727 return MatchOperand_Success; 8728 } 8729 8730 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8731 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8732 } 8733 8734 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8735 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8736 } 8737 8738 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8739 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8740 } 8741 8742 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8743 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8744 } 8745 8746 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8747 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8748 } 8749 8750 // Add dummy $old operand 8751 void AMDGPUAsmParser::cvtVOPC64NoDstDPP(MCInst &Inst, 8752 const OperandVector &Operands, 8753 bool IsDPP8) { 8754 Inst.addOperand(MCOperand::createReg(0)); 8755 cvtVOP3DPP(Inst, Operands, IsDPP8); 8756 } 8757 8758 void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8759 OptionalImmIndexMap OptionalIdx; 8760 unsigned Opc = Inst.getOpcode(); 8761 bool HasModifiers = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8762 unsigned I = 1; 8763 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8764 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8765 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8766 } 8767 8768 int Fi = 0; 8769 for (unsigned E = Operands.size(); I != E; ++I) { 8770 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8771 MCOI::TIED_TO); 8772 if (TiedTo != -1) { 8773 assert((unsigned)TiedTo < Inst.getNumOperands()); 8774 // handle tied old or src2 for MAC instructions 8775 Inst.addOperand(Inst.getOperand(TiedTo)); 8776 } 8777 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8778 // Add the register arguments 8779 if (IsDPP8 && Op.isFI()) { 8780 Fi = Op.getImm(); 8781 } else if (HasModifiers && 8782 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8783 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8784 } else if (Op.isReg()) { 8785 Op.addRegOperands(Inst, 1); 8786 } else if (Op.isImm() && 8787 Desc.OpInfo[Inst.getNumOperands()].RegClass != -1) { 8788 assert(!HasModifiers && "Case should be unreachable with modifiers"); 8789 assert(!Op.IsImmKindLiteral() && "Cannot use literal with DPP"); 8790 Op.addImmOperands(Inst, 1); 8791 } else if (Op.isImm()) { 8792 OptionalIdx[Op.getImmTy()] = I; 8793 } else { 8794 llvm_unreachable("unhandled operand type"); 8795 } 8796 } 8797 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8798 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8799 } 8800 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8801 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8802 } 8803 if (Desc.TSFlags & SIInstrFlags::VOP3P) 8804 cvtVOP3P(Inst, Operands, OptionalIdx); 8805 else if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel) != -1) { 8806 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel); 8807 } 8808 8809 if (IsDPP8) { 8810 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8); 8811 using namespace llvm::AMDGPU::DPP; 8812 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8813 } else { 8814 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4); 8815 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8816 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8817 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8818 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8819 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8820 } 8821 } 8822 } 8823 8824 // Add dummy $old operand 8825 void AMDGPUAsmParser::cvtVOPCNoDstDPP(MCInst &Inst, 8826 const OperandVector &Operands, 8827 bool IsDPP8) { 8828 Inst.addOperand(MCOperand::createReg(0)); 8829 cvtDPP(Inst, Operands, IsDPP8); 8830 } 8831 8832 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8833 OptionalImmIndexMap OptionalIdx; 8834 8835 unsigned Opc = Inst.getOpcode(); 8836 bool HasModifiers = 8837 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8838 unsigned I = 1; 8839 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8840 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8841 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8842 } 8843 8844 int Fi = 0; 8845 for (unsigned E = Operands.size(); I != E; ++I) { 8846 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8847 MCOI::TIED_TO); 8848 if (TiedTo != -1) { 8849 assert((unsigned)TiedTo < Inst.getNumOperands()); 8850 // handle tied old or src2 for MAC instructions 8851 Inst.addOperand(Inst.getOperand(TiedTo)); 8852 } 8853 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8854 // Add the register arguments 8855 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8856 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8857 // Skip it. 8858 continue; 8859 } 8860 8861 if (IsDPP8) { 8862 if (Op.isDPP8()) { 8863 Op.addImmOperands(Inst, 1); 8864 } else if (HasModifiers && 8865 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8866 Op.addRegWithFPInputModsOperands(Inst, 2); 8867 } else if (Op.isFI()) { 8868 Fi = Op.getImm(); 8869 } else if (Op.isReg()) { 8870 Op.addRegOperands(Inst, 1); 8871 } else { 8872 llvm_unreachable("Invalid operand type"); 8873 } 8874 } else { 8875 if (HasModifiers && 8876 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8877 Op.addRegWithFPInputModsOperands(Inst, 2); 8878 } else if (Op.isReg()) { 8879 Op.addRegOperands(Inst, 1); 8880 } else if (Op.isDPPCtrl()) { 8881 Op.addImmOperands(Inst, 1); 8882 } else if (Op.isImm()) { 8883 // Handle optional arguments 8884 OptionalIdx[Op.getImmTy()] = I; 8885 } else { 8886 llvm_unreachable("Invalid operand type"); 8887 } 8888 } 8889 } 8890 8891 if (IsDPP8) { 8892 using namespace llvm::AMDGPU::DPP; 8893 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8894 } else { 8895 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8896 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8897 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8898 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8899 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8900 } 8901 } 8902 } 8903 8904 //===----------------------------------------------------------------------===// 8905 // sdwa 8906 //===----------------------------------------------------------------------===// 8907 8908 OperandMatchResultTy 8909 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8910 AMDGPUOperand::ImmTy Type) { 8911 using namespace llvm::AMDGPU::SDWA; 8912 8913 SMLoc S = getLoc(); 8914 StringRef Value; 8915 OperandMatchResultTy res; 8916 8917 SMLoc StringLoc; 8918 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8919 if (res != MatchOperand_Success) { 8920 return res; 8921 } 8922 8923 int64_t Int; 8924 Int = StringSwitch<int64_t>(Value) 8925 .Case("BYTE_0", SdwaSel::BYTE_0) 8926 .Case("BYTE_1", SdwaSel::BYTE_1) 8927 .Case("BYTE_2", SdwaSel::BYTE_2) 8928 .Case("BYTE_3", SdwaSel::BYTE_3) 8929 .Case("WORD_0", SdwaSel::WORD_0) 8930 .Case("WORD_1", SdwaSel::WORD_1) 8931 .Case("DWORD", SdwaSel::DWORD) 8932 .Default(0xffffffff); 8933 8934 if (Int == 0xffffffff) { 8935 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8936 return MatchOperand_ParseFail; 8937 } 8938 8939 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8940 return MatchOperand_Success; 8941 } 8942 8943 OperandMatchResultTy 8944 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8945 using namespace llvm::AMDGPU::SDWA; 8946 8947 SMLoc S = getLoc(); 8948 StringRef Value; 8949 OperandMatchResultTy res; 8950 8951 SMLoc StringLoc; 8952 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8953 if (res != MatchOperand_Success) { 8954 return res; 8955 } 8956 8957 int64_t Int; 8958 Int = StringSwitch<int64_t>(Value) 8959 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8960 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8961 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8962 .Default(0xffffffff); 8963 8964 if (Int == 0xffffffff) { 8965 Error(StringLoc, "invalid dst_unused value"); 8966 return MatchOperand_ParseFail; 8967 } 8968 8969 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8970 return MatchOperand_Success; 8971 } 8972 8973 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8974 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8975 } 8976 8977 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8978 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8979 } 8980 8981 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8982 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8983 } 8984 8985 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8986 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8987 } 8988 8989 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8990 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8991 } 8992 8993 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8994 uint64_t BasicInstType, 8995 bool SkipDstVcc, 8996 bool SkipSrcVcc) { 8997 using namespace llvm::AMDGPU::SDWA; 8998 8999 OptionalImmIndexMap OptionalIdx; 9000 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 9001 bool SkippedVcc = false; 9002 9003 unsigned I = 1; 9004 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 9005 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 9006 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 9007 } 9008 9009 for (unsigned E = Operands.size(); I != E; ++I) { 9010 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 9011 if (SkipVcc && !SkippedVcc && Op.isReg() && 9012 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 9013 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 9014 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 9015 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 9016 // Skip VCC only if we didn't skip it on previous iteration. 9017 // Note that src0 and src1 occupy 2 slots each because of modifiers. 9018 if (BasicInstType == SIInstrFlags::VOP2 && 9019 ((SkipDstVcc && Inst.getNumOperands() == 1) || 9020 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 9021 SkippedVcc = true; 9022 continue; 9023 } else if (BasicInstType == SIInstrFlags::VOPC && 9024 Inst.getNumOperands() == 0) { 9025 SkippedVcc = true; 9026 continue; 9027 } 9028 } 9029 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 9030 Op.addRegOrImmWithInputModsOperands(Inst, 2); 9031 } else if (Op.isImm()) { 9032 // Handle optional arguments 9033 OptionalIdx[Op.getImmTy()] = I; 9034 } else { 9035 llvm_unreachable("Invalid operand type"); 9036 } 9037 SkippedVcc = false; 9038 } 9039 9040 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 9041 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 9042 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 9043 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 9044 switch (BasicInstType) { 9045 case SIInstrFlags::VOP1: 9046 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 9047 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 9048 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 9049 } 9050 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 9051 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 9052 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 9053 break; 9054 9055 case SIInstrFlags::VOP2: 9056 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 9057 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 9058 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 9059 } 9060 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 9061 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 9062 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 9063 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 9064 break; 9065 9066 case SIInstrFlags::VOPC: 9067 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 9068 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 9069 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 9070 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 9071 break; 9072 9073 default: 9074 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 9075 } 9076 } 9077 9078 // special case v_mac_{f16, f32}: 9079 // it has src2 register operand that is tied to dst operand 9080 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 9081 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 9082 auto it = Inst.begin(); 9083 std::advance( 9084 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 9085 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 9086 } 9087 } 9088 9089 //===----------------------------------------------------------------------===// 9090 // mAI 9091 //===----------------------------------------------------------------------===// 9092 9093 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 9094 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 9095 } 9096 9097 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 9098 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 9099 } 9100 9101 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 9102 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 9103 } 9104 9105 /// Force static initialization. 9106 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 9107 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 9108 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 9109 } 9110 9111 #define GET_REGISTER_MATCHER 9112 #define GET_MATCHER_IMPLEMENTATION 9113 #define GET_MNEMONIC_SPELL_CHECKER 9114 #define GET_MNEMONIC_CHECKER 9115 #include "AMDGPUGenAsmMatcher.inc" 9116 9117 // This function should be defined after auto-generated include so that we have 9118 // MatchClassKind enum defined 9119 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 9120 unsigned Kind) { 9121 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 9122 // But MatchInstructionImpl() expects to meet token and fails to validate 9123 // operand. This method checks if we are given immediate operand but expect to 9124 // get corresponding token. 9125 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 9126 switch (Kind) { 9127 case MCK_addr64: 9128 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 9129 case MCK_gds: 9130 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 9131 case MCK_lds: 9132 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 9133 case MCK_idxen: 9134 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 9135 case MCK_offen: 9136 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 9137 case MCK_SSrcB32: 9138 // When operands have expression values, they will return true for isToken, 9139 // because it is not possible to distinguish between a token and an 9140 // expression at parse time. MatchInstructionImpl() will always try to 9141 // match an operand as a token, when isToken returns true, and when the 9142 // name of the expression is not a valid token, the match will fail, 9143 // so we need to handle it here. 9144 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 9145 case MCK_SSrcF32: 9146 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 9147 case MCK_SoppBrTarget: 9148 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 9149 case MCK_VReg32OrOff: 9150 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 9151 case MCK_InterpSlot: 9152 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 9153 case MCK_Attr: 9154 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 9155 case MCK_AttrChan: 9156 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 9157 case MCK_ImmSMEMOffset: 9158 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 9159 case MCK_SReg_64: 9160 case MCK_SReg_64_XEXEC: 9161 // Null is defined as a 32-bit register but 9162 // it should also be enabled with 64-bit operands. 9163 // The following code enables it for SReg_64 operands 9164 // used as source and destination. Remaining source 9165 // operands are handled in isInlinableImm. 9166 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 9167 default: 9168 return Match_InvalidOperand; 9169 } 9170 } 9171 9172 //===----------------------------------------------------------------------===// 9173 // endpgm 9174 //===----------------------------------------------------------------------===// 9175 9176 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 9177 SMLoc S = getLoc(); 9178 int64_t Imm = 0; 9179 9180 if (!parseExpr(Imm)) { 9181 // The operand is optional, if not present default to 0 9182 Imm = 0; 9183 } 9184 9185 if (!isUInt<16>(Imm)) { 9186 Error(S, "expected a 16-bit value"); 9187 return MatchOperand_ParseFail; 9188 } 9189 9190 Operands.push_back( 9191 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 9192 return MatchOperand_Success; 9193 } 9194 9195 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 9196 9197 //===----------------------------------------------------------------------===// 9198 // LDSDIR 9199 //===----------------------------------------------------------------------===// 9200 9201 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const { 9202 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST); 9203 } 9204 9205 bool AMDGPUOperand::isWaitVDST() const { 9206 return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm()); 9207 } 9208 9209 //===----------------------------------------------------------------------===// 9210 // VINTERP 9211 //===----------------------------------------------------------------------===// 9212 9213 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const { 9214 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP); 9215 } 9216 9217 bool AMDGPUOperand::isWaitEXP() const { 9218 return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm()); 9219 } 9220