1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "MCTargetDesc/X86BaseInfo.h" 10 #include "MCTargetDesc/X86EncodingOptimization.h" 11 #include "MCTargetDesc/X86IntelInstPrinter.h" 12 #include "MCTargetDesc/X86MCExpr.h" 13 #include "MCTargetDesc/X86MCTargetDesc.h" 14 #include "MCTargetDesc/X86TargetStreamer.h" 15 #include "TargetInfo/X86TargetInfo.h" 16 #include "X86AsmParserCommon.h" 17 #include "X86Operand.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/ADT/SmallString.h" 20 #include "llvm/ADT/SmallVector.h" 21 #include "llvm/ADT/StringSwitch.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/MC/MCContext.h" 24 #include "llvm/MC/MCExpr.h" 25 #include "llvm/MC/MCInst.h" 26 #include "llvm/MC/MCInstrInfo.h" 27 #include "llvm/MC/MCParser/MCAsmLexer.h" 28 #include "llvm/MC/MCParser/MCAsmParser.h" 29 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 30 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 31 #include "llvm/MC/MCRegisterInfo.h" 32 #include "llvm/MC/MCSection.h" 33 #include "llvm/MC/MCStreamer.h" 34 #include "llvm/MC/MCSubtargetInfo.h" 35 #include "llvm/MC/MCSymbol.h" 36 #include "llvm/MC/TargetRegistry.h" 37 #include "llvm/Support/CommandLine.h" 38 #include "llvm/Support/Compiler.h" 39 #include "llvm/Support/SourceMgr.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include <algorithm> 42 #include <memory> 43 44 using namespace llvm; 45 46 static cl::opt<bool> LVIInlineAsmHardening( 47 "x86-experimental-lvi-inline-asm-hardening", 48 cl::desc("Harden inline assembly code that may be vulnerable to Load Value" 49 " Injection (LVI). This feature is experimental."), cl::Hidden); 50 51 static bool checkScale(unsigned Scale, StringRef &ErrMsg) { 52 if (Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) { 53 ErrMsg = "scale factor in address must be 1, 2, 4 or 8"; 54 return true; 55 } 56 return false; 57 } 58 59 namespace { 60 61 static const char OpPrecedence[] = { 62 0, // IC_OR 63 1, // IC_XOR 64 2, // IC_AND 65 4, // IC_LSHIFT 66 4, // IC_RSHIFT 67 5, // IC_PLUS 68 5, // IC_MINUS 69 6, // IC_MULTIPLY 70 6, // IC_DIVIDE 71 6, // IC_MOD 72 7, // IC_NOT 73 8, // IC_NEG 74 9, // IC_RPAREN 75 10, // IC_LPAREN 76 0, // IC_IMM 77 0, // IC_REGISTER 78 3, // IC_EQ 79 3, // IC_NE 80 3, // IC_LT 81 3, // IC_LE 82 3, // IC_GT 83 3 // IC_GE 84 }; 85 86 class X86AsmParser : public MCTargetAsmParser { 87 ParseInstructionInfo *InstInfo; 88 bool Code16GCC; 89 unsigned ForcedDataPrefix = 0; 90 91 enum VEXEncoding { 92 VEXEncoding_Default, 93 VEXEncoding_VEX, 94 VEXEncoding_VEX2, 95 VEXEncoding_VEX3, 96 VEXEncoding_EVEX, 97 }; 98 99 VEXEncoding ForcedVEXEncoding = VEXEncoding_Default; 100 101 enum DispEncoding { 102 DispEncoding_Default, 103 DispEncoding_Disp8, 104 DispEncoding_Disp32, 105 }; 106 107 DispEncoding ForcedDispEncoding = DispEncoding_Default; 108 109 // Does this instruction use apx extended register? 110 bool UseApxExtendedReg = false; 111 112 private: 113 SMLoc consumeToken() { 114 MCAsmParser &Parser = getParser(); 115 SMLoc Result = Parser.getTok().getLoc(); 116 Parser.Lex(); 117 return Result; 118 } 119 120 X86TargetStreamer &getTargetStreamer() { 121 assert(getParser().getStreamer().getTargetStreamer() && 122 "do not have a target streamer"); 123 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 124 return static_cast<X86TargetStreamer &>(TS); 125 } 126 127 unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst, 128 uint64_t &ErrorInfo, FeatureBitset &MissingFeatures, 129 bool matchingInlineAsm, unsigned VariantID = 0) { 130 // In Code16GCC mode, match as 32-bit. 131 if (Code16GCC) 132 SwitchMode(X86::Is32Bit); 133 unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo, 134 MissingFeatures, matchingInlineAsm, 135 VariantID); 136 if (Code16GCC) 137 SwitchMode(X86::Is16Bit); 138 return rv; 139 } 140 141 enum InfixCalculatorTok { 142 IC_OR = 0, 143 IC_XOR, 144 IC_AND, 145 IC_LSHIFT, 146 IC_RSHIFT, 147 IC_PLUS, 148 IC_MINUS, 149 IC_MULTIPLY, 150 IC_DIVIDE, 151 IC_MOD, 152 IC_NOT, 153 IC_NEG, 154 IC_RPAREN, 155 IC_LPAREN, 156 IC_IMM, 157 IC_REGISTER, 158 IC_EQ, 159 IC_NE, 160 IC_LT, 161 IC_LE, 162 IC_GT, 163 IC_GE 164 }; 165 166 enum IntelOperatorKind { 167 IOK_INVALID = 0, 168 IOK_LENGTH, 169 IOK_SIZE, 170 IOK_TYPE, 171 }; 172 173 enum MasmOperatorKind { 174 MOK_INVALID = 0, 175 MOK_LENGTHOF, 176 MOK_SIZEOF, 177 MOK_TYPE, 178 }; 179 180 class InfixCalculator { 181 typedef std::pair< InfixCalculatorTok, int64_t > ICToken; 182 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack; 183 SmallVector<ICToken, 4> PostfixStack; 184 185 bool isUnaryOperator(InfixCalculatorTok Op) const { 186 return Op == IC_NEG || Op == IC_NOT; 187 } 188 189 public: 190 int64_t popOperand() { 191 assert (!PostfixStack.empty() && "Poped an empty stack!"); 192 ICToken Op = PostfixStack.pop_back_val(); 193 if (!(Op.first == IC_IMM || Op.first == IC_REGISTER)) 194 return -1; // The invalid Scale value will be caught later by checkScale 195 return Op.second; 196 } 197 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) { 198 assert ((Op == IC_IMM || Op == IC_REGISTER) && 199 "Unexpected operand!"); 200 PostfixStack.push_back(std::make_pair(Op, Val)); 201 } 202 203 void popOperator() { InfixOperatorStack.pop_back(); } 204 void pushOperator(InfixCalculatorTok Op) { 205 // Push the new operator if the stack is empty. 206 if (InfixOperatorStack.empty()) { 207 InfixOperatorStack.push_back(Op); 208 return; 209 } 210 211 // Push the new operator if it has a higher precedence than the operator 212 // on the top of the stack or the operator on the top of the stack is a 213 // left parentheses. 214 unsigned Idx = InfixOperatorStack.size() - 1; 215 InfixCalculatorTok StackOp = InfixOperatorStack[Idx]; 216 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) { 217 InfixOperatorStack.push_back(Op); 218 return; 219 } 220 221 // The operator on the top of the stack has higher precedence than the 222 // new operator. 223 unsigned ParenCount = 0; 224 while (true) { 225 // Nothing to process. 226 if (InfixOperatorStack.empty()) 227 break; 228 229 Idx = InfixOperatorStack.size() - 1; 230 StackOp = InfixOperatorStack[Idx]; 231 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount)) 232 break; 233 234 // If we have an even parentheses count and we see a left parentheses, 235 // then stop processing. 236 if (!ParenCount && StackOp == IC_LPAREN) 237 break; 238 239 if (StackOp == IC_RPAREN) { 240 ++ParenCount; 241 InfixOperatorStack.pop_back(); 242 } else if (StackOp == IC_LPAREN) { 243 --ParenCount; 244 InfixOperatorStack.pop_back(); 245 } else { 246 InfixOperatorStack.pop_back(); 247 PostfixStack.push_back(std::make_pair(StackOp, 0)); 248 } 249 } 250 // Push the new operator. 251 InfixOperatorStack.push_back(Op); 252 } 253 254 int64_t execute() { 255 // Push any remaining operators onto the postfix stack. 256 while (!InfixOperatorStack.empty()) { 257 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val(); 258 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN) 259 PostfixStack.push_back(std::make_pair(StackOp, 0)); 260 } 261 262 if (PostfixStack.empty()) 263 return 0; 264 265 SmallVector<ICToken, 16> OperandStack; 266 for (const ICToken &Op : PostfixStack) { 267 if (Op.first == IC_IMM || Op.first == IC_REGISTER) { 268 OperandStack.push_back(Op); 269 } else if (isUnaryOperator(Op.first)) { 270 assert (OperandStack.size() > 0 && "Too few operands."); 271 ICToken Operand = OperandStack.pop_back_val(); 272 assert (Operand.first == IC_IMM && 273 "Unary operation with a register!"); 274 switch (Op.first) { 275 default: 276 report_fatal_error("Unexpected operator!"); 277 break; 278 case IC_NEG: 279 OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second)); 280 break; 281 case IC_NOT: 282 OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second)); 283 break; 284 } 285 } else { 286 assert (OperandStack.size() > 1 && "Too few operands."); 287 int64_t Val; 288 ICToken Op2 = OperandStack.pop_back_val(); 289 ICToken Op1 = OperandStack.pop_back_val(); 290 switch (Op.first) { 291 default: 292 report_fatal_error("Unexpected operator!"); 293 break; 294 case IC_PLUS: 295 Val = Op1.second + Op2.second; 296 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 297 break; 298 case IC_MINUS: 299 Val = Op1.second - Op2.second; 300 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 301 break; 302 case IC_MULTIPLY: 303 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 304 "Multiply operation with an immediate and a register!"); 305 Val = Op1.second * Op2.second; 306 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 307 break; 308 case IC_DIVIDE: 309 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 310 "Divide operation with an immediate and a register!"); 311 assert (Op2.second != 0 && "Division by zero!"); 312 Val = Op1.second / Op2.second; 313 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 314 break; 315 case IC_MOD: 316 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 317 "Modulo operation with an immediate and a register!"); 318 Val = Op1.second % Op2.second; 319 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 320 break; 321 case IC_OR: 322 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 323 "Or operation with an immediate and a register!"); 324 Val = Op1.second | Op2.second; 325 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 326 break; 327 case IC_XOR: 328 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 329 "Xor operation with an immediate and a register!"); 330 Val = Op1.second ^ Op2.second; 331 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 332 break; 333 case IC_AND: 334 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 335 "And operation with an immediate and a register!"); 336 Val = Op1.second & Op2.second; 337 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 338 break; 339 case IC_LSHIFT: 340 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 341 "Left shift operation with an immediate and a register!"); 342 Val = Op1.second << Op2.second; 343 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 344 break; 345 case IC_RSHIFT: 346 assert (Op1.first == IC_IMM && Op2.first == IC_IMM && 347 "Right shift operation with an immediate and a register!"); 348 Val = Op1.second >> Op2.second; 349 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 350 break; 351 case IC_EQ: 352 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 353 "Equals operation with an immediate and a register!"); 354 Val = (Op1.second == Op2.second) ? -1 : 0; 355 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 356 break; 357 case IC_NE: 358 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 359 "Not-equals operation with an immediate and a register!"); 360 Val = (Op1.second != Op2.second) ? -1 : 0; 361 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 362 break; 363 case IC_LT: 364 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 365 "Less-than operation with an immediate and a register!"); 366 Val = (Op1.second < Op2.second) ? -1 : 0; 367 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 368 break; 369 case IC_LE: 370 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 371 "Less-than-or-equal operation with an immediate and a " 372 "register!"); 373 Val = (Op1.second <= Op2.second) ? -1 : 0; 374 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 375 break; 376 case IC_GT: 377 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 378 "Greater-than operation with an immediate and a register!"); 379 Val = (Op1.second > Op2.second) ? -1 : 0; 380 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 381 break; 382 case IC_GE: 383 assert(Op1.first == IC_IMM && Op2.first == IC_IMM && 384 "Greater-than-or-equal operation with an immediate and a " 385 "register!"); 386 Val = (Op1.second >= Op2.second) ? -1 : 0; 387 OperandStack.push_back(std::make_pair(IC_IMM, Val)); 388 break; 389 } 390 } 391 } 392 assert (OperandStack.size() == 1 && "Expected a single result."); 393 return OperandStack.pop_back_val().second; 394 } 395 }; 396 397 enum IntelExprState { 398 IES_INIT, 399 IES_OR, 400 IES_XOR, 401 IES_AND, 402 IES_EQ, 403 IES_NE, 404 IES_LT, 405 IES_LE, 406 IES_GT, 407 IES_GE, 408 IES_LSHIFT, 409 IES_RSHIFT, 410 IES_PLUS, 411 IES_MINUS, 412 IES_OFFSET, 413 IES_CAST, 414 IES_NOT, 415 IES_MULTIPLY, 416 IES_DIVIDE, 417 IES_MOD, 418 IES_LBRAC, 419 IES_RBRAC, 420 IES_LPAREN, 421 IES_RPAREN, 422 IES_REGISTER, 423 IES_INTEGER, 424 IES_ERROR 425 }; 426 427 class IntelExprStateMachine { 428 IntelExprState State = IES_INIT, PrevState = IES_ERROR; 429 unsigned BaseReg = 0, IndexReg = 0, TmpReg = 0, Scale = 0; 430 int64_t Imm = 0; 431 const MCExpr *Sym = nullptr; 432 StringRef SymName; 433 InfixCalculator IC; 434 InlineAsmIdentifierInfo Info; 435 short BracCount = 0; 436 bool MemExpr = false; 437 bool BracketUsed = false; 438 bool OffsetOperator = false; 439 bool AttachToOperandIdx = false; 440 bool IsPIC = false; 441 SMLoc OffsetOperatorLoc; 442 AsmTypeInfo CurType; 443 444 bool setSymRef(const MCExpr *Val, StringRef ID, StringRef &ErrMsg) { 445 if (Sym) { 446 ErrMsg = "cannot use more than one symbol in memory operand"; 447 return true; 448 } 449 Sym = Val; 450 SymName = ID; 451 return false; 452 } 453 454 public: 455 IntelExprStateMachine() = default; 456 457 void addImm(int64_t imm) { Imm += imm; } 458 short getBracCount() const { return BracCount; } 459 bool isMemExpr() const { return MemExpr; } 460 bool isBracketUsed() const { return BracketUsed; } 461 bool isOffsetOperator() const { return OffsetOperator; } 462 SMLoc getOffsetLoc() const { return OffsetOperatorLoc; } 463 unsigned getBaseReg() const { return BaseReg; } 464 unsigned getIndexReg() const { return IndexReg; } 465 unsigned getScale() const { return Scale; } 466 const MCExpr *getSym() const { return Sym; } 467 StringRef getSymName() const { return SymName; } 468 StringRef getType() const { return CurType.Name; } 469 unsigned getSize() const { return CurType.Size; } 470 unsigned getElementSize() const { return CurType.ElementSize; } 471 unsigned getLength() const { return CurType.Length; } 472 int64_t getImm() { return Imm + IC.execute(); } 473 bool isValidEndState() const { 474 return State == IES_RBRAC || State == IES_INTEGER; 475 } 476 477 // Is the intel expression appended after an operand index. 478 // [OperandIdx][Intel Expression] 479 // This is neccessary for checking if it is an independent 480 // intel expression at back end when parse inline asm. 481 void setAppendAfterOperand() { AttachToOperandIdx = true; } 482 483 bool isPIC() const { return IsPIC; } 484 void setPIC() { IsPIC = true; } 485 486 bool hadError() const { return State == IES_ERROR; } 487 const InlineAsmIdentifierInfo &getIdentifierInfo() const { return Info; } 488 489 bool regsUseUpError(StringRef &ErrMsg) { 490 // This case mostly happen in inline asm, e.g. Arr[BaseReg + IndexReg] 491 // can not intruduce additional register in inline asm in PIC model. 492 if (IsPIC && AttachToOperandIdx) 493 ErrMsg = "Don't use 2 or more regs for mem offset in PIC model!"; 494 else 495 ErrMsg = "BaseReg/IndexReg already set!"; 496 return true; 497 } 498 499 void onOr() { 500 IntelExprState CurrState = State; 501 switch (State) { 502 default: 503 State = IES_ERROR; 504 break; 505 case IES_INTEGER: 506 case IES_RPAREN: 507 case IES_REGISTER: 508 State = IES_OR; 509 IC.pushOperator(IC_OR); 510 break; 511 } 512 PrevState = CurrState; 513 } 514 void onXor() { 515 IntelExprState CurrState = State; 516 switch (State) { 517 default: 518 State = IES_ERROR; 519 break; 520 case IES_INTEGER: 521 case IES_RPAREN: 522 case IES_REGISTER: 523 State = IES_XOR; 524 IC.pushOperator(IC_XOR); 525 break; 526 } 527 PrevState = CurrState; 528 } 529 void onAnd() { 530 IntelExprState CurrState = State; 531 switch (State) { 532 default: 533 State = IES_ERROR; 534 break; 535 case IES_INTEGER: 536 case IES_RPAREN: 537 case IES_REGISTER: 538 State = IES_AND; 539 IC.pushOperator(IC_AND); 540 break; 541 } 542 PrevState = CurrState; 543 } 544 void onEq() { 545 IntelExprState CurrState = State; 546 switch (State) { 547 default: 548 State = IES_ERROR; 549 break; 550 case IES_INTEGER: 551 case IES_RPAREN: 552 case IES_REGISTER: 553 State = IES_EQ; 554 IC.pushOperator(IC_EQ); 555 break; 556 } 557 PrevState = CurrState; 558 } 559 void onNE() { 560 IntelExprState CurrState = State; 561 switch (State) { 562 default: 563 State = IES_ERROR; 564 break; 565 case IES_INTEGER: 566 case IES_RPAREN: 567 case IES_REGISTER: 568 State = IES_NE; 569 IC.pushOperator(IC_NE); 570 break; 571 } 572 PrevState = CurrState; 573 } 574 void onLT() { 575 IntelExprState CurrState = State; 576 switch (State) { 577 default: 578 State = IES_ERROR; 579 break; 580 case IES_INTEGER: 581 case IES_RPAREN: 582 case IES_REGISTER: 583 State = IES_LT; 584 IC.pushOperator(IC_LT); 585 break; 586 } 587 PrevState = CurrState; 588 } 589 void onLE() { 590 IntelExprState CurrState = State; 591 switch (State) { 592 default: 593 State = IES_ERROR; 594 break; 595 case IES_INTEGER: 596 case IES_RPAREN: 597 case IES_REGISTER: 598 State = IES_LE; 599 IC.pushOperator(IC_LE); 600 break; 601 } 602 PrevState = CurrState; 603 } 604 void onGT() { 605 IntelExprState CurrState = State; 606 switch (State) { 607 default: 608 State = IES_ERROR; 609 break; 610 case IES_INTEGER: 611 case IES_RPAREN: 612 case IES_REGISTER: 613 State = IES_GT; 614 IC.pushOperator(IC_GT); 615 break; 616 } 617 PrevState = CurrState; 618 } 619 void onGE() { 620 IntelExprState CurrState = State; 621 switch (State) { 622 default: 623 State = IES_ERROR; 624 break; 625 case IES_INTEGER: 626 case IES_RPAREN: 627 case IES_REGISTER: 628 State = IES_GE; 629 IC.pushOperator(IC_GE); 630 break; 631 } 632 PrevState = CurrState; 633 } 634 void onLShift() { 635 IntelExprState CurrState = State; 636 switch (State) { 637 default: 638 State = IES_ERROR; 639 break; 640 case IES_INTEGER: 641 case IES_RPAREN: 642 case IES_REGISTER: 643 State = IES_LSHIFT; 644 IC.pushOperator(IC_LSHIFT); 645 break; 646 } 647 PrevState = CurrState; 648 } 649 void onRShift() { 650 IntelExprState CurrState = State; 651 switch (State) { 652 default: 653 State = IES_ERROR; 654 break; 655 case IES_INTEGER: 656 case IES_RPAREN: 657 case IES_REGISTER: 658 State = IES_RSHIFT; 659 IC.pushOperator(IC_RSHIFT); 660 break; 661 } 662 PrevState = CurrState; 663 } 664 bool onPlus(StringRef &ErrMsg) { 665 IntelExprState CurrState = State; 666 switch (State) { 667 default: 668 State = IES_ERROR; 669 break; 670 case IES_INTEGER: 671 case IES_RPAREN: 672 case IES_REGISTER: 673 case IES_OFFSET: 674 State = IES_PLUS; 675 IC.pushOperator(IC_PLUS); 676 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 677 // If we already have a BaseReg, then assume this is the IndexReg with 678 // no explicit scale. 679 if (!BaseReg) { 680 BaseReg = TmpReg; 681 } else { 682 if (IndexReg) 683 return regsUseUpError(ErrMsg); 684 IndexReg = TmpReg; 685 Scale = 0; 686 } 687 } 688 break; 689 } 690 PrevState = CurrState; 691 return false; 692 } 693 bool onMinus(StringRef &ErrMsg) { 694 IntelExprState CurrState = State; 695 switch (State) { 696 default: 697 State = IES_ERROR; 698 break; 699 case IES_OR: 700 case IES_XOR: 701 case IES_AND: 702 case IES_EQ: 703 case IES_NE: 704 case IES_LT: 705 case IES_LE: 706 case IES_GT: 707 case IES_GE: 708 case IES_LSHIFT: 709 case IES_RSHIFT: 710 case IES_PLUS: 711 case IES_NOT: 712 case IES_MULTIPLY: 713 case IES_DIVIDE: 714 case IES_MOD: 715 case IES_LPAREN: 716 case IES_RPAREN: 717 case IES_LBRAC: 718 case IES_RBRAC: 719 case IES_INTEGER: 720 case IES_REGISTER: 721 case IES_INIT: 722 case IES_OFFSET: 723 State = IES_MINUS; 724 // push minus operator if it is not a negate operator 725 if (CurrState == IES_REGISTER || CurrState == IES_RPAREN || 726 CurrState == IES_INTEGER || CurrState == IES_RBRAC || 727 CurrState == IES_OFFSET) 728 IC.pushOperator(IC_MINUS); 729 else if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) { 730 // We have negate operator for Scale: it's illegal 731 ErrMsg = "Scale can't be negative"; 732 return true; 733 } else 734 IC.pushOperator(IC_NEG); 735 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 736 // If we already have a BaseReg, then assume this is the IndexReg with 737 // no explicit scale. 738 if (!BaseReg) { 739 BaseReg = TmpReg; 740 } else { 741 if (IndexReg) 742 return regsUseUpError(ErrMsg); 743 IndexReg = TmpReg; 744 Scale = 0; 745 } 746 } 747 break; 748 } 749 PrevState = CurrState; 750 return false; 751 } 752 void onNot() { 753 IntelExprState CurrState = State; 754 switch (State) { 755 default: 756 State = IES_ERROR; 757 break; 758 case IES_OR: 759 case IES_XOR: 760 case IES_AND: 761 case IES_EQ: 762 case IES_NE: 763 case IES_LT: 764 case IES_LE: 765 case IES_GT: 766 case IES_GE: 767 case IES_LSHIFT: 768 case IES_RSHIFT: 769 case IES_PLUS: 770 case IES_MINUS: 771 case IES_NOT: 772 case IES_MULTIPLY: 773 case IES_DIVIDE: 774 case IES_MOD: 775 case IES_LPAREN: 776 case IES_LBRAC: 777 case IES_INIT: 778 State = IES_NOT; 779 IC.pushOperator(IC_NOT); 780 break; 781 } 782 PrevState = CurrState; 783 } 784 bool onRegister(unsigned Reg, StringRef &ErrMsg) { 785 IntelExprState CurrState = State; 786 switch (State) { 787 default: 788 State = IES_ERROR; 789 break; 790 case IES_PLUS: 791 case IES_LPAREN: 792 case IES_LBRAC: 793 State = IES_REGISTER; 794 TmpReg = Reg; 795 IC.pushOperand(IC_REGISTER); 796 break; 797 case IES_MULTIPLY: 798 // Index Register - Scale * Register 799 if (PrevState == IES_INTEGER) { 800 if (IndexReg) 801 return regsUseUpError(ErrMsg); 802 State = IES_REGISTER; 803 IndexReg = Reg; 804 // Get the scale and replace the 'Scale * Register' with '0'. 805 Scale = IC.popOperand(); 806 if (checkScale(Scale, ErrMsg)) 807 return true; 808 IC.pushOperand(IC_IMM); 809 IC.popOperator(); 810 } else { 811 State = IES_ERROR; 812 } 813 break; 814 } 815 PrevState = CurrState; 816 return false; 817 } 818 bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName, 819 const InlineAsmIdentifierInfo &IDInfo, 820 const AsmTypeInfo &Type, bool ParsingMSInlineAsm, 821 StringRef &ErrMsg) { 822 // InlineAsm: Treat an enum value as an integer 823 if (ParsingMSInlineAsm) 824 if (IDInfo.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) 825 return onInteger(IDInfo.Enum.EnumVal, ErrMsg); 826 // Treat a symbolic constant like an integer 827 if (auto *CE = dyn_cast<MCConstantExpr>(SymRef)) 828 return onInteger(CE->getValue(), ErrMsg); 829 PrevState = State; 830 switch (State) { 831 default: 832 State = IES_ERROR; 833 break; 834 case IES_CAST: 835 case IES_PLUS: 836 case IES_MINUS: 837 case IES_NOT: 838 case IES_INIT: 839 case IES_LBRAC: 840 case IES_LPAREN: 841 if (setSymRef(SymRef, SymRefName, ErrMsg)) 842 return true; 843 MemExpr = true; 844 State = IES_INTEGER; 845 IC.pushOperand(IC_IMM); 846 if (ParsingMSInlineAsm) 847 Info = IDInfo; 848 setTypeInfo(Type); 849 break; 850 } 851 return false; 852 } 853 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) { 854 IntelExprState CurrState = State; 855 switch (State) { 856 default: 857 State = IES_ERROR; 858 break; 859 case IES_PLUS: 860 case IES_MINUS: 861 case IES_NOT: 862 case IES_OR: 863 case IES_XOR: 864 case IES_AND: 865 case IES_EQ: 866 case IES_NE: 867 case IES_LT: 868 case IES_LE: 869 case IES_GT: 870 case IES_GE: 871 case IES_LSHIFT: 872 case IES_RSHIFT: 873 case IES_DIVIDE: 874 case IES_MOD: 875 case IES_MULTIPLY: 876 case IES_LPAREN: 877 case IES_INIT: 878 case IES_LBRAC: 879 State = IES_INTEGER; 880 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) { 881 // Index Register - Register * Scale 882 if (IndexReg) 883 return regsUseUpError(ErrMsg); 884 IndexReg = TmpReg; 885 Scale = TmpInt; 886 if (checkScale(Scale, ErrMsg)) 887 return true; 888 // Get the scale and replace the 'Register * Scale' with '0'. 889 IC.popOperator(); 890 } else { 891 IC.pushOperand(IC_IMM, TmpInt); 892 } 893 break; 894 } 895 PrevState = CurrState; 896 return false; 897 } 898 void onStar() { 899 PrevState = State; 900 switch (State) { 901 default: 902 State = IES_ERROR; 903 break; 904 case IES_INTEGER: 905 case IES_REGISTER: 906 case IES_RPAREN: 907 State = IES_MULTIPLY; 908 IC.pushOperator(IC_MULTIPLY); 909 break; 910 } 911 } 912 void onDivide() { 913 PrevState = State; 914 switch (State) { 915 default: 916 State = IES_ERROR; 917 break; 918 case IES_INTEGER: 919 case IES_RPAREN: 920 State = IES_DIVIDE; 921 IC.pushOperator(IC_DIVIDE); 922 break; 923 } 924 } 925 void onMod() { 926 PrevState = State; 927 switch (State) { 928 default: 929 State = IES_ERROR; 930 break; 931 case IES_INTEGER: 932 case IES_RPAREN: 933 State = IES_MOD; 934 IC.pushOperator(IC_MOD); 935 break; 936 } 937 } 938 bool onLBrac() { 939 if (BracCount) 940 return true; 941 PrevState = State; 942 switch (State) { 943 default: 944 State = IES_ERROR; 945 break; 946 case IES_RBRAC: 947 case IES_INTEGER: 948 case IES_RPAREN: 949 State = IES_PLUS; 950 IC.pushOperator(IC_PLUS); 951 CurType.Length = 1; 952 CurType.Size = CurType.ElementSize; 953 break; 954 case IES_INIT: 955 case IES_CAST: 956 assert(!BracCount && "BracCount should be zero on parsing's start"); 957 State = IES_LBRAC; 958 break; 959 } 960 MemExpr = true; 961 BracketUsed = true; 962 BracCount++; 963 return false; 964 } 965 bool onRBrac(StringRef &ErrMsg) { 966 IntelExprState CurrState = State; 967 switch (State) { 968 default: 969 State = IES_ERROR; 970 break; 971 case IES_INTEGER: 972 case IES_OFFSET: 973 case IES_REGISTER: 974 case IES_RPAREN: 975 if (BracCount-- != 1) { 976 ErrMsg = "unexpected bracket encountered"; 977 return true; 978 } 979 State = IES_RBRAC; 980 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) { 981 // If we already have a BaseReg, then assume this is the IndexReg with 982 // no explicit scale. 983 if (!BaseReg) { 984 BaseReg = TmpReg; 985 } else { 986 if (IndexReg) 987 return regsUseUpError(ErrMsg); 988 IndexReg = TmpReg; 989 Scale = 0; 990 } 991 } 992 break; 993 } 994 PrevState = CurrState; 995 return false; 996 } 997 void onLParen() { 998 IntelExprState CurrState = State; 999 switch (State) { 1000 default: 1001 State = IES_ERROR; 1002 break; 1003 case IES_PLUS: 1004 case IES_MINUS: 1005 case IES_NOT: 1006 case IES_OR: 1007 case IES_XOR: 1008 case IES_AND: 1009 case IES_EQ: 1010 case IES_NE: 1011 case IES_LT: 1012 case IES_LE: 1013 case IES_GT: 1014 case IES_GE: 1015 case IES_LSHIFT: 1016 case IES_RSHIFT: 1017 case IES_MULTIPLY: 1018 case IES_DIVIDE: 1019 case IES_MOD: 1020 case IES_LPAREN: 1021 case IES_INIT: 1022 case IES_LBRAC: 1023 State = IES_LPAREN; 1024 IC.pushOperator(IC_LPAREN); 1025 break; 1026 } 1027 PrevState = CurrState; 1028 } 1029 void onRParen() { 1030 PrevState = State; 1031 switch (State) { 1032 default: 1033 State = IES_ERROR; 1034 break; 1035 case IES_INTEGER: 1036 case IES_OFFSET: 1037 case IES_REGISTER: 1038 case IES_RBRAC: 1039 case IES_RPAREN: 1040 State = IES_RPAREN; 1041 IC.pushOperator(IC_RPAREN); 1042 break; 1043 } 1044 } 1045 bool onOffset(const MCExpr *Val, SMLoc OffsetLoc, StringRef ID, 1046 const InlineAsmIdentifierInfo &IDInfo, 1047 bool ParsingMSInlineAsm, StringRef &ErrMsg) { 1048 PrevState = State; 1049 switch (State) { 1050 default: 1051 ErrMsg = "unexpected offset operator expression"; 1052 return true; 1053 case IES_PLUS: 1054 case IES_INIT: 1055 case IES_LBRAC: 1056 if (setSymRef(Val, ID, ErrMsg)) 1057 return true; 1058 OffsetOperator = true; 1059 OffsetOperatorLoc = OffsetLoc; 1060 State = IES_OFFSET; 1061 // As we cannot yet resolve the actual value (offset), we retain 1062 // the requested semantics by pushing a '0' to the operands stack 1063 IC.pushOperand(IC_IMM); 1064 if (ParsingMSInlineAsm) { 1065 Info = IDInfo; 1066 } 1067 break; 1068 } 1069 return false; 1070 } 1071 void onCast(AsmTypeInfo Info) { 1072 PrevState = State; 1073 switch (State) { 1074 default: 1075 State = IES_ERROR; 1076 break; 1077 case IES_LPAREN: 1078 setTypeInfo(Info); 1079 State = IES_CAST; 1080 break; 1081 } 1082 } 1083 void setTypeInfo(AsmTypeInfo Type) { CurType = Type; } 1084 }; 1085 1086 bool Error(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt, 1087 bool MatchingInlineAsm = false) { 1088 MCAsmParser &Parser = getParser(); 1089 if (MatchingInlineAsm) { 1090 if (!getLexer().isAtStartOfStatement()) 1091 Parser.eatToEndOfStatement(); 1092 return false; 1093 } 1094 return Parser.Error(L, Msg, Range); 1095 } 1096 1097 bool MatchRegisterByName(MCRegister &RegNo, StringRef RegName, SMLoc StartLoc, 1098 SMLoc EndLoc); 1099 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1100 bool RestoreOnFailure); 1101 1102 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc); 1103 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc); 1104 bool IsSIReg(unsigned Reg); 1105 unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg); 1106 void 1107 AddDefaultSrcDestOperands(OperandVector &Operands, 1108 std::unique_ptr<llvm::MCParsedAsmOperand> &&Src, 1109 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst); 1110 bool VerifyAndAdjustOperands(OperandVector &OrigOperands, 1111 OperandVector &FinalOperands); 1112 bool parseOperand(OperandVector &Operands, StringRef Name); 1113 bool parseATTOperand(OperandVector &Operands); 1114 bool parseIntelOperand(OperandVector &Operands, StringRef Name); 1115 bool ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID, 1116 InlineAsmIdentifierInfo &Info, SMLoc &End); 1117 bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End); 1118 unsigned IdentifyIntelInlineAsmOperator(StringRef Name); 1119 unsigned ParseIntelInlineAsmOperator(unsigned OpKind); 1120 unsigned IdentifyMasmOperator(StringRef Name); 1121 bool ParseMasmOperator(unsigned OpKind, int64_t &Val); 1122 bool ParseRoundingModeOp(SMLoc Start, OperandVector &Operands); 1123 bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM, 1124 bool &ParseError, SMLoc &End); 1125 bool ParseMasmNamedOperator(StringRef Name, IntelExprStateMachine &SM, 1126 bool &ParseError, SMLoc &End); 1127 void RewriteIntelExpression(IntelExprStateMachine &SM, SMLoc Start, 1128 SMLoc End); 1129 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End); 1130 bool ParseIntelInlineAsmIdentifier(const MCExpr *&Val, StringRef &Identifier, 1131 InlineAsmIdentifierInfo &Info, 1132 bool IsUnevaluatedOperand, SMLoc &End, 1133 bool IsParsingOffsetOperator = false); 1134 void tryParseOperandIdx(AsmToken::TokenKind PrevTK, 1135 IntelExprStateMachine &SM); 1136 1137 bool ParseMemOperand(unsigned SegReg, const MCExpr *Disp, SMLoc StartLoc, 1138 SMLoc EndLoc, OperandVector &Operands); 1139 1140 X86::CondCode ParseConditionCode(StringRef CCode); 1141 1142 bool ParseIntelMemoryOperandSize(unsigned &Size); 1143 bool CreateMemForMSInlineAsm(unsigned SegReg, const MCExpr *Disp, 1144 unsigned BaseReg, unsigned IndexReg, 1145 unsigned Scale, bool NonAbsMem, SMLoc Start, 1146 SMLoc End, unsigned Size, StringRef Identifier, 1147 const InlineAsmIdentifierInfo &Info, 1148 OperandVector &Operands); 1149 1150 bool parseDirectiveArch(); 1151 bool parseDirectiveNops(SMLoc L); 1152 bool parseDirectiveEven(SMLoc L); 1153 bool ParseDirectiveCode(StringRef IDVal, SMLoc L); 1154 1155 /// CodeView FPO data directives. 1156 bool parseDirectiveFPOProc(SMLoc L); 1157 bool parseDirectiveFPOSetFrame(SMLoc L); 1158 bool parseDirectiveFPOPushReg(SMLoc L); 1159 bool parseDirectiveFPOStackAlloc(SMLoc L); 1160 bool parseDirectiveFPOStackAlign(SMLoc L); 1161 bool parseDirectiveFPOEndPrologue(SMLoc L); 1162 bool parseDirectiveFPOEndProc(SMLoc L); 1163 1164 /// SEH directives. 1165 bool parseSEHRegisterNumber(unsigned RegClassID, MCRegister &RegNo); 1166 bool parseDirectiveSEHPushReg(SMLoc); 1167 bool parseDirectiveSEHSetFrame(SMLoc); 1168 bool parseDirectiveSEHSaveReg(SMLoc); 1169 bool parseDirectiveSEHSaveXMM(SMLoc); 1170 bool parseDirectiveSEHPushFrame(SMLoc); 1171 1172 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1173 1174 bool validateInstruction(MCInst &Inst, const OperandVector &Ops); 1175 bool processInstruction(MCInst &Inst, const OperandVector &Ops); 1176 1177 // Load Value Injection (LVI) Mitigations for machine code 1178 void emitWarningForSpecialLVIInstruction(SMLoc Loc); 1179 void applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out); 1180 void applyLVILoadHardeningMitigation(MCInst &Inst, MCStreamer &Out); 1181 1182 /// Wrapper around MCStreamer::emitInstruction(). Possibly adds 1183 /// instrumentation around Inst. 1184 void emitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out); 1185 1186 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1187 OperandVector &Operands, MCStreamer &Out, 1188 uint64_t &ErrorInfo, 1189 bool MatchingInlineAsm) override; 1190 1191 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands, 1192 MCStreamer &Out, bool MatchingInlineAsm); 1193 1194 bool ErrorMissingFeature(SMLoc IDLoc, const FeatureBitset &MissingFeatures, 1195 bool MatchingInlineAsm); 1196 1197 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, 1198 OperandVector &Operands, MCStreamer &Out, 1199 uint64_t &ErrorInfo, 1200 bool MatchingInlineAsm); 1201 1202 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, 1203 OperandVector &Operands, MCStreamer &Out, 1204 uint64_t &ErrorInfo, 1205 bool MatchingInlineAsm); 1206 1207 bool OmitRegisterFromClobberLists(unsigned RegNo) override; 1208 1209 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z}) 1210 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required. 1211 /// return false if no parsing errors occurred, true otherwise. 1212 bool HandleAVX512Operand(OperandVector &Operands); 1213 1214 bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc); 1215 1216 bool is64BitMode() const { 1217 // FIXME: Can tablegen auto-generate this? 1218 return getSTI().hasFeature(X86::Is64Bit); 1219 } 1220 bool is32BitMode() const { 1221 // FIXME: Can tablegen auto-generate this? 1222 return getSTI().hasFeature(X86::Is32Bit); 1223 } 1224 bool is16BitMode() const { 1225 // FIXME: Can tablegen auto-generate this? 1226 return getSTI().hasFeature(X86::Is16Bit); 1227 } 1228 void SwitchMode(unsigned mode) { 1229 MCSubtargetInfo &STI = copySTI(); 1230 FeatureBitset AllModes({X86::Is64Bit, X86::Is32Bit, X86::Is16Bit}); 1231 FeatureBitset OldMode = STI.getFeatureBits() & AllModes; 1232 FeatureBitset FB = ComputeAvailableFeatures( 1233 STI.ToggleFeature(OldMode.flip(mode))); 1234 setAvailableFeatures(FB); 1235 1236 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes)); 1237 } 1238 1239 unsigned getPointerWidth() { 1240 if (is16BitMode()) return 16; 1241 if (is32BitMode()) return 32; 1242 if (is64BitMode()) return 64; 1243 llvm_unreachable("invalid mode"); 1244 } 1245 1246 bool isParsingIntelSyntax() { 1247 return getParser().getAssemblerDialect(); 1248 } 1249 1250 /// @name Auto-generated Matcher Functions 1251 /// { 1252 1253 #define GET_ASSEMBLER_HEADER 1254 #include "X86GenAsmMatcher.inc" 1255 1256 /// } 1257 1258 public: 1259 enum X86MatchResultTy { 1260 Match_Unsupported = FIRST_TARGET_MATCH_RESULT_TY, 1261 #define GET_OPERAND_DIAGNOSTIC_TYPES 1262 #include "X86GenAsmMatcher.inc" 1263 }; 1264 1265 X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser, 1266 const MCInstrInfo &mii, const MCTargetOptions &Options) 1267 : MCTargetAsmParser(Options, sti, mii), InstInfo(nullptr), 1268 Code16GCC(false) { 1269 1270 Parser.addAliasForDirective(".word", ".2byte"); 1271 1272 // Initialize the set of available features. 1273 setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits())); 1274 } 1275 1276 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override; 1277 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, 1278 SMLoc &EndLoc) override; 1279 1280 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override; 1281 1282 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1283 SMLoc NameLoc, OperandVector &Operands) override; 1284 1285 bool ParseDirective(AsmToken DirectiveID) override; 1286 }; 1287 } // end anonymous namespace 1288 1289 #define GET_REGISTER_MATCHER 1290 #define GET_SUBTARGET_FEATURE_NAME 1291 #include "X86GenAsmMatcher.inc" 1292 1293 static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg, 1294 unsigned Scale, bool Is64BitMode, 1295 StringRef &ErrMsg) { 1296 // If we have both a base register and an index register make sure they are 1297 // both 64-bit or 32-bit registers. 1298 // To support VSIB, IndexReg can be 128-bit or 256-bit registers. 1299 1300 if (BaseReg != 0 && 1301 !(BaseReg == X86::RIP || BaseReg == X86::EIP || 1302 X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) || 1303 X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) || 1304 X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg))) { 1305 ErrMsg = "invalid base+index expression"; 1306 return true; 1307 } 1308 1309 if (IndexReg != 0 && 1310 !(IndexReg == X86::EIZ || IndexReg == X86::RIZ || 1311 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 1312 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) || 1313 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) || 1314 X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) || 1315 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) || 1316 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg))) { 1317 ErrMsg = "invalid base+index expression"; 1318 return true; 1319 } 1320 1321 if (((BaseReg == X86::RIP || BaseReg == X86::EIP) && IndexReg != 0) || 1322 IndexReg == X86::EIP || IndexReg == X86::RIP || 1323 IndexReg == X86::ESP || IndexReg == X86::RSP) { 1324 ErrMsg = "invalid base+index expression"; 1325 return true; 1326 } 1327 1328 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed, 1329 // and then only in non-64-bit modes. 1330 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) && 1331 (Is64BitMode || (BaseReg != X86::BX && BaseReg != X86::BP && 1332 BaseReg != X86::SI && BaseReg != X86::DI))) { 1333 ErrMsg = "invalid 16-bit base register"; 1334 return true; 1335 } 1336 1337 if (BaseReg == 0 && 1338 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) { 1339 ErrMsg = "16-bit memory operand may not include only index register"; 1340 return true; 1341 } 1342 1343 if (BaseReg != 0 && IndexReg != 0) { 1344 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) && 1345 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 1346 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) || 1347 IndexReg == X86::EIZ)) { 1348 ErrMsg = "base register is 64-bit, but index register is not"; 1349 return true; 1350 } 1351 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) && 1352 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) || 1353 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) || 1354 IndexReg == X86::RIZ)) { 1355 ErrMsg = "base register is 32-bit, but index register is not"; 1356 return true; 1357 } 1358 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) { 1359 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) || 1360 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) { 1361 ErrMsg = "base register is 16-bit, but index register is not"; 1362 return true; 1363 } 1364 if ((BaseReg != X86::BX && BaseReg != X86::BP) || 1365 (IndexReg != X86::SI && IndexReg != X86::DI)) { 1366 ErrMsg = "invalid 16-bit base/index register combination"; 1367 return true; 1368 } 1369 } 1370 } 1371 1372 // RIP/EIP-relative addressing is only supported in 64-bit mode. 1373 if (!Is64BitMode && BaseReg != 0 && 1374 (BaseReg == X86::RIP || BaseReg == X86::EIP)) { 1375 ErrMsg = "IP-relative addressing requires 64-bit mode"; 1376 return true; 1377 } 1378 1379 return checkScale(Scale, ErrMsg); 1380 } 1381 1382 bool X86AsmParser::MatchRegisterByName(MCRegister &RegNo, StringRef RegName, 1383 SMLoc StartLoc, SMLoc EndLoc) { 1384 // If we encounter a %, ignore it. This code handles registers with and 1385 // without the prefix, unprefixed registers can occur in cfi directives. 1386 RegName.consume_front("%"); 1387 1388 RegNo = MatchRegisterName(RegName); 1389 1390 // If the match failed, try the register name as lowercase. 1391 if (RegNo == 0) 1392 RegNo = MatchRegisterName(RegName.lower()); 1393 1394 // The "flags" and "mxcsr" registers cannot be referenced directly. 1395 // Treat it as an identifier instead. 1396 if (isParsingMSInlineAsm() && isParsingIntelSyntax() && 1397 (RegNo == X86::EFLAGS || RegNo == X86::MXCSR)) 1398 RegNo = 0; 1399 1400 if (!is64BitMode()) { 1401 // FIXME: This should be done using Requires<Not64BitMode> and 1402 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also 1403 // checked. 1404 if (RegNo == X86::RIZ || RegNo == X86::RIP || 1405 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) || 1406 X86II::isX86_64NonExtLowByteReg(RegNo) || 1407 X86II::isX86_64ExtendedReg(RegNo)) { 1408 return Error(StartLoc, 1409 "register %" + RegName + " is only available in 64-bit mode", 1410 SMRange(StartLoc, EndLoc)); 1411 } 1412 } 1413 1414 if (X86II::isApxExtendedReg(RegNo)) 1415 UseApxExtendedReg = true; 1416 1417 // If this is "db[0-15]", match it as an alias 1418 // for dr[0-15]. 1419 if (RegNo == 0 && RegName.starts_with("db")) { 1420 if (RegName.size() == 3) { 1421 switch (RegName[2]) { 1422 case '0': 1423 RegNo = X86::DR0; 1424 break; 1425 case '1': 1426 RegNo = X86::DR1; 1427 break; 1428 case '2': 1429 RegNo = X86::DR2; 1430 break; 1431 case '3': 1432 RegNo = X86::DR3; 1433 break; 1434 case '4': 1435 RegNo = X86::DR4; 1436 break; 1437 case '5': 1438 RegNo = X86::DR5; 1439 break; 1440 case '6': 1441 RegNo = X86::DR6; 1442 break; 1443 case '7': 1444 RegNo = X86::DR7; 1445 break; 1446 case '8': 1447 RegNo = X86::DR8; 1448 break; 1449 case '9': 1450 RegNo = X86::DR9; 1451 break; 1452 } 1453 } else if (RegName.size() == 4 && RegName[2] == '1') { 1454 switch (RegName[3]) { 1455 case '0': 1456 RegNo = X86::DR10; 1457 break; 1458 case '1': 1459 RegNo = X86::DR11; 1460 break; 1461 case '2': 1462 RegNo = X86::DR12; 1463 break; 1464 case '3': 1465 RegNo = X86::DR13; 1466 break; 1467 case '4': 1468 RegNo = X86::DR14; 1469 break; 1470 case '5': 1471 RegNo = X86::DR15; 1472 break; 1473 } 1474 } 1475 } 1476 1477 if (RegNo == 0) { 1478 if (isParsingIntelSyntax()) 1479 return true; 1480 return Error(StartLoc, "invalid register name", SMRange(StartLoc, EndLoc)); 1481 } 1482 return false; 1483 } 1484 1485 bool X86AsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, 1486 SMLoc &EndLoc, bool RestoreOnFailure) { 1487 MCAsmParser &Parser = getParser(); 1488 MCAsmLexer &Lexer = getLexer(); 1489 RegNo = 0; 1490 1491 SmallVector<AsmToken, 5> Tokens; 1492 auto OnFailure = [RestoreOnFailure, &Lexer, &Tokens]() { 1493 if (RestoreOnFailure) { 1494 while (!Tokens.empty()) { 1495 Lexer.UnLex(Tokens.pop_back_val()); 1496 } 1497 } 1498 }; 1499 1500 const AsmToken &PercentTok = Parser.getTok(); 1501 StartLoc = PercentTok.getLoc(); 1502 1503 // If we encounter a %, ignore it. This code handles registers with and 1504 // without the prefix, unprefixed registers can occur in cfi directives. 1505 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) { 1506 Tokens.push_back(PercentTok); 1507 Parser.Lex(); // Eat percent token. 1508 } 1509 1510 const AsmToken &Tok = Parser.getTok(); 1511 EndLoc = Tok.getEndLoc(); 1512 1513 if (Tok.isNot(AsmToken::Identifier)) { 1514 OnFailure(); 1515 if (isParsingIntelSyntax()) return true; 1516 return Error(StartLoc, "invalid register name", 1517 SMRange(StartLoc, EndLoc)); 1518 } 1519 1520 if (MatchRegisterByName(RegNo, Tok.getString(), StartLoc, EndLoc)) { 1521 OnFailure(); 1522 return true; 1523 } 1524 1525 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens. 1526 if (RegNo == X86::ST0) { 1527 Tokens.push_back(Tok); 1528 Parser.Lex(); // Eat 'st' 1529 1530 // Check to see if we have '(4)' after %st. 1531 if (Lexer.isNot(AsmToken::LParen)) 1532 return false; 1533 // Lex the paren. 1534 Tokens.push_back(Parser.getTok()); 1535 Parser.Lex(); 1536 1537 const AsmToken &IntTok = Parser.getTok(); 1538 if (IntTok.isNot(AsmToken::Integer)) { 1539 OnFailure(); 1540 return Error(IntTok.getLoc(), "expected stack index"); 1541 } 1542 switch (IntTok.getIntVal()) { 1543 case 0: RegNo = X86::ST0; break; 1544 case 1: RegNo = X86::ST1; break; 1545 case 2: RegNo = X86::ST2; break; 1546 case 3: RegNo = X86::ST3; break; 1547 case 4: RegNo = X86::ST4; break; 1548 case 5: RegNo = X86::ST5; break; 1549 case 6: RegNo = X86::ST6; break; 1550 case 7: RegNo = X86::ST7; break; 1551 default: 1552 OnFailure(); 1553 return Error(IntTok.getLoc(), "invalid stack index"); 1554 } 1555 1556 // Lex IntTok 1557 Tokens.push_back(IntTok); 1558 Parser.Lex(); 1559 if (Lexer.isNot(AsmToken::RParen)) { 1560 OnFailure(); 1561 return Error(Parser.getTok().getLoc(), "expected ')'"); 1562 } 1563 1564 EndLoc = Parser.getTok().getEndLoc(); 1565 Parser.Lex(); // Eat ')' 1566 return false; 1567 } 1568 1569 EndLoc = Parser.getTok().getEndLoc(); 1570 1571 if (RegNo == 0) { 1572 OnFailure(); 1573 if (isParsingIntelSyntax()) return true; 1574 return Error(StartLoc, "invalid register name", 1575 SMRange(StartLoc, EndLoc)); 1576 } 1577 1578 Parser.Lex(); // Eat identifier token. 1579 return false; 1580 } 1581 1582 bool X86AsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc, 1583 SMLoc &EndLoc) { 1584 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 1585 } 1586 1587 ParseStatus X86AsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, 1588 SMLoc &EndLoc) { 1589 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 1590 bool PendingErrors = getParser().hasPendingError(); 1591 getParser().clearPendingErrors(); 1592 if (PendingErrors) 1593 return ParseStatus::Failure; 1594 if (Result) 1595 return ParseStatus::NoMatch; 1596 return ParseStatus::Success; 1597 } 1598 1599 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) { 1600 bool Parse32 = is32BitMode() || Code16GCC; 1601 unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI); 1602 const MCExpr *Disp = MCConstantExpr::create(0, getContext()); 1603 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, 1604 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1, 1605 Loc, Loc, 0); 1606 } 1607 1608 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) { 1609 bool Parse32 = is32BitMode() || Code16GCC; 1610 unsigned Basereg = is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI); 1611 const MCExpr *Disp = MCConstantExpr::create(0, getContext()); 1612 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, 1613 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1, 1614 Loc, Loc, 0); 1615 } 1616 1617 bool X86AsmParser::IsSIReg(unsigned Reg) { 1618 switch (Reg) { 1619 default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!"); 1620 case X86::RSI: 1621 case X86::ESI: 1622 case X86::SI: 1623 return true; 1624 case X86::RDI: 1625 case X86::EDI: 1626 case X86::DI: 1627 return false; 1628 } 1629 } 1630 1631 unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, 1632 bool IsSIReg) { 1633 switch (RegClassID) { 1634 default: llvm_unreachable("Unexpected register class"); 1635 case X86::GR64RegClassID: 1636 return IsSIReg ? X86::RSI : X86::RDI; 1637 case X86::GR32RegClassID: 1638 return IsSIReg ? X86::ESI : X86::EDI; 1639 case X86::GR16RegClassID: 1640 return IsSIReg ? X86::SI : X86::DI; 1641 } 1642 } 1643 1644 void X86AsmParser::AddDefaultSrcDestOperands( 1645 OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src, 1646 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) { 1647 if (isParsingIntelSyntax()) { 1648 Operands.push_back(std::move(Dst)); 1649 Operands.push_back(std::move(Src)); 1650 } 1651 else { 1652 Operands.push_back(std::move(Src)); 1653 Operands.push_back(std::move(Dst)); 1654 } 1655 } 1656 1657 bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands, 1658 OperandVector &FinalOperands) { 1659 1660 if (OrigOperands.size() > 1) { 1661 // Check if sizes match, OrigOperands also contains the instruction name 1662 assert(OrigOperands.size() == FinalOperands.size() + 1 && 1663 "Operand size mismatch"); 1664 1665 SmallVector<std::pair<SMLoc, std::string>, 2> Warnings; 1666 // Verify types match 1667 int RegClassID = -1; 1668 for (unsigned int i = 0; i < FinalOperands.size(); ++i) { 1669 X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]); 1670 X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]); 1671 1672 if (FinalOp.isReg() && 1673 (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg())) 1674 // Return false and let a normal complaint about bogus operands happen 1675 return false; 1676 1677 if (FinalOp.isMem()) { 1678 1679 if (!OrigOp.isMem()) 1680 // Return false and let a normal complaint about bogus operands happen 1681 return false; 1682 1683 unsigned OrigReg = OrigOp.Mem.BaseReg; 1684 unsigned FinalReg = FinalOp.Mem.BaseReg; 1685 1686 // If we've already encounterd a register class, make sure all register 1687 // bases are of the same register class 1688 if (RegClassID != -1 && 1689 !X86MCRegisterClasses[RegClassID].contains(OrigReg)) { 1690 return Error(OrigOp.getStartLoc(), 1691 "mismatching source and destination index registers"); 1692 } 1693 1694 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg)) 1695 RegClassID = X86::GR64RegClassID; 1696 else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg)) 1697 RegClassID = X86::GR32RegClassID; 1698 else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg)) 1699 RegClassID = X86::GR16RegClassID; 1700 else 1701 // Unexpected register class type 1702 // Return false and let a normal complaint about bogus operands happen 1703 return false; 1704 1705 bool IsSI = IsSIReg(FinalReg); 1706 FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI); 1707 1708 if (FinalReg != OrigReg) { 1709 std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI"; 1710 Warnings.push_back(std::make_pair( 1711 OrigOp.getStartLoc(), 1712 "memory operand is only for determining the size, " + RegName + 1713 " will be used for the location")); 1714 } 1715 1716 FinalOp.Mem.Size = OrigOp.Mem.Size; 1717 FinalOp.Mem.SegReg = OrigOp.Mem.SegReg; 1718 FinalOp.Mem.BaseReg = FinalReg; 1719 } 1720 } 1721 1722 // Produce warnings only if all the operands passed the adjustment - prevent 1723 // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings 1724 for (auto &WarningMsg : Warnings) { 1725 Warning(WarningMsg.first, WarningMsg.second); 1726 } 1727 1728 // Remove old operands 1729 for (unsigned int i = 0; i < FinalOperands.size(); ++i) 1730 OrigOperands.pop_back(); 1731 } 1732 // OrigOperands.append(FinalOperands.begin(), FinalOperands.end()); 1733 for (auto &Op : FinalOperands) 1734 OrigOperands.push_back(std::move(Op)); 1735 1736 return false; 1737 } 1738 1739 bool X86AsmParser::parseOperand(OperandVector &Operands, StringRef Name) { 1740 if (isParsingIntelSyntax()) 1741 return parseIntelOperand(Operands, Name); 1742 1743 return parseATTOperand(Operands); 1744 } 1745 1746 bool X86AsmParser::CreateMemForMSInlineAsm(unsigned SegReg, const MCExpr *Disp, 1747 unsigned BaseReg, unsigned IndexReg, 1748 unsigned Scale, bool NonAbsMem, 1749 SMLoc Start, SMLoc End, 1750 unsigned Size, StringRef Identifier, 1751 const InlineAsmIdentifierInfo &Info, 1752 OperandVector &Operands) { 1753 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or 1754 // some other label reference. 1755 if (Info.isKind(InlineAsmIdentifierInfo::IK_Label)) { 1756 // Create an absolute memory reference in order to match against 1757 // instructions taking a PC relative operand. 1758 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start, 1759 End, Size, Identifier, 1760 Info.Label.Decl)); 1761 return false; 1762 } 1763 // We either have a direct symbol reference, or an offset from a symbol. The 1764 // parser always puts the symbol on the LHS, so look there for size 1765 // calculation purposes. 1766 unsigned FrontendSize = 0; 1767 void *Decl = nullptr; 1768 bool IsGlobalLV = false; 1769 if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) { 1770 // Size is in terms of bits in this context. 1771 FrontendSize = Info.Var.Type * 8; 1772 Decl = Info.Var.Decl; 1773 IsGlobalLV = Info.Var.IsGlobalLV; 1774 } 1775 // It is widely common for MS InlineAsm to use a global variable and one/two 1776 // registers in a mmory expression, and though unaccessible via rip/eip. 1777 if (IsGlobalLV) { 1778 if (BaseReg || IndexReg) { 1779 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start, 1780 End, Size, Identifier, Decl, 0, 1781 BaseReg && IndexReg)); 1782 return false; 1783 } 1784 if (NonAbsMem) 1785 BaseReg = 1; // Make isAbsMem() false 1786 } 1787 Operands.push_back(X86Operand::CreateMem( 1788 getPointerWidth(), SegReg, Disp, BaseReg, IndexReg, Scale, Start, End, 1789 Size, 1790 /*DefaultBaseReg=*/X86::RIP, Identifier, Decl, FrontendSize)); 1791 return false; 1792 } 1793 1794 // Some binary bitwise operators have a named synonymous 1795 // Query a candidate string for being such a named operator 1796 // and if so - invoke the appropriate handler 1797 bool X86AsmParser::ParseIntelNamedOperator(StringRef Name, 1798 IntelExprStateMachine &SM, 1799 bool &ParseError, SMLoc &End) { 1800 // A named operator should be either lower or upper case, but not a mix... 1801 // except in MASM, which uses full case-insensitivity. 1802 if (Name.compare(Name.lower()) && Name.compare(Name.upper()) && 1803 !getParser().isParsingMasm()) 1804 return false; 1805 if (Name.equals_insensitive("not")) { 1806 SM.onNot(); 1807 } else if (Name.equals_insensitive("or")) { 1808 SM.onOr(); 1809 } else if (Name.equals_insensitive("shl")) { 1810 SM.onLShift(); 1811 } else if (Name.equals_insensitive("shr")) { 1812 SM.onRShift(); 1813 } else if (Name.equals_insensitive("xor")) { 1814 SM.onXor(); 1815 } else if (Name.equals_insensitive("and")) { 1816 SM.onAnd(); 1817 } else if (Name.equals_insensitive("mod")) { 1818 SM.onMod(); 1819 } else if (Name.equals_insensitive("offset")) { 1820 SMLoc OffsetLoc = getTok().getLoc(); 1821 const MCExpr *Val = nullptr; 1822 StringRef ID; 1823 InlineAsmIdentifierInfo Info; 1824 ParseError = ParseIntelOffsetOperator(Val, ID, Info, End); 1825 if (ParseError) 1826 return true; 1827 StringRef ErrMsg; 1828 ParseError = 1829 SM.onOffset(Val, OffsetLoc, ID, Info, isParsingMSInlineAsm(), ErrMsg); 1830 if (ParseError) 1831 return Error(SMLoc::getFromPointer(Name.data()), ErrMsg); 1832 } else { 1833 return false; 1834 } 1835 if (!Name.equals_insensitive("offset")) 1836 End = consumeToken(); 1837 return true; 1838 } 1839 bool X86AsmParser::ParseMasmNamedOperator(StringRef Name, 1840 IntelExprStateMachine &SM, 1841 bool &ParseError, SMLoc &End) { 1842 if (Name.equals_insensitive("eq")) { 1843 SM.onEq(); 1844 } else if (Name.equals_insensitive("ne")) { 1845 SM.onNE(); 1846 } else if (Name.equals_insensitive("lt")) { 1847 SM.onLT(); 1848 } else if (Name.equals_insensitive("le")) { 1849 SM.onLE(); 1850 } else if (Name.equals_insensitive("gt")) { 1851 SM.onGT(); 1852 } else if (Name.equals_insensitive("ge")) { 1853 SM.onGE(); 1854 } else { 1855 return false; 1856 } 1857 End = consumeToken(); 1858 return true; 1859 } 1860 1861 // Check if current intel expression append after an operand. 1862 // Like: [Operand][Intel Expression] 1863 void X86AsmParser::tryParseOperandIdx(AsmToken::TokenKind PrevTK, 1864 IntelExprStateMachine &SM) { 1865 if (PrevTK != AsmToken::RBrac) 1866 return; 1867 1868 SM.setAppendAfterOperand(); 1869 } 1870 1871 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { 1872 MCAsmParser &Parser = getParser(); 1873 StringRef ErrMsg; 1874 1875 AsmToken::TokenKind PrevTK = AsmToken::Error; 1876 1877 if (getContext().getObjectFileInfo()->isPositionIndependent()) 1878 SM.setPIC(); 1879 1880 bool Done = false; 1881 while (!Done) { 1882 // Get a fresh reference on each loop iteration in case the previous 1883 // iteration moved the token storage during UnLex(). 1884 const AsmToken &Tok = Parser.getTok(); 1885 1886 bool UpdateLocLex = true; 1887 AsmToken::TokenKind TK = getLexer().getKind(); 1888 1889 switch (TK) { 1890 default: 1891 if ((Done = SM.isValidEndState())) 1892 break; 1893 return Error(Tok.getLoc(), "unknown token in expression"); 1894 case AsmToken::Error: 1895 return Error(getLexer().getErrLoc(), getLexer().getErr()); 1896 break; 1897 case AsmToken::EndOfStatement: 1898 Done = true; 1899 break; 1900 case AsmToken::Real: 1901 // DotOperator: [ebx].0 1902 UpdateLocLex = false; 1903 if (ParseIntelDotOperator(SM, End)) 1904 return true; 1905 break; 1906 case AsmToken::Dot: 1907 if (!Parser.isParsingMasm()) { 1908 if ((Done = SM.isValidEndState())) 1909 break; 1910 return Error(Tok.getLoc(), "unknown token in expression"); 1911 } 1912 // MASM allows spaces around the dot operator (e.g., "var . x") 1913 Lex(); 1914 UpdateLocLex = false; 1915 if (ParseIntelDotOperator(SM, End)) 1916 return true; 1917 break; 1918 case AsmToken::Dollar: 1919 if (!Parser.isParsingMasm()) { 1920 if ((Done = SM.isValidEndState())) 1921 break; 1922 return Error(Tok.getLoc(), "unknown token in expression"); 1923 } 1924 [[fallthrough]]; 1925 case AsmToken::String: { 1926 if (Parser.isParsingMasm()) { 1927 // MASM parsers handle strings in expressions as constants. 1928 SMLoc ValueLoc = Tok.getLoc(); 1929 int64_t Res; 1930 const MCExpr *Val; 1931 if (Parser.parsePrimaryExpr(Val, End, nullptr)) 1932 return true; 1933 UpdateLocLex = false; 1934 if (!Val->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr())) 1935 return Error(ValueLoc, "expected absolute value"); 1936 if (SM.onInteger(Res, ErrMsg)) 1937 return Error(ValueLoc, ErrMsg); 1938 break; 1939 } 1940 [[fallthrough]]; 1941 } 1942 case AsmToken::At: 1943 case AsmToken::Identifier: { 1944 SMLoc IdentLoc = Tok.getLoc(); 1945 StringRef Identifier = Tok.getString(); 1946 UpdateLocLex = false; 1947 if (Parser.isParsingMasm()) { 1948 size_t DotOffset = Identifier.find_first_of('.'); 1949 if (DotOffset != StringRef::npos) { 1950 consumeToken(); 1951 StringRef LHS = Identifier.slice(0, DotOffset); 1952 StringRef Dot = Identifier.slice(DotOffset, DotOffset + 1); 1953 StringRef RHS = Identifier.slice(DotOffset + 1, StringRef::npos); 1954 if (!RHS.empty()) { 1955 getLexer().UnLex(AsmToken(AsmToken::Identifier, RHS)); 1956 } 1957 getLexer().UnLex(AsmToken(AsmToken::Dot, Dot)); 1958 if (!LHS.empty()) { 1959 getLexer().UnLex(AsmToken(AsmToken::Identifier, LHS)); 1960 } 1961 break; 1962 } 1963 } 1964 // (MASM only) <TYPE> PTR operator 1965 if (Parser.isParsingMasm()) { 1966 const AsmToken &NextTok = getLexer().peekTok(); 1967 if (NextTok.is(AsmToken::Identifier) && 1968 NextTok.getIdentifier().equals_insensitive("ptr")) { 1969 AsmTypeInfo Info; 1970 if (Parser.lookUpType(Identifier, Info)) 1971 return Error(Tok.getLoc(), "unknown type"); 1972 SM.onCast(Info); 1973 // Eat type and PTR. 1974 consumeToken(); 1975 End = consumeToken(); 1976 break; 1977 } 1978 } 1979 // Register, or (MASM only) <register>.<field> 1980 MCRegister Reg; 1981 if (Tok.is(AsmToken::Identifier)) { 1982 if (!ParseRegister(Reg, IdentLoc, End, /*RestoreOnFailure=*/true)) { 1983 if (SM.onRegister(Reg, ErrMsg)) 1984 return Error(IdentLoc, ErrMsg); 1985 break; 1986 } 1987 if (Parser.isParsingMasm()) { 1988 const std::pair<StringRef, StringRef> IDField = 1989 Tok.getString().split('.'); 1990 const StringRef ID = IDField.first, Field = IDField.second; 1991 SMLoc IDEndLoc = SMLoc::getFromPointer(ID.data() + ID.size()); 1992 if (!Field.empty() && 1993 !MatchRegisterByName(Reg, ID, IdentLoc, IDEndLoc)) { 1994 if (SM.onRegister(Reg, ErrMsg)) 1995 return Error(IdentLoc, ErrMsg); 1996 1997 AsmFieldInfo Info; 1998 SMLoc FieldStartLoc = SMLoc::getFromPointer(Field.data()); 1999 if (Parser.lookUpField(Field, Info)) 2000 return Error(FieldStartLoc, "unknown offset"); 2001 else if (SM.onPlus(ErrMsg)) 2002 return Error(getTok().getLoc(), ErrMsg); 2003 else if (SM.onInteger(Info.Offset, ErrMsg)) 2004 return Error(IdentLoc, ErrMsg); 2005 SM.setTypeInfo(Info.Type); 2006 2007 End = consumeToken(); 2008 break; 2009 } 2010 } 2011 } 2012 // Operator synonymous ("not", "or" etc.) 2013 bool ParseError = false; 2014 if (ParseIntelNamedOperator(Identifier, SM, ParseError, End)) { 2015 if (ParseError) 2016 return true; 2017 break; 2018 } 2019 if (Parser.isParsingMasm() && 2020 ParseMasmNamedOperator(Identifier, SM, ParseError, End)) { 2021 if (ParseError) 2022 return true; 2023 break; 2024 } 2025 // Symbol reference, when parsing assembly content 2026 InlineAsmIdentifierInfo Info; 2027 AsmFieldInfo FieldInfo; 2028 const MCExpr *Val; 2029 if (isParsingMSInlineAsm() || Parser.isParsingMasm()) { 2030 // MS Dot Operator expression 2031 if (Identifier.count('.') && 2032 (PrevTK == AsmToken::RBrac || PrevTK == AsmToken::RParen)) { 2033 if (ParseIntelDotOperator(SM, End)) 2034 return true; 2035 break; 2036 } 2037 } 2038 if (isParsingMSInlineAsm()) { 2039 // MS InlineAsm operators (TYPE/LENGTH/SIZE) 2040 if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) { 2041 if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) { 2042 if (SM.onInteger(Val, ErrMsg)) 2043 return Error(IdentLoc, ErrMsg); 2044 } else { 2045 return true; 2046 } 2047 break; 2048 } 2049 // MS InlineAsm identifier 2050 // Call parseIdentifier() to combine @ with the identifier behind it. 2051 if (TK == AsmToken::At && Parser.parseIdentifier(Identifier)) 2052 return Error(IdentLoc, "expected identifier"); 2053 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End)) 2054 return true; 2055 else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type, 2056 true, ErrMsg)) 2057 return Error(IdentLoc, ErrMsg); 2058 break; 2059 } 2060 if (Parser.isParsingMasm()) { 2061 if (unsigned OpKind = IdentifyMasmOperator(Identifier)) { 2062 int64_t Val; 2063 if (ParseMasmOperator(OpKind, Val)) 2064 return true; 2065 if (SM.onInteger(Val, ErrMsg)) 2066 return Error(IdentLoc, ErrMsg); 2067 break; 2068 } 2069 if (!getParser().lookUpType(Identifier, FieldInfo.Type)) { 2070 // Field offset immediate; <TYPE>.<field specification> 2071 Lex(); // eat type 2072 bool EndDot = parseOptionalToken(AsmToken::Dot); 2073 while (EndDot || (getTok().is(AsmToken::Identifier) && 2074 getTok().getString().starts_with("."))) { 2075 getParser().parseIdentifier(Identifier); 2076 if (!EndDot) 2077 Identifier.consume_front("."); 2078 EndDot = Identifier.consume_back("."); 2079 if (getParser().lookUpField(FieldInfo.Type.Name, Identifier, 2080 FieldInfo)) { 2081 SMLoc IDEnd = 2082 SMLoc::getFromPointer(Identifier.data() + Identifier.size()); 2083 return Error(IdentLoc, "Unable to lookup field reference!", 2084 SMRange(IdentLoc, IDEnd)); 2085 } 2086 if (!EndDot) 2087 EndDot = parseOptionalToken(AsmToken::Dot); 2088 } 2089 if (SM.onInteger(FieldInfo.Offset, ErrMsg)) 2090 return Error(IdentLoc, ErrMsg); 2091 break; 2092 } 2093 } 2094 if (getParser().parsePrimaryExpr(Val, End, &FieldInfo.Type)) { 2095 return Error(Tok.getLoc(), "Unexpected identifier!"); 2096 } else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type, 2097 false, ErrMsg)) { 2098 return Error(IdentLoc, ErrMsg); 2099 } 2100 break; 2101 } 2102 case AsmToken::Integer: { 2103 // Look for 'b' or 'f' following an Integer as a directional label 2104 SMLoc Loc = getTok().getLoc(); 2105 int64_t IntVal = getTok().getIntVal(); 2106 End = consumeToken(); 2107 UpdateLocLex = false; 2108 if (getLexer().getKind() == AsmToken::Identifier) { 2109 StringRef IDVal = getTok().getString(); 2110 if (IDVal == "f" || IDVal == "b") { 2111 MCSymbol *Sym = 2112 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b"); 2113 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; 2114 const MCExpr *Val = 2115 MCSymbolRefExpr::create(Sym, Variant, getContext()); 2116 if (IDVal == "b" && Sym->isUndefined()) 2117 return Error(Loc, "invalid reference to undefined symbol"); 2118 StringRef Identifier = Sym->getName(); 2119 InlineAsmIdentifierInfo Info; 2120 AsmTypeInfo Type; 2121 if (SM.onIdentifierExpr(Val, Identifier, Info, Type, 2122 isParsingMSInlineAsm(), ErrMsg)) 2123 return Error(Loc, ErrMsg); 2124 End = consumeToken(); 2125 } else { 2126 if (SM.onInteger(IntVal, ErrMsg)) 2127 return Error(Loc, ErrMsg); 2128 } 2129 } else { 2130 if (SM.onInteger(IntVal, ErrMsg)) 2131 return Error(Loc, ErrMsg); 2132 } 2133 break; 2134 } 2135 case AsmToken::Plus: 2136 if (SM.onPlus(ErrMsg)) 2137 return Error(getTok().getLoc(), ErrMsg); 2138 break; 2139 case AsmToken::Minus: 2140 if (SM.onMinus(ErrMsg)) 2141 return Error(getTok().getLoc(), ErrMsg); 2142 break; 2143 case AsmToken::Tilde: SM.onNot(); break; 2144 case AsmToken::Star: SM.onStar(); break; 2145 case AsmToken::Slash: SM.onDivide(); break; 2146 case AsmToken::Percent: SM.onMod(); break; 2147 case AsmToken::Pipe: SM.onOr(); break; 2148 case AsmToken::Caret: SM.onXor(); break; 2149 case AsmToken::Amp: SM.onAnd(); break; 2150 case AsmToken::LessLess: 2151 SM.onLShift(); break; 2152 case AsmToken::GreaterGreater: 2153 SM.onRShift(); break; 2154 case AsmToken::LBrac: 2155 if (SM.onLBrac()) 2156 return Error(Tok.getLoc(), "unexpected bracket encountered"); 2157 tryParseOperandIdx(PrevTK, SM); 2158 break; 2159 case AsmToken::RBrac: 2160 if (SM.onRBrac(ErrMsg)) { 2161 return Error(Tok.getLoc(), ErrMsg); 2162 } 2163 break; 2164 case AsmToken::LParen: SM.onLParen(); break; 2165 case AsmToken::RParen: SM.onRParen(); break; 2166 } 2167 if (SM.hadError()) 2168 return Error(Tok.getLoc(), "unknown token in expression"); 2169 2170 if (!Done && UpdateLocLex) 2171 End = consumeToken(); 2172 2173 PrevTK = TK; 2174 } 2175 return false; 2176 } 2177 2178 void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine &SM, 2179 SMLoc Start, SMLoc End) { 2180 SMLoc Loc = Start; 2181 unsigned ExprLen = End.getPointer() - Start.getPointer(); 2182 // Skip everything before a symbol displacement (if we have one) 2183 if (SM.getSym() && !SM.isOffsetOperator()) { 2184 StringRef SymName = SM.getSymName(); 2185 if (unsigned Len = SymName.data() - Start.getPointer()) 2186 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Start, Len); 2187 Loc = SMLoc::getFromPointer(SymName.data() + SymName.size()); 2188 ExprLen = End.getPointer() - (SymName.data() + SymName.size()); 2189 // If we have only a symbol than there's no need for complex rewrite, 2190 // simply skip everything after it 2191 if (!(SM.getBaseReg() || SM.getIndexReg() || SM.getImm())) { 2192 if (ExprLen) 2193 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Loc, ExprLen); 2194 return; 2195 } 2196 } 2197 // Build an Intel Expression rewrite 2198 StringRef BaseRegStr; 2199 StringRef IndexRegStr; 2200 StringRef OffsetNameStr; 2201 if (SM.getBaseReg()) 2202 BaseRegStr = X86IntelInstPrinter::getRegisterName(SM.getBaseReg()); 2203 if (SM.getIndexReg()) 2204 IndexRegStr = X86IntelInstPrinter::getRegisterName(SM.getIndexReg()); 2205 if (SM.isOffsetOperator()) 2206 OffsetNameStr = SM.getSymName(); 2207 // Emit it 2208 IntelExpr Expr(BaseRegStr, IndexRegStr, SM.getScale(), OffsetNameStr, 2209 SM.getImm(), SM.isMemExpr()); 2210 InstInfo->AsmRewrites->emplace_back(Loc, ExprLen, Expr); 2211 } 2212 2213 // Inline assembly may use variable names with namespace alias qualifiers. 2214 bool X86AsmParser::ParseIntelInlineAsmIdentifier( 2215 const MCExpr *&Val, StringRef &Identifier, InlineAsmIdentifierInfo &Info, 2216 bool IsUnevaluatedOperand, SMLoc &End, bool IsParsingOffsetOperator) { 2217 MCAsmParser &Parser = getParser(); 2218 assert(isParsingMSInlineAsm() && "Expected to be parsing inline assembly."); 2219 Val = nullptr; 2220 2221 StringRef LineBuf(Identifier.data()); 2222 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand); 2223 2224 const AsmToken &Tok = Parser.getTok(); 2225 SMLoc Loc = Tok.getLoc(); 2226 2227 // Advance the token stream until the end of the current token is 2228 // after the end of what the frontend claimed. 2229 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size(); 2230 do { 2231 End = Tok.getEndLoc(); 2232 getLexer().Lex(); 2233 } while (End.getPointer() < EndPtr); 2234 Identifier = LineBuf; 2235 2236 // The frontend should end parsing on an assembler token boundary, unless it 2237 // failed parsing. 2238 assert((End.getPointer() == EndPtr || 2239 Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) && 2240 "frontend claimed part of a token?"); 2241 2242 // If the identifier lookup was unsuccessful, assume that we are dealing with 2243 // a label. 2244 if (Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) { 2245 StringRef InternalName = 2246 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(), 2247 Loc, false); 2248 assert(InternalName.size() && "We should have an internal name here."); 2249 // Push a rewrite for replacing the identifier name with the internal name, 2250 // unless we are parsing the operand of an offset operator 2251 if (!IsParsingOffsetOperator) 2252 InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(), 2253 InternalName); 2254 else 2255 Identifier = InternalName; 2256 } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) 2257 return false; 2258 // Create the symbol reference. 2259 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier); 2260 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; 2261 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext()); 2262 return false; 2263 } 2264 2265 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand 2266 bool X86AsmParser::ParseRoundingModeOp(SMLoc Start, OperandVector &Operands) { 2267 MCAsmParser &Parser = getParser(); 2268 const AsmToken &Tok = Parser.getTok(); 2269 // Eat "{" and mark the current place. 2270 const SMLoc consumedToken = consumeToken(); 2271 if (Tok.isNot(AsmToken::Identifier)) 2272 return Error(Tok.getLoc(), "Expected an identifier after {"); 2273 if (Tok.getIdentifier().starts_with("r")) { 2274 int rndMode = StringSwitch<int>(Tok.getIdentifier()) 2275 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT) 2276 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF) 2277 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF) 2278 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO) 2279 .Default(-1); 2280 if (-1 == rndMode) 2281 return Error(Tok.getLoc(), "Invalid rounding mode."); 2282 Parser.Lex(); // Eat "r*" of r*-sae 2283 if (!getLexer().is(AsmToken::Minus)) 2284 return Error(Tok.getLoc(), "Expected - at this point"); 2285 Parser.Lex(); // Eat "-" 2286 Parser.Lex(); // Eat the sae 2287 if (!getLexer().is(AsmToken::RCurly)) 2288 return Error(Tok.getLoc(), "Expected } at this point"); 2289 SMLoc End = Tok.getEndLoc(); 2290 Parser.Lex(); // Eat "}" 2291 const MCExpr *RndModeOp = 2292 MCConstantExpr::create(rndMode, Parser.getContext()); 2293 Operands.push_back(X86Operand::CreateImm(RndModeOp, Start, End)); 2294 return false; 2295 } 2296 if(Tok.getIdentifier().equals("sae")){ 2297 Parser.Lex(); // Eat the sae 2298 if (!getLexer().is(AsmToken::RCurly)) 2299 return Error(Tok.getLoc(), "Expected } at this point"); 2300 Parser.Lex(); // Eat "}" 2301 Operands.push_back(X86Operand::CreateToken("{sae}", consumedToken)); 2302 return false; 2303 } 2304 return Error(Tok.getLoc(), "unknown token in expression"); 2305 } 2306 2307 /// Parse the '.' operator. 2308 bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, 2309 SMLoc &End) { 2310 const AsmToken &Tok = getTok(); 2311 AsmFieldInfo Info; 2312 2313 // Drop the optional '.'. 2314 StringRef DotDispStr = Tok.getString(); 2315 if (DotDispStr.starts_with(".")) 2316 DotDispStr = DotDispStr.drop_front(1); 2317 StringRef TrailingDot; 2318 2319 // .Imm gets lexed as a real. 2320 if (Tok.is(AsmToken::Real)) { 2321 APInt DotDisp; 2322 if (DotDispStr.getAsInteger(10, DotDisp)) 2323 return Error(Tok.getLoc(), "Unexpected offset"); 2324 Info.Offset = DotDisp.getZExtValue(); 2325 } else if ((isParsingMSInlineAsm() || getParser().isParsingMasm()) && 2326 Tok.is(AsmToken::Identifier)) { 2327 if (DotDispStr.ends_with(".")) { 2328 TrailingDot = DotDispStr.substr(DotDispStr.size() - 1); 2329 DotDispStr = DotDispStr.drop_back(1); 2330 } 2331 const std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.'); 2332 const StringRef Base = BaseMember.first, Member = BaseMember.second; 2333 if (getParser().lookUpField(SM.getType(), DotDispStr, Info) && 2334 getParser().lookUpField(SM.getSymName(), DotDispStr, Info) && 2335 getParser().lookUpField(DotDispStr, Info) && 2336 (!SemaCallback || 2337 SemaCallback->LookupInlineAsmField(Base, Member, Info.Offset))) 2338 return Error(Tok.getLoc(), "Unable to lookup field reference!"); 2339 } else { 2340 return Error(Tok.getLoc(), "Unexpected token type!"); 2341 } 2342 2343 // Eat the DotExpression and update End 2344 End = SMLoc::getFromPointer(DotDispStr.data()); 2345 const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size(); 2346 while (Tok.getLoc().getPointer() < DotExprEndLoc) 2347 Lex(); 2348 if (!TrailingDot.empty()) 2349 getLexer().UnLex(AsmToken(AsmToken::Dot, TrailingDot)); 2350 SM.addImm(Info.Offset); 2351 SM.setTypeInfo(Info.Type); 2352 return false; 2353 } 2354 2355 /// Parse the 'offset' operator. 2356 /// This operator is used to specify the location of a given operand 2357 bool X86AsmParser::ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID, 2358 InlineAsmIdentifierInfo &Info, 2359 SMLoc &End) { 2360 // Eat offset, mark start of identifier. 2361 SMLoc Start = Lex().getLoc(); 2362 ID = getTok().getString(); 2363 if (!isParsingMSInlineAsm()) { 2364 if ((getTok().isNot(AsmToken::Identifier) && 2365 getTok().isNot(AsmToken::String)) || 2366 getParser().parsePrimaryExpr(Val, End, nullptr)) 2367 return Error(Start, "unexpected token!"); 2368 } else if (ParseIntelInlineAsmIdentifier(Val, ID, Info, false, End, true)) { 2369 return Error(Start, "unable to lookup expression"); 2370 } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) { 2371 return Error(Start, "offset operator cannot yet handle constants"); 2372 } 2373 return false; 2374 } 2375 2376 // Query a candidate string for being an Intel assembly operator 2377 // Report back its kind, or IOK_INVALID if does not evaluated as a known one 2378 unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) { 2379 return StringSwitch<unsigned>(Name) 2380 .Cases("TYPE","type",IOK_TYPE) 2381 .Cases("SIZE","size",IOK_SIZE) 2382 .Cases("LENGTH","length",IOK_LENGTH) 2383 .Default(IOK_INVALID); 2384 } 2385 2386 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator 2387 /// returns the number of elements in an array. It returns the value 1 for 2388 /// non-array variables. The SIZE operator returns the size of a C or C++ 2389 /// variable. A variable's size is the product of its LENGTH and TYPE. The 2390 /// TYPE operator returns the size of a C or C++ type or variable. If the 2391 /// variable is an array, TYPE returns the size of a single element. 2392 unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) { 2393 MCAsmParser &Parser = getParser(); 2394 const AsmToken &Tok = Parser.getTok(); 2395 Parser.Lex(); // Eat operator. 2396 2397 const MCExpr *Val = nullptr; 2398 InlineAsmIdentifierInfo Info; 2399 SMLoc Start = Tok.getLoc(), End; 2400 StringRef Identifier = Tok.getString(); 2401 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, 2402 /*IsUnevaluatedOperand=*/true, End)) 2403 return 0; 2404 2405 if (!Info.isKind(InlineAsmIdentifierInfo::IK_Var)) { 2406 Error(Start, "unable to lookup expression"); 2407 return 0; 2408 } 2409 2410 unsigned CVal = 0; 2411 switch(OpKind) { 2412 default: llvm_unreachable("Unexpected operand kind!"); 2413 case IOK_LENGTH: CVal = Info.Var.Length; break; 2414 case IOK_SIZE: CVal = Info.Var.Size; break; 2415 case IOK_TYPE: CVal = Info.Var.Type; break; 2416 } 2417 2418 return CVal; 2419 } 2420 2421 // Query a candidate string for being an Intel assembly operator 2422 // Report back its kind, or IOK_INVALID if does not evaluated as a known one 2423 unsigned X86AsmParser::IdentifyMasmOperator(StringRef Name) { 2424 return StringSwitch<unsigned>(Name.lower()) 2425 .Case("type", MOK_TYPE) 2426 .Cases("size", "sizeof", MOK_SIZEOF) 2427 .Cases("length", "lengthof", MOK_LENGTHOF) 2428 .Default(MOK_INVALID); 2429 } 2430 2431 /// Parse the 'LENGTHOF', 'SIZEOF', and 'TYPE' operators. The LENGTHOF operator 2432 /// returns the number of elements in an array. It returns the value 1 for 2433 /// non-array variables. The SIZEOF operator returns the size of a type or 2434 /// variable in bytes. A variable's size is the product of its LENGTH and TYPE. 2435 /// The TYPE operator returns the size of a variable. If the variable is an 2436 /// array, TYPE returns the size of a single element. 2437 bool X86AsmParser::ParseMasmOperator(unsigned OpKind, int64_t &Val) { 2438 MCAsmParser &Parser = getParser(); 2439 SMLoc OpLoc = Parser.getTok().getLoc(); 2440 Parser.Lex(); // Eat operator. 2441 2442 Val = 0; 2443 if (OpKind == MOK_SIZEOF || OpKind == MOK_TYPE) { 2444 // Check for SIZEOF(<type>) and TYPE(<type>). 2445 bool InParens = Parser.getTok().is(AsmToken::LParen); 2446 const AsmToken &IDTok = InParens ? getLexer().peekTok() : Parser.getTok(); 2447 AsmTypeInfo Type; 2448 if (IDTok.is(AsmToken::Identifier) && 2449 !Parser.lookUpType(IDTok.getIdentifier(), Type)) { 2450 Val = Type.Size; 2451 2452 // Eat tokens. 2453 if (InParens) 2454 parseToken(AsmToken::LParen); 2455 parseToken(AsmToken::Identifier); 2456 if (InParens) 2457 parseToken(AsmToken::RParen); 2458 } 2459 } 2460 2461 if (!Val) { 2462 IntelExprStateMachine SM; 2463 SMLoc End, Start = Parser.getTok().getLoc(); 2464 if (ParseIntelExpression(SM, End)) 2465 return true; 2466 2467 switch (OpKind) { 2468 default: 2469 llvm_unreachable("Unexpected operand kind!"); 2470 case MOK_SIZEOF: 2471 Val = SM.getSize(); 2472 break; 2473 case MOK_LENGTHOF: 2474 Val = SM.getLength(); 2475 break; 2476 case MOK_TYPE: 2477 Val = SM.getElementSize(); 2478 break; 2479 } 2480 2481 if (!Val) 2482 return Error(OpLoc, "expression has unknown type", SMRange(Start, End)); 2483 } 2484 2485 return false; 2486 } 2487 2488 bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) { 2489 Size = StringSwitch<unsigned>(getTok().getString()) 2490 .Cases("BYTE", "byte", 8) 2491 .Cases("WORD", "word", 16) 2492 .Cases("DWORD", "dword", 32) 2493 .Cases("FLOAT", "float", 32) 2494 .Cases("LONG", "long", 32) 2495 .Cases("FWORD", "fword", 48) 2496 .Cases("DOUBLE", "double", 64) 2497 .Cases("QWORD", "qword", 64) 2498 .Cases("MMWORD","mmword", 64) 2499 .Cases("XWORD", "xword", 80) 2500 .Cases("TBYTE", "tbyte", 80) 2501 .Cases("XMMWORD", "xmmword", 128) 2502 .Cases("YMMWORD", "ymmword", 256) 2503 .Cases("ZMMWORD", "zmmword", 512) 2504 .Default(0); 2505 if (Size) { 2506 const AsmToken &Tok = Lex(); // Eat operand size (e.g., byte, word). 2507 if (!(Tok.getString().equals("PTR") || Tok.getString().equals("ptr"))) 2508 return Error(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!"); 2509 Lex(); // Eat ptr. 2510 } 2511 return false; 2512 } 2513 2514 bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) { 2515 MCAsmParser &Parser = getParser(); 2516 const AsmToken &Tok = Parser.getTok(); 2517 SMLoc Start, End; 2518 2519 // Parse optional Size directive. 2520 unsigned Size; 2521 if (ParseIntelMemoryOperandSize(Size)) 2522 return true; 2523 bool PtrInOperand = bool(Size); 2524 2525 Start = Tok.getLoc(); 2526 2527 // Rounding mode operand. 2528 if (getLexer().is(AsmToken::LCurly)) 2529 return ParseRoundingModeOp(Start, Operands); 2530 2531 // Register operand. 2532 MCRegister RegNo; 2533 if (Tok.is(AsmToken::Identifier) && !parseRegister(RegNo, Start, End)) { 2534 if (RegNo == X86::RIP) 2535 return Error(Start, "rip can only be used as a base register"); 2536 // A Register followed by ':' is considered a segment override 2537 if (Tok.isNot(AsmToken::Colon)) { 2538 if (PtrInOperand) 2539 return Error(Start, "expected memory operand after 'ptr', " 2540 "found register operand instead"); 2541 Operands.push_back(X86Operand::CreateReg(RegNo, Start, End)); 2542 return false; 2543 } 2544 // An alleged segment override. check if we have a valid segment register 2545 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo)) 2546 return Error(Start, "invalid segment register"); 2547 // Eat ':' and update Start location 2548 Start = Lex().getLoc(); 2549 } 2550 2551 // Immediates and Memory 2552 IntelExprStateMachine SM; 2553 if (ParseIntelExpression(SM, End)) 2554 return true; 2555 2556 if (isParsingMSInlineAsm()) 2557 RewriteIntelExpression(SM, Start, Tok.getLoc()); 2558 2559 int64_t Imm = SM.getImm(); 2560 const MCExpr *Disp = SM.getSym(); 2561 const MCExpr *ImmDisp = MCConstantExpr::create(Imm, getContext()); 2562 if (Disp && Imm) 2563 Disp = MCBinaryExpr::createAdd(Disp, ImmDisp, getContext()); 2564 if (!Disp) 2565 Disp = ImmDisp; 2566 2567 // RegNo != 0 specifies a valid segment register, 2568 // and we are parsing a segment override 2569 if (!SM.isMemExpr() && !RegNo) { 2570 if (isParsingMSInlineAsm() && SM.isOffsetOperator()) { 2571 const InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo(); 2572 if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) { 2573 // Disp includes the address of a variable; make sure this is recorded 2574 // for later handling. 2575 Operands.push_back(X86Operand::CreateImm(Disp, Start, End, 2576 SM.getSymName(), Info.Var.Decl, 2577 Info.Var.IsGlobalLV)); 2578 return false; 2579 } 2580 } 2581 2582 Operands.push_back(X86Operand::CreateImm(Disp, Start, End)); 2583 return false; 2584 } 2585 2586 StringRef ErrMsg; 2587 unsigned BaseReg = SM.getBaseReg(); 2588 unsigned IndexReg = SM.getIndexReg(); 2589 if (IndexReg && BaseReg == X86::RIP) 2590 BaseReg = 0; 2591 unsigned Scale = SM.getScale(); 2592 if (!PtrInOperand) 2593 Size = SM.getElementSize() << 3; 2594 2595 if (Scale == 0 && BaseReg != X86::ESP && BaseReg != X86::RSP && 2596 (IndexReg == X86::ESP || IndexReg == X86::RSP)) 2597 std::swap(BaseReg, IndexReg); 2598 2599 // If BaseReg is a vector register and IndexReg is not, swap them unless 2600 // Scale was specified in which case it would be an error. 2601 if (Scale == 0 && 2602 !(X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) || 2603 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) || 2604 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg)) && 2605 (X86MCRegisterClasses[X86::VR128XRegClassID].contains(BaseReg) || 2606 X86MCRegisterClasses[X86::VR256XRegClassID].contains(BaseReg) || 2607 X86MCRegisterClasses[X86::VR512RegClassID].contains(BaseReg))) 2608 std::swap(BaseReg, IndexReg); 2609 2610 if (Scale != 0 && 2611 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) 2612 return Error(Start, "16-bit addresses cannot have a scale"); 2613 2614 // If there was no explicit scale specified, change it to 1. 2615 if (Scale == 0) 2616 Scale = 1; 2617 2618 // If this is a 16-bit addressing mode with the base and index in the wrong 2619 // order, swap them so CheckBaseRegAndIndexRegAndScale doesn't fail. It is 2620 // shared with att syntax where order matters. 2621 if ((BaseReg == X86::SI || BaseReg == X86::DI) && 2622 (IndexReg == X86::BX || IndexReg == X86::BP)) 2623 std::swap(BaseReg, IndexReg); 2624 2625 if ((BaseReg || IndexReg) && 2626 CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(), 2627 ErrMsg)) 2628 return Error(Start, ErrMsg); 2629 bool IsUnconditionalBranch = 2630 Name.equals_insensitive("jmp") || Name.equals_insensitive("call"); 2631 if (isParsingMSInlineAsm()) 2632 return CreateMemForMSInlineAsm(RegNo, Disp, BaseReg, IndexReg, Scale, 2633 IsUnconditionalBranch && is64BitMode(), 2634 Start, End, Size, SM.getSymName(), 2635 SM.getIdentifierInfo(), Operands); 2636 2637 // When parsing x64 MS-style assembly, all non-absolute references to a named 2638 // variable default to RIP-relative. 2639 unsigned DefaultBaseReg = X86::NoRegister; 2640 bool MaybeDirectBranchDest = true; 2641 2642 if (Parser.isParsingMasm()) { 2643 if (is64BitMode() && SM.getElementSize() > 0) { 2644 DefaultBaseReg = X86::RIP; 2645 } 2646 if (IsUnconditionalBranch) { 2647 if (PtrInOperand) { 2648 MaybeDirectBranchDest = false; 2649 if (is64BitMode()) 2650 DefaultBaseReg = X86::RIP; 2651 } else if (!BaseReg && !IndexReg && Disp && 2652 Disp->getKind() == MCExpr::SymbolRef) { 2653 if (is64BitMode()) { 2654 if (SM.getSize() == 8) { 2655 MaybeDirectBranchDest = false; 2656 DefaultBaseReg = X86::RIP; 2657 } 2658 } else { 2659 if (SM.getSize() == 4 || SM.getSize() == 2) 2660 MaybeDirectBranchDest = false; 2661 } 2662 } 2663 } 2664 } else if (IsUnconditionalBranch) { 2665 // Treat `call [offset fn_ref]` (or `jmp`) syntax as an error. 2666 if (!PtrInOperand && SM.isOffsetOperator()) 2667 return Error( 2668 Start, "`OFFSET` operator cannot be used in an unconditional branch"); 2669 if (PtrInOperand || SM.isBracketUsed()) 2670 MaybeDirectBranchDest = false; 2671 } 2672 2673 if ((BaseReg || IndexReg || RegNo || DefaultBaseReg != X86::NoRegister)) 2674 Operands.push_back(X86Operand::CreateMem( 2675 getPointerWidth(), RegNo, Disp, BaseReg, IndexReg, Scale, Start, End, 2676 Size, DefaultBaseReg, /*SymName=*/StringRef(), /*OpDecl=*/nullptr, 2677 /*FrontendSize=*/0, /*UseUpRegs=*/false, MaybeDirectBranchDest)); 2678 else 2679 Operands.push_back(X86Operand::CreateMem( 2680 getPointerWidth(), Disp, Start, End, Size, /*SymName=*/StringRef(), 2681 /*OpDecl=*/nullptr, /*FrontendSize=*/0, /*UseUpRegs=*/false, 2682 MaybeDirectBranchDest)); 2683 return false; 2684 } 2685 2686 bool X86AsmParser::parseATTOperand(OperandVector &Operands) { 2687 MCAsmParser &Parser = getParser(); 2688 switch (getLexer().getKind()) { 2689 case AsmToken::Dollar: { 2690 // $42 or $ID -> immediate. 2691 SMLoc Start = Parser.getTok().getLoc(), End; 2692 Parser.Lex(); 2693 const MCExpr *Val; 2694 // This is an immediate, so we should not parse a register. Do a precheck 2695 // for '%' to supercede intra-register parse errors. 2696 SMLoc L = Parser.getTok().getLoc(); 2697 if (check(getLexer().is(AsmToken::Percent), L, 2698 "expected immediate expression") || 2699 getParser().parseExpression(Val, End) || 2700 check(isa<X86MCExpr>(Val), L, "expected immediate expression")) 2701 return true; 2702 Operands.push_back(X86Operand::CreateImm(Val, Start, End)); 2703 return false; 2704 } 2705 case AsmToken::LCurly: { 2706 SMLoc Start = Parser.getTok().getLoc(); 2707 return ParseRoundingModeOp(Start, Operands); 2708 } 2709 default: { 2710 // This a memory operand or a register. We have some parsing complications 2711 // as a '(' may be part of an immediate expression or the addressing mode 2712 // block. This is complicated by the fact that an assembler-level variable 2713 // may refer either to a register or an immediate expression. 2714 2715 SMLoc Loc = Parser.getTok().getLoc(), EndLoc; 2716 const MCExpr *Expr = nullptr; 2717 unsigned Reg = 0; 2718 if (getLexer().isNot(AsmToken::LParen)) { 2719 // No '(' so this is either a displacement expression or a register. 2720 if (Parser.parseExpression(Expr, EndLoc)) 2721 return true; 2722 if (auto *RE = dyn_cast<X86MCExpr>(Expr)) { 2723 // Segment Register. Reset Expr and copy value to register. 2724 Expr = nullptr; 2725 Reg = RE->getRegNo(); 2726 2727 // Check the register. 2728 if (Reg == X86::EIZ || Reg == X86::RIZ) 2729 return Error( 2730 Loc, "%eiz and %riz can only be used as index registers", 2731 SMRange(Loc, EndLoc)); 2732 if (Reg == X86::RIP) 2733 return Error(Loc, "%rip can only be used as a base register", 2734 SMRange(Loc, EndLoc)); 2735 // Return register that are not segment prefixes immediately. 2736 if (!Parser.parseOptionalToken(AsmToken::Colon)) { 2737 Operands.push_back(X86Operand::CreateReg(Reg, Loc, EndLoc)); 2738 return false; 2739 } 2740 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(Reg)) 2741 return Error(Loc, "invalid segment register"); 2742 // Accept a '*' absolute memory reference after the segment. Place it 2743 // before the full memory operand. 2744 if (getLexer().is(AsmToken::Star)) 2745 Operands.push_back(X86Operand::CreateToken("*", consumeToken())); 2746 } 2747 } 2748 // This is a Memory operand. 2749 return ParseMemOperand(Reg, Expr, Loc, EndLoc, Operands); 2750 } 2751 } 2752 } 2753 2754 // X86::COND_INVALID if not a recognized condition code or alternate mnemonic, 2755 // otherwise the EFLAGS Condition Code enumerator. 2756 X86::CondCode X86AsmParser::ParseConditionCode(StringRef CC) { 2757 return StringSwitch<X86::CondCode>(CC) 2758 .Case("o", X86::COND_O) // Overflow 2759 .Case("no", X86::COND_NO) // No Overflow 2760 .Cases("b", "nae", X86::COND_B) // Below/Neither Above nor Equal 2761 .Cases("ae", "nb", X86::COND_AE) // Above or Equal/Not Below 2762 .Cases("e", "z", X86::COND_E) // Equal/Zero 2763 .Cases("ne", "nz", X86::COND_NE) // Not Equal/Not Zero 2764 .Cases("be", "na", X86::COND_BE) // Below or Equal/Not Above 2765 .Cases("a", "nbe", X86::COND_A) // Above/Neither Below nor Equal 2766 .Case("s", X86::COND_S) // Sign 2767 .Case("ns", X86::COND_NS) // No Sign 2768 .Cases("p", "pe", X86::COND_P) // Parity/Parity Even 2769 .Cases("np", "po", X86::COND_NP) // No Parity/Parity Odd 2770 .Cases("l", "nge", X86::COND_L) // Less/Neither Greater nor Equal 2771 .Cases("ge", "nl", X86::COND_GE) // Greater or Equal/Not Less 2772 .Cases("le", "ng", X86::COND_LE) // Less or Equal/Not Greater 2773 .Cases("g", "nle", X86::COND_G) // Greater/Neither Less nor Equal 2774 .Default(X86::COND_INVALID); 2775 } 2776 2777 // true on failure, false otherwise 2778 // If no {z} mark was found - Parser doesn't advance 2779 bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z, 2780 const SMLoc &StartLoc) { 2781 MCAsmParser &Parser = getParser(); 2782 // Assuming we are just pass the '{' mark, quering the next token 2783 // Searched for {z}, but none was found. Return false, as no parsing error was 2784 // encountered 2785 if (!(getLexer().is(AsmToken::Identifier) && 2786 (getLexer().getTok().getIdentifier() == "z"))) 2787 return false; 2788 Parser.Lex(); // Eat z 2789 // Query and eat the '}' mark 2790 if (!getLexer().is(AsmToken::RCurly)) 2791 return Error(getLexer().getLoc(), "Expected } at this point"); 2792 Parser.Lex(); // Eat '}' 2793 // Assign Z with the {z} mark operand 2794 Z = X86Operand::CreateToken("{z}", StartLoc); 2795 return false; 2796 } 2797 2798 // true on failure, false otherwise 2799 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands) { 2800 MCAsmParser &Parser = getParser(); 2801 if (getLexer().is(AsmToken::LCurly)) { 2802 // Eat "{" and mark the current place. 2803 const SMLoc consumedToken = consumeToken(); 2804 // Distinguish {1to<NUM>} from {%k<NUM>}. 2805 if(getLexer().is(AsmToken::Integer)) { 2806 // Parse memory broadcasting ({1to<NUM>}). 2807 if (getLexer().getTok().getIntVal() != 1) 2808 return TokError("Expected 1to<NUM> at this point"); 2809 StringRef Prefix = getLexer().getTok().getString(); 2810 Parser.Lex(); // Eat first token of 1to8 2811 if (!getLexer().is(AsmToken::Identifier)) 2812 return TokError("Expected 1to<NUM> at this point"); 2813 // Recognize only reasonable suffixes. 2814 SmallVector<char, 5> BroadcastVector; 2815 StringRef BroadcastString = (Prefix + getLexer().getTok().getIdentifier()) 2816 .toStringRef(BroadcastVector); 2817 if (!BroadcastString.starts_with("1to")) 2818 return TokError("Expected 1to<NUM> at this point"); 2819 const char *BroadcastPrimitive = 2820 StringSwitch<const char *>(BroadcastString) 2821 .Case("1to2", "{1to2}") 2822 .Case("1to4", "{1to4}") 2823 .Case("1to8", "{1to8}") 2824 .Case("1to16", "{1to16}") 2825 .Case("1to32", "{1to32}") 2826 .Default(nullptr); 2827 if (!BroadcastPrimitive) 2828 return TokError("Invalid memory broadcast primitive."); 2829 Parser.Lex(); // Eat trailing token of 1toN 2830 if (!getLexer().is(AsmToken::RCurly)) 2831 return TokError("Expected } at this point"); 2832 Parser.Lex(); // Eat "}" 2833 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive, 2834 consumedToken)); 2835 // No AVX512 specific primitives can pass 2836 // after memory broadcasting, so return. 2837 return false; 2838 } else { 2839 // Parse either {k}{z}, {z}{k}, {k} or {z} 2840 // last one have no meaning, but GCC accepts it 2841 // Currently, we're just pass a '{' mark 2842 std::unique_ptr<X86Operand> Z; 2843 if (ParseZ(Z, consumedToken)) 2844 return true; 2845 // Reaching here means that parsing of the allegadly '{z}' mark yielded 2846 // no errors. 2847 // Query for the need of further parsing for a {%k<NUM>} mark 2848 if (!Z || getLexer().is(AsmToken::LCurly)) { 2849 SMLoc StartLoc = Z ? consumeToken() : consumedToken; 2850 // Parse an op-mask register mark ({%k<NUM>}), which is now to be 2851 // expected 2852 MCRegister RegNo; 2853 SMLoc RegLoc; 2854 if (!parseRegister(RegNo, RegLoc, StartLoc) && 2855 X86MCRegisterClasses[X86::VK1RegClassID].contains(RegNo)) { 2856 if (RegNo == X86::K0) 2857 return Error(RegLoc, "Register k0 can't be used as write mask"); 2858 if (!getLexer().is(AsmToken::RCurly)) 2859 return Error(getLexer().getLoc(), "Expected } at this point"); 2860 Operands.push_back(X86Operand::CreateToken("{", StartLoc)); 2861 Operands.push_back( 2862 X86Operand::CreateReg(RegNo, StartLoc, StartLoc)); 2863 Operands.push_back(X86Operand::CreateToken("}", consumeToken())); 2864 } else 2865 return Error(getLexer().getLoc(), 2866 "Expected an op-mask register at this point"); 2867 // {%k<NUM>} mark is found, inquire for {z} 2868 if (getLexer().is(AsmToken::LCurly) && !Z) { 2869 // Have we've found a parsing error, or found no (expected) {z} mark 2870 // - report an error 2871 if (ParseZ(Z, consumeToken()) || !Z) 2872 return Error(getLexer().getLoc(), 2873 "Expected a {z} mark at this point"); 2874 2875 } 2876 // '{z}' on its own is meaningless, hence should be ignored. 2877 // on the contrary - have it been accompanied by a K register, 2878 // allow it. 2879 if (Z) 2880 Operands.push_back(std::move(Z)); 2881 } 2882 } 2883 } 2884 return false; 2885 } 2886 2887 /// ParseMemOperand: 'seg : disp(basereg, indexreg, scale)'. The '%ds:' prefix 2888 /// has already been parsed if present. disp may be provided as well. 2889 bool X86AsmParser::ParseMemOperand(unsigned SegReg, const MCExpr *Disp, 2890 SMLoc StartLoc, SMLoc EndLoc, 2891 OperandVector &Operands) { 2892 MCAsmParser &Parser = getParser(); 2893 SMLoc Loc; 2894 // Based on the initial passed values, we may be in any of these cases, we are 2895 // in one of these cases (with current position (*)): 2896 2897 // 1. seg : * disp (base-index-scale-expr) 2898 // 2. seg : *(disp) (base-index-scale-expr) 2899 // 3. seg : *(base-index-scale-expr) 2900 // 4. disp *(base-index-scale-expr) 2901 // 5. *(disp) (base-index-scale-expr) 2902 // 6. *(base-index-scale-expr) 2903 // 7. disp * 2904 // 8. *(disp) 2905 2906 // If we do not have an displacement yet, check if we're in cases 4 or 6 by 2907 // checking if the first object after the parenthesis is a register (or an 2908 // identifier referring to a register) and parse the displacement or default 2909 // to 0 as appropriate. 2910 auto isAtMemOperand = [this]() { 2911 if (this->getLexer().isNot(AsmToken::LParen)) 2912 return false; 2913 AsmToken Buf[2]; 2914 StringRef Id; 2915 auto TokCount = this->getLexer().peekTokens(Buf, true); 2916 if (TokCount == 0) 2917 return false; 2918 switch (Buf[0].getKind()) { 2919 case AsmToken::Percent: 2920 case AsmToken::Comma: 2921 return true; 2922 // These lower cases are doing a peekIdentifier. 2923 case AsmToken::At: 2924 case AsmToken::Dollar: 2925 if ((TokCount > 1) && 2926 (Buf[1].is(AsmToken::Identifier) || Buf[1].is(AsmToken::String)) && 2927 (Buf[0].getLoc().getPointer() + 1 == Buf[1].getLoc().getPointer())) 2928 Id = StringRef(Buf[0].getLoc().getPointer(), 2929 Buf[1].getIdentifier().size() + 1); 2930 break; 2931 case AsmToken::Identifier: 2932 case AsmToken::String: 2933 Id = Buf[0].getIdentifier(); 2934 break; 2935 default: 2936 return false; 2937 } 2938 // We have an ID. Check if it is bound to a register. 2939 if (!Id.empty()) { 2940 MCSymbol *Sym = this->getContext().getOrCreateSymbol(Id); 2941 if (Sym->isVariable()) { 2942 auto V = Sym->getVariableValue(/*SetUsed*/ false); 2943 return isa<X86MCExpr>(V); 2944 } 2945 } 2946 return false; 2947 }; 2948 2949 if (!Disp) { 2950 // Parse immediate if we're not at a mem operand yet. 2951 if (!isAtMemOperand()) { 2952 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(Disp, EndLoc)) 2953 return true; 2954 assert(!isa<X86MCExpr>(Disp) && "Expected non-register here."); 2955 } else { 2956 // Disp is implicitly zero if we haven't parsed it yet. 2957 Disp = MCConstantExpr::create(0, Parser.getContext()); 2958 } 2959 } 2960 2961 // We are now either at the end of the operand or at the '(' at the start of a 2962 // base-index-scale-expr. 2963 2964 if (!parseOptionalToken(AsmToken::LParen)) { 2965 if (SegReg == 0) 2966 Operands.push_back( 2967 X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc)); 2968 else 2969 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 2970 0, 0, 1, StartLoc, EndLoc)); 2971 return false; 2972 } 2973 2974 // If we reached here, then eat the '(' and Process 2975 // the rest of the memory operand. 2976 unsigned BaseReg = 0, IndexReg = 0, Scale = 1; 2977 SMLoc BaseLoc = getLexer().getLoc(); 2978 const MCExpr *E; 2979 StringRef ErrMsg; 2980 2981 // Parse BaseReg if one is provided. 2982 if (getLexer().isNot(AsmToken::Comma) && getLexer().isNot(AsmToken::RParen)) { 2983 if (Parser.parseExpression(E, EndLoc) || 2984 check(!isa<X86MCExpr>(E), BaseLoc, "expected register here")) 2985 return true; 2986 2987 // Check the register. 2988 BaseReg = cast<X86MCExpr>(E)->getRegNo(); 2989 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) 2990 return Error(BaseLoc, "eiz and riz can only be used as index registers", 2991 SMRange(BaseLoc, EndLoc)); 2992 } 2993 2994 if (parseOptionalToken(AsmToken::Comma)) { 2995 // Following the comma we should have either an index register, or a scale 2996 // value. We don't support the later form, but we want to parse it 2997 // correctly. 2998 // 2999 // Even though it would be completely consistent to support syntax like 3000 // "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this. 3001 if (getLexer().isNot(AsmToken::RParen)) { 3002 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(E, EndLoc)) 3003 return true; 3004 3005 if (!isa<X86MCExpr>(E)) { 3006 // We've parsed an unexpected Scale Value instead of an index 3007 // register. Interpret it as an absolute. 3008 int64_t ScaleVal; 3009 if (!E->evaluateAsAbsolute(ScaleVal, getStreamer().getAssemblerPtr())) 3010 return Error(Loc, "expected absolute expression"); 3011 if (ScaleVal != 1) 3012 Warning(Loc, "scale factor without index register is ignored"); 3013 Scale = 1; 3014 } else { // IndexReg Found. 3015 IndexReg = cast<X86MCExpr>(E)->getRegNo(); 3016 3017 if (BaseReg == X86::RIP) 3018 return Error(Loc, 3019 "%rip as base register can not have an index register"); 3020 if (IndexReg == X86::RIP) 3021 return Error(Loc, "%rip is not allowed as an index register"); 3022 3023 if (parseOptionalToken(AsmToken::Comma)) { 3024 // Parse the scale amount: 3025 // ::= ',' [scale-expression] 3026 3027 // A scale amount without an index is ignored. 3028 if (getLexer().isNot(AsmToken::RParen)) { 3029 int64_t ScaleVal; 3030 if (Parser.parseTokenLoc(Loc) || 3031 Parser.parseAbsoluteExpression(ScaleVal)) 3032 return Error(Loc, "expected scale expression"); 3033 Scale = (unsigned)ScaleVal; 3034 // Validate the scale amount. 3035 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) && 3036 Scale != 1) 3037 return Error(Loc, "scale factor in 16-bit address must be 1"); 3038 if (checkScale(Scale, ErrMsg)) 3039 return Error(Loc, ErrMsg); 3040 } 3041 } 3042 } 3043 } 3044 } 3045 3046 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too. 3047 if (parseToken(AsmToken::RParen, "unexpected token in memory operand")) 3048 return true; 3049 3050 // This is to support otherwise illegal operand (%dx) found in various 3051 // unofficial manuals examples (e.g. "out[s]?[bwl]? %al, (%dx)") and must now 3052 // be supported. Mark such DX variants separately fix only in special cases. 3053 if (BaseReg == X86::DX && IndexReg == 0 && Scale == 1 && SegReg == 0 && 3054 isa<MCConstantExpr>(Disp) && 3055 cast<MCConstantExpr>(Disp)->getValue() == 0) { 3056 Operands.push_back(X86Operand::CreateDXReg(BaseLoc, BaseLoc)); 3057 return false; 3058 } 3059 3060 if (CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(), 3061 ErrMsg)) 3062 return Error(BaseLoc, ErrMsg); 3063 3064 // If the displacement is a constant, check overflows. For 64-bit addressing, 3065 // gas requires isInt<32> and otherwise reports an error. For others, gas 3066 // reports a warning and allows a wider range. E.g. gas allows 3067 // [-0xffffffff,0xffffffff] for 32-bit addressing (e.g. Linux kernel uses 3068 // `leal -__PAGE_OFFSET(%ecx),%esp` where __PAGE_OFFSET is 0xc0000000). 3069 if (BaseReg || IndexReg) { 3070 if (auto CE = dyn_cast<MCConstantExpr>(Disp)) { 3071 auto Imm = CE->getValue(); 3072 bool Is64 = X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) || 3073 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg); 3074 bool Is16 = X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg); 3075 if (Is64) { 3076 if (!isInt<32>(Imm)) 3077 return Error(BaseLoc, "displacement " + Twine(Imm) + 3078 " is not within [-2147483648, 2147483647]"); 3079 } else if (!Is16) { 3080 if (!isUInt<32>(Imm < 0 ? -uint64_t(Imm) : uint64_t(Imm))) { 3081 Warning(BaseLoc, "displacement " + Twine(Imm) + 3082 " shortened to 32-bit signed " + 3083 Twine(static_cast<int32_t>(Imm))); 3084 } 3085 } else if (!isUInt<16>(Imm < 0 ? -uint64_t(Imm) : uint64_t(Imm))) { 3086 Warning(BaseLoc, "displacement " + Twine(Imm) + 3087 " shortened to 16-bit signed " + 3088 Twine(static_cast<int16_t>(Imm))); 3089 } 3090 } 3091 } 3092 3093 if (SegReg || BaseReg || IndexReg) 3094 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 3095 BaseReg, IndexReg, Scale, StartLoc, 3096 EndLoc)); 3097 else 3098 Operands.push_back( 3099 X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc)); 3100 return false; 3101 } 3102 3103 // Parse either a standard primary expression or a register. 3104 bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { 3105 MCAsmParser &Parser = getParser(); 3106 // See if this is a register first. 3107 if (getTok().is(AsmToken::Percent) || 3108 (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier) && 3109 MatchRegisterName(Parser.getTok().getString()))) { 3110 SMLoc StartLoc = Parser.getTok().getLoc(); 3111 MCRegister RegNo; 3112 if (parseRegister(RegNo, StartLoc, EndLoc)) 3113 return true; 3114 Res = X86MCExpr::create(RegNo, Parser.getContext()); 3115 return false; 3116 } 3117 return Parser.parsePrimaryExpr(Res, EndLoc, nullptr); 3118 } 3119 3120 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 3121 SMLoc NameLoc, OperandVector &Operands) { 3122 MCAsmParser &Parser = getParser(); 3123 InstInfo = &Info; 3124 3125 // Reset the forced VEX encoding. 3126 ForcedVEXEncoding = VEXEncoding_Default; 3127 ForcedDispEncoding = DispEncoding_Default; 3128 UseApxExtendedReg = false; 3129 3130 // Parse pseudo prefixes. 3131 while (true) { 3132 if (Name == "{") { 3133 if (getLexer().isNot(AsmToken::Identifier)) 3134 return Error(Parser.getTok().getLoc(), "Unexpected token after '{'"); 3135 std::string Prefix = Parser.getTok().getString().lower(); 3136 Parser.Lex(); // Eat identifier. 3137 if (getLexer().isNot(AsmToken::RCurly)) 3138 return Error(Parser.getTok().getLoc(), "Expected '}'"); 3139 Parser.Lex(); // Eat curly. 3140 3141 if (Prefix == "vex") 3142 ForcedVEXEncoding = VEXEncoding_VEX; 3143 else if (Prefix == "vex2") 3144 ForcedVEXEncoding = VEXEncoding_VEX2; 3145 else if (Prefix == "vex3") 3146 ForcedVEXEncoding = VEXEncoding_VEX3; 3147 else if (Prefix == "evex") 3148 ForcedVEXEncoding = VEXEncoding_EVEX; 3149 else if (Prefix == "disp8") 3150 ForcedDispEncoding = DispEncoding_Disp8; 3151 else if (Prefix == "disp32") 3152 ForcedDispEncoding = DispEncoding_Disp32; 3153 else 3154 return Error(NameLoc, "unknown prefix"); 3155 3156 NameLoc = Parser.getTok().getLoc(); 3157 if (getLexer().is(AsmToken::LCurly)) { 3158 Parser.Lex(); 3159 Name = "{"; 3160 } else { 3161 if (getLexer().isNot(AsmToken::Identifier)) 3162 return Error(Parser.getTok().getLoc(), "Expected identifier"); 3163 // FIXME: The mnemonic won't match correctly if its not in lower case. 3164 Name = Parser.getTok().getString(); 3165 Parser.Lex(); 3166 } 3167 continue; 3168 } 3169 // Parse MASM style pseudo prefixes. 3170 if (isParsingMSInlineAsm()) { 3171 if (Name.equals_insensitive("vex")) 3172 ForcedVEXEncoding = VEXEncoding_VEX; 3173 else if (Name.equals_insensitive("vex2")) 3174 ForcedVEXEncoding = VEXEncoding_VEX2; 3175 else if (Name.equals_insensitive("vex3")) 3176 ForcedVEXEncoding = VEXEncoding_VEX3; 3177 else if (Name.equals_insensitive("evex")) 3178 ForcedVEXEncoding = VEXEncoding_EVEX; 3179 3180 if (ForcedVEXEncoding != VEXEncoding_Default) { 3181 if (getLexer().isNot(AsmToken::Identifier)) 3182 return Error(Parser.getTok().getLoc(), "Expected identifier"); 3183 // FIXME: The mnemonic won't match correctly if its not in lower case. 3184 Name = Parser.getTok().getString(); 3185 NameLoc = Parser.getTok().getLoc(); 3186 Parser.Lex(); 3187 } 3188 } 3189 break; 3190 } 3191 3192 // Support the suffix syntax for overriding displacement size as well. 3193 if (Name.consume_back(".d32")) { 3194 ForcedDispEncoding = DispEncoding_Disp32; 3195 } else if (Name.consume_back(".d8")) { 3196 ForcedDispEncoding = DispEncoding_Disp8; 3197 } 3198 3199 StringRef PatchedName = Name; 3200 3201 // Hack to skip "short" following Jcc. 3202 if (isParsingIntelSyntax() && 3203 (PatchedName == "jmp" || PatchedName == "jc" || PatchedName == "jnc" || 3204 PatchedName == "jcxz" || PatchedName == "jecxz" || 3205 (PatchedName.starts_with("j") && 3206 ParseConditionCode(PatchedName.substr(1)) != X86::COND_INVALID))) { 3207 StringRef NextTok = Parser.getTok().getString(); 3208 if (Parser.isParsingMasm() ? NextTok.equals_insensitive("short") 3209 : NextTok == "short") { 3210 SMLoc NameEndLoc = 3211 NameLoc.getFromPointer(NameLoc.getPointer() + Name.size()); 3212 // Eat the short keyword. 3213 Parser.Lex(); 3214 // MS and GAS ignore the short keyword; they both determine the jmp type 3215 // based on the distance of the label. (NASM does emit different code with 3216 // and without "short," though.) 3217 InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc, 3218 NextTok.size() + 1); 3219 } 3220 } 3221 3222 // FIXME: Hack to recognize setneb as setne. 3223 if (PatchedName.starts_with("set") && PatchedName.ends_with("b") && 3224 PatchedName != "setb" && PatchedName != "setnb") 3225 PatchedName = PatchedName.substr(0, Name.size()-1); 3226 3227 unsigned ComparisonPredicate = ~0U; 3228 3229 // FIXME: Hack to recognize cmp<comparison code>{sh,ss,sd,ph,ps,pd}. 3230 if ((PatchedName.starts_with("cmp") || PatchedName.starts_with("vcmp")) && 3231 (PatchedName.ends_with("ss") || PatchedName.ends_with("sd") || 3232 PatchedName.ends_with("sh") || PatchedName.ends_with("ph") || 3233 PatchedName.ends_with("ps") || PatchedName.ends_with("pd"))) { 3234 bool IsVCMP = PatchedName[0] == 'v'; 3235 unsigned CCIdx = IsVCMP ? 4 : 3; 3236 unsigned CC = StringSwitch<unsigned>( 3237 PatchedName.slice(CCIdx, PatchedName.size() - 2)) 3238 .Case("eq", 0x00) 3239 .Case("eq_oq", 0x00) 3240 .Case("lt", 0x01) 3241 .Case("lt_os", 0x01) 3242 .Case("le", 0x02) 3243 .Case("le_os", 0x02) 3244 .Case("unord", 0x03) 3245 .Case("unord_q", 0x03) 3246 .Case("neq", 0x04) 3247 .Case("neq_uq", 0x04) 3248 .Case("nlt", 0x05) 3249 .Case("nlt_us", 0x05) 3250 .Case("nle", 0x06) 3251 .Case("nle_us", 0x06) 3252 .Case("ord", 0x07) 3253 .Case("ord_q", 0x07) 3254 /* AVX only from here */ 3255 .Case("eq_uq", 0x08) 3256 .Case("nge", 0x09) 3257 .Case("nge_us", 0x09) 3258 .Case("ngt", 0x0A) 3259 .Case("ngt_us", 0x0A) 3260 .Case("false", 0x0B) 3261 .Case("false_oq", 0x0B) 3262 .Case("neq_oq", 0x0C) 3263 .Case("ge", 0x0D) 3264 .Case("ge_os", 0x0D) 3265 .Case("gt", 0x0E) 3266 .Case("gt_os", 0x0E) 3267 .Case("true", 0x0F) 3268 .Case("true_uq", 0x0F) 3269 .Case("eq_os", 0x10) 3270 .Case("lt_oq", 0x11) 3271 .Case("le_oq", 0x12) 3272 .Case("unord_s", 0x13) 3273 .Case("neq_us", 0x14) 3274 .Case("nlt_uq", 0x15) 3275 .Case("nle_uq", 0x16) 3276 .Case("ord_s", 0x17) 3277 .Case("eq_us", 0x18) 3278 .Case("nge_uq", 0x19) 3279 .Case("ngt_uq", 0x1A) 3280 .Case("false_os", 0x1B) 3281 .Case("neq_os", 0x1C) 3282 .Case("ge_oq", 0x1D) 3283 .Case("gt_oq", 0x1E) 3284 .Case("true_us", 0x1F) 3285 .Default(~0U); 3286 if (CC != ~0U && (IsVCMP || CC < 8) && 3287 (IsVCMP || PatchedName.back() != 'h')) { 3288 if (PatchedName.ends_with("ss")) 3289 PatchedName = IsVCMP ? "vcmpss" : "cmpss"; 3290 else if (PatchedName.ends_with("sd")) 3291 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd"; 3292 else if (PatchedName.ends_with("ps")) 3293 PatchedName = IsVCMP ? "vcmpps" : "cmpps"; 3294 else if (PatchedName.ends_with("pd")) 3295 PatchedName = IsVCMP ? "vcmppd" : "cmppd"; 3296 else if (PatchedName.ends_with("sh")) 3297 PatchedName = "vcmpsh"; 3298 else if (PatchedName.ends_with("ph")) 3299 PatchedName = "vcmpph"; 3300 else 3301 llvm_unreachable("Unexpected suffix!"); 3302 3303 ComparisonPredicate = CC; 3304 } 3305 } 3306 3307 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}. 3308 if (PatchedName.starts_with("vpcmp") && 3309 (PatchedName.back() == 'b' || PatchedName.back() == 'w' || 3310 PatchedName.back() == 'd' || PatchedName.back() == 'q')) { 3311 unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1; 3312 unsigned CC = StringSwitch<unsigned>( 3313 PatchedName.slice(5, PatchedName.size() - SuffixSize)) 3314 .Case("eq", 0x0) // Only allowed on unsigned. Checked below. 3315 .Case("lt", 0x1) 3316 .Case("le", 0x2) 3317 //.Case("false", 0x3) // Not a documented alias. 3318 .Case("neq", 0x4) 3319 .Case("nlt", 0x5) 3320 .Case("nle", 0x6) 3321 //.Case("true", 0x7) // Not a documented alias. 3322 .Default(~0U); 3323 if (CC != ~0U && (CC != 0 || SuffixSize == 2)) { 3324 switch (PatchedName.back()) { 3325 default: llvm_unreachable("Unexpected character!"); 3326 case 'b': PatchedName = SuffixSize == 2 ? "vpcmpub" : "vpcmpb"; break; 3327 case 'w': PatchedName = SuffixSize == 2 ? "vpcmpuw" : "vpcmpw"; break; 3328 case 'd': PatchedName = SuffixSize == 2 ? "vpcmpud" : "vpcmpd"; break; 3329 case 'q': PatchedName = SuffixSize == 2 ? "vpcmpuq" : "vpcmpq"; break; 3330 } 3331 // Set up the immediate to push into the operands later. 3332 ComparisonPredicate = CC; 3333 } 3334 } 3335 3336 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}. 3337 if (PatchedName.starts_with("vpcom") && 3338 (PatchedName.back() == 'b' || PatchedName.back() == 'w' || 3339 PatchedName.back() == 'd' || PatchedName.back() == 'q')) { 3340 unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1; 3341 unsigned CC = StringSwitch<unsigned>( 3342 PatchedName.slice(5, PatchedName.size() - SuffixSize)) 3343 .Case("lt", 0x0) 3344 .Case("le", 0x1) 3345 .Case("gt", 0x2) 3346 .Case("ge", 0x3) 3347 .Case("eq", 0x4) 3348 .Case("neq", 0x5) 3349 .Case("false", 0x6) 3350 .Case("true", 0x7) 3351 .Default(~0U); 3352 if (CC != ~0U) { 3353 switch (PatchedName.back()) { 3354 default: llvm_unreachable("Unexpected character!"); 3355 case 'b': PatchedName = SuffixSize == 2 ? "vpcomub" : "vpcomb"; break; 3356 case 'w': PatchedName = SuffixSize == 2 ? "vpcomuw" : "vpcomw"; break; 3357 case 'd': PatchedName = SuffixSize == 2 ? "vpcomud" : "vpcomd"; break; 3358 case 'q': PatchedName = SuffixSize == 2 ? "vpcomuq" : "vpcomq"; break; 3359 } 3360 // Set up the immediate to push into the operands later. 3361 ComparisonPredicate = CC; 3362 } 3363 } 3364 3365 // Determine whether this is an instruction prefix. 3366 // FIXME: 3367 // Enhance prefixes integrity robustness. for example, following forms 3368 // are currently tolerated: 3369 // repz repnz <insn> ; GAS errors for the use of two similar prefixes 3370 // lock addq %rax, %rbx ; Destination operand must be of memory type 3371 // xacquire <insn> ; xacquire must be accompanied by 'lock' 3372 bool IsPrefix = 3373 StringSwitch<bool>(Name) 3374 .Cases("cs", "ds", "es", "fs", "gs", "ss", true) 3375 .Cases("rex64", "data32", "data16", "addr32", "addr16", true) 3376 .Cases("xacquire", "xrelease", true) 3377 .Cases("acquire", "release", isParsingIntelSyntax()) 3378 .Default(false); 3379 3380 auto isLockRepeatNtPrefix = [](StringRef N) { 3381 return StringSwitch<bool>(N) 3382 .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true) 3383 .Default(false); 3384 }; 3385 3386 bool CurlyAsEndOfStatement = false; 3387 3388 unsigned Flags = X86::IP_NO_PREFIX; 3389 while (isLockRepeatNtPrefix(Name.lower())) { 3390 unsigned Prefix = 3391 StringSwitch<unsigned>(Name) 3392 .Cases("lock", "lock", X86::IP_HAS_LOCK) 3393 .Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT) 3394 .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE) 3395 .Cases("notrack", "notrack", X86::IP_HAS_NOTRACK) 3396 .Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible) 3397 Flags |= Prefix; 3398 if (getLexer().is(AsmToken::EndOfStatement)) { 3399 // We don't have real instr with the given prefix 3400 // let's use the prefix as the instr. 3401 // TODO: there could be several prefixes one after another 3402 Flags = X86::IP_NO_PREFIX; 3403 break; 3404 } 3405 // FIXME: The mnemonic won't match correctly if its not in lower case. 3406 Name = Parser.getTok().getString(); 3407 Parser.Lex(); // eat the prefix 3408 // Hack: we could have something like "rep # some comment" or 3409 // "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl" 3410 while (Name.starts_with(";") || Name.starts_with("\n") || 3411 Name.starts_with("#") || Name.starts_with("\t") || 3412 Name.starts_with("/")) { 3413 // FIXME: The mnemonic won't match correctly if its not in lower case. 3414 Name = Parser.getTok().getString(); 3415 Parser.Lex(); // go to next prefix or instr 3416 } 3417 } 3418 3419 if (Flags) 3420 PatchedName = Name; 3421 3422 // Hacks to handle 'data16' and 'data32' 3423 if (PatchedName == "data16" && is16BitMode()) { 3424 return Error(NameLoc, "redundant data16 prefix"); 3425 } 3426 if (PatchedName == "data32") { 3427 if (is32BitMode()) 3428 return Error(NameLoc, "redundant data32 prefix"); 3429 if (is64BitMode()) 3430 return Error(NameLoc, "'data32' is not supported in 64-bit mode"); 3431 // Hack to 'data16' for the table lookup. 3432 PatchedName = "data16"; 3433 3434 if (getLexer().isNot(AsmToken::EndOfStatement)) { 3435 StringRef Next = Parser.getTok().getString(); 3436 getLexer().Lex(); 3437 // data32 effectively changes the instruction suffix. 3438 // TODO Generalize. 3439 if (Next == "callw") 3440 Next = "calll"; 3441 if (Next == "ljmpw") 3442 Next = "ljmpl"; 3443 3444 Name = Next; 3445 PatchedName = Name; 3446 ForcedDataPrefix = X86::Is32Bit; 3447 IsPrefix = false; 3448 } 3449 } 3450 3451 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc)); 3452 3453 // Push the immediate if we extracted one from the mnemonic. 3454 if (ComparisonPredicate != ~0U && !isParsingIntelSyntax()) { 3455 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate, 3456 getParser().getContext()); 3457 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc)); 3458 } 3459 3460 // This does the actual operand parsing. Don't parse any more if we have a 3461 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we 3462 // just want to parse the "lock" as the first instruction and the "incl" as 3463 // the next one. 3464 if (getLexer().isNot(AsmToken::EndOfStatement) && !IsPrefix) { 3465 // Parse '*' modifier. 3466 if (getLexer().is(AsmToken::Star)) 3467 Operands.push_back(X86Operand::CreateToken("*", consumeToken())); 3468 3469 // Read the operands. 3470 while (true) { 3471 if (parseOperand(Operands, Name)) 3472 return true; 3473 if (HandleAVX512Operand(Operands)) 3474 return true; 3475 3476 // check for comma and eat it 3477 if (getLexer().is(AsmToken::Comma)) 3478 Parser.Lex(); 3479 else 3480 break; 3481 } 3482 3483 // In MS inline asm curly braces mark the beginning/end of a block, 3484 // therefore they should be interepreted as end of statement 3485 CurlyAsEndOfStatement = 3486 isParsingIntelSyntax() && isParsingMSInlineAsm() && 3487 (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly)); 3488 if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement) 3489 return TokError("unexpected token in argument list"); 3490 } 3491 3492 // Push the immediate if we extracted one from the mnemonic. 3493 if (ComparisonPredicate != ~0U && isParsingIntelSyntax()) { 3494 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate, 3495 getParser().getContext()); 3496 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc)); 3497 } 3498 3499 // Consume the EndOfStatement or the prefix separator Slash 3500 if (getLexer().is(AsmToken::EndOfStatement) || 3501 (IsPrefix && getLexer().is(AsmToken::Slash))) 3502 Parser.Lex(); 3503 else if (CurlyAsEndOfStatement) 3504 // Add an actual EndOfStatement before the curly brace 3505 Info.AsmRewrites->emplace_back(AOK_EndOfStatement, 3506 getLexer().getTok().getLoc(), 0); 3507 3508 // This is for gas compatibility and cannot be done in td. 3509 // Adding "p" for some floating point with no argument. 3510 // For example: fsub --> fsubp 3511 bool IsFp = 3512 Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr"; 3513 if (IsFp && Operands.size() == 1) { 3514 const char *Repl = StringSwitch<const char *>(Name) 3515 .Case("fsub", "fsubp") 3516 .Case("fdiv", "fdivp") 3517 .Case("fsubr", "fsubrp") 3518 .Case("fdivr", "fdivrp"); 3519 static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl); 3520 } 3521 3522 if ((Name == "mov" || Name == "movw" || Name == "movl") && 3523 (Operands.size() == 3)) { 3524 X86Operand &Op1 = (X86Operand &)*Operands[1]; 3525 X86Operand &Op2 = (X86Operand &)*Operands[2]; 3526 SMLoc Loc = Op1.getEndLoc(); 3527 // Moving a 32 or 16 bit value into a segment register has the same 3528 // behavior. Modify such instructions to always take shorter form. 3529 if (Op1.isReg() && Op2.isReg() && 3530 X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains( 3531 Op2.getReg()) && 3532 (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) || 3533 X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) { 3534 // Change instruction name to match new instruction. 3535 if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) { 3536 Name = is16BitMode() ? "movw" : "movl"; 3537 Operands[0] = X86Operand::CreateToken(Name, NameLoc); 3538 } 3539 // Select the correct equivalent 16-/32-bit source register. 3540 MCRegister Reg = 3541 getX86SubSuperRegister(Op1.getReg(), is16BitMode() ? 16 : 32); 3542 Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc); 3543 } 3544 } 3545 3546 // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" -> 3547 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely 3548 // documented form in various unofficial manuals, so a lot of code uses it. 3549 if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" || 3550 Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") && 3551 Operands.size() == 3) { 3552 X86Operand &Op = (X86Operand &)*Operands.back(); 3553 if (Op.isDXReg()) 3554 Operands.back() = X86Operand::CreateReg(X86::DX, Op.getStartLoc(), 3555 Op.getEndLoc()); 3556 } 3557 // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al". 3558 if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" || 3559 Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") && 3560 Operands.size() == 3) { 3561 X86Operand &Op = (X86Operand &)*Operands[1]; 3562 if (Op.isDXReg()) 3563 Operands[1] = X86Operand::CreateReg(X86::DX, Op.getStartLoc(), 3564 Op.getEndLoc()); 3565 } 3566 3567 SmallVector<std::unique_ptr<MCParsedAsmOperand>, 2> TmpOperands; 3568 bool HadVerifyError = false; 3569 3570 // Append default arguments to "ins[bwld]" 3571 if (Name.starts_with("ins") && 3572 (Operands.size() == 1 || Operands.size() == 3) && 3573 (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" || 3574 Name == "ins")) { 3575 3576 AddDefaultSrcDestOperands(TmpOperands, 3577 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc), 3578 DefaultMemDIOperand(NameLoc)); 3579 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3580 } 3581 3582 // Append default arguments to "outs[bwld]" 3583 if (Name.starts_with("outs") && 3584 (Operands.size() == 1 || Operands.size() == 3) && 3585 (Name == "outsb" || Name == "outsw" || Name == "outsl" || 3586 Name == "outsd" || Name == "outs")) { 3587 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc), 3588 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc)); 3589 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3590 } 3591 3592 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate 3593 // values of $SIREG according to the mode. It would be nice if this 3594 // could be achieved with InstAlias in the tables. 3595 if (Name.starts_with("lods") && 3596 (Operands.size() == 1 || Operands.size() == 2) && 3597 (Name == "lods" || Name == "lodsb" || Name == "lodsw" || 3598 Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) { 3599 TmpOperands.push_back(DefaultMemSIOperand(NameLoc)); 3600 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3601 } 3602 3603 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate 3604 // values of $DIREG according to the mode. It would be nice if this 3605 // could be achieved with InstAlias in the tables. 3606 if (Name.starts_with("stos") && 3607 (Operands.size() == 1 || Operands.size() == 2) && 3608 (Name == "stos" || Name == "stosb" || Name == "stosw" || 3609 Name == "stosl" || Name == "stosd" || Name == "stosq")) { 3610 TmpOperands.push_back(DefaultMemDIOperand(NameLoc)); 3611 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3612 } 3613 3614 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate 3615 // values of $DIREG according to the mode. It would be nice if this 3616 // could be achieved with InstAlias in the tables. 3617 if (Name.starts_with("scas") && 3618 (Operands.size() == 1 || Operands.size() == 2) && 3619 (Name == "scas" || Name == "scasb" || Name == "scasw" || 3620 Name == "scasl" || Name == "scasd" || Name == "scasq")) { 3621 TmpOperands.push_back(DefaultMemDIOperand(NameLoc)); 3622 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3623 } 3624 3625 // Add default SI and DI operands to "cmps[bwlq]". 3626 if (Name.starts_with("cmps") && 3627 (Operands.size() == 1 || Operands.size() == 3) && 3628 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" || 3629 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) { 3630 AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc), 3631 DefaultMemSIOperand(NameLoc)); 3632 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3633 } 3634 3635 // Add default SI and DI operands to "movs[bwlq]". 3636 if (((Name.starts_with("movs") && 3637 (Name == "movs" || Name == "movsb" || Name == "movsw" || 3638 Name == "movsl" || Name == "movsd" || Name == "movsq")) || 3639 (Name.starts_with("smov") && 3640 (Name == "smov" || Name == "smovb" || Name == "smovw" || 3641 Name == "smovl" || Name == "smovd" || Name == "smovq"))) && 3642 (Operands.size() == 1 || Operands.size() == 3)) { 3643 if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax()) 3644 Operands.back() = X86Operand::CreateToken("movsl", NameLoc); 3645 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc), 3646 DefaultMemDIOperand(NameLoc)); 3647 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands); 3648 } 3649 3650 // Check if we encountered an error for one the string insturctions 3651 if (HadVerifyError) { 3652 return HadVerifyError; 3653 } 3654 3655 // Transforms "xlat mem8" into "xlatb" 3656 if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) { 3657 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]); 3658 if (Op1.isMem8()) { 3659 Warning(Op1.getStartLoc(), "memory operand is only for determining the " 3660 "size, (R|E)BX will be used for the location"); 3661 Operands.pop_back(); 3662 static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb"); 3663 } 3664 } 3665 3666 if (Flags) 3667 Operands.push_back(X86Operand::CreatePrefix(Flags, NameLoc, NameLoc)); 3668 return false; 3669 } 3670 3671 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) { 3672 if (ForcedVEXEncoding != VEXEncoding_VEX3 && 3673 X86::optimizeInstFromVEX3ToVEX2(Inst, MII.get(Inst.getOpcode()))) 3674 return true; 3675 3676 if (X86::optimizeShiftRotateWithImmediateOne(Inst)) 3677 return true; 3678 3679 switch (Inst.getOpcode()) { 3680 default: return false; 3681 case X86::JMP_1: 3682 // {disp32} forces a larger displacement as if the instruction was relaxed. 3683 // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}. 3684 // This matches GNU assembler. 3685 if (ForcedDispEncoding == DispEncoding_Disp32) { 3686 Inst.setOpcode(is16BitMode() ? X86::JMP_2 : X86::JMP_4); 3687 return true; 3688 } 3689 3690 return false; 3691 case X86::JCC_1: 3692 // {disp32} forces a larger displacement as if the instruction was relaxed. 3693 // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}. 3694 // This matches GNU assembler. 3695 if (ForcedDispEncoding == DispEncoding_Disp32) { 3696 Inst.setOpcode(is16BitMode() ? X86::JCC_2 : X86::JCC_4); 3697 return true; 3698 } 3699 3700 return false; 3701 case X86::INT: { 3702 // Transforms "int $3" into "int3" as a size optimization. 3703 // We can't write this as an InstAlias. 3704 if (!Inst.getOperand(0).isImm() || Inst.getOperand(0).getImm() != 3) 3705 return false; 3706 Inst.clear(); 3707 Inst.setOpcode(X86::INT3); 3708 return true; 3709 } 3710 } 3711 } 3712 3713 bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) { 3714 using namespace X86; 3715 const MCRegisterInfo *MRI = getContext().getRegisterInfo(); 3716 unsigned Opcode = Inst.getOpcode(); 3717 uint64_t TSFlags = MII.get(Opcode).TSFlags; 3718 if (isVFCMADDCPH(Opcode) || isVFCMADDCSH(Opcode) || isVFMADDCPH(Opcode) || 3719 isVFMADDCSH(Opcode)) { 3720 unsigned Dest = Inst.getOperand(0).getReg(); 3721 for (unsigned i = 2; i < Inst.getNumOperands(); i++) 3722 if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg()) 3723 return Warning(Ops[0]->getStartLoc(), "Destination register should be " 3724 "distinct from source registers"); 3725 } else if (isVFCMULCPH(Opcode) || isVFCMULCSH(Opcode) || isVFMULCPH(Opcode) || 3726 isVFMULCSH(Opcode)) { 3727 unsigned Dest = Inst.getOperand(0).getReg(); 3728 // The mask variants have different operand list. Scan from the third 3729 // operand to avoid emitting incorrect warning. 3730 // VFMULCPHZrr Dest, Src1, Src2 3731 // VFMULCPHZrrk Dest, Dest, Mask, Src1, Src2 3732 // VFMULCPHZrrkz Dest, Mask, Src1, Src2 3733 for (unsigned i = TSFlags & X86II::EVEX_K ? 2 : 1; 3734 i < Inst.getNumOperands(); i++) 3735 if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg()) 3736 return Warning(Ops[0]->getStartLoc(), "Destination register should be " 3737 "distinct from source registers"); 3738 } else if (isV4FMADDPS(Opcode) || isV4FMADDSS(Opcode) || 3739 isV4FNMADDPS(Opcode) || isV4FNMADDSS(Opcode) || 3740 isVP4DPWSSDS(Opcode) || isVP4DPWSSD(Opcode)) { 3741 unsigned Src2 = Inst.getOperand(Inst.getNumOperands() - 3742 X86::AddrNumOperands - 1).getReg(); 3743 unsigned Src2Enc = MRI->getEncodingValue(Src2); 3744 if (Src2Enc % 4 != 0) { 3745 StringRef RegName = X86IntelInstPrinter::getRegisterName(Src2); 3746 unsigned GroupStart = (Src2Enc / 4) * 4; 3747 unsigned GroupEnd = GroupStart + 3; 3748 return Warning(Ops[0]->getStartLoc(), 3749 "source register '" + RegName + "' implicitly denotes '" + 3750 RegName.take_front(3) + Twine(GroupStart) + "' to '" + 3751 RegName.take_front(3) + Twine(GroupEnd) + 3752 "' source group"); 3753 } 3754 } else if (isVGATHERDPD(Opcode) || isVGATHERDPS(Opcode) || 3755 isVGATHERQPD(Opcode) || isVGATHERQPS(Opcode) || 3756 isVPGATHERDD(Opcode) || isVPGATHERDQ(Opcode) || 3757 isVPGATHERQD(Opcode) || isVPGATHERQQ(Opcode)) { 3758 bool HasEVEX = (TSFlags & X86II::EncodingMask) == X86II::EVEX; 3759 if (HasEVEX) { 3760 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg()); 3761 unsigned Index = MRI->getEncodingValue( 3762 Inst.getOperand(4 + X86::AddrIndexReg).getReg()); 3763 if (Dest == Index) 3764 return Warning(Ops[0]->getStartLoc(), "index and destination registers " 3765 "should be distinct"); 3766 } else { 3767 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg()); 3768 unsigned Mask = MRI->getEncodingValue(Inst.getOperand(1).getReg()); 3769 unsigned Index = MRI->getEncodingValue( 3770 Inst.getOperand(3 + X86::AddrIndexReg).getReg()); 3771 if (Dest == Mask || Dest == Index || Mask == Index) 3772 return Warning(Ops[0]->getStartLoc(), "mask, index, and destination " 3773 "registers should be distinct"); 3774 } 3775 } 3776 3777 // Check that we aren't mixing AH/BH/CH/DH with REX prefix. We only need to 3778 // check this with the legacy encoding, VEX/EVEX/XOP don't use REX. 3779 if ((TSFlags & X86II::EncodingMask) == 0) { 3780 MCPhysReg HReg = X86::NoRegister; 3781 bool UsesRex = TSFlags & X86II::REX_W; 3782 unsigned NumOps = Inst.getNumOperands(); 3783 for (unsigned i = 0; i != NumOps; ++i) { 3784 const MCOperand &MO = Inst.getOperand(i); 3785 if (!MO.isReg()) 3786 continue; 3787 unsigned Reg = MO.getReg(); 3788 if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH) 3789 HReg = Reg; 3790 if (X86II::isX86_64NonExtLowByteReg(Reg) || 3791 X86II::isX86_64ExtendedReg(Reg)) 3792 UsesRex = true; 3793 } 3794 3795 if (UsesRex && HReg != X86::NoRegister) { 3796 StringRef RegName = X86IntelInstPrinter::getRegisterName(HReg); 3797 return Error(Ops[0]->getStartLoc(), 3798 "can't encode '" + RegName + "' in an instruction requiring " 3799 "REX prefix"); 3800 } 3801 } 3802 3803 if ((Opcode == X86::PREFETCHIT0 || Opcode == X86::PREFETCHIT1)) { 3804 const MCOperand &MO = Inst.getOperand(X86::AddrBaseReg); 3805 if (!MO.isReg() || MO.getReg() != X86::RIP) 3806 return Warning( 3807 Ops[0]->getStartLoc(), 3808 Twine((Inst.getOpcode() == X86::PREFETCHIT0 ? "'prefetchit0'" 3809 : "'prefetchit1'")) + 3810 " only supports RIP-relative address"); 3811 } 3812 return false; 3813 } 3814 3815 void X86AsmParser::emitWarningForSpecialLVIInstruction(SMLoc Loc) { 3816 Warning(Loc, "Instruction may be vulnerable to LVI and " 3817 "requires manual mitigation"); 3818 Note(SMLoc(), "See https://software.intel.com/" 3819 "security-software-guidance/insights/" 3820 "deep-dive-load-value-injection#specialinstructions" 3821 " for more information"); 3822 } 3823 3824 /// RET instructions and also instructions that indirect calls/jumps from memory 3825 /// combine a load and a branch within a single instruction. To mitigate these 3826 /// instructions against LVI, they must be decomposed into separate load and 3827 /// branch instructions, with an LFENCE in between. For more details, see: 3828 /// - X86LoadValueInjectionRetHardening.cpp 3829 /// - X86LoadValueInjectionIndirectThunks.cpp 3830 /// - https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection 3831 /// 3832 /// Returns `true` if a mitigation was applied or warning was emitted. 3833 void X86AsmParser::applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out) { 3834 // Information on control-flow instructions that require manual mitigation can 3835 // be found here: 3836 // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions 3837 switch (Inst.getOpcode()) { 3838 case X86::RET16: 3839 case X86::RET32: 3840 case X86::RET64: 3841 case X86::RETI16: 3842 case X86::RETI32: 3843 case X86::RETI64: { 3844 MCInst ShlInst, FenceInst; 3845 bool Parse32 = is32BitMode() || Code16GCC; 3846 unsigned Basereg = 3847 is64BitMode() ? X86::RSP : (Parse32 ? X86::ESP : X86::SP); 3848 const MCExpr *Disp = MCConstantExpr::create(0, getContext()); 3849 auto ShlMemOp = X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp, 3850 /*BaseReg=*/Basereg, /*IndexReg=*/0, 3851 /*Scale=*/1, SMLoc{}, SMLoc{}, 0); 3852 ShlInst.setOpcode(X86::SHL64mi); 3853 ShlMemOp->addMemOperands(ShlInst, 5); 3854 ShlInst.addOperand(MCOperand::createImm(0)); 3855 FenceInst.setOpcode(X86::LFENCE); 3856 Out.emitInstruction(ShlInst, getSTI()); 3857 Out.emitInstruction(FenceInst, getSTI()); 3858 return; 3859 } 3860 case X86::JMP16m: 3861 case X86::JMP32m: 3862 case X86::JMP64m: 3863 case X86::CALL16m: 3864 case X86::CALL32m: 3865 case X86::CALL64m: 3866 emitWarningForSpecialLVIInstruction(Inst.getLoc()); 3867 return; 3868 } 3869 } 3870 3871 /// To mitigate LVI, every instruction that performs a load can be followed by 3872 /// an LFENCE instruction to squash any potential mis-speculation. There are 3873 /// some instructions that require additional considerations, and may requre 3874 /// manual mitigation. For more details, see: 3875 /// https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection 3876 /// 3877 /// Returns `true` if a mitigation was applied or warning was emitted. 3878 void X86AsmParser::applyLVILoadHardeningMitigation(MCInst &Inst, 3879 MCStreamer &Out) { 3880 auto Opcode = Inst.getOpcode(); 3881 auto Flags = Inst.getFlags(); 3882 if ((Flags & X86::IP_HAS_REPEAT) || (Flags & X86::IP_HAS_REPEAT_NE)) { 3883 // Information on REP string instructions that require manual mitigation can 3884 // be found here: 3885 // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions 3886 switch (Opcode) { 3887 case X86::CMPSB: 3888 case X86::CMPSW: 3889 case X86::CMPSL: 3890 case X86::CMPSQ: 3891 case X86::SCASB: 3892 case X86::SCASW: 3893 case X86::SCASL: 3894 case X86::SCASQ: 3895 emitWarningForSpecialLVIInstruction(Inst.getLoc()); 3896 return; 3897 } 3898 } else if (Opcode == X86::REP_PREFIX || Opcode == X86::REPNE_PREFIX) { 3899 // If a REP instruction is found on its own line, it may or may not be 3900 // followed by a vulnerable instruction. Emit a warning just in case. 3901 emitWarningForSpecialLVIInstruction(Inst.getLoc()); 3902 return; 3903 } 3904 3905 const MCInstrDesc &MCID = MII.get(Inst.getOpcode()); 3906 3907 // Can't mitigate after terminators or calls. A control flow change may have 3908 // already occurred. 3909 if (MCID.isTerminator() || MCID.isCall()) 3910 return; 3911 3912 // LFENCE has the mayLoad property, don't double fence. 3913 if (MCID.mayLoad() && Inst.getOpcode() != X86::LFENCE) { 3914 MCInst FenceInst; 3915 FenceInst.setOpcode(X86::LFENCE); 3916 Out.emitInstruction(FenceInst, getSTI()); 3917 } 3918 } 3919 3920 void X86AsmParser::emitInstruction(MCInst &Inst, OperandVector &Operands, 3921 MCStreamer &Out) { 3922 if (LVIInlineAsmHardening && 3923 getSTI().hasFeature(X86::FeatureLVIControlFlowIntegrity)) 3924 applyLVICFIMitigation(Inst, Out); 3925 3926 Out.emitInstruction(Inst, getSTI()); 3927 3928 if (LVIInlineAsmHardening && 3929 getSTI().hasFeature(X86::FeatureLVILoadHardening)) 3930 applyLVILoadHardeningMitigation(Inst, Out); 3931 } 3932 3933 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3934 OperandVector &Operands, 3935 MCStreamer &Out, uint64_t &ErrorInfo, 3936 bool MatchingInlineAsm) { 3937 if (isParsingIntelSyntax()) 3938 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo, 3939 MatchingInlineAsm); 3940 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo, 3941 MatchingInlineAsm); 3942 } 3943 3944 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, 3945 OperandVector &Operands, MCStreamer &Out, 3946 bool MatchingInlineAsm) { 3947 // FIXME: This should be replaced with a real .td file alias mechanism. 3948 // Also, MatchInstructionImpl should actually *do* the EmitInstruction 3949 // call. 3950 const char *Repl = StringSwitch<const char *>(Op.getToken()) 3951 .Case("finit", "fninit") 3952 .Case("fsave", "fnsave") 3953 .Case("fstcw", "fnstcw") 3954 .Case("fstcww", "fnstcw") 3955 .Case("fstenv", "fnstenv") 3956 .Case("fstsw", "fnstsw") 3957 .Case("fstsww", "fnstsw") 3958 .Case("fclex", "fnclex") 3959 .Default(nullptr); 3960 if (Repl) { 3961 MCInst Inst; 3962 Inst.setOpcode(X86::WAIT); 3963 Inst.setLoc(IDLoc); 3964 if (!MatchingInlineAsm) 3965 emitInstruction(Inst, Operands, Out); 3966 Operands[0] = X86Operand::CreateToken(Repl, IDLoc); 3967 } 3968 } 3969 3970 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, 3971 const FeatureBitset &MissingFeatures, 3972 bool MatchingInlineAsm) { 3973 assert(MissingFeatures.any() && "Unknown missing feature!"); 3974 SmallString<126> Msg; 3975 raw_svector_ostream OS(Msg); 3976 OS << "instruction requires:"; 3977 for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) { 3978 if (MissingFeatures[i]) 3979 OS << ' ' << getSubtargetFeatureName(i); 3980 } 3981 return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm); 3982 } 3983 3984 static unsigned getPrefixes(OperandVector &Operands) { 3985 unsigned Result = 0; 3986 X86Operand &Prefix = static_cast<X86Operand &>(*Operands.back()); 3987 if (Prefix.isPrefix()) { 3988 Result = Prefix.getPrefix(); 3989 Operands.pop_back(); 3990 } 3991 return Result; 3992 } 3993 3994 unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3995 unsigned Opc = Inst.getOpcode(); 3996 const MCInstrDesc &MCID = MII.get(Opc); 3997 3998 if (UseApxExtendedReg && !X86II::canUseApxExtendedReg(MCID)) 3999 return Match_Unsupported; 4000 4001 if (ForcedVEXEncoding == VEXEncoding_EVEX && 4002 (MCID.TSFlags & X86II::EncodingMask) != X86II::EVEX) 4003 return Match_Unsupported; 4004 4005 if ((ForcedVEXEncoding == VEXEncoding_VEX || 4006 ForcedVEXEncoding == VEXEncoding_VEX2 || 4007 ForcedVEXEncoding == VEXEncoding_VEX3) && 4008 (MCID.TSFlags & X86II::EncodingMask) != X86II::VEX) 4009 return Match_Unsupported; 4010 4011 if ((MCID.TSFlags & X86II::ExplicitOpPrefixMask) == 4012 X86II::ExplicitVEXPrefix && 4013 (ForcedVEXEncoding != VEXEncoding_VEX && 4014 ForcedVEXEncoding != VEXEncoding_VEX2 && 4015 ForcedVEXEncoding != VEXEncoding_VEX3)) 4016 return Match_Unsupported; 4017 4018 return Match_Success; 4019 } 4020 4021 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, 4022 OperandVector &Operands, 4023 MCStreamer &Out, 4024 uint64_t &ErrorInfo, 4025 bool MatchingInlineAsm) { 4026 assert(!Operands.empty() && "Unexpect empty operand list!"); 4027 assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!"); 4028 SMRange EmptyRange = std::nullopt; 4029 4030 // First, handle aliases that expand to multiple instructions. 4031 MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands, 4032 Out, MatchingInlineAsm); 4033 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]); 4034 unsigned Prefixes = getPrefixes(Operands); 4035 4036 MCInst Inst; 4037 4038 // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the 4039 // encoder and printer. 4040 if (ForcedVEXEncoding == VEXEncoding_VEX) 4041 Prefixes |= X86::IP_USE_VEX; 4042 else if (ForcedVEXEncoding == VEXEncoding_VEX2) 4043 Prefixes |= X86::IP_USE_VEX2; 4044 else if (ForcedVEXEncoding == VEXEncoding_VEX3) 4045 Prefixes |= X86::IP_USE_VEX3; 4046 else if (ForcedVEXEncoding == VEXEncoding_EVEX) 4047 Prefixes |= X86::IP_USE_EVEX; 4048 4049 // Set encoded flags for {disp8} and {disp32}. 4050 if (ForcedDispEncoding == DispEncoding_Disp8) 4051 Prefixes |= X86::IP_USE_DISP8; 4052 else if (ForcedDispEncoding == DispEncoding_Disp32) 4053 Prefixes |= X86::IP_USE_DISP32; 4054 4055 if (Prefixes) 4056 Inst.setFlags(Prefixes); 4057 4058 // In 16-bit mode, if data32 is specified, temporarily switch to 32-bit mode 4059 // when matching the instruction. 4060 if (ForcedDataPrefix == X86::Is32Bit) 4061 SwitchMode(X86::Is32Bit); 4062 // First, try a direct match. 4063 FeatureBitset MissingFeatures; 4064 unsigned OriginalError = MatchInstruction(Operands, Inst, ErrorInfo, 4065 MissingFeatures, MatchingInlineAsm, 4066 isParsingIntelSyntax()); 4067 if (ForcedDataPrefix == X86::Is32Bit) { 4068 SwitchMode(X86::Is16Bit); 4069 ForcedDataPrefix = 0; 4070 } 4071 switch (OriginalError) { 4072 default: llvm_unreachable("Unexpected match result!"); 4073 case Match_Success: 4074 if (!MatchingInlineAsm && validateInstruction(Inst, Operands)) 4075 return true; 4076 // Some instructions need post-processing to, for example, tweak which 4077 // encoding is selected. Loop on it while changes happen so the 4078 // individual transformations can chain off each other. 4079 if (!MatchingInlineAsm) 4080 while (processInstruction(Inst, Operands)) 4081 ; 4082 4083 Inst.setLoc(IDLoc); 4084 if (!MatchingInlineAsm) 4085 emitInstruction(Inst, Operands, Out); 4086 Opcode = Inst.getOpcode(); 4087 return false; 4088 case Match_InvalidImmUnsignedi4: { 4089 SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc(); 4090 if (ErrorLoc == SMLoc()) 4091 ErrorLoc = IDLoc; 4092 return Error(ErrorLoc, "immediate must be an integer in range [0, 15]", 4093 EmptyRange, MatchingInlineAsm); 4094 } 4095 case Match_MissingFeature: 4096 return ErrorMissingFeature(IDLoc, MissingFeatures, MatchingInlineAsm); 4097 case Match_InvalidOperand: 4098 case Match_MnemonicFail: 4099 case Match_Unsupported: 4100 break; 4101 } 4102 if (Op.getToken().empty()) { 4103 Error(IDLoc, "instruction must have size higher than 0", EmptyRange, 4104 MatchingInlineAsm); 4105 return true; 4106 } 4107 4108 // FIXME: Ideally, we would only attempt suffix matches for things which are 4109 // valid prefixes, and we could just infer the right unambiguous 4110 // type. However, that requires substantially more matcher support than the 4111 // following hack. 4112 4113 // Change the operand to point to a temporary token. 4114 StringRef Base = Op.getToken(); 4115 SmallString<16> Tmp; 4116 Tmp += Base; 4117 Tmp += ' '; 4118 Op.setTokenValue(Tmp); 4119 4120 // If this instruction starts with an 'f', then it is a floating point stack 4121 // instruction. These come in up to three forms for 32-bit, 64-bit, and 4122 // 80-bit floating point, which use the suffixes s,l,t respectively. 4123 // 4124 // Otherwise, we assume that this may be an integer instruction, which comes 4125 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively. 4126 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0"; 4127 // MemSize corresponding to Suffixes. { 8, 16, 32, 64 } { 32, 64, 80, 0 } 4128 const char *MemSize = Base[0] != 'f' ? "\x08\x10\x20\x40" : "\x20\x40\x50\0"; 4129 4130 // Check for the various suffix matches. 4131 uint64_t ErrorInfoIgnore; 4132 FeatureBitset ErrorInfoMissingFeatures; // Init suppresses compiler warnings. 4133 unsigned Match[4]; 4134 4135 // Some instruction like VPMULDQ is NOT the variant of VPMULD but a new one. 4136 // So we should make sure the suffix matcher only works for memory variant 4137 // that has the same size with the suffix. 4138 // FIXME: This flag is a workaround for legacy instructions that didn't 4139 // declare non suffix variant assembly. 4140 bool HasVectorReg = false; 4141 X86Operand *MemOp = nullptr; 4142 for (const auto &Op : Operands) { 4143 X86Operand *X86Op = static_cast<X86Operand *>(Op.get()); 4144 if (X86Op->isVectorReg()) 4145 HasVectorReg = true; 4146 else if (X86Op->isMem()) { 4147 MemOp = X86Op; 4148 assert(MemOp->Mem.Size == 0 && "Memory size always 0 under ATT syntax"); 4149 // Have we found an unqualified memory operand, 4150 // break. IA allows only one memory operand. 4151 break; 4152 } 4153 } 4154 4155 for (unsigned I = 0, E = std::size(Match); I != E; ++I) { 4156 Tmp.back() = Suffixes[I]; 4157 if (MemOp && HasVectorReg) 4158 MemOp->Mem.Size = MemSize[I]; 4159 Match[I] = Match_MnemonicFail; 4160 if (MemOp || !HasVectorReg) { 4161 Match[I] = 4162 MatchInstruction(Operands, Inst, ErrorInfoIgnore, MissingFeatures, 4163 MatchingInlineAsm, isParsingIntelSyntax()); 4164 // If this returned as a missing feature failure, remember that. 4165 if (Match[I] == Match_MissingFeature) 4166 ErrorInfoMissingFeatures = MissingFeatures; 4167 } 4168 } 4169 4170 // Restore the old token. 4171 Op.setTokenValue(Base); 4172 4173 // If exactly one matched, then we treat that as a successful match (and the 4174 // instruction will already have been filled in correctly, since the failing 4175 // matches won't have modified it). 4176 unsigned NumSuccessfulMatches = llvm::count(Match, Match_Success); 4177 if (NumSuccessfulMatches == 1) { 4178 if (!MatchingInlineAsm && validateInstruction(Inst, Operands)) 4179 return true; 4180 // Some instructions need post-processing to, for example, tweak which 4181 // encoding is selected. Loop on it while changes happen so the 4182 // individual transformations can chain off each other. 4183 if (!MatchingInlineAsm) 4184 while (processInstruction(Inst, Operands)) 4185 ; 4186 4187 Inst.setLoc(IDLoc); 4188 if (!MatchingInlineAsm) 4189 emitInstruction(Inst, Operands, Out); 4190 Opcode = Inst.getOpcode(); 4191 return false; 4192 } 4193 4194 // Otherwise, the match failed, try to produce a decent error message. 4195 4196 // If we had multiple suffix matches, then identify this as an ambiguous 4197 // match. 4198 if (NumSuccessfulMatches > 1) { 4199 char MatchChars[4]; 4200 unsigned NumMatches = 0; 4201 for (unsigned I = 0, E = std::size(Match); I != E; ++I) 4202 if (Match[I] == Match_Success) 4203 MatchChars[NumMatches++] = Suffixes[I]; 4204 4205 SmallString<126> Msg; 4206 raw_svector_ostream OS(Msg); 4207 OS << "ambiguous instructions require an explicit suffix (could be "; 4208 for (unsigned i = 0; i != NumMatches; ++i) { 4209 if (i != 0) 4210 OS << ", "; 4211 if (i + 1 == NumMatches) 4212 OS << "or "; 4213 OS << "'" << Base << MatchChars[i] << "'"; 4214 } 4215 OS << ")"; 4216 Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm); 4217 return true; 4218 } 4219 4220 // Okay, we know that none of the variants matched successfully. 4221 4222 // If all of the instructions reported an invalid mnemonic, then the original 4223 // mnemonic was invalid. 4224 if (llvm::count(Match, Match_MnemonicFail) == 4) { 4225 if (OriginalError == Match_MnemonicFail) 4226 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'", 4227 Op.getLocRange(), MatchingInlineAsm); 4228 4229 if (OriginalError == Match_Unsupported) 4230 return Error(IDLoc, "unsupported instruction", EmptyRange, 4231 MatchingInlineAsm); 4232 4233 assert(OriginalError == Match_InvalidOperand && "Unexpected error"); 4234 // Recover location info for the operand if we know which was the problem. 4235 if (ErrorInfo != ~0ULL) { 4236 if (ErrorInfo >= Operands.size()) 4237 return Error(IDLoc, "too few operands for instruction", EmptyRange, 4238 MatchingInlineAsm); 4239 4240 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo]; 4241 if (Operand.getStartLoc().isValid()) { 4242 SMRange OperandRange = Operand.getLocRange(); 4243 return Error(Operand.getStartLoc(), "invalid operand for instruction", 4244 OperandRange, MatchingInlineAsm); 4245 } 4246 } 4247 4248 return Error(IDLoc, "invalid operand for instruction", EmptyRange, 4249 MatchingInlineAsm); 4250 } 4251 4252 // If one instruction matched as unsupported, report this as unsupported. 4253 if (llvm::count(Match, Match_Unsupported) == 1) { 4254 return Error(IDLoc, "unsupported instruction", EmptyRange, 4255 MatchingInlineAsm); 4256 } 4257 4258 // If one instruction matched with a missing feature, report this as a 4259 // missing feature. 4260 if (llvm::count(Match, Match_MissingFeature) == 1) { 4261 ErrorInfo = Match_MissingFeature; 4262 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures, 4263 MatchingInlineAsm); 4264 } 4265 4266 // If one instruction matched with an invalid operand, report this as an 4267 // operand failure. 4268 if (llvm::count(Match, Match_InvalidOperand) == 1) { 4269 return Error(IDLoc, "invalid operand for instruction", EmptyRange, 4270 MatchingInlineAsm); 4271 } 4272 4273 // If all of these were an outright failure, report it in a useless way. 4274 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix", 4275 EmptyRange, MatchingInlineAsm); 4276 return true; 4277 } 4278 4279 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, 4280 OperandVector &Operands, 4281 MCStreamer &Out, 4282 uint64_t &ErrorInfo, 4283 bool MatchingInlineAsm) { 4284 assert(!Operands.empty() && "Unexpect empty operand list!"); 4285 assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!"); 4286 StringRef Mnemonic = (static_cast<X86Operand &>(*Operands[0])).getToken(); 4287 SMRange EmptyRange = std::nullopt; 4288 StringRef Base = (static_cast<X86Operand &>(*Operands[0])).getToken(); 4289 unsigned Prefixes = getPrefixes(Operands); 4290 4291 // First, handle aliases that expand to multiple instructions. 4292 MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands, Out, MatchingInlineAsm); 4293 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]); 4294 4295 MCInst Inst; 4296 4297 // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the 4298 // encoder and printer. 4299 if (ForcedVEXEncoding == VEXEncoding_VEX) 4300 Prefixes |= X86::IP_USE_VEX; 4301 else if (ForcedVEXEncoding == VEXEncoding_VEX2) 4302 Prefixes |= X86::IP_USE_VEX2; 4303 else if (ForcedVEXEncoding == VEXEncoding_VEX3) 4304 Prefixes |= X86::IP_USE_VEX3; 4305 else if (ForcedVEXEncoding == VEXEncoding_EVEX) 4306 Prefixes |= X86::IP_USE_EVEX; 4307 4308 // Set encoded flags for {disp8} and {disp32}. 4309 if (ForcedDispEncoding == DispEncoding_Disp8) 4310 Prefixes |= X86::IP_USE_DISP8; 4311 else if (ForcedDispEncoding == DispEncoding_Disp32) 4312 Prefixes |= X86::IP_USE_DISP32; 4313 4314 if (Prefixes) 4315 Inst.setFlags(Prefixes); 4316 4317 // Find one unsized memory operand, if present. 4318 X86Operand *UnsizedMemOp = nullptr; 4319 for (const auto &Op : Operands) { 4320 X86Operand *X86Op = static_cast<X86Operand *>(Op.get()); 4321 if (X86Op->isMemUnsized()) { 4322 UnsizedMemOp = X86Op; 4323 // Have we found an unqualified memory operand, 4324 // break. IA allows only one memory operand. 4325 break; 4326 } 4327 } 4328 4329 // Allow some instructions to have implicitly pointer-sized operands. This is 4330 // compatible with gas. 4331 if (UnsizedMemOp) { 4332 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"}; 4333 for (const char *Instr : PtrSizedInstrs) { 4334 if (Mnemonic == Instr) { 4335 UnsizedMemOp->Mem.Size = getPointerWidth(); 4336 break; 4337 } 4338 } 4339 } 4340 4341 SmallVector<unsigned, 8> Match; 4342 FeatureBitset ErrorInfoMissingFeatures; 4343 FeatureBitset MissingFeatures; 4344 4345 // If unsized push has immediate operand we should default the default pointer 4346 // size for the size. 4347 if (Mnemonic == "push" && Operands.size() == 2) { 4348 auto *X86Op = static_cast<X86Operand *>(Operands[1].get()); 4349 if (X86Op->isImm()) { 4350 // If it's not a constant fall through and let remainder take care of it. 4351 const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm()); 4352 unsigned Size = getPointerWidth(); 4353 if (CE && 4354 (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) { 4355 SmallString<16> Tmp; 4356 Tmp += Base; 4357 Tmp += (is64BitMode()) 4358 ? "q" 4359 : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " "; 4360 Op.setTokenValue(Tmp); 4361 // Do match in ATT mode to allow explicit suffix usage. 4362 Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo, 4363 MissingFeatures, MatchingInlineAsm, 4364 false /*isParsingIntelSyntax()*/)); 4365 Op.setTokenValue(Base); 4366 } 4367 } 4368 } 4369 4370 // If an unsized memory operand is present, try to match with each memory 4371 // operand size. In Intel assembly, the size is not part of the instruction 4372 // mnemonic. 4373 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) { 4374 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512}; 4375 for (unsigned Size : MopSizes) { 4376 UnsizedMemOp->Mem.Size = Size; 4377 uint64_t ErrorInfoIgnore; 4378 unsigned LastOpcode = Inst.getOpcode(); 4379 unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore, 4380 MissingFeatures, MatchingInlineAsm, 4381 isParsingIntelSyntax()); 4382 if (Match.empty() || LastOpcode != Inst.getOpcode()) 4383 Match.push_back(M); 4384 4385 // If this returned as a missing feature failure, remember that. 4386 if (Match.back() == Match_MissingFeature) 4387 ErrorInfoMissingFeatures = MissingFeatures; 4388 } 4389 4390 // Restore the size of the unsized memory operand if we modified it. 4391 UnsizedMemOp->Mem.Size = 0; 4392 } 4393 4394 // If we haven't matched anything yet, this is not a basic integer or FPU 4395 // operation. There shouldn't be any ambiguity in our mnemonic table, so try 4396 // matching with the unsized operand. 4397 if (Match.empty()) { 4398 Match.push_back(MatchInstruction( 4399 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm, 4400 isParsingIntelSyntax())); 4401 // If this returned as a missing feature failure, remember that. 4402 if (Match.back() == Match_MissingFeature) 4403 ErrorInfoMissingFeatures = MissingFeatures; 4404 } 4405 4406 // Restore the size of the unsized memory operand if we modified it. 4407 if (UnsizedMemOp) 4408 UnsizedMemOp->Mem.Size = 0; 4409 4410 // If it's a bad mnemonic, all results will be the same. 4411 if (Match.back() == Match_MnemonicFail) { 4412 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'", 4413 Op.getLocRange(), MatchingInlineAsm); 4414 } 4415 4416 unsigned NumSuccessfulMatches = llvm::count(Match, Match_Success); 4417 4418 // If matching was ambiguous and we had size information from the frontend, 4419 // try again with that. This handles cases like "movxz eax, m8/m16". 4420 if (UnsizedMemOp && NumSuccessfulMatches > 1 && 4421 UnsizedMemOp->getMemFrontendSize()) { 4422 UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize(); 4423 unsigned M = MatchInstruction( 4424 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm, 4425 isParsingIntelSyntax()); 4426 if (M == Match_Success) 4427 NumSuccessfulMatches = 1; 4428 4429 // Add a rewrite that encodes the size information we used from the 4430 // frontend. 4431 InstInfo->AsmRewrites->emplace_back( 4432 AOK_SizeDirective, UnsizedMemOp->getStartLoc(), 4433 /*Len=*/0, UnsizedMemOp->getMemFrontendSize()); 4434 } 4435 4436 // If exactly one matched, then we treat that as a successful match (and the 4437 // instruction will already have been filled in correctly, since the failing 4438 // matches won't have modified it). 4439 if (NumSuccessfulMatches == 1) { 4440 if (!MatchingInlineAsm && validateInstruction(Inst, Operands)) 4441 return true; 4442 // Some instructions need post-processing to, for example, tweak which 4443 // encoding is selected. Loop on it while changes happen so the individual 4444 // transformations can chain off each other. 4445 if (!MatchingInlineAsm) 4446 while (processInstruction(Inst, Operands)) 4447 ; 4448 Inst.setLoc(IDLoc); 4449 if (!MatchingInlineAsm) 4450 emitInstruction(Inst, Operands, Out); 4451 Opcode = Inst.getOpcode(); 4452 return false; 4453 } else if (NumSuccessfulMatches > 1) { 4454 assert(UnsizedMemOp && 4455 "multiple matches only possible with unsized memory operands"); 4456 return Error(UnsizedMemOp->getStartLoc(), 4457 "ambiguous operand size for instruction '" + Mnemonic + "\'", 4458 UnsizedMemOp->getLocRange()); 4459 } 4460 4461 // If one instruction matched as unsupported, report this as unsupported. 4462 if (llvm::count(Match, Match_Unsupported) == 1) { 4463 return Error(IDLoc, "unsupported instruction", EmptyRange, 4464 MatchingInlineAsm); 4465 } 4466 4467 // If one instruction matched with a missing feature, report this as a 4468 // missing feature. 4469 if (llvm::count(Match, Match_MissingFeature) == 1) { 4470 ErrorInfo = Match_MissingFeature; 4471 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures, 4472 MatchingInlineAsm); 4473 } 4474 4475 // If one instruction matched with an invalid operand, report this as an 4476 // operand failure. 4477 if (llvm::count(Match, Match_InvalidOperand) == 1) { 4478 return Error(IDLoc, "invalid operand for instruction", EmptyRange, 4479 MatchingInlineAsm); 4480 } 4481 4482 if (llvm::count(Match, Match_InvalidImmUnsignedi4) == 1) { 4483 SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc(); 4484 if (ErrorLoc == SMLoc()) 4485 ErrorLoc = IDLoc; 4486 return Error(ErrorLoc, "immediate must be an integer in range [0, 15]", 4487 EmptyRange, MatchingInlineAsm); 4488 } 4489 4490 // If all of these were an outright failure, report it in a useless way. 4491 return Error(IDLoc, "unknown instruction mnemonic", EmptyRange, 4492 MatchingInlineAsm); 4493 } 4494 4495 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) { 4496 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo); 4497 } 4498 4499 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) { 4500 MCAsmParser &Parser = getParser(); 4501 StringRef IDVal = DirectiveID.getIdentifier(); 4502 if (IDVal.starts_with(".arch")) 4503 return parseDirectiveArch(); 4504 if (IDVal.starts_with(".code")) 4505 return ParseDirectiveCode(IDVal, DirectiveID.getLoc()); 4506 else if (IDVal.starts_with(".att_syntax")) { 4507 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4508 if (Parser.getTok().getString() == "prefix") 4509 Parser.Lex(); 4510 else if (Parser.getTok().getString() == "noprefix") 4511 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not " 4512 "supported: registers must have a " 4513 "'%' prefix in .att_syntax"); 4514 } 4515 getParser().setAssemblerDialect(0); 4516 return false; 4517 } else if (IDVal.starts_with(".intel_syntax")) { 4518 getParser().setAssemblerDialect(1); 4519 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4520 if (Parser.getTok().getString() == "noprefix") 4521 Parser.Lex(); 4522 else if (Parser.getTok().getString() == "prefix") 4523 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not " 4524 "supported: registers must not have " 4525 "a '%' prefix in .intel_syntax"); 4526 } 4527 return false; 4528 } else if (IDVal == ".nops") 4529 return parseDirectiveNops(DirectiveID.getLoc()); 4530 else if (IDVal == ".even") 4531 return parseDirectiveEven(DirectiveID.getLoc()); 4532 else if (IDVal == ".cv_fpo_proc") 4533 return parseDirectiveFPOProc(DirectiveID.getLoc()); 4534 else if (IDVal == ".cv_fpo_setframe") 4535 return parseDirectiveFPOSetFrame(DirectiveID.getLoc()); 4536 else if (IDVal == ".cv_fpo_pushreg") 4537 return parseDirectiveFPOPushReg(DirectiveID.getLoc()); 4538 else if (IDVal == ".cv_fpo_stackalloc") 4539 return parseDirectiveFPOStackAlloc(DirectiveID.getLoc()); 4540 else if (IDVal == ".cv_fpo_stackalign") 4541 return parseDirectiveFPOStackAlign(DirectiveID.getLoc()); 4542 else if (IDVal == ".cv_fpo_endprologue") 4543 return parseDirectiveFPOEndPrologue(DirectiveID.getLoc()); 4544 else if (IDVal == ".cv_fpo_endproc") 4545 return parseDirectiveFPOEndProc(DirectiveID.getLoc()); 4546 else if (IDVal == ".seh_pushreg" || 4547 (Parser.isParsingMasm() && IDVal.equals_insensitive(".pushreg"))) 4548 return parseDirectiveSEHPushReg(DirectiveID.getLoc()); 4549 else if (IDVal == ".seh_setframe" || 4550 (Parser.isParsingMasm() && IDVal.equals_insensitive(".setframe"))) 4551 return parseDirectiveSEHSetFrame(DirectiveID.getLoc()); 4552 else if (IDVal == ".seh_savereg" || 4553 (Parser.isParsingMasm() && IDVal.equals_insensitive(".savereg"))) 4554 return parseDirectiveSEHSaveReg(DirectiveID.getLoc()); 4555 else if (IDVal == ".seh_savexmm" || 4556 (Parser.isParsingMasm() && IDVal.equals_insensitive(".savexmm128"))) 4557 return parseDirectiveSEHSaveXMM(DirectiveID.getLoc()); 4558 else if (IDVal == ".seh_pushframe" || 4559 (Parser.isParsingMasm() && IDVal.equals_insensitive(".pushframe"))) 4560 return parseDirectiveSEHPushFrame(DirectiveID.getLoc()); 4561 4562 return true; 4563 } 4564 4565 bool X86AsmParser::parseDirectiveArch() { 4566 // Ignore .arch for now. 4567 getParser().parseStringToEndOfStatement(); 4568 return false; 4569 } 4570 4571 /// parseDirectiveNops 4572 /// ::= .nops size[, control] 4573 bool X86AsmParser::parseDirectiveNops(SMLoc L) { 4574 int64_t NumBytes = 0, Control = 0; 4575 SMLoc NumBytesLoc, ControlLoc; 4576 const MCSubtargetInfo& STI = getSTI(); 4577 NumBytesLoc = getTok().getLoc(); 4578 if (getParser().checkForValidSection() || 4579 getParser().parseAbsoluteExpression(NumBytes)) 4580 return true; 4581 4582 if (parseOptionalToken(AsmToken::Comma)) { 4583 ControlLoc = getTok().getLoc(); 4584 if (getParser().parseAbsoluteExpression(Control)) 4585 return true; 4586 } 4587 if (getParser().parseEOL()) 4588 return true; 4589 4590 if (NumBytes <= 0) { 4591 Error(NumBytesLoc, "'.nops' directive with non-positive size"); 4592 return false; 4593 } 4594 4595 if (Control < 0) { 4596 Error(ControlLoc, "'.nops' directive with negative NOP size"); 4597 return false; 4598 } 4599 4600 /// Emit nops 4601 getParser().getStreamer().emitNops(NumBytes, Control, L, STI); 4602 4603 return false; 4604 } 4605 4606 /// parseDirectiveEven 4607 /// ::= .even 4608 bool X86AsmParser::parseDirectiveEven(SMLoc L) { 4609 if (parseEOL()) 4610 return false; 4611 4612 const MCSection *Section = getStreamer().getCurrentSectionOnly(); 4613 if (!Section) { 4614 getStreamer().initSections(false, getSTI()); 4615 Section = getStreamer().getCurrentSectionOnly(); 4616 } 4617 if (Section->useCodeAlign()) 4618 getStreamer().emitCodeAlignment(Align(2), &getSTI(), 0); 4619 else 4620 getStreamer().emitValueToAlignment(Align(2), 0, 1, 0); 4621 return false; 4622 } 4623 4624 /// ParseDirectiveCode 4625 /// ::= .code16 | .code32 | .code64 4626 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) { 4627 MCAsmParser &Parser = getParser(); 4628 Code16GCC = false; 4629 if (IDVal == ".code16") { 4630 Parser.Lex(); 4631 if (!is16BitMode()) { 4632 SwitchMode(X86::Is16Bit); 4633 getParser().getStreamer().emitAssemblerFlag(MCAF_Code16); 4634 } 4635 } else if (IDVal == ".code16gcc") { 4636 // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode. 4637 Parser.Lex(); 4638 Code16GCC = true; 4639 if (!is16BitMode()) { 4640 SwitchMode(X86::Is16Bit); 4641 getParser().getStreamer().emitAssemblerFlag(MCAF_Code16); 4642 } 4643 } else if (IDVal == ".code32") { 4644 Parser.Lex(); 4645 if (!is32BitMode()) { 4646 SwitchMode(X86::Is32Bit); 4647 getParser().getStreamer().emitAssemblerFlag(MCAF_Code32); 4648 } 4649 } else if (IDVal == ".code64") { 4650 Parser.Lex(); 4651 if (!is64BitMode()) { 4652 SwitchMode(X86::Is64Bit); 4653 getParser().getStreamer().emitAssemblerFlag(MCAF_Code64); 4654 } 4655 } else { 4656 Error(L, "unknown directive " + IDVal); 4657 return false; 4658 } 4659 4660 return false; 4661 } 4662 4663 // .cv_fpo_proc foo 4664 bool X86AsmParser::parseDirectiveFPOProc(SMLoc L) { 4665 MCAsmParser &Parser = getParser(); 4666 StringRef ProcName; 4667 int64_t ParamsSize; 4668 if (Parser.parseIdentifier(ProcName)) 4669 return Parser.TokError("expected symbol name"); 4670 if (Parser.parseIntToken(ParamsSize, "expected parameter byte count")) 4671 return true; 4672 if (!isUIntN(32, ParamsSize)) 4673 return Parser.TokError("parameters size out of range"); 4674 if (parseEOL()) 4675 return true; 4676 MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName); 4677 return getTargetStreamer().emitFPOProc(ProcSym, ParamsSize, L); 4678 } 4679 4680 // .cv_fpo_setframe ebp 4681 bool X86AsmParser::parseDirectiveFPOSetFrame(SMLoc L) { 4682 MCRegister Reg; 4683 SMLoc DummyLoc; 4684 if (parseRegister(Reg, DummyLoc, DummyLoc) || parseEOL()) 4685 return true; 4686 return getTargetStreamer().emitFPOSetFrame(Reg, L); 4687 } 4688 4689 // .cv_fpo_pushreg ebx 4690 bool X86AsmParser::parseDirectiveFPOPushReg(SMLoc L) { 4691 MCRegister Reg; 4692 SMLoc DummyLoc; 4693 if (parseRegister(Reg, DummyLoc, DummyLoc) || parseEOL()) 4694 return true; 4695 return getTargetStreamer().emitFPOPushReg(Reg, L); 4696 } 4697 4698 // .cv_fpo_stackalloc 20 4699 bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L) { 4700 MCAsmParser &Parser = getParser(); 4701 int64_t Offset; 4702 if (Parser.parseIntToken(Offset, "expected offset") || parseEOL()) 4703 return true; 4704 return getTargetStreamer().emitFPOStackAlloc(Offset, L); 4705 } 4706 4707 // .cv_fpo_stackalign 8 4708 bool X86AsmParser::parseDirectiveFPOStackAlign(SMLoc L) { 4709 MCAsmParser &Parser = getParser(); 4710 int64_t Offset; 4711 if (Parser.parseIntToken(Offset, "expected offset") || parseEOL()) 4712 return true; 4713 return getTargetStreamer().emitFPOStackAlign(Offset, L); 4714 } 4715 4716 // .cv_fpo_endprologue 4717 bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L) { 4718 MCAsmParser &Parser = getParser(); 4719 if (Parser.parseEOL()) 4720 return true; 4721 return getTargetStreamer().emitFPOEndPrologue(L); 4722 } 4723 4724 // .cv_fpo_endproc 4725 bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L) { 4726 MCAsmParser &Parser = getParser(); 4727 if (Parser.parseEOL()) 4728 return true; 4729 return getTargetStreamer().emitFPOEndProc(L); 4730 } 4731 4732 bool X86AsmParser::parseSEHRegisterNumber(unsigned RegClassID, 4733 MCRegister &RegNo) { 4734 SMLoc startLoc = getLexer().getLoc(); 4735 const MCRegisterInfo *MRI = getContext().getRegisterInfo(); 4736 4737 // Try parsing the argument as a register first. 4738 if (getLexer().getTok().isNot(AsmToken::Integer)) { 4739 SMLoc endLoc; 4740 if (parseRegister(RegNo, startLoc, endLoc)) 4741 return true; 4742 4743 if (!X86MCRegisterClasses[RegClassID].contains(RegNo)) { 4744 return Error(startLoc, 4745 "register is not supported for use with this directive"); 4746 } 4747 } else { 4748 // Otherwise, an integer number matching the encoding of the desired 4749 // register may appear. 4750 int64_t EncodedReg; 4751 if (getParser().parseAbsoluteExpression(EncodedReg)) 4752 return true; 4753 4754 // The SEH register number is the same as the encoding register number. Map 4755 // from the encoding back to the LLVM register number. 4756 RegNo = 0; 4757 for (MCPhysReg Reg : X86MCRegisterClasses[RegClassID]) { 4758 if (MRI->getEncodingValue(Reg) == EncodedReg) { 4759 RegNo = Reg; 4760 break; 4761 } 4762 } 4763 if (RegNo == 0) { 4764 return Error(startLoc, 4765 "incorrect register number for use with this directive"); 4766 } 4767 } 4768 4769 return false; 4770 } 4771 4772 bool X86AsmParser::parseDirectiveSEHPushReg(SMLoc Loc) { 4773 MCRegister Reg; 4774 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg)) 4775 return true; 4776 4777 if (getLexer().isNot(AsmToken::EndOfStatement)) 4778 return TokError("expected end of directive"); 4779 4780 getParser().Lex(); 4781 getStreamer().emitWinCFIPushReg(Reg, Loc); 4782 return false; 4783 } 4784 4785 bool X86AsmParser::parseDirectiveSEHSetFrame(SMLoc Loc) { 4786 MCRegister Reg; 4787 int64_t Off; 4788 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg)) 4789 return true; 4790 if (getLexer().isNot(AsmToken::Comma)) 4791 return TokError("you must specify a stack pointer offset"); 4792 4793 getParser().Lex(); 4794 if (getParser().parseAbsoluteExpression(Off)) 4795 return true; 4796 4797 if (getLexer().isNot(AsmToken::EndOfStatement)) 4798 return TokError("expected end of directive"); 4799 4800 getParser().Lex(); 4801 getStreamer().emitWinCFISetFrame(Reg, Off, Loc); 4802 return false; 4803 } 4804 4805 bool X86AsmParser::parseDirectiveSEHSaveReg(SMLoc Loc) { 4806 MCRegister Reg; 4807 int64_t Off; 4808 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg)) 4809 return true; 4810 if (getLexer().isNot(AsmToken::Comma)) 4811 return TokError("you must specify an offset on the stack"); 4812 4813 getParser().Lex(); 4814 if (getParser().parseAbsoluteExpression(Off)) 4815 return true; 4816 4817 if (getLexer().isNot(AsmToken::EndOfStatement)) 4818 return TokError("expected end of directive"); 4819 4820 getParser().Lex(); 4821 getStreamer().emitWinCFISaveReg(Reg, Off, Loc); 4822 return false; 4823 } 4824 4825 bool X86AsmParser::parseDirectiveSEHSaveXMM(SMLoc Loc) { 4826 MCRegister Reg; 4827 int64_t Off; 4828 if (parseSEHRegisterNumber(X86::VR128XRegClassID, Reg)) 4829 return true; 4830 if (getLexer().isNot(AsmToken::Comma)) 4831 return TokError("you must specify an offset on the stack"); 4832 4833 getParser().Lex(); 4834 if (getParser().parseAbsoluteExpression(Off)) 4835 return true; 4836 4837 if (getLexer().isNot(AsmToken::EndOfStatement)) 4838 return TokError("expected end of directive"); 4839 4840 getParser().Lex(); 4841 getStreamer().emitWinCFISaveXMM(Reg, Off, Loc); 4842 return false; 4843 } 4844 4845 bool X86AsmParser::parseDirectiveSEHPushFrame(SMLoc Loc) { 4846 bool Code = false; 4847 StringRef CodeID; 4848 if (getLexer().is(AsmToken::At)) { 4849 SMLoc startLoc = getLexer().getLoc(); 4850 getParser().Lex(); 4851 if (!getParser().parseIdentifier(CodeID)) { 4852 if (CodeID != "code") 4853 return Error(startLoc, "expected @code"); 4854 Code = true; 4855 } 4856 } 4857 4858 if (getLexer().isNot(AsmToken::EndOfStatement)) 4859 return TokError("expected end of directive"); 4860 4861 getParser().Lex(); 4862 getStreamer().emitWinCFIPushFrame(Code, Loc); 4863 return false; 4864 } 4865 4866 // Force static initialization. 4867 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86AsmParser() { 4868 RegisterMCAsmParser<X86AsmParser> X(getTheX86_32Target()); 4869 RegisterMCAsmParser<X86AsmParser> Y(getTheX86_64Target()); 4870 } 4871 4872 #define GET_MATCHER_IMPLEMENTATION 4873 #include "X86GenAsmMatcher.inc" 4874