1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9
10 #include "InstPrinter/X86IntelInstPrinter.h"
11 #include "MCTargetDesc/X86BaseInfo.h"
12 #include "MCTargetDesc/X86MCExpr.h"
13 #include "MCTargetDesc/X86TargetStreamer.h"
14 #include "X86AsmInstrumentation.h"
15 #include "X86AsmParserCommon.h"
16 #include "X86Operand.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringSwitch.h"
21 #include "llvm/ADT/Twine.h"
22 #include "llvm/MC/MCContext.h"
23 #include "llvm/MC/MCExpr.h"
24 #include "llvm/MC/MCInst.h"
25 #include "llvm/MC/MCInstrInfo.h"
26 #include "llvm/MC/MCParser/MCAsmLexer.h"
27 #include "llvm/MC/MCParser/MCAsmParser.h"
28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
29 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
30 #include "llvm/MC/MCRegisterInfo.h"
31 #include "llvm/MC/MCSection.h"
32 #include "llvm/MC/MCStreamer.h"
33 #include "llvm/MC/MCSubtargetInfo.h"
34 #include "llvm/MC/MCSymbol.h"
35 #include "llvm/Support/SourceMgr.h"
36 #include "llvm/Support/TargetRegistry.h"
37 #include "llvm/Support/raw_ostream.h"
38 #include <algorithm>
39 #include <memory>
40
41 using namespace llvm;
42
checkScale(unsigned Scale,StringRef & ErrMsg)43 static bool checkScale(unsigned Scale, StringRef &ErrMsg) {
44 if (Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
45 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
46 return true;
47 }
48 return false;
49 }
50
51 namespace {
52
53 static const char OpPrecedence[] = {
54 0, // IC_OR
55 1, // IC_XOR
56 2, // IC_AND
57 3, // IC_LSHIFT
58 3, // IC_RSHIFT
59 4, // IC_PLUS
60 4, // IC_MINUS
61 5, // IC_MULTIPLY
62 5, // IC_DIVIDE
63 5, // IC_MOD
64 6, // IC_NOT
65 7, // IC_NEG
66 8, // IC_RPAREN
67 9, // IC_LPAREN
68 0, // IC_IMM
69 0 // IC_REGISTER
70 };
71
72 class X86AsmParser : public MCTargetAsmParser {
73 ParseInstructionInfo *InstInfo;
74 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
75 bool Code16GCC;
76
77 private:
consumeToken()78 SMLoc consumeToken() {
79 MCAsmParser &Parser = getParser();
80 SMLoc Result = Parser.getTok().getLoc();
81 Parser.Lex();
82 return Result;
83 }
84
getTargetStreamer()85 X86TargetStreamer &getTargetStreamer() {
86 assert(getParser().getStreamer().getTargetStreamer() &&
87 "do not have a target streamer");
88 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
89 return static_cast<X86TargetStreamer &>(TS);
90 }
91
MatchInstruction(const OperandVector & Operands,MCInst & Inst,uint64_t & ErrorInfo,bool matchingInlineAsm,unsigned VariantID=0)92 unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst,
93 uint64_t &ErrorInfo, bool matchingInlineAsm,
94 unsigned VariantID = 0) {
95 // In Code16GCC mode, match as 32-bit.
96 if (Code16GCC)
97 SwitchMode(X86::Mode32Bit);
98 unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo,
99 matchingInlineAsm, VariantID);
100 if (Code16GCC)
101 SwitchMode(X86::Mode16Bit);
102 return rv;
103 }
104
105 enum InfixCalculatorTok {
106 IC_OR = 0,
107 IC_XOR,
108 IC_AND,
109 IC_LSHIFT,
110 IC_RSHIFT,
111 IC_PLUS,
112 IC_MINUS,
113 IC_MULTIPLY,
114 IC_DIVIDE,
115 IC_MOD,
116 IC_NOT,
117 IC_NEG,
118 IC_RPAREN,
119 IC_LPAREN,
120 IC_IMM,
121 IC_REGISTER
122 };
123
124 enum IntelOperatorKind {
125 IOK_INVALID = 0,
126 IOK_LENGTH,
127 IOK_SIZE,
128 IOK_TYPE,
129 IOK_OFFSET
130 };
131
132 class InfixCalculator {
133 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
134 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
135 SmallVector<ICToken, 4> PostfixStack;
136
isUnaryOperator(const InfixCalculatorTok Op)137 bool isUnaryOperator(const InfixCalculatorTok Op) {
138 return Op == IC_NEG || Op == IC_NOT;
139 }
140
141 public:
popOperand()142 int64_t popOperand() {
143 assert (!PostfixStack.empty() && "Poped an empty stack!");
144 ICToken Op = PostfixStack.pop_back_val();
145 if (!(Op.first == IC_IMM || Op.first == IC_REGISTER))
146 return -1; // The invalid Scale value will be caught later by checkScale
147 return Op.second;
148 }
pushOperand(InfixCalculatorTok Op,int64_t Val=0)149 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
150 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
151 "Unexpected operand!");
152 PostfixStack.push_back(std::make_pair(Op, Val));
153 }
154
popOperator()155 void popOperator() { InfixOperatorStack.pop_back(); }
pushOperator(InfixCalculatorTok Op)156 void pushOperator(InfixCalculatorTok Op) {
157 // Push the new operator if the stack is empty.
158 if (InfixOperatorStack.empty()) {
159 InfixOperatorStack.push_back(Op);
160 return;
161 }
162
163 // Push the new operator if it has a higher precedence than the operator
164 // on the top of the stack or the operator on the top of the stack is a
165 // left parentheses.
166 unsigned Idx = InfixOperatorStack.size() - 1;
167 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
168 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
169 InfixOperatorStack.push_back(Op);
170 return;
171 }
172
173 // The operator on the top of the stack has higher precedence than the
174 // new operator.
175 unsigned ParenCount = 0;
176 while (1) {
177 // Nothing to process.
178 if (InfixOperatorStack.empty())
179 break;
180
181 Idx = InfixOperatorStack.size() - 1;
182 StackOp = InfixOperatorStack[Idx];
183 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
184 break;
185
186 // If we have an even parentheses count and we see a left parentheses,
187 // then stop processing.
188 if (!ParenCount && StackOp == IC_LPAREN)
189 break;
190
191 if (StackOp == IC_RPAREN) {
192 ++ParenCount;
193 InfixOperatorStack.pop_back();
194 } else if (StackOp == IC_LPAREN) {
195 --ParenCount;
196 InfixOperatorStack.pop_back();
197 } else {
198 InfixOperatorStack.pop_back();
199 PostfixStack.push_back(std::make_pair(StackOp, 0));
200 }
201 }
202 // Push the new operator.
203 InfixOperatorStack.push_back(Op);
204 }
205
execute()206 int64_t execute() {
207 // Push any remaining operators onto the postfix stack.
208 while (!InfixOperatorStack.empty()) {
209 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
210 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
211 PostfixStack.push_back(std::make_pair(StackOp, 0));
212 }
213
214 if (PostfixStack.empty())
215 return 0;
216
217 SmallVector<ICToken, 16> OperandStack;
218 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
219 ICToken Op = PostfixStack[i];
220 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
221 OperandStack.push_back(Op);
222 } else if (isUnaryOperator(Op.first)) {
223 assert (OperandStack.size() > 0 && "Too few operands.");
224 ICToken Operand = OperandStack.pop_back_val();
225 assert (Operand.first == IC_IMM &&
226 "Unary operation with a register!");
227 switch (Op.first) {
228 default:
229 report_fatal_error("Unexpected operator!");
230 break;
231 case IC_NEG:
232 OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second));
233 break;
234 case IC_NOT:
235 OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second));
236 break;
237 }
238 } else {
239 assert (OperandStack.size() > 1 && "Too few operands.");
240 int64_t Val;
241 ICToken Op2 = OperandStack.pop_back_val();
242 ICToken Op1 = OperandStack.pop_back_val();
243 switch (Op.first) {
244 default:
245 report_fatal_error("Unexpected operator!");
246 break;
247 case IC_PLUS:
248 Val = Op1.second + Op2.second;
249 OperandStack.push_back(std::make_pair(IC_IMM, Val));
250 break;
251 case IC_MINUS:
252 Val = Op1.second - Op2.second;
253 OperandStack.push_back(std::make_pair(IC_IMM, Val));
254 break;
255 case IC_MULTIPLY:
256 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
257 "Multiply operation with an immediate and a register!");
258 Val = Op1.second * Op2.second;
259 OperandStack.push_back(std::make_pair(IC_IMM, Val));
260 break;
261 case IC_DIVIDE:
262 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
263 "Divide operation with an immediate and a register!");
264 assert (Op2.second != 0 && "Division by zero!");
265 Val = Op1.second / Op2.second;
266 OperandStack.push_back(std::make_pair(IC_IMM, Val));
267 break;
268 case IC_MOD:
269 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
270 "Modulo operation with an immediate and a register!");
271 Val = Op1.second % Op2.second;
272 OperandStack.push_back(std::make_pair(IC_IMM, Val));
273 break;
274 case IC_OR:
275 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
276 "Or operation with an immediate and a register!");
277 Val = Op1.second | Op2.second;
278 OperandStack.push_back(std::make_pair(IC_IMM, Val));
279 break;
280 case IC_XOR:
281 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
282 "Xor operation with an immediate and a register!");
283 Val = Op1.second ^ Op2.second;
284 OperandStack.push_back(std::make_pair(IC_IMM, Val));
285 break;
286 case IC_AND:
287 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
288 "And operation with an immediate and a register!");
289 Val = Op1.second & Op2.second;
290 OperandStack.push_back(std::make_pair(IC_IMM, Val));
291 break;
292 case IC_LSHIFT:
293 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
294 "Left shift operation with an immediate and a register!");
295 Val = Op1.second << Op2.second;
296 OperandStack.push_back(std::make_pair(IC_IMM, Val));
297 break;
298 case IC_RSHIFT:
299 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
300 "Right shift operation with an immediate and a register!");
301 Val = Op1.second >> Op2.second;
302 OperandStack.push_back(std::make_pair(IC_IMM, Val));
303 break;
304 }
305 }
306 }
307 assert (OperandStack.size() == 1 && "Expected a single result.");
308 return OperandStack.pop_back_val().second;
309 }
310 };
311
312 enum IntelExprState {
313 IES_INIT,
314 IES_OR,
315 IES_XOR,
316 IES_AND,
317 IES_LSHIFT,
318 IES_RSHIFT,
319 IES_PLUS,
320 IES_MINUS,
321 IES_NOT,
322 IES_MULTIPLY,
323 IES_DIVIDE,
324 IES_MOD,
325 IES_LBRAC,
326 IES_RBRAC,
327 IES_LPAREN,
328 IES_RPAREN,
329 IES_REGISTER,
330 IES_INTEGER,
331 IES_IDENTIFIER,
332 IES_ERROR
333 };
334
335 class IntelExprStateMachine {
336 IntelExprState State, PrevState;
337 unsigned BaseReg, IndexReg, TmpReg, Scale;
338 int64_t Imm;
339 const MCExpr *Sym;
340 StringRef SymName;
341 InfixCalculator IC;
342 InlineAsmIdentifierInfo Info;
343 short BracCount;
344 bool MemExpr;
345
346 public:
IntelExprStateMachine()347 IntelExprStateMachine()
348 : State(IES_INIT), PrevState(IES_ERROR), BaseReg(0), IndexReg(0),
349 TmpReg(0), Scale(0), Imm(0), Sym(nullptr), BracCount(0),
350 MemExpr(false) {}
351
addImm(int64_t imm)352 void addImm(int64_t imm) { Imm += imm; }
getBracCount()353 short getBracCount() { return BracCount; }
isMemExpr()354 bool isMemExpr() { return MemExpr; }
getBaseReg()355 unsigned getBaseReg() { return BaseReg; }
getIndexReg()356 unsigned getIndexReg() { return IndexReg; }
getScale()357 unsigned getScale() { return Scale; }
getSym()358 const MCExpr *getSym() { return Sym; }
getSymName()359 StringRef getSymName() { return SymName; }
getImm()360 int64_t getImm() { return Imm + IC.execute(); }
isValidEndState()361 bool isValidEndState() {
362 return State == IES_RBRAC || State == IES_INTEGER;
363 }
hadError()364 bool hadError() { return State == IES_ERROR; }
getIdentifierInfo()365 InlineAsmIdentifierInfo &getIdentifierInfo() { return Info; }
366
onOr()367 void onOr() {
368 IntelExprState CurrState = State;
369 switch (State) {
370 default:
371 State = IES_ERROR;
372 break;
373 case IES_INTEGER:
374 case IES_RPAREN:
375 case IES_REGISTER:
376 State = IES_OR;
377 IC.pushOperator(IC_OR);
378 break;
379 }
380 PrevState = CurrState;
381 }
onXor()382 void onXor() {
383 IntelExprState CurrState = State;
384 switch (State) {
385 default:
386 State = IES_ERROR;
387 break;
388 case IES_INTEGER:
389 case IES_RPAREN:
390 case IES_REGISTER:
391 State = IES_XOR;
392 IC.pushOperator(IC_XOR);
393 break;
394 }
395 PrevState = CurrState;
396 }
onAnd()397 void onAnd() {
398 IntelExprState CurrState = State;
399 switch (State) {
400 default:
401 State = IES_ERROR;
402 break;
403 case IES_INTEGER:
404 case IES_RPAREN:
405 case IES_REGISTER:
406 State = IES_AND;
407 IC.pushOperator(IC_AND);
408 break;
409 }
410 PrevState = CurrState;
411 }
onLShift()412 void onLShift() {
413 IntelExprState CurrState = State;
414 switch (State) {
415 default:
416 State = IES_ERROR;
417 break;
418 case IES_INTEGER:
419 case IES_RPAREN:
420 case IES_REGISTER:
421 State = IES_LSHIFT;
422 IC.pushOperator(IC_LSHIFT);
423 break;
424 }
425 PrevState = CurrState;
426 }
onRShift()427 void onRShift() {
428 IntelExprState CurrState = State;
429 switch (State) {
430 default:
431 State = IES_ERROR;
432 break;
433 case IES_INTEGER:
434 case IES_RPAREN:
435 case IES_REGISTER:
436 State = IES_RSHIFT;
437 IC.pushOperator(IC_RSHIFT);
438 break;
439 }
440 PrevState = CurrState;
441 }
onPlus(StringRef & ErrMsg)442 bool onPlus(StringRef &ErrMsg) {
443 IntelExprState CurrState = State;
444 switch (State) {
445 default:
446 State = IES_ERROR;
447 break;
448 case IES_INTEGER:
449 case IES_RPAREN:
450 case IES_REGISTER:
451 State = IES_PLUS;
452 IC.pushOperator(IC_PLUS);
453 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
454 // If we already have a BaseReg, then assume this is the IndexReg with
455 // no explicit scale.
456 if (!BaseReg) {
457 BaseReg = TmpReg;
458 } else {
459 if (IndexReg) {
460 ErrMsg = "BaseReg/IndexReg already set!";
461 return true;
462 }
463 IndexReg = TmpReg;
464 Scale = 0;
465 }
466 }
467 break;
468 }
469 PrevState = CurrState;
470 return false;
471 }
onMinus(StringRef & ErrMsg)472 bool onMinus(StringRef &ErrMsg) {
473 IntelExprState CurrState = State;
474 switch (State) {
475 default:
476 State = IES_ERROR;
477 break;
478 case IES_OR:
479 case IES_XOR:
480 case IES_AND:
481 case IES_LSHIFT:
482 case IES_RSHIFT:
483 case IES_PLUS:
484 case IES_NOT:
485 case IES_MULTIPLY:
486 case IES_DIVIDE:
487 case IES_MOD:
488 case IES_LPAREN:
489 case IES_RPAREN:
490 case IES_LBRAC:
491 case IES_RBRAC:
492 case IES_INTEGER:
493 case IES_REGISTER:
494 case IES_INIT:
495 State = IES_MINUS;
496 // push minus operator if it is not a negate operator
497 if (CurrState == IES_REGISTER || CurrState == IES_RPAREN ||
498 CurrState == IES_INTEGER || CurrState == IES_RBRAC)
499 IC.pushOperator(IC_MINUS);
500 else if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
501 // We have negate operator for Scale: it's illegal
502 ErrMsg = "Scale can't be negative";
503 return true;
504 } else
505 IC.pushOperator(IC_NEG);
506 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
507 // If we already have a BaseReg, then assume this is the IndexReg with
508 // no explicit scale.
509 if (!BaseReg) {
510 BaseReg = TmpReg;
511 } else {
512 if (IndexReg) {
513 ErrMsg = "BaseReg/IndexReg already set!";
514 return true;
515 }
516 IndexReg = TmpReg;
517 Scale = 0;
518 }
519 }
520 break;
521 }
522 PrevState = CurrState;
523 return false;
524 }
onNot()525 void onNot() {
526 IntelExprState CurrState = State;
527 switch (State) {
528 default:
529 State = IES_ERROR;
530 break;
531 case IES_OR:
532 case IES_XOR:
533 case IES_AND:
534 case IES_LSHIFT:
535 case IES_RSHIFT:
536 case IES_PLUS:
537 case IES_MINUS:
538 case IES_NOT:
539 case IES_MULTIPLY:
540 case IES_DIVIDE:
541 case IES_MOD:
542 case IES_LPAREN:
543 case IES_LBRAC:
544 case IES_INIT:
545 State = IES_NOT;
546 IC.pushOperator(IC_NOT);
547 break;
548 }
549 PrevState = CurrState;
550 }
551
onRegister(unsigned Reg,StringRef & ErrMsg)552 bool onRegister(unsigned Reg, StringRef &ErrMsg) {
553 IntelExprState CurrState = State;
554 switch (State) {
555 default:
556 State = IES_ERROR;
557 break;
558 case IES_PLUS:
559 case IES_LPAREN:
560 case IES_LBRAC:
561 State = IES_REGISTER;
562 TmpReg = Reg;
563 IC.pushOperand(IC_REGISTER);
564 break;
565 case IES_MULTIPLY:
566 // Index Register - Scale * Register
567 if (PrevState == IES_INTEGER) {
568 if (IndexReg) {
569 ErrMsg = "BaseReg/IndexReg already set!";
570 return true;
571 }
572 State = IES_REGISTER;
573 IndexReg = Reg;
574 // Get the scale and replace the 'Scale * Register' with '0'.
575 Scale = IC.popOperand();
576 if (checkScale(Scale, ErrMsg))
577 return true;
578 IC.pushOperand(IC_IMM);
579 IC.popOperator();
580 } else {
581 State = IES_ERROR;
582 }
583 break;
584 }
585 PrevState = CurrState;
586 return false;
587 }
onIdentifierExpr(const MCExpr * SymRef,StringRef SymRefName,const InlineAsmIdentifierInfo & IDInfo,bool ParsingInlineAsm,StringRef & ErrMsg)588 bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName,
589 const InlineAsmIdentifierInfo &IDInfo,
590 bool ParsingInlineAsm, StringRef &ErrMsg) {
591 // InlineAsm: Treat an enum value as an integer
592 if (ParsingInlineAsm)
593 if (IDInfo.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
594 return onInteger(IDInfo.Enum.EnumVal, ErrMsg);
595 // Treat a symbolic constant like an integer
596 if (auto *CE = dyn_cast<MCConstantExpr>(SymRef))
597 return onInteger(CE->getValue(), ErrMsg);
598 PrevState = State;
599 bool HasSymbol = Sym != nullptr;
600 switch (State) {
601 default:
602 State = IES_ERROR;
603 break;
604 case IES_PLUS:
605 case IES_MINUS:
606 case IES_NOT:
607 case IES_INIT:
608 case IES_LBRAC:
609 MemExpr = true;
610 State = IES_INTEGER;
611 Sym = SymRef;
612 SymName = SymRefName;
613 IC.pushOperand(IC_IMM);
614 if (ParsingInlineAsm)
615 Info = IDInfo;
616 break;
617 }
618 if (HasSymbol)
619 ErrMsg = "cannot use more than one symbol in memory operand";
620 return HasSymbol;
621 }
onInteger(int64_t TmpInt,StringRef & ErrMsg)622 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
623 IntelExprState CurrState = State;
624 switch (State) {
625 default:
626 State = IES_ERROR;
627 break;
628 case IES_PLUS:
629 case IES_MINUS:
630 case IES_NOT:
631 case IES_OR:
632 case IES_XOR:
633 case IES_AND:
634 case IES_LSHIFT:
635 case IES_RSHIFT:
636 case IES_DIVIDE:
637 case IES_MOD:
638 case IES_MULTIPLY:
639 case IES_LPAREN:
640 case IES_INIT:
641 case IES_LBRAC:
642 State = IES_INTEGER;
643 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
644 // Index Register - Register * Scale
645 if (IndexReg) {
646 ErrMsg = "BaseReg/IndexReg already set!";
647 return true;
648 }
649 IndexReg = TmpReg;
650 Scale = TmpInt;
651 if (checkScale(Scale, ErrMsg))
652 return true;
653 // Get the scale and replace the 'Register * Scale' with '0'.
654 IC.popOperator();
655 } else {
656 IC.pushOperand(IC_IMM, TmpInt);
657 }
658 break;
659 }
660 PrevState = CurrState;
661 return false;
662 }
onStar()663 void onStar() {
664 PrevState = State;
665 switch (State) {
666 default:
667 State = IES_ERROR;
668 break;
669 case IES_INTEGER:
670 case IES_REGISTER:
671 case IES_RPAREN:
672 State = IES_MULTIPLY;
673 IC.pushOperator(IC_MULTIPLY);
674 break;
675 }
676 }
onDivide()677 void onDivide() {
678 PrevState = State;
679 switch (State) {
680 default:
681 State = IES_ERROR;
682 break;
683 case IES_INTEGER:
684 case IES_RPAREN:
685 State = IES_DIVIDE;
686 IC.pushOperator(IC_DIVIDE);
687 break;
688 }
689 }
onMod()690 void onMod() {
691 PrevState = State;
692 switch (State) {
693 default:
694 State = IES_ERROR;
695 break;
696 case IES_INTEGER:
697 case IES_RPAREN:
698 State = IES_MOD;
699 IC.pushOperator(IC_MOD);
700 break;
701 }
702 }
onLBrac()703 bool onLBrac() {
704 if (BracCount)
705 return true;
706 PrevState = State;
707 switch (State) {
708 default:
709 State = IES_ERROR;
710 break;
711 case IES_RBRAC:
712 case IES_INTEGER:
713 case IES_RPAREN:
714 State = IES_PLUS;
715 IC.pushOperator(IC_PLUS);
716 break;
717 case IES_INIT:
718 assert(!BracCount && "BracCount should be zero on parsing's start");
719 State = IES_LBRAC;
720 break;
721 }
722 MemExpr = true;
723 BracCount++;
724 return false;
725 }
onRBrac()726 bool onRBrac() {
727 IntelExprState CurrState = State;
728 switch (State) {
729 default:
730 State = IES_ERROR;
731 break;
732 case IES_INTEGER:
733 case IES_REGISTER:
734 case IES_RPAREN:
735 if (BracCount-- != 1)
736 return true;
737 State = IES_RBRAC;
738 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
739 // If we already have a BaseReg, then assume this is the IndexReg with
740 // no explicit scale.
741 if (!BaseReg) {
742 BaseReg = TmpReg;
743 } else {
744 assert (!IndexReg && "BaseReg/IndexReg already set!");
745 IndexReg = TmpReg;
746 Scale = 0;
747 }
748 }
749 break;
750 }
751 PrevState = CurrState;
752 return false;
753 }
onLParen()754 void onLParen() {
755 IntelExprState CurrState = State;
756 switch (State) {
757 default:
758 State = IES_ERROR;
759 break;
760 case IES_PLUS:
761 case IES_MINUS:
762 case IES_NOT:
763 case IES_OR:
764 case IES_XOR:
765 case IES_AND:
766 case IES_LSHIFT:
767 case IES_RSHIFT:
768 case IES_MULTIPLY:
769 case IES_DIVIDE:
770 case IES_MOD:
771 case IES_LPAREN:
772 case IES_INIT:
773 case IES_LBRAC:
774 State = IES_LPAREN;
775 IC.pushOperator(IC_LPAREN);
776 break;
777 }
778 PrevState = CurrState;
779 }
onRParen()780 void onRParen() {
781 PrevState = State;
782 switch (State) {
783 default:
784 State = IES_ERROR;
785 break;
786 case IES_INTEGER:
787 case IES_REGISTER:
788 case IES_RPAREN:
789 State = IES_RPAREN;
790 IC.pushOperator(IC_RPAREN);
791 break;
792 }
793 }
794 };
795
Error(SMLoc L,const Twine & Msg,SMRange Range=None,bool MatchingInlineAsm=false)796 bool Error(SMLoc L, const Twine &Msg, SMRange Range = None,
797 bool MatchingInlineAsm = false) {
798 MCAsmParser &Parser = getParser();
799 if (MatchingInlineAsm) {
800 if (!getLexer().isAtStartOfStatement())
801 Parser.eatToEndOfStatement();
802 return false;
803 }
804 return Parser.Error(L, Msg, Range);
805 }
806
ErrorOperand(SMLoc Loc,StringRef Msg,SMRange R=SMRange ())807 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg, SMRange R = SMRange()) {
808 Error(Loc, Msg, R);
809 return nullptr;
810 }
811
812 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
813 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
814 bool IsSIReg(unsigned Reg);
815 unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg);
816 void
817 AddDefaultSrcDestOperands(OperandVector &Operands,
818 std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
819 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
820 bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
821 OperandVector &FinalOperands);
822 std::unique_ptr<X86Operand> ParseOperand();
823 std::unique_ptr<X86Operand> ParseATTOperand();
824 std::unique_ptr<X86Operand> ParseIntelOperand();
825 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
826 bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End);
827 unsigned IdentifyIntelInlineAsmOperator(StringRef Name);
828 unsigned ParseIntelInlineAsmOperator(unsigned OpKind);
829 std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start);
830 bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM);
831 void RewriteIntelExpression(IntelExprStateMachine &SM, SMLoc Start,
832 SMLoc End);
833 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
834 bool ParseIntelInlineAsmIdentifier(const MCExpr *&Val, StringRef &Identifier,
835 InlineAsmIdentifierInfo &Info,
836 bool IsUnevaluatedOperand, SMLoc &End);
837
838 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg,
839 const MCExpr *&Disp,
840 const SMLoc &StartLoc,
841 SMLoc &EndLoc);
842
843 bool ParseIntelMemoryOperandSize(unsigned &Size);
844 std::unique_ptr<X86Operand>
845 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
846 unsigned IndexReg, unsigned Scale, SMLoc Start,
847 SMLoc End, unsigned Size, StringRef Identifier,
848 const InlineAsmIdentifierInfo &Info);
849
850 bool parseDirectiveEven(SMLoc L);
851 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
852
853 /// CodeView FPO data directives.
854 bool parseDirectiveFPOProc(SMLoc L);
855 bool parseDirectiveFPOSetFrame(SMLoc L);
856 bool parseDirectiveFPOPushReg(SMLoc L);
857 bool parseDirectiveFPOStackAlloc(SMLoc L);
858 bool parseDirectiveFPOStackAlign(SMLoc L);
859 bool parseDirectiveFPOEndPrologue(SMLoc L);
860 bool parseDirectiveFPOEndProc(SMLoc L);
861 bool parseDirectiveFPOData(SMLoc L);
862
863 bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
864 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
865
866 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
867 /// instrumentation around Inst.
868 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
869
870 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
871 OperandVector &Operands, MCStreamer &Out,
872 uint64_t &ErrorInfo,
873 bool MatchingInlineAsm) override;
874
875 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
876 MCStreamer &Out, bool MatchingInlineAsm);
877
878 bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
879 bool MatchingInlineAsm);
880
881 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
882 OperandVector &Operands, MCStreamer &Out,
883 uint64_t &ErrorInfo,
884 bool MatchingInlineAsm);
885
886 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
887 OperandVector &Operands, MCStreamer &Out,
888 uint64_t &ErrorInfo,
889 bool MatchingInlineAsm);
890
891 bool OmitRegisterFromClobberLists(unsigned RegNo) override;
892
893 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
894 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
895 /// return false if no parsing errors occurred, true otherwise.
896 bool HandleAVX512Operand(OperandVector &Operands,
897 const MCParsedAsmOperand &Op);
898
899 bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc);
900
is64BitMode() const901 bool is64BitMode() const {
902 // FIXME: Can tablegen auto-generate this?
903 return getSTI().getFeatureBits()[X86::Mode64Bit];
904 }
is32BitMode() const905 bool is32BitMode() const {
906 // FIXME: Can tablegen auto-generate this?
907 return getSTI().getFeatureBits()[X86::Mode32Bit];
908 }
is16BitMode() const909 bool is16BitMode() const {
910 // FIXME: Can tablegen auto-generate this?
911 return getSTI().getFeatureBits()[X86::Mode16Bit];
912 }
SwitchMode(unsigned mode)913 void SwitchMode(unsigned mode) {
914 MCSubtargetInfo &STI = copySTI();
915 FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit});
916 FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
917 uint64_t FB = ComputeAvailableFeatures(
918 STI.ToggleFeature(OldMode.flip(mode)));
919 setAvailableFeatures(FB);
920
921 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
922 }
923
getPointerWidth()924 unsigned getPointerWidth() {
925 if (is16BitMode()) return 16;
926 if (is32BitMode()) return 32;
927 if (is64BitMode()) return 64;
928 llvm_unreachable("invalid mode");
929 }
930
isParsingIntelSyntax()931 bool isParsingIntelSyntax() {
932 return getParser().getAssemblerDialect();
933 }
934
935 /// @name Auto-generated Matcher Functions
936 /// {
937
938 #define GET_ASSEMBLER_HEADER
939 #include "X86GenAsmMatcher.inc"
940
941 /// }
942
943 public:
944
X86AsmParser(const MCSubtargetInfo & sti,MCAsmParser & Parser,const MCInstrInfo & mii,const MCTargetOptions & Options)945 X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
946 const MCInstrInfo &mii, const MCTargetOptions &Options)
947 : MCTargetAsmParser(Options, sti, mii), InstInfo(nullptr),
948 Code16GCC(false) {
949
950 Parser.addAliasForDirective(".word", ".2byte");
951
952 // Initialize the set of available features.
953 setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
954 Instrumentation.reset(
955 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
956 }
957
958 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
959
960 void SetFrameRegister(unsigned RegNo) override;
961
962 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
963
964 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
965 SMLoc NameLoc, OperandVector &Operands) override;
966
967 bool ParseDirective(AsmToken DirectiveID) override;
968 };
969 } // end anonymous namespace
970
971 /// @name Auto-generated Match Functions
972 /// {
973
974 static unsigned MatchRegisterName(StringRef Name);
975
976 /// }
977
CheckBaseRegAndIndexRegAndScale(unsigned BaseReg,unsigned IndexReg,unsigned Scale,bool Is64BitMode,StringRef & ErrMsg)978 static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg,
979 unsigned Scale, bool Is64BitMode,
980 StringRef &ErrMsg) {
981 // If we have both a base register and an index register make sure they are
982 // both 64-bit or 32-bit registers.
983 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
984
985 if (BaseReg != 0 &&
986 !(BaseReg == X86::RIP || BaseReg == X86::EIP ||
987 X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) ||
988 X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) ||
989 X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg))) {
990 ErrMsg = "invalid base+index expression";
991 return true;
992 }
993
994 if (IndexReg != 0 &&
995 !(IndexReg == X86::EIZ || IndexReg == X86::RIZ ||
996 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
997 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
998 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
999 X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
1000 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
1001 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg))) {
1002 ErrMsg = "invalid base+index expression";
1003 return true;
1004 }
1005
1006 if (((BaseReg == X86::RIP || BaseReg == X86::EIP) && IndexReg != 0) ||
1007 IndexReg == X86::EIP || IndexReg == X86::RIP ||
1008 IndexReg == X86::ESP || IndexReg == X86::RSP) {
1009 ErrMsg = "invalid base+index expression";
1010 return true;
1011 }
1012
1013 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1014 // and then only in non-64-bit modes.
1015 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1016 (Is64BitMode || (BaseReg != X86::BX && BaseReg != X86::BP &&
1017 BaseReg != X86::SI && BaseReg != X86::DI))) {
1018 ErrMsg = "invalid 16-bit base register";
1019 return true;
1020 }
1021
1022 if (BaseReg == 0 &&
1023 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1024 ErrMsg = "16-bit memory operand may not include only index register";
1025 return true;
1026 }
1027
1028 if (BaseReg != 0 && IndexReg != 0) {
1029 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
1030 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1031 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1032 IndexReg == X86::EIZ)) {
1033 ErrMsg = "base register is 64-bit, but index register is not";
1034 return true;
1035 }
1036 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
1037 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1038 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
1039 IndexReg == X86::RIZ)) {
1040 ErrMsg = "base register is 32-bit, but index register is not";
1041 return true;
1042 }
1043 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
1044 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1045 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
1046 ErrMsg = "base register is 16-bit, but index register is not";
1047 return true;
1048 }
1049 if ((BaseReg != X86::BX && BaseReg != X86::BP) ||
1050 (IndexReg != X86::SI && IndexReg != X86::DI)) {
1051 ErrMsg = "invalid 16-bit base/index register combination";
1052 return true;
1053 }
1054 }
1055 }
1056
1057 // RIP/EIP-relative addressing is only supported in 64-bit mode.
1058 if (!Is64BitMode && BaseReg != 0 &&
1059 (BaseReg == X86::RIP || BaseReg == X86::EIP)) {
1060 ErrMsg = "IP-relative addressing requires 64-bit mode";
1061 return true;
1062 }
1063
1064 return checkScale(Scale, ErrMsg);
1065 }
1066
ParseRegister(unsigned & RegNo,SMLoc & StartLoc,SMLoc & EndLoc)1067 bool X86AsmParser::ParseRegister(unsigned &RegNo,
1068 SMLoc &StartLoc, SMLoc &EndLoc) {
1069 MCAsmParser &Parser = getParser();
1070 RegNo = 0;
1071 const AsmToken &PercentTok = Parser.getTok();
1072 StartLoc = PercentTok.getLoc();
1073
1074 // If we encounter a %, ignore it. This code handles registers with and
1075 // without the prefix, unprefixed registers can occur in cfi directives.
1076 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
1077 Parser.Lex(); // Eat percent token.
1078
1079 const AsmToken &Tok = Parser.getTok();
1080 EndLoc = Tok.getEndLoc();
1081
1082 if (Tok.isNot(AsmToken::Identifier)) {
1083 if (isParsingIntelSyntax()) return true;
1084 return Error(StartLoc, "invalid register name",
1085 SMRange(StartLoc, EndLoc));
1086 }
1087
1088 RegNo = MatchRegisterName(Tok.getString());
1089
1090 // If the match failed, try the register name as lowercase.
1091 if (RegNo == 0)
1092 RegNo = MatchRegisterName(Tok.getString().lower());
1093
1094 // The "flags" register cannot be referenced directly.
1095 // Treat it as an identifier instead.
1096 if (isParsingInlineAsm() && isParsingIntelSyntax() && RegNo == X86::EFLAGS)
1097 RegNo = 0;
1098
1099 if (!is64BitMode()) {
1100 // FIXME: This should be done using Requires<Not64BitMode> and
1101 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
1102 // checked.
1103 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
1104 // REX prefix.
1105 if (RegNo == X86::RIZ || RegNo == X86::RIP ||
1106 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
1107 X86II::isX86_64NonExtLowByteReg(RegNo) ||
1108 X86II::isX86_64ExtendedReg(RegNo)) {
1109 StringRef RegName = Tok.getString();
1110 Parser.Lex(); // Eat register name.
1111 return Error(StartLoc,
1112 "register %" + RegName + " is only available in 64-bit mode",
1113 SMRange(StartLoc, EndLoc));
1114 }
1115 }
1116
1117 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
1118 if (RegNo == X86::ST0) {
1119 Parser.Lex(); // Eat 'st'
1120
1121 // Check to see if we have '(4)' after %st.
1122 if (getLexer().isNot(AsmToken::LParen))
1123 return false;
1124 // Lex the paren.
1125 getParser().Lex();
1126
1127 const AsmToken &IntTok = Parser.getTok();
1128 if (IntTok.isNot(AsmToken::Integer))
1129 return Error(IntTok.getLoc(), "expected stack index");
1130 switch (IntTok.getIntVal()) {
1131 case 0: RegNo = X86::ST0; break;
1132 case 1: RegNo = X86::ST1; break;
1133 case 2: RegNo = X86::ST2; break;
1134 case 3: RegNo = X86::ST3; break;
1135 case 4: RegNo = X86::ST4; break;
1136 case 5: RegNo = X86::ST5; break;
1137 case 6: RegNo = X86::ST6; break;
1138 case 7: RegNo = X86::ST7; break;
1139 default: return Error(IntTok.getLoc(), "invalid stack index");
1140 }
1141
1142 if (getParser().Lex().isNot(AsmToken::RParen))
1143 return Error(Parser.getTok().getLoc(), "expected ')'");
1144
1145 EndLoc = Parser.getTok().getEndLoc();
1146 Parser.Lex(); // Eat ')'
1147 return false;
1148 }
1149
1150 EndLoc = Parser.getTok().getEndLoc();
1151
1152 // If this is "db[0-15]", match it as an alias
1153 // for dr[0-15].
1154 if (RegNo == 0 && Tok.getString().startswith("db")) {
1155 if (Tok.getString().size() == 3) {
1156 switch (Tok.getString()[2]) {
1157 case '0': RegNo = X86::DR0; break;
1158 case '1': RegNo = X86::DR1; break;
1159 case '2': RegNo = X86::DR2; break;
1160 case '3': RegNo = X86::DR3; break;
1161 case '4': RegNo = X86::DR4; break;
1162 case '5': RegNo = X86::DR5; break;
1163 case '6': RegNo = X86::DR6; break;
1164 case '7': RegNo = X86::DR7; break;
1165 case '8': RegNo = X86::DR8; break;
1166 case '9': RegNo = X86::DR9; break;
1167 }
1168 } else if (Tok.getString().size() == 4 && Tok.getString()[2] == '1') {
1169 switch (Tok.getString()[3]) {
1170 case '0': RegNo = X86::DR10; break;
1171 case '1': RegNo = X86::DR11; break;
1172 case '2': RegNo = X86::DR12; break;
1173 case '3': RegNo = X86::DR13; break;
1174 case '4': RegNo = X86::DR14; break;
1175 case '5': RegNo = X86::DR15; break;
1176 }
1177 }
1178
1179 if (RegNo != 0) {
1180 EndLoc = Parser.getTok().getEndLoc();
1181 Parser.Lex(); // Eat it.
1182 return false;
1183 }
1184 }
1185
1186 if (RegNo == 0) {
1187 if (isParsingIntelSyntax()) return true;
1188 return Error(StartLoc, "invalid register name",
1189 SMRange(StartLoc, EndLoc));
1190 }
1191
1192 Parser.Lex(); // Eat identifier token.
1193 return false;
1194 }
1195
SetFrameRegister(unsigned RegNo)1196 void X86AsmParser::SetFrameRegister(unsigned RegNo) {
1197 Instrumentation->SetInitialFrameRegister(RegNo);
1198 }
1199
DefaultMemSIOperand(SMLoc Loc)1200 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1201 bool Parse32 = is32BitMode() || Code16GCC;
1202 unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI);
1203 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1204 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1205 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1206 Loc, Loc, 0);
1207 }
1208
DefaultMemDIOperand(SMLoc Loc)1209 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1210 bool Parse32 = is32BitMode() || Code16GCC;
1211 unsigned Basereg = is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI);
1212 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1213 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1214 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1215 Loc, Loc, 0);
1216 }
1217
IsSIReg(unsigned Reg)1218 bool X86AsmParser::IsSIReg(unsigned Reg) {
1219 switch (Reg) {
1220 default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!");
1221 case X86::RSI:
1222 case X86::ESI:
1223 case X86::SI:
1224 return true;
1225 case X86::RDI:
1226 case X86::EDI:
1227 case X86::DI:
1228 return false;
1229 }
1230 }
1231
GetSIDIForRegClass(unsigned RegClassID,unsigned Reg,bool IsSIReg)1232 unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg,
1233 bool IsSIReg) {
1234 switch (RegClassID) {
1235 default: llvm_unreachable("Unexpected register class");
1236 case X86::GR64RegClassID:
1237 return IsSIReg ? X86::RSI : X86::RDI;
1238 case X86::GR32RegClassID:
1239 return IsSIReg ? X86::ESI : X86::EDI;
1240 case X86::GR16RegClassID:
1241 return IsSIReg ? X86::SI : X86::DI;
1242 }
1243 }
1244
AddDefaultSrcDestOperands(OperandVector & Operands,std::unique_ptr<llvm::MCParsedAsmOperand> && Src,std::unique_ptr<llvm::MCParsedAsmOperand> && Dst)1245 void X86AsmParser::AddDefaultSrcDestOperands(
1246 OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1247 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) {
1248 if (isParsingIntelSyntax()) {
1249 Operands.push_back(std::move(Dst));
1250 Operands.push_back(std::move(Src));
1251 }
1252 else {
1253 Operands.push_back(std::move(Src));
1254 Operands.push_back(std::move(Dst));
1255 }
1256 }
1257
VerifyAndAdjustOperands(OperandVector & OrigOperands,OperandVector & FinalOperands)1258 bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
1259 OperandVector &FinalOperands) {
1260
1261 if (OrigOperands.size() > 1) {
1262 // Check if sizes match, OrigOperands also contains the instruction name
1263 assert(OrigOperands.size() == FinalOperands.size() + 1 &&
1264 "Operand size mismatch");
1265
1266 SmallVector<std::pair<SMLoc, std::string>, 2> Warnings;
1267 // Verify types match
1268 int RegClassID = -1;
1269 for (unsigned int i = 0; i < FinalOperands.size(); ++i) {
1270 X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]);
1271 X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]);
1272
1273 if (FinalOp.isReg() &&
1274 (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg()))
1275 // Return false and let a normal complaint about bogus operands happen
1276 return false;
1277
1278 if (FinalOp.isMem()) {
1279
1280 if (!OrigOp.isMem())
1281 // Return false and let a normal complaint about bogus operands happen
1282 return false;
1283
1284 unsigned OrigReg = OrigOp.Mem.BaseReg;
1285 unsigned FinalReg = FinalOp.Mem.BaseReg;
1286
1287 // If we've already encounterd a register class, make sure all register
1288 // bases are of the same register class
1289 if (RegClassID != -1 &&
1290 !X86MCRegisterClasses[RegClassID].contains(OrigReg)) {
1291 return Error(OrigOp.getStartLoc(),
1292 "mismatching source and destination index registers");
1293 }
1294
1295 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg))
1296 RegClassID = X86::GR64RegClassID;
1297 else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg))
1298 RegClassID = X86::GR32RegClassID;
1299 else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg))
1300 RegClassID = X86::GR16RegClassID;
1301 else
1302 // Unexpected register class type
1303 // Return false and let a normal complaint about bogus operands happen
1304 return false;
1305
1306 bool IsSI = IsSIReg(FinalReg);
1307 FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI);
1308
1309 if (FinalReg != OrigReg) {
1310 std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI";
1311 Warnings.push_back(std::make_pair(
1312 OrigOp.getStartLoc(),
1313 "memory operand is only for determining the size, " + RegName +
1314 " will be used for the location"));
1315 }
1316
1317 FinalOp.Mem.Size = OrigOp.Mem.Size;
1318 FinalOp.Mem.SegReg = OrigOp.Mem.SegReg;
1319 FinalOp.Mem.BaseReg = FinalReg;
1320 }
1321 }
1322
1323 // Produce warnings only if all the operands passed the adjustment - prevent
1324 // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings
1325 for (auto &WarningMsg : Warnings) {
1326 Warning(WarningMsg.first, WarningMsg.second);
1327 }
1328
1329 // Remove old operands
1330 for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1331 OrigOperands.pop_back();
1332 }
1333 // OrigOperands.append(FinalOperands.begin(), FinalOperands.end());
1334 for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1335 OrigOperands.push_back(std::move(FinalOperands[i]));
1336
1337 return false;
1338 }
1339
ParseOperand()1340 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
1341 if (isParsingIntelSyntax())
1342 return ParseIntelOperand();
1343 return ParseATTOperand();
1344 }
1345
CreateMemForInlineAsm(unsigned SegReg,const MCExpr * Disp,unsigned BaseReg,unsigned IndexReg,unsigned Scale,SMLoc Start,SMLoc End,unsigned Size,StringRef Identifier,const InlineAsmIdentifierInfo & Info)1346 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
1347 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1348 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1349 const InlineAsmIdentifierInfo &Info) {
1350 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1351 // some other label reference.
1352 if (Info.isKind(InlineAsmIdentifierInfo::IK_Label)) {
1353 // Insert an explicit size if the user didn't have one.
1354 if (!Size) {
1355 Size = getPointerWidth();
1356 InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
1357 /*Len=*/0, Size);
1358 }
1359 // Create an absolute memory reference in order to match against
1360 // instructions taking a PC relative operand.
1361 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size,
1362 Identifier, Info.Label.Decl);
1363 }
1364 // We either have a direct symbol reference, or an offset from a symbol. The
1365 // parser always puts the symbol on the LHS, so look there for size
1366 // calculation purposes.
1367 unsigned FrontendSize = 0;
1368 void *Decl = nullptr;
1369 bool IsGlobalLV = false;
1370 if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
1371 // Size is in terms of bits in this context.
1372 FrontendSize = Info.Var.Type * 8;
1373 Decl = Info.Var.Decl;
1374 IsGlobalLV = Info.Var.IsGlobalLV;
1375 }
1376 // It is widely common for MS InlineAsm to use a global variable and one/two
1377 // registers in a mmory expression, and though unaccessible via rip/eip.
1378 if (IsGlobalLV && (BaseReg || IndexReg)) {
1379 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End);
1380 // Otherwise, we set the base register to a non-zero value
1381 // if we don't know the actual value at this time. This is necessary to
1382 // get the matching correct in some cases.
1383 } else {
1384 BaseReg = BaseReg ? BaseReg : 1;
1385 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1386 IndexReg, Scale, Start, End, Size, Identifier,
1387 Decl, FrontendSize);
1388 }
1389 }
1390
1391 // Some binary bitwise operators have a named synonymous
1392 // Query a candidate string for being such a named operator
1393 // and if so - invoke the appropriate handler
ParseIntelNamedOperator(StringRef Name,IntelExprStateMachine & SM)1394 bool X86AsmParser::ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM) {
1395 // A named operator should be either lower or upper case, but not a mix
1396 if (Name.compare(Name.lower()) && Name.compare(Name.upper()))
1397 return false;
1398 if (Name.equals_lower("not"))
1399 SM.onNot();
1400 else if (Name.equals_lower("or"))
1401 SM.onOr();
1402 else if (Name.equals_lower("shl"))
1403 SM.onLShift();
1404 else if (Name.equals_lower("shr"))
1405 SM.onRShift();
1406 else if (Name.equals_lower("xor"))
1407 SM.onXor();
1408 else if (Name.equals_lower("and"))
1409 SM.onAnd();
1410 else if (Name.equals_lower("mod"))
1411 SM.onMod();
1412 else
1413 return false;
1414 return true;
1415 }
1416
ParseIntelExpression(IntelExprStateMachine & SM,SMLoc & End)1417 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1418 MCAsmParser &Parser = getParser();
1419 const AsmToken &Tok = Parser.getTok();
1420 StringRef ErrMsg;
1421
1422 AsmToken::TokenKind PrevTK = AsmToken::Error;
1423 bool Done = false;
1424 while (!Done) {
1425 bool UpdateLocLex = true;
1426 AsmToken::TokenKind TK = getLexer().getKind();
1427
1428 switch (TK) {
1429 default:
1430 if ((Done = SM.isValidEndState()))
1431 break;
1432 return Error(Tok.getLoc(), "unknown token in expression");
1433 case AsmToken::EndOfStatement:
1434 Done = true;
1435 break;
1436 case AsmToken::Real:
1437 // DotOperator: [ebx].0
1438 UpdateLocLex = false;
1439 if (ParseIntelDotOperator(SM, End))
1440 return true;
1441 break;
1442 case AsmToken::At:
1443 case AsmToken::String:
1444 case AsmToken::Identifier: {
1445 SMLoc IdentLoc = Tok.getLoc();
1446 StringRef Identifier = Tok.getString();
1447 UpdateLocLex = false;
1448 // Register
1449 unsigned Reg;
1450 if (Tok.is(AsmToken::Identifier) && !ParseRegister(Reg, IdentLoc, End)) {
1451 if (SM.onRegister(Reg, ErrMsg))
1452 return Error(Tok.getLoc(), ErrMsg);
1453 break;
1454 }
1455 // Operator synonymous ("not", "or" etc.)
1456 if ((UpdateLocLex = ParseIntelNamedOperator(Identifier, SM)))
1457 break;
1458 // Symbol reference, when parsing assembly content
1459 InlineAsmIdentifierInfo Info;
1460 const MCExpr *Val;
1461 if (!isParsingInlineAsm()) {
1462 if (getParser().parsePrimaryExpr(Val, End)) {
1463 return Error(Tok.getLoc(), "Unexpected identifier!");
1464 } else if (SM.onIdentifierExpr(Val, Identifier, Info, false, ErrMsg)) {
1465 return Error(IdentLoc, ErrMsg);
1466 } else
1467 break;
1468 }
1469 // MS InlineAsm operators (TYPE/LENGTH/SIZE)
1470 if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) {
1471 if (OpKind == IOK_OFFSET)
1472 return Error(IdentLoc, "Dealing OFFSET operator as part of"
1473 "a compound immediate expression is yet to be supported");
1474 if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) {
1475 if (SM.onInteger(Val, ErrMsg))
1476 return Error(IdentLoc, ErrMsg);
1477 } else
1478 return true;
1479 break;
1480 }
1481 // MS Dot Operator expression
1482 if (Identifier.count('.') && PrevTK == AsmToken::RBrac) {
1483 if (ParseIntelDotOperator(SM, End))
1484 return true;
1485 break;
1486 }
1487 // MS InlineAsm identifier
1488 // Call parseIdentifier() to combine @ with the identifier behind it.
1489 if (TK == AsmToken::At && Parser.parseIdentifier(Identifier))
1490 return Error(IdentLoc, "expected identifier");
1491 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End))
1492 return true;
1493 else if (SM.onIdentifierExpr(Val, Identifier, Info, true, ErrMsg))
1494 return Error(IdentLoc, ErrMsg);
1495 break;
1496 }
1497 case AsmToken::Integer: {
1498 // Look for 'b' or 'f' following an Integer as a directional label
1499 SMLoc Loc = getTok().getLoc();
1500 int64_t IntVal = getTok().getIntVal();
1501 End = consumeToken();
1502 UpdateLocLex = false;
1503 if (getLexer().getKind() == AsmToken::Identifier) {
1504 StringRef IDVal = getTok().getString();
1505 if (IDVal == "f" || IDVal == "b") {
1506 MCSymbol *Sym =
1507 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
1508 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1509 const MCExpr *Val =
1510 MCSymbolRefExpr::create(Sym, Variant, getContext());
1511 if (IDVal == "b" && Sym->isUndefined())
1512 return Error(Loc, "invalid reference to undefined symbol");
1513 StringRef Identifier = Sym->getName();
1514 InlineAsmIdentifierInfo Info;
1515 if (SM.onIdentifierExpr(Val, Identifier, Info,
1516 isParsingInlineAsm(), ErrMsg))
1517 return Error(Loc, ErrMsg);
1518 End = consumeToken();
1519 } else {
1520 if (SM.onInteger(IntVal, ErrMsg))
1521 return Error(Loc, ErrMsg);
1522 }
1523 } else {
1524 if (SM.onInteger(IntVal, ErrMsg))
1525 return Error(Loc, ErrMsg);
1526 }
1527 break;
1528 }
1529 case AsmToken::Plus:
1530 if (SM.onPlus(ErrMsg))
1531 return Error(getTok().getLoc(), ErrMsg);
1532 break;
1533 case AsmToken::Minus:
1534 if (SM.onMinus(ErrMsg))
1535 return Error(getTok().getLoc(), ErrMsg);
1536 break;
1537 case AsmToken::Tilde: SM.onNot(); break;
1538 case AsmToken::Star: SM.onStar(); break;
1539 case AsmToken::Slash: SM.onDivide(); break;
1540 case AsmToken::Percent: SM.onMod(); break;
1541 case AsmToken::Pipe: SM.onOr(); break;
1542 case AsmToken::Caret: SM.onXor(); break;
1543 case AsmToken::Amp: SM.onAnd(); break;
1544 case AsmToken::LessLess:
1545 SM.onLShift(); break;
1546 case AsmToken::GreaterGreater:
1547 SM.onRShift(); break;
1548 case AsmToken::LBrac:
1549 if (SM.onLBrac())
1550 return Error(Tok.getLoc(), "unexpected bracket encountered");
1551 break;
1552 case AsmToken::RBrac:
1553 if (SM.onRBrac())
1554 return Error(Tok.getLoc(), "unexpected bracket encountered");
1555 break;
1556 case AsmToken::LParen: SM.onLParen(); break;
1557 case AsmToken::RParen: SM.onRParen(); break;
1558 }
1559 if (SM.hadError())
1560 return Error(Tok.getLoc(), "unknown token in expression");
1561
1562 if (!Done && UpdateLocLex)
1563 End = consumeToken();
1564
1565 PrevTK = TK;
1566 }
1567 return false;
1568 }
1569
RewriteIntelExpression(IntelExprStateMachine & SM,SMLoc Start,SMLoc End)1570 void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine &SM,
1571 SMLoc Start, SMLoc End) {
1572 SMLoc Loc = Start;
1573 unsigned ExprLen = End.getPointer() - Start.getPointer();
1574 // Skip everything before a symbol displacement (if we have one)
1575 if (SM.getSym()) {
1576 StringRef SymName = SM.getSymName();
1577 if (unsigned Len = SymName.data() - Start.getPointer())
1578 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Start, Len);
1579 Loc = SMLoc::getFromPointer(SymName.data() + SymName.size());
1580 ExprLen = End.getPointer() - (SymName.data() + SymName.size());
1581 // If we have only a symbol than there's no need for complex rewrite,
1582 // simply skip everything after it
1583 if (!(SM.getBaseReg() || SM.getIndexReg() || SM.getImm())) {
1584 if (ExprLen)
1585 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Loc, ExprLen);
1586 return;
1587 }
1588 }
1589 // Build an Intel Expression rewrite
1590 StringRef BaseRegStr;
1591 StringRef IndexRegStr;
1592 if (SM.getBaseReg())
1593 BaseRegStr = X86IntelInstPrinter::getRegisterName(SM.getBaseReg());
1594 if (SM.getIndexReg())
1595 IndexRegStr = X86IntelInstPrinter::getRegisterName(SM.getIndexReg());
1596 // Emit it
1597 IntelExpr Expr(BaseRegStr, IndexRegStr, SM.getScale(), SM.getImm(), SM.isMemExpr());
1598 InstInfo->AsmRewrites->emplace_back(Loc, ExprLen, Expr);
1599 }
1600
1601 // Inline assembly may use variable names with namespace alias qualifiers.
ParseIntelInlineAsmIdentifier(const MCExpr * & Val,StringRef & Identifier,InlineAsmIdentifierInfo & Info,bool IsUnevaluatedOperand,SMLoc & End)1602 bool X86AsmParser::ParseIntelInlineAsmIdentifier(const MCExpr *&Val,
1603 StringRef &Identifier,
1604 InlineAsmIdentifierInfo &Info,
1605 bool IsUnevaluatedOperand,
1606 SMLoc &End) {
1607 MCAsmParser &Parser = getParser();
1608 assert(isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1609 Val = nullptr;
1610
1611 StringRef LineBuf(Identifier.data());
1612 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1613
1614 const AsmToken &Tok = Parser.getTok();
1615 SMLoc Loc = Tok.getLoc();
1616
1617 // Advance the token stream until the end of the current token is
1618 // after the end of what the frontend claimed.
1619 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1620 do {
1621 End = Tok.getEndLoc();
1622 getLexer().Lex();
1623 } while (End.getPointer() < EndPtr);
1624 Identifier = LineBuf;
1625
1626 // The frontend should end parsing on an assembler token boundary, unless it
1627 // failed parsing.
1628 assert((End.getPointer() == EndPtr ||
1629 Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) &&
1630 "frontend claimed part of a token?");
1631
1632 // If the identifier lookup was unsuccessful, assume that we are dealing with
1633 // a label.
1634 if (Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) {
1635 StringRef InternalName =
1636 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
1637 Loc, false);
1638 assert(InternalName.size() && "We should have an internal name here.");
1639 // Push a rewrite for replacing the identifier name with the internal name.
1640 InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
1641 InternalName);
1642 } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
1643 return false;
1644 // Create the symbol reference.
1645 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
1646 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1647 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
1648 return false;
1649 }
1650
1651 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
1652 std::unique_ptr<X86Operand>
ParseRoundingModeOp(SMLoc Start)1653 X86AsmParser::ParseRoundingModeOp(SMLoc Start) {
1654 MCAsmParser &Parser = getParser();
1655 const AsmToken &Tok = Parser.getTok();
1656 // Eat "{" and mark the current place.
1657 const SMLoc consumedToken = consumeToken();
1658 if (Tok.getIdentifier().startswith("r")){
1659 int rndMode = StringSwitch<int>(Tok.getIdentifier())
1660 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
1661 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
1662 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
1663 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
1664 .Default(-1);
1665 if (-1 == rndMode)
1666 return ErrorOperand(Tok.getLoc(), "Invalid rounding mode.");
1667 Parser.Lex(); // Eat "r*" of r*-sae
1668 if (!getLexer().is(AsmToken::Minus))
1669 return ErrorOperand(Tok.getLoc(), "Expected - at this point");
1670 Parser.Lex(); // Eat "-"
1671 Parser.Lex(); // Eat the sae
1672 if (!getLexer().is(AsmToken::RCurly))
1673 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1674 SMLoc End = Tok.getEndLoc();
1675 Parser.Lex(); // Eat "}"
1676 const MCExpr *RndModeOp =
1677 MCConstantExpr::create(rndMode, Parser.getContext());
1678 return X86Operand::CreateImm(RndModeOp, Start, End);
1679 }
1680 if(Tok.getIdentifier().equals("sae")){
1681 Parser.Lex(); // Eat the sae
1682 if (!getLexer().is(AsmToken::RCurly))
1683 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1684 Parser.Lex(); // Eat "}"
1685 return X86Operand::CreateToken("{sae}", consumedToken);
1686 }
1687 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1688 }
1689
1690 /// Parse the '.' operator.
ParseIntelDotOperator(IntelExprStateMachine & SM,SMLoc & End)1691 bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End) {
1692 const AsmToken &Tok = getTok();
1693 unsigned Offset;
1694
1695 // Drop the optional '.'.
1696 StringRef DotDispStr = Tok.getString();
1697 if (DotDispStr.startswith("."))
1698 DotDispStr = DotDispStr.drop_front(1);
1699
1700 // .Imm gets lexed as a real.
1701 if (Tok.is(AsmToken::Real)) {
1702 APInt DotDisp;
1703 DotDispStr.getAsInteger(10, DotDisp);
1704 Offset = DotDisp.getZExtValue();
1705 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1706 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1707 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1708 Offset))
1709 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1710 } else
1711 return Error(Tok.getLoc(), "Unexpected token type!");
1712
1713 // Eat the DotExpression and update End
1714 End = SMLoc::getFromPointer(DotDispStr.data());
1715 const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size();
1716 while (Tok.getLoc().getPointer() < DotExprEndLoc)
1717 Lex();
1718 SM.addImm(Offset);
1719 return false;
1720 }
1721
1722 /// Parse the 'offset' operator. This operator is used to specify the
1723 /// location rather then the content of a variable.
ParseIntelOffsetOfOperator()1724 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1725 MCAsmParser &Parser = getParser();
1726 const AsmToken &Tok = Parser.getTok();
1727 SMLoc OffsetOfLoc = Tok.getLoc();
1728 Parser.Lex(); // Eat offset.
1729
1730 const MCExpr *Val;
1731 InlineAsmIdentifierInfo Info;
1732 SMLoc Start = Tok.getLoc(), End;
1733 StringRef Identifier = Tok.getString();
1734 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info,
1735 /*Unevaluated=*/false, End))
1736 return nullptr;
1737
1738 void *Decl = nullptr;
1739 // FIXME: MS evaluates "offset <Constant>" to the underlying integral
1740 if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
1741 return ErrorOperand(Start, "offset operator cannot yet handle constants");
1742 else if (Info.isKind(InlineAsmIdentifierInfo::IK_Var))
1743 Decl = Info.Var.Decl;
1744 // Don't emit the offset operator.
1745 InstInfo->AsmRewrites->emplace_back(AOK_Skip, OffsetOfLoc, 7);
1746
1747 // The offset operator will have an 'r' constraint, thus we need to create
1748 // register operand to ensure proper matching. Just pick a GPR based on
1749 // the size of a pointer.
1750 bool Parse32 = is32BitMode() || Code16GCC;
1751 unsigned RegNo = is64BitMode() ? X86::RBX : (Parse32 ? X86::EBX : X86::BX);
1752
1753 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1754 OffsetOfLoc, Identifier, Decl);
1755 }
1756
1757 // Query a candidate string for being an Intel assembly operator
1758 // Report back its kind, or IOK_INVALID if does not evaluated as a known one
IdentifyIntelInlineAsmOperator(StringRef Name)1759 unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) {
1760 return StringSwitch<unsigned>(Name)
1761 .Cases("TYPE","type",IOK_TYPE)
1762 .Cases("SIZE","size",IOK_SIZE)
1763 .Cases("LENGTH","length",IOK_LENGTH)
1764 .Cases("OFFSET","offset",IOK_OFFSET)
1765 .Default(IOK_INVALID);
1766 }
1767
1768 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1769 /// returns the number of elements in an array. It returns the value 1 for
1770 /// non-array variables. The SIZE operator returns the size of a C or C++
1771 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1772 /// TYPE operator returns the size of a C or C++ type or variable. If the
1773 /// variable is an array, TYPE returns the size of a single element.
ParseIntelInlineAsmOperator(unsigned OpKind)1774 unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) {
1775 MCAsmParser &Parser = getParser();
1776 const AsmToken &Tok = Parser.getTok();
1777 Parser.Lex(); // Eat operator.
1778
1779 const MCExpr *Val = nullptr;
1780 InlineAsmIdentifierInfo Info;
1781 SMLoc Start = Tok.getLoc(), End;
1782 StringRef Identifier = Tok.getString();
1783 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info,
1784 /*Unevaluated=*/true, End))
1785 return 0;
1786
1787 if (!Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
1788 Error(Start, "unable to lookup expression");
1789 return 0;
1790 }
1791
1792 unsigned CVal = 0;
1793 switch(OpKind) {
1794 default: llvm_unreachable("Unexpected operand kind!");
1795 case IOK_LENGTH: CVal = Info.Var.Length; break;
1796 case IOK_SIZE: CVal = Info.Var.Size; break;
1797 case IOK_TYPE: CVal = Info.Var.Type; break;
1798 }
1799
1800 return CVal;
1801 }
1802
ParseIntelMemoryOperandSize(unsigned & Size)1803 bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
1804 Size = StringSwitch<unsigned>(getTok().getString())
1805 .Cases("BYTE", "byte", 8)
1806 .Cases("WORD", "word", 16)
1807 .Cases("DWORD", "dword", 32)
1808 .Cases("FLOAT", "float", 32)
1809 .Cases("LONG", "long", 32)
1810 .Cases("FWORD", "fword", 48)
1811 .Cases("DOUBLE", "double", 64)
1812 .Cases("QWORD", "qword", 64)
1813 .Cases("MMWORD","mmword", 64)
1814 .Cases("XWORD", "xword", 80)
1815 .Cases("TBYTE", "tbyte", 80)
1816 .Cases("XMMWORD", "xmmword", 128)
1817 .Cases("YMMWORD", "ymmword", 256)
1818 .Cases("ZMMWORD", "zmmword", 512)
1819 .Default(0);
1820 if (Size) {
1821 const AsmToken &Tok = Lex(); // Eat operand size (e.g., byte, word).
1822 if (!(Tok.getString().equals("PTR") || Tok.getString().equals("ptr")))
1823 return Error(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
1824 Lex(); // Eat ptr.
1825 }
1826 return false;
1827 }
1828
ParseIntelOperand()1829 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1830 MCAsmParser &Parser = getParser();
1831 const AsmToken &Tok = Parser.getTok();
1832 SMLoc Start, End;
1833
1834 // FIXME: Offset operator
1835 // Should be handled as part of immediate expression, as other operators
1836 // Currently, only supported as a stand-alone operand
1837 if (isParsingInlineAsm())
1838 if (IdentifyIntelInlineAsmOperator(Tok.getString()) == IOK_OFFSET)
1839 return ParseIntelOffsetOfOperator();
1840
1841 // Parse optional Size directive.
1842 unsigned Size;
1843 if (ParseIntelMemoryOperandSize(Size))
1844 return nullptr;
1845 bool PtrInOperand = bool(Size);
1846
1847 Start = Tok.getLoc();
1848
1849 // Rounding mode operand.
1850 if (getLexer().is(AsmToken::LCurly))
1851 return ParseRoundingModeOp(Start);
1852
1853 // Register operand.
1854 unsigned RegNo = 0;
1855 if (Tok.is(AsmToken::Identifier) && !ParseRegister(RegNo, Start, End)) {
1856 if (RegNo == X86::RIP)
1857 return ErrorOperand(Start, "rip can only be used as a base register");
1858 // A Register followed by ':' is considered a segment override
1859 if (Tok.isNot(AsmToken::Colon))
1860 return !PtrInOperand ? X86Operand::CreateReg(RegNo, Start, End) :
1861 ErrorOperand(Start, "expected memory operand after 'ptr', "
1862 "found register operand instead");
1863 // An alleged segment override. check if we have a valid segment register
1864 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1865 return ErrorOperand(Start, "invalid segment register");
1866 // Eat ':' and update Start location
1867 Start = Lex().getLoc();
1868 }
1869
1870 // Immediates and Memory
1871 IntelExprStateMachine SM;
1872 if (ParseIntelExpression(SM, End))
1873 return nullptr;
1874
1875 if (isParsingInlineAsm())
1876 RewriteIntelExpression(SM, Start, Tok.getLoc());
1877
1878 int64_t Imm = SM.getImm();
1879 const MCExpr *Disp = SM.getSym();
1880 const MCExpr *ImmDisp = MCConstantExpr::create(Imm, getContext());
1881 if (Disp && Imm)
1882 Disp = MCBinaryExpr::createAdd(Disp, ImmDisp, getContext());
1883 if (!Disp)
1884 Disp = ImmDisp;
1885
1886 // RegNo != 0 specifies a valid segment register,
1887 // and we are parsing a segment override
1888 if (!SM.isMemExpr() && !RegNo)
1889 return X86Operand::CreateImm(Disp, Start, End);
1890
1891 StringRef ErrMsg;
1892 unsigned BaseReg = SM.getBaseReg();
1893 unsigned IndexReg = SM.getIndexReg();
1894 unsigned Scale = SM.getScale();
1895
1896 if (Scale == 0 && BaseReg != X86::ESP && BaseReg != X86::RSP &&
1897 (IndexReg == X86::ESP || IndexReg == X86::RSP))
1898 std::swap(BaseReg, IndexReg);
1899
1900 // If BaseReg is a vector register and IndexReg is not, swap them unless
1901 // Scale was specified in which case it would be an error.
1902 if (Scale == 0 &&
1903 !(X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
1904 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
1905 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg)) &&
1906 (X86MCRegisterClasses[X86::VR128XRegClassID].contains(BaseReg) ||
1907 X86MCRegisterClasses[X86::VR256XRegClassID].contains(BaseReg) ||
1908 X86MCRegisterClasses[X86::VR512RegClassID].contains(BaseReg)))
1909 std::swap(BaseReg, IndexReg);
1910
1911 if (Scale != 0 &&
1912 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg))
1913 return ErrorOperand(Start, "16-bit addresses cannot have a scale");
1914
1915 // If there was no explicit scale specified, change it to 1.
1916 if (Scale == 0)
1917 Scale = 1;
1918
1919 // If this is a 16-bit addressing mode with the base and index in the wrong
1920 // order, swap them so CheckBaseRegAndIndexRegAndScale doesn't fail. It is
1921 // shared with att syntax where order matters.
1922 if ((BaseReg == X86::SI || BaseReg == X86::DI) &&
1923 (IndexReg == X86::BX || IndexReg == X86::BP))
1924 std::swap(BaseReg, IndexReg);
1925
1926 if ((BaseReg || IndexReg) &&
1927 CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
1928 ErrMsg))
1929 return ErrorOperand(Start, ErrMsg);
1930 if (isParsingInlineAsm())
1931 return CreateMemForInlineAsm(RegNo, Disp, BaseReg, IndexReg,
1932 Scale, Start, End, Size, SM.getSymName(),
1933 SM.getIdentifierInfo());
1934 if (!(BaseReg || IndexReg || RegNo))
1935 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size);
1936 return X86Operand::CreateMem(getPointerWidth(), RegNo, Disp,
1937 BaseReg, IndexReg, Scale, Start, End, Size);
1938 }
1939
ParseATTOperand()1940 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1941 MCAsmParser &Parser = getParser();
1942 switch (getLexer().getKind()) {
1943 case AsmToken::Dollar: {
1944 // $42 or $ID -> immediate.
1945 SMLoc Start = Parser.getTok().getLoc(), End;
1946 Parser.Lex();
1947 const MCExpr *Val;
1948 // This is an immediate, so we should not parse a register. Do a precheck
1949 // for '%' to supercede intra-register parse errors.
1950 SMLoc L = Parser.getTok().getLoc();
1951 if (check(getLexer().is(AsmToken::Percent), L,
1952 "expected immediate expression") ||
1953 getParser().parseExpression(Val, End) ||
1954 check(isa<X86MCExpr>(Val), L, "expected immediate expression"))
1955 return nullptr;
1956 return X86Operand::CreateImm(Val, Start, End);
1957 }
1958 case AsmToken::LCurly: {
1959 SMLoc Start = Parser.getTok().getLoc();
1960 return ParseRoundingModeOp(Start);
1961 }
1962 default: {
1963 // This a memory operand or a register. We have some parsing complications
1964 // as a '(' may be part of an immediate expression or the addressing mode
1965 // block. This is complicated by the fact that an assembler-level variable
1966 // may refer either to a register or an immediate expression.
1967
1968 SMLoc Loc = Parser.getTok().getLoc(), EndLoc;
1969 const MCExpr *Expr = nullptr;
1970 unsigned Reg = 0;
1971 if (getLexer().isNot(AsmToken::LParen)) {
1972 // No '(' so this is either a displacement expression or a register.
1973 if (Parser.parseExpression(Expr, EndLoc))
1974 return nullptr;
1975 if (auto *RE = dyn_cast<X86MCExpr>(Expr)) {
1976 // Segment Register. Reset Expr and copy value to register.
1977 Expr = nullptr;
1978 Reg = RE->getRegNo();
1979
1980 // Sanity check register.
1981 if (Reg == X86::EIZ || Reg == X86::RIZ)
1982 return ErrorOperand(
1983 Loc, "%eiz and %riz can only be used as index registers",
1984 SMRange(Loc, EndLoc));
1985 if (Reg == X86::RIP)
1986 return ErrorOperand(Loc, "%rip can only be used as a base register",
1987 SMRange(Loc, EndLoc));
1988 // Return register that are not segment prefixes immediately.
1989 if (!Parser.parseOptionalToken(AsmToken::Colon))
1990 return X86Operand::CreateReg(Reg, Loc, EndLoc);
1991 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(Reg))
1992 return ErrorOperand(Loc, "invalid segment register");
1993 }
1994 }
1995 // This is a Memory operand.
1996 return ParseMemOperand(Reg, Expr, Loc, EndLoc);
1997 }
1998 }
1999 }
2000
2001 // true on failure, false otherwise
2002 // If no {z} mark was found - Parser doesn't advance
ParseZ(std::unique_ptr<X86Operand> & Z,const SMLoc & StartLoc)2003 bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z,
2004 const SMLoc &StartLoc) {
2005 MCAsmParser &Parser = getParser();
2006 // Assuming we are just pass the '{' mark, quering the next token
2007 // Searched for {z}, but none was found. Return false, as no parsing error was
2008 // encountered
2009 if (!(getLexer().is(AsmToken::Identifier) &&
2010 (getLexer().getTok().getIdentifier() == "z")))
2011 return false;
2012 Parser.Lex(); // Eat z
2013 // Query and eat the '}' mark
2014 if (!getLexer().is(AsmToken::RCurly))
2015 return Error(getLexer().getLoc(), "Expected } at this point");
2016 Parser.Lex(); // Eat '}'
2017 // Assign Z with the {z} mark opernad
2018 Z = X86Operand::CreateToken("{z}", StartLoc);
2019 return false;
2020 }
2021
2022 // true on failure, false otherwise
HandleAVX512Operand(OperandVector & Operands,const MCParsedAsmOperand & Op)2023 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
2024 const MCParsedAsmOperand &Op) {
2025 MCAsmParser &Parser = getParser();
2026 if (getLexer().is(AsmToken::LCurly)) {
2027 // Eat "{" and mark the current place.
2028 const SMLoc consumedToken = consumeToken();
2029 // Distinguish {1to<NUM>} from {%k<NUM>}.
2030 if(getLexer().is(AsmToken::Integer)) {
2031 // Parse memory broadcasting ({1to<NUM>}).
2032 if (getLexer().getTok().getIntVal() != 1)
2033 return TokError("Expected 1to<NUM> at this point");
2034 Parser.Lex(); // Eat "1" of 1to8
2035 if (!getLexer().is(AsmToken::Identifier) ||
2036 !getLexer().getTok().getIdentifier().startswith("to"))
2037 return TokError("Expected 1to<NUM> at this point");
2038 // Recognize only reasonable suffixes.
2039 const char *BroadcastPrimitive =
2040 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
2041 .Case("to2", "{1to2}")
2042 .Case("to4", "{1to4}")
2043 .Case("to8", "{1to8}")
2044 .Case("to16", "{1to16}")
2045 .Default(nullptr);
2046 if (!BroadcastPrimitive)
2047 return TokError("Invalid memory broadcast primitive.");
2048 Parser.Lex(); // Eat "toN" of 1toN
2049 if (!getLexer().is(AsmToken::RCurly))
2050 return TokError("Expected } at this point");
2051 Parser.Lex(); // Eat "}"
2052 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
2053 consumedToken));
2054 // No AVX512 specific primitives can pass
2055 // after memory broadcasting, so return.
2056 return false;
2057 } else {
2058 // Parse either {k}{z}, {z}{k}, {k} or {z}
2059 // last one have no meaning, but GCC accepts it
2060 // Currently, we're just pass a '{' mark
2061 std::unique_ptr<X86Operand> Z;
2062 if (ParseZ(Z, consumedToken))
2063 return true;
2064 // Reaching here means that parsing of the allegadly '{z}' mark yielded
2065 // no errors.
2066 // Query for the need of further parsing for a {%k<NUM>} mark
2067 if (!Z || getLexer().is(AsmToken::LCurly)) {
2068 SMLoc StartLoc = Z ? consumeToken() : consumedToken;
2069 // Parse an op-mask register mark ({%k<NUM>}), which is now to be
2070 // expected
2071 unsigned RegNo;
2072 SMLoc RegLoc;
2073 if (!ParseRegister(RegNo, RegLoc, StartLoc) &&
2074 X86MCRegisterClasses[X86::VK1RegClassID].contains(RegNo)) {
2075 if (RegNo == X86::K0)
2076 return Error(RegLoc, "Register k0 can't be used as write mask");
2077 if (!getLexer().is(AsmToken::RCurly))
2078 return Error(getLexer().getLoc(), "Expected } at this point");
2079 Operands.push_back(X86Operand::CreateToken("{", StartLoc));
2080 Operands.push_back(
2081 X86Operand::CreateReg(RegNo, StartLoc, StartLoc));
2082 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
2083 } else
2084 return Error(getLexer().getLoc(),
2085 "Expected an op-mask register at this point");
2086 // {%k<NUM>} mark is found, inquire for {z}
2087 if (getLexer().is(AsmToken::LCurly) && !Z) {
2088 // Have we've found a parsing error, or found no (expected) {z} mark
2089 // - report an error
2090 if (ParseZ(Z, consumeToken()) || !Z)
2091 return Error(getLexer().getLoc(),
2092 "Expected a {z} mark at this point");
2093
2094 }
2095 // '{z}' on its own is meaningless, hence should be ignored.
2096 // on the contrary - have it been accompanied by a K register,
2097 // allow it.
2098 if (Z)
2099 Operands.push_back(std::move(Z));
2100 }
2101 }
2102 }
2103 return false;
2104 }
2105
2106 /// ParseMemOperand: 'seg : disp(basereg, indexreg, scale)'. The '%ds:' prefix
2107 /// has already been parsed if present. disp may be provided as well.
ParseMemOperand(unsigned SegReg,const MCExpr * & Disp,const SMLoc & StartLoc,SMLoc & EndLoc)2108 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
2109 const MCExpr *&Disp,
2110 const SMLoc &StartLoc,
2111 SMLoc &EndLoc) {
2112 MCAsmParser &Parser = getParser();
2113 SMLoc Loc;
2114 // Based on the initial passed values, we may be in any of these cases, we are
2115 // in one of these cases (with current position (*)):
2116
2117 // 1. seg : * disp (base-index-scale-expr)
2118 // 2. seg : *(disp) (base-index-scale-expr)
2119 // 3. seg : *(base-index-scale-expr)
2120 // 4. disp *(base-index-scale-expr)
2121 // 5. *(disp) (base-index-scale-expr)
2122 // 6. *(base-index-scale-expr)
2123 // 7. disp *
2124 // 8. *(disp)
2125
2126 // If we do not have an displacement yet, check if we're in cases 4 or 6 by
2127 // checking if the first object after the parenthesis is a register (or an
2128 // identifier referring to a register) and parse the displacement or default
2129 // to 0 as appropriate.
2130 auto isAtMemOperand = [this]() {
2131 if (this->getLexer().isNot(AsmToken::LParen))
2132 return false;
2133 AsmToken Buf[2];
2134 StringRef Id;
2135 auto TokCount = this->getLexer().peekTokens(Buf, true);
2136 if (TokCount == 0)
2137 return false;
2138 switch (Buf[0].getKind()) {
2139 case AsmToken::Percent:
2140 case AsmToken::Comma:
2141 return true;
2142 // These lower cases are doing a peekIdentifier.
2143 case AsmToken::At:
2144 case AsmToken::Dollar:
2145 if ((TokCount > 1) &&
2146 (Buf[1].is(AsmToken::Identifier) || Buf[1].is(AsmToken::String)) &&
2147 (Buf[0].getLoc().getPointer() + 1 == Buf[1].getLoc().getPointer()))
2148 Id = StringRef(Buf[0].getLoc().getPointer(),
2149 Buf[1].getIdentifier().size() + 1);
2150 break;
2151 case AsmToken::Identifier:
2152 case AsmToken::String:
2153 Id = Buf[0].getIdentifier();
2154 break;
2155 default:
2156 return false;
2157 }
2158 // We have an ID. Check if it is bound to a register.
2159 if (!Id.empty()) {
2160 MCSymbol *Sym = this->getContext().getOrCreateSymbol(Id);
2161 if (Sym->isVariable()) {
2162 auto V = Sym->getVariableValue(/*SetUsed*/ false);
2163 return isa<X86MCExpr>(V);
2164 }
2165 }
2166 return false;
2167 };
2168
2169 if (!Disp) {
2170 // Parse immediate if we're not at a mem operand yet.
2171 if (!isAtMemOperand()) {
2172 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(Disp, EndLoc))
2173 return nullptr;
2174 assert(!isa<X86MCExpr>(Disp) && "Expected non-register here.");
2175 } else {
2176 // Disp is implicitly zero if we haven't parsed it yet.
2177 Disp = MCConstantExpr::create(0, Parser.getContext());
2178 }
2179 }
2180
2181 // We are now either at the end of the operand or at the '(' at the start of a
2182 // base-index-scale-expr.
2183
2184 if (!parseOptionalToken(AsmToken::LParen)) {
2185 if (SegReg == 0)
2186 return X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc);
2187 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
2188 StartLoc, EndLoc);
2189 }
2190
2191 // If we reached here, then eat the '(' and Process
2192 // the rest of the memory operand.
2193 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
2194 SMLoc BaseLoc = getLexer().getLoc();
2195 const MCExpr *E;
2196 StringRef ErrMsg;
2197
2198 // Parse BaseReg if one is provided.
2199 if (getLexer().isNot(AsmToken::Comma) && getLexer().isNot(AsmToken::RParen)) {
2200 if (Parser.parseExpression(E, EndLoc) ||
2201 check(!isa<X86MCExpr>(E), BaseLoc, "expected register here"))
2202 return nullptr;
2203
2204 // Sanity check register.
2205 BaseReg = cast<X86MCExpr>(E)->getRegNo();
2206 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ)
2207 return ErrorOperand(BaseLoc,
2208 "eiz and riz can only be used as index registers",
2209 SMRange(BaseLoc, EndLoc));
2210 }
2211
2212 if (parseOptionalToken(AsmToken::Comma)) {
2213 // Following the comma we should have either an index register, or a scale
2214 // value. We don't support the later form, but we want to parse it
2215 // correctly.
2216 //
2217 // Even though it would be completely consistent to support syntax like
2218 // "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
2219 if (getLexer().isNot(AsmToken::RParen)) {
2220 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(E, EndLoc))
2221 return nullptr;
2222
2223 if (!isa<X86MCExpr>(E)) {
2224 // We've parsed an unexpected Scale Value instead of an index
2225 // register. Interpret it as an absolute.
2226 int64_t ScaleVal;
2227 if (!E->evaluateAsAbsolute(ScaleVal, getStreamer().getAssemblerPtr()))
2228 return ErrorOperand(Loc, "expected absolute expression");
2229 if (ScaleVal != 1)
2230 Warning(Loc, "scale factor without index register is ignored");
2231 Scale = 1;
2232 } else { // IndexReg Found.
2233 IndexReg = cast<X86MCExpr>(E)->getRegNo();
2234
2235 if (BaseReg == X86::RIP)
2236 return ErrorOperand(
2237 Loc, "%rip as base register can not have an index register");
2238 if (IndexReg == X86::RIP)
2239 return ErrorOperand(Loc, "%rip is not allowed as an index register");
2240
2241 if (parseOptionalToken(AsmToken::Comma)) {
2242 // Parse the scale amount:
2243 // ::= ',' [scale-expression]
2244
2245 // A scale amount without an index is ignored.
2246 if (getLexer().isNot(AsmToken::RParen)) {
2247 int64_t ScaleVal;
2248 if (Parser.parseTokenLoc(Loc) ||
2249 Parser.parseAbsoluteExpression(ScaleVal))
2250 return ErrorOperand(Loc, "expected scale expression");
2251 Scale = (unsigned)ScaleVal;
2252 // Validate the scale amount.
2253 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2254 Scale != 1)
2255 return ErrorOperand(Loc,
2256 "scale factor in 16-bit address must be 1");
2257 if (checkScale(Scale, ErrMsg))
2258 return ErrorOperand(Loc, ErrMsg);
2259 }
2260 }
2261 }
2262 }
2263 }
2264
2265 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
2266 if (parseToken(AsmToken::RParen, "unexpected token in memory operand"))
2267 return nullptr;
2268
2269 // This is to support otherwise illegal operand (%dx) found in various
2270 // unofficial manuals examples (e.g. "out[s]?[bwl]? %al, (%dx)") and must now
2271 // be supported. Mark such DX variants separately fix only in special cases.
2272 if (BaseReg == X86::DX && IndexReg == 0 && Scale == 1 && SegReg == 0 &&
2273 isa<MCConstantExpr>(Disp) && cast<MCConstantExpr>(Disp)->getValue() == 0)
2274 return X86Operand::CreateDXReg(BaseLoc, BaseLoc);
2275
2276 if (CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
2277 ErrMsg))
2278 return ErrorOperand(BaseLoc, ErrMsg);
2279
2280 if (SegReg || BaseReg || IndexReg)
2281 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
2282 IndexReg, Scale, StartLoc, EndLoc);
2283 return X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc);
2284 }
2285
2286 // Parse either a standard primary expression or a register.
parsePrimaryExpr(const MCExpr * & Res,SMLoc & EndLoc)2287 bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
2288 MCAsmParser &Parser = getParser();
2289 // See if this is a register first.
2290 if (getTok().is(AsmToken::Percent) ||
2291 (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier) &&
2292 MatchRegisterName(Parser.getTok().getString()))) {
2293 SMLoc StartLoc = Parser.getTok().getLoc();
2294 unsigned RegNo;
2295 if (ParseRegister(RegNo, StartLoc, EndLoc))
2296 return true;
2297 Res = X86MCExpr::create(RegNo, Parser.getContext());
2298 return false;
2299 }
2300 return Parser.parsePrimaryExpr(Res, EndLoc);
2301 }
2302
ParseInstruction(ParseInstructionInfo & Info,StringRef Name,SMLoc NameLoc,OperandVector & Operands)2303 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
2304 SMLoc NameLoc, OperandVector &Operands) {
2305 MCAsmParser &Parser = getParser();
2306 InstInfo = &Info;
2307 StringRef PatchedName = Name;
2308
2309 if ((Name.equals("jmp") || Name.equals("jc") || Name.equals("jz")) &&
2310 isParsingIntelSyntax() && isParsingInlineAsm()) {
2311 StringRef NextTok = Parser.getTok().getString();
2312 if (NextTok == "short") {
2313 SMLoc NameEndLoc =
2314 NameLoc.getFromPointer(NameLoc.getPointer() + Name.size());
2315 // Eat the short keyword
2316 Parser.Lex();
2317 // MS ignores the short keyword, it determines the jmp type based
2318 // on the distance of the label
2319 InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc,
2320 NextTok.size() + 1);
2321 }
2322 }
2323
2324 // FIXME: Hack to recognize setneb as setne.
2325 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
2326 PatchedName != "setb" && PatchedName != "setnb")
2327 PatchedName = PatchedName.substr(0, Name.size()-1);
2328
2329 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
2330 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
2331 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
2332 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
2333 bool IsVCMP = PatchedName[0] == 'v';
2334 unsigned CCIdx = IsVCMP ? 4 : 3;
2335 unsigned ComparisonCode = StringSwitch<unsigned>(
2336 PatchedName.slice(CCIdx, PatchedName.size() - 2))
2337 .Case("eq", 0x00)
2338 .Case("eq_oq", 0x00)
2339 .Case("lt", 0x01)
2340 .Case("lt_os", 0x01)
2341 .Case("le", 0x02)
2342 .Case("le_os", 0x02)
2343 .Case("unord", 0x03)
2344 .Case("unord_q", 0x03)
2345 .Case("neq", 0x04)
2346 .Case("neq_uq", 0x04)
2347 .Case("nlt", 0x05)
2348 .Case("nlt_us", 0x05)
2349 .Case("nle", 0x06)
2350 .Case("nle_us", 0x06)
2351 .Case("ord", 0x07)
2352 .Case("ord_q", 0x07)
2353 /* AVX only from here */
2354 .Case("eq_uq", 0x08)
2355 .Case("nge", 0x09)
2356 .Case("nge_us", 0x09)
2357 .Case("ngt", 0x0A)
2358 .Case("ngt_us", 0x0A)
2359 .Case("false", 0x0B)
2360 .Case("false_oq", 0x0B)
2361 .Case("neq_oq", 0x0C)
2362 .Case("ge", 0x0D)
2363 .Case("ge_os", 0x0D)
2364 .Case("gt", 0x0E)
2365 .Case("gt_os", 0x0E)
2366 .Case("true", 0x0F)
2367 .Case("true_uq", 0x0F)
2368 .Case("eq_os", 0x10)
2369 .Case("lt_oq", 0x11)
2370 .Case("le_oq", 0x12)
2371 .Case("unord_s", 0x13)
2372 .Case("neq_us", 0x14)
2373 .Case("nlt_uq", 0x15)
2374 .Case("nle_uq", 0x16)
2375 .Case("ord_s", 0x17)
2376 .Case("eq_us", 0x18)
2377 .Case("nge_uq", 0x19)
2378 .Case("ngt_uq", 0x1A)
2379 .Case("false_os", 0x1B)
2380 .Case("neq_os", 0x1C)
2381 .Case("ge_oq", 0x1D)
2382 .Case("gt_oq", 0x1E)
2383 .Case("true_us", 0x1F)
2384 .Default(~0U);
2385 if (ComparisonCode != ~0U && (IsVCMP || ComparisonCode < 8)) {
2386
2387 Operands.push_back(X86Operand::CreateToken(PatchedName.slice(0, CCIdx),
2388 NameLoc));
2389
2390 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2391 getParser().getContext());
2392 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2393
2394 PatchedName = PatchedName.substr(PatchedName.size() - 2);
2395 }
2396 }
2397
2398 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2399 if (PatchedName.startswith("vpcmp") &&
2400 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2401 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2402 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2403 unsigned ComparisonCode = StringSwitch<unsigned>(
2404 PatchedName.slice(5, PatchedName.size() - CCIdx))
2405 .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
2406 .Case("lt", 0x1)
2407 .Case("le", 0x2)
2408 //.Case("false", 0x3) // Not a documented alias.
2409 .Case("neq", 0x4)
2410 .Case("nlt", 0x5)
2411 .Case("nle", 0x6)
2412 //.Case("true", 0x7) // Not a documented alias.
2413 .Default(~0U);
2414 if (ComparisonCode != ~0U && (ComparisonCode != 0 || CCIdx == 2)) {
2415 Operands.push_back(X86Operand::CreateToken("vpcmp", NameLoc));
2416
2417 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2418 getParser().getContext());
2419 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2420
2421 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2422 }
2423 }
2424
2425 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2426 if (PatchedName.startswith("vpcom") &&
2427 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2428 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2429 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2430 unsigned ComparisonCode = StringSwitch<unsigned>(
2431 PatchedName.slice(5, PatchedName.size() - CCIdx))
2432 .Case("lt", 0x0)
2433 .Case("le", 0x1)
2434 .Case("gt", 0x2)
2435 .Case("ge", 0x3)
2436 .Case("eq", 0x4)
2437 .Case("neq", 0x5)
2438 .Case("false", 0x6)
2439 .Case("true", 0x7)
2440 .Default(~0U);
2441 if (ComparisonCode != ~0U) {
2442 Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc));
2443
2444 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2445 getParser().getContext());
2446 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2447
2448 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2449 }
2450 }
2451
2452
2453 // Determine whether this is an instruction prefix.
2454 // FIXME:
2455 // Enhance prefixes integrity robustness. for example, following forms
2456 // are currently tolerated:
2457 // repz repnz <insn> ; GAS errors for the use of two similar prefixes
2458 // lock addq %rax, %rbx ; Destination operand must be of memory type
2459 // xacquire <insn> ; xacquire must be accompanied by 'lock'
2460 bool isPrefix = StringSwitch<bool>(Name)
2461 .Cases("rex64", "data32", "data16", true)
2462 .Cases("xacquire", "xrelease", true)
2463 .Cases("acquire", "release", isParsingIntelSyntax())
2464 .Default(false);
2465
2466 auto isLockRepeatNtPrefix = [](StringRef N) {
2467 return StringSwitch<bool>(N)
2468 .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true)
2469 .Default(false);
2470 };
2471
2472 bool CurlyAsEndOfStatement = false;
2473
2474 unsigned Flags = X86::IP_NO_PREFIX;
2475 while (isLockRepeatNtPrefix(Name.lower())) {
2476 unsigned Prefix =
2477 StringSwitch<unsigned>(Name)
2478 .Cases("lock", "lock", X86::IP_HAS_LOCK)
2479 .Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT)
2480 .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE)
2481 .Cases("notrack", "notrack", X86::IP_HAS_NOTRACK)
2482 .Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible)
2483 Flags |= Prefix;
2484 if (getLexer().is(AsmToken::EndOfStatement)) {
2485 // We don't have real instr with the given prefix
2486 // let's use the prefix as the instr.
2487 // TODO: there could be several prefixes one after another
2488 Flags = X86::IP_NO_PREFIX;
2489 break;
2490 }
2491 Name = Parser.getTok().getString();
2492 Parser.Lex(); // eat the prefix
2493 // Hack: we could have something like "rep # some comment" or
2494 // "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl"
2495 while (Name.startswith(";") || Name.startswith("\n") ||
2496 Name.startswith("#") || Name.startswith("\t") ||
2497 Name.startswith("/")) {
2498 Name = Parser.getTok().getString();
2499 Parser.Lex(); // go to next prefix or instr
2500 }
2501 }
2502
2503 if (Flags)
2504 PatchedName = Name;
2505
2506 // Hacks to handle 'data16' and 'data32'
2507 if (PatchedName == "data16" && is16BitMode()) {
2508 return Error(NameLoc, "redundant data16 prefix");
2509 }
2510 if (PatchedName == "data32") {
2511 if (is32BitMode())
2512 return Error(NameLoc, "redundant data32 prefix");
2513 if (is64BitMode())
2514 return Error(NameLoc, "'data32' is not supported in 64-bit mode");
2515 // Hack to 'data16' for the table lookup.
2516 PatchedName = "data16";
2517 }
2518
2519 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
2520
2521 // This does the actual operand parsing. Don't parse any more if we have a
2522 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
2523 // just want to parse the "lock" as the first instruction and the "incl" as
2524 // the next one.
2525 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
2526 // Parse '*' modifier.
2527 if (getLexer().is(AsmToken::Star))
2528 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2529
2530 // Read the operands.
2531 while(1) {
2532 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
2533 Operands.push_back(std::move(Op));
2534 if (HandleAVX512Operand(Operands, *Operands.back()))
2535 return true;
2536 } else {
2537 return true;
2538 }
2539 // check for comma and eat it
2540 if (getLexer().is(AsmToken::Comma))
2541 Parser.Lex();
2542 else
2543 break;
2544 }
2545
2546 // In MS inline asm curly braces mark the beginning/end of a block,
2547 // therefore they should be interepreted as end of statement
2548 CurlyAsEndOfStatement =
2549 isParsingIntelSyntax() && isParsingInlineAsm() &&
2550 (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly));
2551 if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement)
2552 return TokError("unexpected token in argument list");
2553 }
2554
2555 // Consume the EndOfStatement or the prefix separator Slash
2556 if (getLexer().is(AsmToken::EndOfStatement) ||
2557 (isPrefix && getLexer().is(AsmToken::Slash)))
2558 Parser.Lex();
2559 else if (CurlyAsEndOfStatement)
2560 // Add an actual EndOfStatement before the curly brace
2561 Info.AsmRewrites->emplace_back(AOK_EndOfStatement,
2562 getLexer().getTok().getLoc(), 0);
2563
2564 // This is for gas compatibility and cannot be done in td.
2565 // Adding "p" for some floating point with no argument.
2566 // For example: fsub --> fsubp
2567 bool IsFp =
2568 Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr";
2569 if (IsFp && Operands.size() == 1) {
2570 const char *Repl = StringSwitch<const char *>(Name)
2571 .Case("fsub", "fsubp")
2572 .Case("fdiv", "fdivp")
2573 .Case("fsubr", "fsubrp")
2574 .Case("fdivr", "fdivrp");
2575 static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
2576 }
2577
2578 // Moving a 32 or 16 bit value into a segment register has the same
2579 // behavior. Modify such instructions to always take shorter form.
2580 if ((Name == "mov" || Name == "movw" || Name == "movl") &&
2581 (Operands.size() == 3)) {
2582 X86Operand &Op1 = (X86Operand &)*Operands[1];
2583 X86Operand &Op2 = (X86Operand &)*Operands[2];
2584 SMLoc Loc = Op1.getEndLoc();
2585 if (Op1.isReg() && Op2.isReg() &&
2586 X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(
2587 Op2.getReg()) &&
2588 (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) ||
2589 X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) {
2590 // Change instruction name to match new instruction.
2591 if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) {
2592 Name = is16BitMode() ? "movw" : "movl";
2593 Operands[0] = X86Operand::CreateToken(Name, NameLoc);
2594 }
2595 // Select the correct equivalent 16-/32-bit source register.
2596 unsigned Reg =
2597 getX86SubSuperRegisterOrZero(Op1.getReg(), is16BitMode() ? 16 : 32);
2598 Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc);
2599 }
2600 }
2601
2602 // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
2603 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2604 // documented form in various unofficial manuals, so a lot of code uses it.
2605 if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" ||
2606 Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") &&
2607 Operands.size() == 3) {
2608 X86Operand &Op = (X86Operand &)*Operands.back();
2609 if (Op.isDXReg())
2610 Operands.back() = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
2611 Op.getEndLoc());
2612 }
2613 // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al".
2614 if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" ||
2615 Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") &&
2616 Operands.size() == 3) {
2617 X86Operand &Op = (X86Operand &)*Operands[1];
2618 if (Op.isDXReg())
2619 Operands[1] = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
2620 Op.getEndLoc());
2621 }
2622
2623 SmallVector<std::unique_ptr<MCParsedAsmOperand>, 2> TmpOperands;
2624 bool HadVerifyError = false;
2625
2626 // Append default arguments to "ins[bwld]"
2627 if (Name.startswith("ins") &&
2628 (Operands.size() == 1 || Operands.size() == 3) &&
2629 (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" ||
2630 Name == "ins")) {
2631
2632 AddDefaultSrcDestOperands(TmpOperands,
2633 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
2634 DefaultMemDIOperand(NameLoc));
2635 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2636 }
2637
2638 // Append default arguments to "outs[bwld]"
2639 if (Name.startswith("outs") &&
2640 (Operands.size() == 1 || Operands.size() == 3) &&
2641 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2642 Name == "outsd" || Name == "outs")) {
2643 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
2644 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2645 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2646 }
2647
2648 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2649 // values of $SIREG according to the mode. It would be nice if this
2650 // could be achieved with InstAlias in the tables.
2651 if (Name.startswith("lods") &&
2652 (Operands.size() == 1 || Operands.size() == 2) &&
2653 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2654 Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) {
2655 TmpOperands.push_back(DefaultMemSIOperand(NameLoc));
2656 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2657 }
2658
2659 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2660 // values of $DIREG according to the mode. It would be nice if this
2661 // could be achieved with InstAlias in the tables.
2662 if (Name.startswith("stos") &&
2663 (Operands.size() == 1 || Operands.size() == 2) &&
2664 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2665 Name == "stosl" || Name == "stosd" || Name == "stosq")) {
2666 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
2667 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2668 }
2669
2670 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2671 // values of $DIREG according to the mode. It would be nice if this
2672 // could be achieved with InstAlias in the tables.
2673 if (Name.startswith("scas") &&
2674 (Operands.size() == 1 || Operands.size() == 2) &&
2675 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2676 Name == "scasl" || Name == "scasd" || Name == "scasq")) {
2677 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
2678 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2679 }
2680
2681 // Add default SI and DI operands to "cmps[bwlq]".
2682 if (Name.startswith("cmps") &&
2683 (Operands.size() == 1 || Operands.size() == 3) &&
2684 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2685 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2686 AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc),
2687 DefaultMemSIOperand(NameLoc));
2688 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2689 }
2690
2691 // Add default SI and DI operands to "movs[bwlq]".
2692 if (((Name.startswith("movs") &&
2693 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2694 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2695 (Name.startswith("smov") &&
2696 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2697 Name == "smovl" || Name == "smovd" || Name == "smovq"))) &&
2698 (Operands.size() == 1 || Operands.size() == 3)) {
2699 if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax())
2700 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2701 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
2702 DefaultMemDIOperand(NameLoc));
2703 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2704 }
2705
2706 // Check if we encountered an error for one the string insturctions
2707 if (HadVerifyError) {
2708 return HadVerifyError;
2709 }
2710
2711 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2712 // "shift <op>".
2713 if ((Name.startswith("shr") || Name.startswith("sar") ||
2714 Name.startswith("shl") || Name.startswith("sal") ||
2715 Name.startswith("rcl") || Name.startswith("rcr") ||
2716 Name.startswith("rol") || Name.startswith("ror")) &&
2717 Operands.size() == 3) {
2718 if (isParsingIntelSyntax()) {
2719 // Intel syntax
2720 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2721 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2722 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2723 Operands.pop_back();
2724 } else {
2725 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2726 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2727 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2728 Operands.erase(Operands.begin() + 1);
2729 }
2730 }
2731
2732 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2733 // instalias with an immediate operand yet.
2734 if (Name == "int" && Operands.size() == 2) {
2735 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2736 if (Op1.isImm())
2737 if (auto *CE = dyn_cast<MCConstantExpr>(Op1.getImm()))
2738 if (CE->getValue() == 3) {
2739 Operands.erase(Operands.begin() + 1);
2740 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2741 }
2742 }
2743
2744 // Transforms "xlat mem8" into "xlatb"
2745 if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) {
2746 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2747 if (Op1.isMem8()) {
2748 Warning(Op1.getStartLoc(), "memory operand is only for determining the "
2749 "size, (R|E)BX will be used for the location");
2750 Operands.pop_back();
2751 static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb");
2752 }
2753 }
2754
2755 if (Flags)
2756 Operands.push_back(X86Operand::CreatePrefix(Flags, NameLoc, NameLoc));
2757 return false;
2758 }
2759
processInstruction(MCInst & Inst,const OperandVector & Ops)2760 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2761 return false;
2762 }
2763
validateInstruction(MCInst & Inst,const OperandVector & Ops)2764 bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
2765 const MCRegisterInfo *MRI = getContext().getRegisterInfo();
2766
2767 switch (Inst.getOpcode()) {
2768 case X86::VGATHERDPDYrm:
2769 case X86::VGATHERDPDrm:
2770 case X86::VGATHERDPSYrm:
2771 case X86::VGATHERDPSrm:
2772 case X86::VGATHERQPDYrm:
2773 case X86::VGATHERQPDrm:
2774 case X86::VGATHERQPSYrm:
2775 case X86::VGATHERQPSrm:
2776 case X86::VPGATHERDDYrm:
2777 case X86::VPGATHERDDrm:
2778 case X86::VPGATHERDQYrm:
2779 case X86::VPGATHERDQrm:
2780 case X86::VPGATHERQDYrm:
2781 case X86::VPGATHERQDrm:
2782 case X86::VPGATHERQQYrm:
2783 case X86::VPGATHERQQrm: {
2784 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
2785 unsigned Mask = MRI->getEncodingValue(Inst.getOperand(1).getReg());
2786 unsigned Index =
2787 MRI->getEncodingValue(Inst.getOperand(3 + X86::AddrIndexReg).getReg());
2788 if (Dest == Mask || Dest == Index || Mask == Index)
2789 return Warning(Ops[0]->getStartLoc(), "mask, index, and destination "
2790 "registers should be distinct");
2791 break;
2792 }
2793 case X86::VGATHERDPDZ128rm:
2794 case X86::VGATHERDPDZ256rm:
2795 case X86::VGATHERDPDZrm:
2796 case X86::VGATHERDPSZ128rm:
2797 case X86::VGATHERDPSZ256rm:
2798 case X86::VGATHERDPSZrm:
2799 case X86::VGATHERQPDZ128rm:
2800 case X86::VGATHERQPDZ256rm:
2801 case X86::VGATHERQPDZrm:
2802 case X86::VGATHERQPSZ128rm:
2803 case X86::VGATHERQPSZ256rm:
2804 case X86::VGATHERQPSZrm:
2805 case X86::VPGATHERDDZ128rm:
2806 case X86::VPGATHERDDZ256rm:
2807 case X86::VPGATHERDDZrm:
2808 case X86::VPGATHERDQZ128rm:
2809 case X86::VPGATHERDQZ256rm:
2810 case X86::VPGATHERDQZrm:
2811 case X86::VPGATHERQDZ128rm:
2812 case X86::VPGATHERQDZ256rm:
2813 case X86::VPGATHERQDZrm:
2814 case X86::VPGATHERQQZ128rm:
2815 case X86::VPGATHERQQZ256rm:
2816 case X86::VPGATHERQQZrm: {
2817 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
2818 unsigned Index =
2819 MRI->getEncodingValue(Inst.getOperand(4 + X86::AddrIndexReg).getReg());
2820 if (Dest == Index)
2821 return Warning(Ops[0]->getStartLoc(), "index and destination registers "
2822 "should be distinct");
2823 break;
2824 }
2825 case X86::V4FMADDPSrm:
2826 case X86::V4FMADDPSrmk:
2827 case X86::V4FMADDPSrmkz:
2828 case X86::V4FMADDSSrm:
2829 case X86::V4FMADDSSrmk:
2830 case X86::V4FMADDSSrmkz:
2831 case X86::V4FNMADDPSrm:
2832 case X86::V4FNMADDPSrmk:
2833 case X86::V4FNMADDPSrmkz:
2834 case X86::V4FNMADDSSrm:
2835 case X86::V4FNMADDSSrmk:
2836 case X86::V4FNMADDSSrmkz:
2837 case X86::VP4DPWSSDSrm:
2838 case X86::VP4DPWSSDSrmk:
2839 case X86::VP4DPWSSDSrmkz:
2840 case X86::VP4DPWSSDrm:
2841 case X86::VP4DPWSSDrmk:
2842 case X86::VP4DPWSSDrmkz: {
2843 unsigned Src2 = Inst.getOperand(Inst.getNumOperands() -
2844 X86::AddrNumOperands - 1).getReg();
2845 unsigned Src2Enc = MRI->getEncodingValue(Src2);
2846 if (Src2Enc % 4 != 0) {
2847 StringRef RegName = X86IntelInstPrinter::getRegisterName(Src2);
2848 unsigned GroupStart = (Src2Enc / 4) * 4;
2849 unsigned GroupEnd = GroupStart + 3;
2850 return Warning(Ops[0]->getStartLoc(),
2851 "source register '" + RegName + "' implicitly denotes '" +
2852 RegName.take_front(3) + Twine(GroupStart) + "' to '" +
2853 RegName.take_front(3) + Twine(GroupEnd) +
2854 "' source group");
2855 }
2856 break;
2857 }
2858 }
2859
2860 return false;
2861 }
2862
2863 static const char *getSubtargetFeatureName(uint64_t Val);
2864
EmitInstruction(MCInst & Inst,OperandVector & Operands,MCStreamer & Out)2865 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2866 MCStreamer &Out) {
2867 Instrumentation->InstrumentAndEmitInstruction(
2868 Inst, Operands, getContext(), MII, Out,
2869 getParser().shouldPrintSchedInfo());
2870 }
2871
MatchAndEmitInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)2872 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2873 OperandVector &Operands,
2874 MCStreamer &Out, uint64_t &ErrorInfo,
2875 bool MatchingInlineAsm) {
2876 if (isParsingIntelSyntax())
2877 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2878 MatchingInlineAsm);
2879 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2880 MatchingInlineAsm);
2881 }
2882
MatchFPUWaitAlias(SMLoc IDLoc,X86Operand & Op,OperandVector & Operands,MCStreamer & Out,bool MatchingInlineAsm)2883 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
2884 OperandVector &Operands, MCStreamer &Out,
2885 bool MatchingInlineAsm) {
2886 // FIXME: This should be replaced with a real .td file alias mechanism.
2887 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2888 // call.
2889 const char *Repl = StringSwitch<const char *>(Op.getToken())
2890 .Case("finit", "fninit")
2891 .Case("fsave", "fnsave")
2892 .Case("fstcw", "fnstcw")
2893 .Case("fstcww", "fnstcw")
2894 .Case("fstenv", "fnstenv")
2895 .Case("fstsw", "fnstsw")
2896 .Case("fstsww", "fnstsw")
2897 .Case("fclex", "fnclex")
2898 .Default(nullptr);
2899 if (Repl) {
2900 MCInst Inst;
2901 Inst.setOpcode(X86::WAIT);
2902 Inst.setLoc(IDLoc);
2903 if (!MatchingInlineAsm)
2904 EmitInstruction(Inst, Operands, Out);
2905 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2906 }
2907 }
2908
ErrorMissingFeature(SMLoc IDLoc,uint64_t ErrorInfo,bool MatchingInlineAsm)2909 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
2910 bool MatchingInlineAsm) {
2911 assert(ErrorInfo && "Unknown missing feature!");
2912 SmallString<126> Msg;
2913 raw_svector_ostream OS(Msg);
2914 OS << "instruction requires:";
2915 uint64_t Mask = 1;
2916 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2917 if (ErrorInfo & Mask)
2918 OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask);
2919 Mask <<= 1;
2920 }
2921 return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm);
2922 }
2923
getPrefixes(OperandVector & Operands)2924 static unsigned getPrefixes(OperandVector &Operands) {
2925 unsigned Result = 0;
2926 X86Operand &Prefix = static_cast<X86Operand &>(*Operands.back());
2927 if (Prefix.isPrefix()) {
2928 Result = Prefix.getPrefix();
2929 Operands.pop_back();
2930 }
2931 return Result;
2932 }
2933
MatchAndEmitATTInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)2934 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
2935 OperandVector &Operands,
2936 MCStreamer &Out,
2937 uint64_t &ErrorInfo,
2938 bool MatchingInlineAsm) {
2939 assert(!Operands.empty() && "Unexpect empty operand list!");
2940 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2941 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2942 SMRange EmptyRange = None;
2943
2944 // First, handle aliases that expand to multiple instructions.
2945 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2946
2947 bool WasOriginallyInvalidOperand = false;
2948 unsigned Prefixes = getPrefixes(Operands);
2949
2950 MCInst Inst;
2951
2952 if (Prefixes)
2953 Inst.setFlags(Prefixes);
2954
2955 // First, try a direct match.
2956 switch (MatchInstruction(Operands, Inst, ErrorInfo, MatchingInlineAsm,
2957 isParsingIntelSyntax())) {
2958 default: llvm_unreachable("Unexpected match result!");
2959 case Match_Success:
2960 if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
2961 return true;
2962 // Some instructions need post-processing to, for example, tweak which
2963 // encoding is selected. Loop on it while changes happen so the
2964 // individual transformations can chain off each other.
2965 if (!MatchingInlineAsm)
2966 while (processInstruction(Inst, Operands))
2967 ;
2968
2969 Inst.setLoc(IDLoc);
2970 if (!MatchingInlineAsm)
2971 EmitInstruction(Inst, Operands, Out);
2972 Opcode = Inst.getOpcode();
2973 return false;
2974 case Match_MissingFeature:
2975 return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
2976 case Match_InvalidOperand:
2977 WasOriginallyInvalidOperand = true;
2978 break;
2979 case Match_MnemonicFail:
2980 break;
2981 }
2982
2983 // FIXME: Ideally, we would only attempt suffix matches for things which are
2984 // valid prefixes, and we could just infer the right unambiguous
2985 // type. However, that requires substantially more matcher support than the
2986 // following hack.
2987
2988 // Change the operand to point to a temporary token.
2989 StringRef Base = Op.getToken();
2990 SmallString<16> Tmp;
2991 Tmp += Base;
2992 Tmp += ' ';
2993 Op.setTokenValue(Tmp);
2994
2995 // If this instruction starts with an 'f', then it is a floating point stack
2996 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2997 // 80-bit floating point, which use the suffixes s,l,t respectively.
2998 //
2999 // Otherwise, we assume that this may be an integer instruction, which comes
3000 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
3001 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
3002
3003 // Check for the various suffix matches.
3004 uint64_t ErrorInfoIgnore;
3005 uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
3006 unsigned Match[4];
3007
3008 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
3009 Tmp.back() = Suffixes[I];
3010 Match[I] = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
3011 MatchingInlineAsm, isParsingIntelSyntax());
3012 // If this returned as a missing feature failure, remember that.
3013 if (Match[I] == Match_MissingFeature)
3014 ErrorInfoMissingFeature = ErrorInfoIgnore;
3015 }
3016
3017 // Restore the old token.
3018 Op.setTokenValue(Base);
3019
3020 // If exactly one matched, then we treat that as a successful match (and the
3021 // instruction will already have been filled in correctly, since the failing
3022 // matches won't have modified it).
3023 unsigned NumSuccessfulMatches =
3024 std::count(std::begin(Match), std::end(Match), Match_Success);
3025 if (NumSuccessfulMatches == 1) {
3026 Inst.setLoc(IDLoc);
3027 if (!MatchingInlineAsm)
3028 EmitInstruction(Inst, Operands, Out);
3029 Opcode = Inst.getOpcode();
3030 return false;
3031 }
3032
3033 // Otherwise, the match failed, try to produce a decent error message.
3034
3035 // If we had multiple suffix matches, then identify this as an ambiguous
3036 // match.
3037 if (NumSuccessfulMatches > 1) {
3038 char MatchChars[4];
3039 unsigned NumMatches = 0;
3040 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
3041 if (Match[I] == Match_Success)
3042 MatchChars[NumMatches++] = Suffixes[I];
3043
3044 SmallString<126> Msg;
3045 raw_svector_ostream OS(Msg);
3046 OS << "ambiguous instructions require an explicit suffix (could be ";
3047 for (unsigned i = 0; i != NumMatches; ++i) {
3048 if (i != 0)
3049 OS << ", ";
3050 if (i + 1 == NumMatches)
3051 OS << "or ";
3052 OS << "'" << Base << MatchChars[i] << "'";
3053 }
3054 OS << ")";
3055 Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm);
3056 return true;
3057 }
3058
3059 // Okay, we know that none of the variants matched successfully.
3060
3061 // If all of the instructions reported an invalid mnemonic, then the original
3062 // mnemonic was invalid.
3063 if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
3064 if (!WasOriginallyInvalidOperand) {
3065 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
3066 Op.getLocRange(), MatchingInlineAsm);
3067 }
3068
3069 // Recover location info for the operand if we know which was the problem.
3070 if (ErrorInfo != ~0ULL) {
3071 if (ErrorInfo >= Operands.size())
3072 return Error(IDLoc, "too few operands for instruction", EmptyRange,
3073 MatchingInlineAsm);
3074
3075 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
3076 if (Operand.getStartLoc().isValid()) {
3077 SMRange OperandRange = Operand.getLocRange();
3078 return Error(Operand.getStartLoc(), "invalid operand for instruction",
3079 OperandRange, MatchingInlineAsm);
3080 }
3081 }
3082
3083 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
3084 MatchingInlineAsm);
3085 }
3086
3087 // If one instruction matched with a missing feature, report this as a
3088 // missing feature.
3089 if (std::count(std::begin(Match), std::end(Match),
3090 Match_MissingFeature) == 1) {
3091 ErrorInfo = ErrorInfoMissingFeature;
3092 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
3093 MatchingInlineAsm);
3094 }
3095
3096 // If one instruction matched with an invalid operand, report this as an
3097 // operand failure.
3098 if (std::count(std::begin(Match), std::end(Match),
3099 Match_InvalidOperand) == 1) {
3100 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
3101 MatchingInlineAsm);
3102 }
3103
3104 // If all of these were an outright failure, report it in a useless way.
3105 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
3106 EmptyRange, MatchingInlineAsm);
3107 return true;
3108 }
3109
MatchAndEmitIntelInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)3110 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
3111 OperandVector &Operands,
3112 MCStreamer &Out,
3113 uint64_t &ErrorInfo,
3114 bool MatchingInlineAsm) {
3115 assert(!Operands.empty() && "Unexpect empty operand list!");
3116 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
3117 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
3118 StringRef Mnemonic = Op.getToken();
3119 SMRange EmptyRange = None;
3120 StringRef Base = Op.getToken();
3121 unsigned Prefixes = getPrefixes(Operands);
3122
3123 // First, handle aliases that expand to multiple instructions.
3124 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
3125
3126 MCInst Inst;
3127
3128 if (Prefixes)
3129 Inst.setFlags(Prefixes);
3130
3131 // Find one unsized memory operand, if present.
3132 X86Operand *UnsizedMemOp = nullptr;
3133 for (const auto &Op : Operands) {
3134 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
3135 if (X86Op->isMemUnsized()) {
3136 UnsizedMemOp = X86Op;
3137 // Have we found an unqualified memory operand,
3138 // break. IA allows only one memory operand.
3139 break;
3140 }
3141 }
3142
3143 // Allow some instructions to have implicitly pointer-sized operands. This is
3144 // compatible with gas.
3145 if (UnsizedMemOp) {
3146 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
3147 for (const char *Instr : PtrSizedInstrs) {
3148 if (Mnemonic == Instr) {
3149 UnsizedMemOp->Mem.Size = getPointerWidth();
3150 break;
3151 }
3152 }
3153 }
3154
3155 SmallVector<unsigned, 8> Match;
3156 uint64_t ErrorInfoMissingFeature = 0;
3157
3158 // If unsized push has immediate operand we should default the default pointer
3159 // size for the size.
3160 if (Mnemonic == "push" && Operands.size() == 2) {
3161 auto *X86Op = static_cast<X86Operand *>(Operands[1].get());
3162 if (X86Op->isImm()) {
3163 // If it's not a constant fall through and let remainder take care of it.
3164 const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm());
3165 unsigned Size = getPointerWidth();
3166 if (CE &&
3167 (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) {
3168 SmallString<16> Tmp;
3169 Tmp += Base;
3170 Tmp += (is64BitMode())
3171 ? "q"
3172 : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " ";
3173 Op.setTokenValue(Tmp);
3174 // Do match in ATT mode to allow explicit suffix usage.
3175 Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo,
3176 MatchingInlineAsm,
3177 false /*isParsingIntelSyntax()*/));
3178 Op.setTokenValue(Base);
3179 }
3180 }
3181 }
3182
3183 // If an unsized memory operand is present, try to match with each memory
3184 // operand size. In Intel assembly, the size is not part of the instruction
3185 // mnemonic.
3186 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
3187 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
3188 for (unsigned Size : MopSizes) {
3189 UnsizedMemOp->Mem.Size = Size;
3190 uint64_t ErrorInfoIgnore;
3191 unsigned LastOpcode = Inst.getOpcode();
3192 unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
3193 MatchingInlineAsm, isParsingIntelSyntax());
3194 if (Match.empty() || LastOpcode != Inst.getOpcode())
3195 Match.push_back(M);
3196
3197 // If this returned as a missing feature failure, remember that.
3198 if (Match.back() == Match_MissingFeature)
3199 ErrorInfoMissingFeature = ErrorInfoIgnore;
3200 }
3201
3202 // Restore the size of the unsized memory operand if we modified it.
3203 UnsizedMemOp->Mem.Size = 0;
3204 }
3205
3206 // If we haven't matched anything yet, this is not a basic integer or FPU
3207 // operation. There shouldn't be any ambiguity in our mnemonic table, so try
3208 // matching with the unsized operand.
3209 if (Match.empty()) {
3210 Match.push_back(MatchInstruction(
3211 Operands, Inst, ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax()));
3212 // If this returned as a missing feature failure, remember that.
3213 if (Match.back() == Match_MissingFeature)
3214 ErrorInfoMissingFeature = ErrorInfo;
3215 }
3216
3217 // Restore the size of the unsized memory operand if we modified it.
3218 if (UnsizedMemOp)
3219 UnsizedMemOp->Mem.Size = 0;
3220
3221 // If it's a bad mnemonic, all results will be the same.
3222 if (Match.back() == Match_MnemonicFail) {
3223 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
3224 Op.getLocRange(), MatchingInlineAsm);
3225 }
3226
3227 unsigned NumSuccessfulMatches =
3228 std::count(std::begin(Match), std::end(Match), Match_Success);
3229
3230 // If matching was ambiguous and we had size information from the frontend,
3231 // try again with that. This handles cases like "movxz eax, m8/m16".
3232 if (UnsizedMemOp && NumSuccessfulMatches > 1 &&
3233 UnsizedMemOp->getMemFrontendSize()) {
3234 UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize();
3235 unsigned M = MatchInstruction(
3236 Operands, Inst, ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax());
3237 if (M == Match_Success)
3238 NumSuccessfulMatches = 1;
3239
3240 // Add a rewrite that encodes the size information we used from the
3241 // frontend.
3242 InstInfo->AsmRewrites->emplace_back(
3243 AOK_SizeDirective, UnsizedMemOp->getStartLoc(),
3244 /*Len=*/0, UnsizedMemOp->getMemFrontendSize());
3245 }
3246
3247 // If exactly one matched, then we treat that as a successful match (and the
3248 // instruction will already have been filled in correctly, since the failing
3249 // matches won't have modified it).
3250 if (NumSuccessfulMatches == 1) {
3251 if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
3252 return true;
3253 // Some instructions need post-processing to, for example, tweak which
3254 // encoding is selected. Loop on it while changes happen so the individual
3255 // transformations can chain off each other.
3256 if (!MatchingInlineAsm)
3257 while (processInstruction(Inst, Operands))
3258 ;
3259 Inst.setLoc(IDLoc);
3260 if (!MatchingInlineAsm)
3261 EmitInstruction(Inst, Operands, Out);
3262 Opcode = Inst.getOpcode();
3263 return false;
3264 } else if (NumSuccessfulMatches > 1) {
3265 assert(UnsizedMemOp &&
3266 "multiple matches only possible with unsized memory operands");
3267 return Error(UnsizedMemOp->getStartLoc(),
3268 "ambiguous operand size for instruction '" + Mnemonic + "\'",
3269 UnsizedMemOp->getLocRange());
3270 }
3271
3272 // If one instruction matched with a missing feature, report this as a
3273 // missing feature.
3274 if (std::count(std::begin(Match), std::end(Match),
3275 Match_MissingFeature) == 1) {
3276 ErrorInfo = ErrorInfoMissingFeature;
3277 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
3278 MatchingInlineAsm);
3279 }
3280
3281 // If one instruction matched with an invalid operand, report this as an
3282 // operand failure.
3283 if (std::count(std::begin(Match), std::end(Match),
3284 Match_InvalidOperand) == 1) {
3285 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
3286 MatchingInlineAsm);
3287 }
3288
3289 // If all of these were an outright failure, report it in a useless way.
3290 return Error(IDLoc, "unknown instruction mnemonic", EmptyRange,
3291 MatchingInlineAsm);
3292 }
3293
OmitRegisterFromClobberLists(unsigned RegNo)3294 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
3295 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
3296 }
3297
ParseDirective(AsmToken DirectiveID)3298 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
3299 MCAsmParser &Parser = getParser();
3300 StringRef IDVal = DirectiveID.getIdentifier();
3301 if (IDVal.startswith(".code"))
3302 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
3303 else if (IDVal.startswith(".att_syntax")) {
3304 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3305 if (Parser.getTok().getString() == "prefix")
3306 Parser.Lex();
3307 else if (Parser.getTok().getString() == "noprefix")
3308 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
3309 "supported: registers must have a "
3310 "'%' prefix in .att_syntax");
3311 }
3312 getParser().setAssemblerDialect(0);
3313 return false;
3314 } else if (IDVal.startswith(".intel_syntax")) {
3315 getParser().setAssemblerDialect(1);
3316 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3317 if (Parser.getTok().getString() == "noprefix")
3318 Parser.Lex();
3319 else if (Parser.getTok().getString() == "prefix")
3320 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
3321 "supported: registers must not have "
3322 "a '%' prefix in .intel_syntax");
3323 }
3324 return false;
3325 } else if (IDVal == ".even")
3326 return parseDirectiveEven(DirectiveID.getLoc());
3327 else if (IDVal == ".cv_fpo_proc")
3328 return parseDirectiveFPOProc(DirectiveID.getLoc());
3329 else if (IDVal == ".cv_fpo_setframe")
3330 return parseDirectiveFPOSetFrame(DirectiveID.getLoc());
3331 else if (IDVal == ".cv_fpo_pushreg")
3332 return parseDirectiveFPOPushReg(DirectiveID.getLoc());
3333 else if (IDVal == ".cv_fpo_stackalloc")
3334 return parseDirectiveFPOStackAlloc(DirectiveID.getLoc());
3335 else if (IDVal == ".cv_fpo_stackalign")
3336 return parseDirectiveFPOStackAlign(DirectiveID.getLoc());
3337 else if (IDVal == ".cv_fpo_endprologue")
3338 return parseDirectiveFPOEndPrologue(DirectiveID.getLoc());
3339 else if (IDVal == ".cv_fpo_endproc")
3340 return parseDirectiveFPOEndProc(DirectiveID.getLoc());
3341
3342 return true;
3343 }
3344
3345 /// parseDirectiveEven
3346 /// ::= .even
parseDirectiveEven(SMLoc L)3347 bool X86AsmParser::parseDirectiveEven(SMLoc L) {
3348 if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
3349 return false;
3350
3351 const MCSection *Section = getStreamer().getCurrentSectionOnly();
3352 if (!Section) {
3353 getStreamer().InitSections(false);
3354 Section = getStreamer().getCurrentSectionOnly();
3355 }
3356 if (Section->UseCodeAlign())
3357 getStreamer().EmitCodeAlignment(2, 0);
3358 else
3359 getStreamer().EmitValueToAlignment(2, 0, 1, 0);
3360 return false;
3361 }
3362
3363 /// ParseDirectiveCode
3364 /// ::= .code16 | .code32 | .code64
ParseDirectiveCode(StringRef IDVal,SMLoc L)3365 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
3366 MCAsmParser &Parser = getParser();
3367 Code16GCC = false;
3368 if (IDVal == ".code16") {
3369 Parser.Lex();
3370 if (!is16BitMode()) {
3371 SwitchMode(X86::Mode16Bit);
3372 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
3373 }
3374 } else if (IDVal == ".code16gcc") {
3375 // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode.
3376 Parser.Lex();
3377 Code16GCC = true;
3378 if (!is16BitMode()) {
3379 SwitchMode(X86::Mode16Bit);
3380 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
3381 }
3382 } else if (IDVal == ".code32") {
3383 Parser.Lex();
3384 if (!is32BitMode()) {
3385 SwitchMode(X86::Mode32Bit);
3386 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
3387 }
3388 } else if (IDVal == ".code64") {
3389 Parser.Lex();
3390 if (!is64BitMode()) {
3391 SwitchMode(X86::Mode64Bit);
3392 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
3393 }
3394 } else {
3395 Error(L, "unknown directive " + IDVal);
3396 return false;
3397 }
3398
3399 return false;
3400 }
3401
3402 // .cv_fpo_proc foo
parseDirectiveFPOProc(SMLoc L)3403 bool X86AsmParser::parseDirectiveFPOProc(SMLoc L) {
3404 MCAsmParser &Parser = getParser();
3405 StringRef ProcName;
3406 int64_t ParamsSize;
3407 if (Parser.parseIdentifier(ProcName))
3408 return Parser.TokError("expected symbol name");
3409 if (Parser.parseIntToken(ParamsSize, "expected parameter byte count"))
3410 return true;
3411 if (!isUIntN(32, ParamsSize))
3412 return Parser.TokError("parameters size out of range");
3413 if (Parser.parseEOL("unexpected tokens"))
3414 return addErrorSuffix(" in '.cv_fpo_proc' directive");
3415 MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
3416 return getTargetStreamer().emitFPOProc(ProcSym, ParamsSize, L);
3417 }
3418
3419 // .cv_fpo_setframe ebp
parseDirectiveFPOSetFrame(SMLoc L)3420 bool X86AsmParser::parseDirectiveFPOSetFrame(SMLoc L) {
3421 MCAsmParser &Parser = getParser();
3422 unsigned Reg;
3423 SMLoc DummyLoc;
3424 if (ParseRegister(Reg, DummyLoc, DummyLoc) ||
3425 Parser.parseEOL("unexpected tokens"))
3426 return addErrorSuffix(" in '.cv_fpo_setframe' directive");
3427 return getTargetStreamer().emitFPOSetFrame(Reg, L);
3428 }
3429
3430 // .cv_fpo_pushreg ebx
parseDirectiveFPOPushReg(SMLoc L)3431 bool X86AsmParser::parseDirectiveFPOPushReg(SMLoc L) {
3432 MCAsmParser &Parser = getParser();
3433 unsigned Reg;
3434 SMLoc DummyLoc;
3435 if (ParseRegister(Reg, DummyLoc, DummyLoc) ||
3436 Parser.parseEOL("unexpected tokens"))
3437 return addErrorSuffix(" in '.cv_fpo_pushreg' directive");
3438 return getTargetStreamer().emitFPOPushReg(Reg, L);
3439 }
3440
3441 // .cv_fpo_stackalloc 20
parseDirectiveFPOStackAlloc(SMLoc L)3442 bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L) {
3443 MCAsmParser &Parser = getParser();
3444 int64_t Offset;
3445 if (Parser.parseIntToken(Offset, "expected offset") ||
3446 Parser.parseEOL("unexpected tokens"))
3447 return addErrorSuffix(" in '.cv_fpo_stackalloc' directive");
3448 return getTargetStreamer().emitFPOStackAlloc(Offset, L);
3449 }
3450
3451 // .cv_fpo_stackalign 8
parseDirectiveFPOStackAlign(SMLoc L)3452 bool X86AsmParser::parseDirectiveFPOStackAlign(SMLoc L) {
3453 MCAsmParser &Parser = getParser();
3454 int64_t Offset;
3455 if (Parser.parseIntToken(Offset, "expected offset") ||
3456 Parser.parseEOL("unexpected tokens"))
3457 return addErrorSuffix(" in '.cv_fpo_stackalign' directive");
3458 return getTargetStreamer().emitFPOStackAlign(Offset, L);
3459 }
3460
3461 // .cv_fpo_endprologue
parseDirectiveFPOEndPrologue(SMLoc L)3462 bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L) {
3463 MCAsmParser &Parser = getParser();
3464 if (Parser.parseEOL("unexpected tokens"))
3465 return addErrorSuffix(" in '.cv_fpo_endprologue' directive");
3466 return getTargetStreamer().emitFPOEndPrologue(L);
3467 }
3468
3469 // .cv_fpo_endproc
parseDirectiveFPOEndProc(SMLoc L)3470 bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L) {
3471 MCAsmParser &Parser = getParser();
3472 if (Parser.parseEOL("unexpected tokens"))
3473 return addErrorSuffix(" in '.cv_fpo_endproc' directive");
3474 return getTargetStreamer().emitFPOEndProc(L);
3475 }
3476
3477 // Force static initialization.
LLVMInitializeX86AsmParser()3478 extern "C" void LLVMInitializeX86AsmParser() {
3479 RegisterMCAsmParser<X86AsmParser> X(getTheX86_32Target());
3480 RegisterMCAsmParser<X86AsmParser> Y(getTheX86_64Target());
3481 }
3482
3483 #define GET_REGISTER_MATCHER
3484 #define GET_MATCHER_IMPLEMENTATION
3485 #define GET_SUBTARGET_FEATURE_NAME
3486 #include "X86GenAsmMatcher.inc"
3487