1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "InstPrinter/X86IntelInstPrinter.h"
11 #include "MCTargetDesc/X86BaseInfo.h"
12 #include "MCTargetDesc/X86MCExpr.h"
13 #include "MCTargetDesc/X86TargetStreamer.h"
14 #include "X86AsmInstrumentation.h"
15 #include "X86AsmParserCommon.h"
16 #include "X86Operand.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringSwitch.h"
21 #include "llvm/ADT/Twine.h"
22 #include "llvm/MC/MCContext.h"
23 #include "llvm/MC/MCExpr.h"
24 #include "llvm/MC/MCInst.h"
25 #include "llvm/MC/MCInstrInfo.h"
26 #include "llvm/MC/MCParser/MCAsmLexer.h"
27 #include "llvm/MC/MCParser/MCAsmParser.h"
28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
29 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
30 #include "llvm/MC/MCRegisterInfo.h"
31 #include "llvm/MC/MCSection.h"
32 #include "llvm/MC/MCStreamer.h"
33 #include "llvm/MC/MCSubtargetInfo.h"
34 #include "llvm/MC/MCSymbol.h"
35 #include "llvm/Support/SourceMgr.h"
36 #include "llvm/Support/TargetRegistry.h"
37 #include "llvm/Support/raw_ostream.h"
38 #include <algorithm>
39 #include <memory>
40 
41 using namespace llvm;
42 
checkScale(unsigned Scale,StringRef & ErrMsg)43 static bool checkScale(unsigned Scale, StringRef &ErrMsg) {
44   if (Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
45     ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
46     return true;
47   }
48   return false;
49 }
50 
51 namespace {
52 
53 static const char OpPrecedence[] = {
54   0, // IC_OR
55   1, // IC_XOR
56   2, // IC_AND
57   3, // IC_LSHIFT
58   3, // IC_RSHIFT
59   4, // IC_PLUS
60   4, // IC_MINUS
61   5, // IC_MULTIPLY
62   5, // IC_DIVIDE
63   5, // IC_MOD
64   6, // IC_NOT
65   7, // IC_NEG
66   8, // IC_RPAREN
67   9, // IC_LPAREN
68   0, // IC_IMM
69   0  // IC_REGISTER
70 };
71 
72 class X86AsmParser : public MCTargetAsmParser {
73   ParseInstructionInfo *InstInfo;
74   std::unique_ptr<X86AsmInstrumentation> Instrumentation;
75   bool Code16GCC;
76 
77 private:
consumeToken()78   SMLoc consumeToken() {
79     MCAsmParser &Parser = getParser();
80     SMLoc Result = Parser.getTok().getLoc();
81     Parser.Lex();
82     return Result;
83   }
84 
getTargetStreamer()85   X86TargetStreamer &getTargetStreamer() {
86     assert(getParser().getStreamer().getTargetStreamer() &&
87            "do not have a target streamer");
88     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
89     return static_cast<X86TargetStreamer &>(TS);
90   }
91 
MatchInstruction(const OperandVector & Operands,MCInst & Inst,uint64_t & ErrorInfo,bool matchingInlineAsm,unsigned VariantID=0)92   unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst,
93                             uint64_t &ErrorInfo, bool matchingInlineAsm,
94                             unsigned VariantID = 0) {
95     // In Code16GCC mode, match as 32-bit.
96     if (Code16GCC)
97       SwitchMode(X86::Mode32Bit);
98     unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo,
99                                        matchingInlineAsm, VariantID);
100     if (Code16GCC)
101       SwitchMode(X86::Mode16Bit);
102     return rv;
103   }
104 
105   enum InfixCalculatorTok {
106     IC_OR = 0,
107     IC_XOR,
108     IC_AND,
109     IC_LSHIFT,
110     IC_RSHIFT,
111     IC_PLUS,
112     IC_MINUS,
113     IC_MULTIPLY,
114     IC_DIVIDE,
115     IC_MOD,
116     IC_NOT,
117     IC_NEG,
118     IC_RPAREN,
119     IC_LPAREN,
120     IC_IMM,
121     IC_REGISTER
122   };
123 
124   enum IntelOperatorKind {
125     IOK_INVALID = 0,
126     IOK_LENGTH,
127     IOK_SIZE,
128     IOK_TYPE,
129     IOK_OFFSET
130   };
131 
132   class InfixCalculator {
133     typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
134     SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
135     SmallVector<ICToken, 4> PostfixStack;
136 
isUnaryOperator(const InfixCalculatorTok Op)137     bool isUnaryOperator(const InfixCalculatorTok Op) {
138       return Op == IC_NEG || Op == IC_NOT;
139     }
140 
141   public:
popOperand()142     int64_t popOperand() {
143       assert (!PostfixStack.empty() && "Poped an empty stack!");
144       ICToken Op = PostfixStack.pop_back_val();
145       if (!(Op.first == IC_IMM || Op.first == IC_REGISTER))
146         return -1; // The invalid Scale value will be caught later by checkScale
147       return Op.second;
148     }
pushOperand(InfixCalculatorTok Op,int64_t Val=0)149     void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
150       assert ((Op == IC_IMM || Op == IC_REGISTER) &&
151               "Unexpected operand!");
152       PostfixStack.push_back(std::make_pair(Op, Val));
153     }
154 
popOperator()155     void popOperator() { InfixOperatorStack.pop_back(); }
pushOperator(InfixCalculatorTok Op)156     void pushOperator(InfixCalculatorTok Op) {
157       // Push the new operator if the stack is empty.
158       if (InfixOperatorStack.empty()) {
159         InfixOperatorStack.push_back(Op);
160         return;
161       }
162 
163       // Push the new operator if it has a higher precedence than the operator
164       // on the top of the stack or the operator on the top of the stack is a
165       // left parentheses.
166       unsigned Idx = InfixOperatorStack.size() - 1;
167       InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
168       if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
169         InfixOperatorStack.push_back(Op);
170         return;
171       }
172 
173       // The operator on the top of the stack has higher precedence than the
174       // new operator.
175       unsigned ParenCount = 0;
176       while (1) {
177         // Nothing to process.
178         if (InfixOperatorStack.empty())
179           break;
180 
181         Idx = InfixOperatorStack.size() - 1;
182         StackOp = InfixOperatorStack[Idx];
183         if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
184           break;
185 
186         // If we have an even parentheses count and we see a left parentheses,
187         // then stop processing.
188         if (!ParenCount && StackOp == IC_LPAREN)
189           break;
190 
191         if (StackOp == IC_RPAREN) {
192           ++ParenCount;
193           InfixOperatorStack.pop_back();
194         } else if (StackOp == IC_LPAREN) {
195           --ParenCount;
196           InfixOperatorStack.pop_back();
197         } else {
198           InfixOperatorStack.pop_back();
199           PostfixStack.push_back(std::make_pair(StackOp, 0));
200         }
201       }
202       // Push the new operator.
203       InfixOperatorStack.push_back(Op);
204     }
205 
execute()206     int64_t execute() {
207       // Push any remaining operators onto the postfix stack.
208       while (!InfixOperatorStack.empty()) {
209         InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
210         if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
211           PostfixStack.push_back(std::make_pair(StackOp, 0));
212       }
213 
214       if (PostfixStack.empty())
215         return 0;
216 
217       SmallVector<ICToken, 16> OperandStack;
218       for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
219         ICToken Op = PostfixStack[i];
220         if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
221           OperandStack.push_back(Op);
222         } else if (isUnaryOperator(Op.first)) {
223           assert (OperandStack.size() > 0 && "Too few operands.");
224           ICToken Operand = OperandStack.pop_back_val();
225           assert (Operand.first == IC_IMM &&
226                   "Unary operation with a register!");
227           switch (Op.first) {
228           default:
229             report_fatal_error("Unexpected operator!");
230             break;
231           case IC_NEG:
232             OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second));
233             break;
234           case IC_NOT:
235             OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second));
236             break;
237           }
238         } else {
239           assert (OperandStack.size() > 1 && "Too few operands.");
240           int64_t Val;
241           ICToken Op2 = OperandStack.pop_back_val();
242           ICToken Op1 = OperandStack.pop_back_val();
243           switch (Op.first) {
244           default:
245             report_fatal_error("Unexpected operator!");
246             break;
247           case IC_PLUS:
248             Val = Op1.second + Op2.second;
249             OperandStack.push_back(std::make_pair(IC_IMM, Val));
250             break;
251           case IC_MINUS:
252             Val = Op1.second - Op2.second;
253             OperandStack.push_back(std::make_pair(IC_IMM, Val));
254             break;
255           case IC_MULTIPLY:
256             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
257                     "Multiply operation with an immediate and a register!");
258             Val = Op1.second * Op2.second;
259             OperandStack.push_back(std::make_pair(IC_IMM, Val));
260             break;
261           case IC_DIVIDE:
262             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
263                     "Divide operation with an immediate and a register!");
264             assert (Op2.second != 0 && "Division by zero!");
265             Val = Op1.second / Op2.second;
266             OperandStack.push_back(std::make_pair(IC_IMM, Val));
267             break;
268           case IC_MOD:
269             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
270                     "Modulo operation with an immediate and a register!");
271             Val = Op1.second % Op2.second;
272             OperandStack.push_back(std::make_pair(IC_IMM, Val));
273             break;
274           case IC_OR:
275             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
276                     "Or operation with an immediate and a register!");
277             Val = Op1.second | Op2.second;
278             OperandStack.push_back(std::make_pair(IC_IMM, Val));
279             break;
280           case IC_XOR:
281             assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
282               "Xor operation with an immediate and a register!");
283             Val = Op1.second ^ Op2.second;
284             OperandStack.push_back(std::make_pair(IC_IMM, Val));
285             break;
286           case IC_AND:
287             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
288                     "And operation with an immediate and a register!");
289             Val = Op1.second & Op2.second;
290             OperandStack.push_back(std::make_pair(IC_IMM, Val));
291             break;
292           case IC_LSHIFT:
293             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
294                     "Left shift operation with an immediate and a register!");
295             Val = Op1.second << Op2.second;
296             OperandStack.push_back(std::make_pair(IC_IMM, Val));
297             break;
298           case IC_RSHIFT:
299             assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
300                     "Right shift operation with an immediate and a register!");
301             Val = Op1.second >> Op2.second;
302             OperandStack.push_back(std::make_pair(IC_IMM, Val));
303             break;
304           }
305         }
306       }
307       assert (OperandStack.size() == 1 && "Expected a single result.");
308       return OperandStack.pop_back_val().second;
309     }
310   };
311 
312   enum IntelExprState {
313     IES_INIT,
314     IES_OR,
315     IES_XOR,
316     IES_AND,
317     IES_LSHIFT,
318     IES_RSHIFT,
319     IES_PLUS,
320     IES_MINUS,
321     IES_NOT,
322     IES_MULTIPLY,
323     IES_DIVIDE,
324     IES_MOD,
325     IES_LBRAC,
326     IES_RBRAC,
327     IES_LPAREN,
328     IES_RPAREN,
329     IES_REGISTER,
330     IES_INTEGER,
331     IES_IDENTIFIER,
332     IES_ERROR
333   };
334 
335   class IntelExprStateMachine {
336     IntelExprState State, PrevState;
337     unsigned BaseReg, IndexReg, TmpReg, Scale;
338     int64_t Imm;
339     const MCExpr *Sym;
340     StringRef SymName;
341     InfixCalculator IC;
342     InlineAsmIdentifierInfo Info;
343     short BracCount;
344     bool MemExpr;
345 
346   public:
IntelExprStateMachine()347     IntelExprStateMachine()
348         : State(IES_INIT), PrevState(IES_ERROR), BaseReg(0), IndexReg(0),
349           TmpReg(0), Scale(0), Imm(0), Sym(nullptr), BracCount(0),
350           MemExpr(false) {}
351 
addImm(int64_t imm)352     void addImm(int64_t imm) { Imm += imm; }
getBracCount()353     short getBracCount() { return BracCount; }
isMemExpr()354     bool isMemExpr() { return MemExpr; }
getBaseReg()355     unsigned getBaseReg() { return BaseReg; }
getIndexReg()356     unsigned getIndexReg() { return IndexReg; }
getScale()357     unsigned getScale() { return Scale; }
getSym()358     const MCExpr *getSym() { return Sym; }
getSymName()359     StringRef getSymName() { return SymName; }
getImm()360     int64_t getImm() { return Imm + IC.execute(); }
isValidEndState()361     bool isValidEndState() {
362       return State == IES_RBRAC || State == IES_INTEGER;
363     }
hadError()364     bool hadError() { return State == IES_ERROR; }
getIdentifierInfo()365     InlineAsmIdentifierInfo &getIdentifierInfo() { return Info; }
366 
onOr()367     void onOr() {
368       IntelExprState CurrState = State;
369       switch (State) {
370       default:
371         State = IES_ERROR;
372         break;
373       case IES_INTEGER:
374       case IES_RPAREN:
375       case IES_REGISTER:
376         State = IES_OR;
377         IC.pushOperator(IC_OR);
378         break;
379       }
380       PrevState = CurrState;
381     }
onXor()382     void onXor() {
383       IntelExprState CurrState = State;
384       switch (State) {
385       default:
386         State = IES_ERROR;
387         break;
388       case IES_INTEGER:
389       case IES_RPAREN:
390       case IES_REGISTER:
391         State = IES_XOR;
392         IC.pushOperator(IC_XOR);
393         break;
394       }
395       PrevState = CurrState;
396     }
onAnd()397     void onAnd() {
398       IntelExprState CurrState = State;
399       switch (State) {
400       default:
401         State = IES_ERROR;
402         break;
403       case IES_INTEGER:
404       case IES_RPAREN:
405       case IES_REGISTER:
406         State = IES_AND;
407         IC.pushOperator(IC_AND);
408         break;
409       }
410       PrevState = CurrState;
411     }
onLShift()412     void onLShift() {
413       IntelExprState CurrState = State;
414       switch (State) {
415       default:
416         State = IES_ERROR;
417         break;
418       case IES_INTEGER:
419       case IES_RPAREN:
420       case IES_REGISTER:
421         State = IES_LSHIFT;
422         IC.pushOperator(IC_LSHIFT);
423         break;
424       }
425       PrevState = CurrState;
426     }
onRShift()427     void onRShift() {
428       IntelExprState CurrState = State;
429       switch (State) {
430       default:
431         State = IES_ERROR;
432         break;
433       case IES_INTEGER:
434       case IES_RPAREN:
435       case IES_REGISTER:
436         State = IES_RSHIFT;
437         IC.pushOperator(IC_RSHIFT);
438         break;
439       }
440       PrevState = CurrState;
441     }
onPlus(StringRef & ErrMsg)442     bool onPlus(StringRef &ErrMsg) {
443       IntelExprState CurrState = State;
444       switch (State) {
445       default:
446         State = IES_ERROR;
447         break;
448       case IES_INTEGER:
449       case IES_RPAREN:
450       case IES_REGISTER:
451         State = IES_PLUS;
452         IC.pushOperator(IC_PLUS);
453         if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
454           // If we already have a BaseReg, then assume this is the IndexReg with
455           // no explicit scale.
456           if (!BaseReg) {
457             BaseReg = TmpReg;
458           } else {
459             if (IndexReg) {
460               ErrMsg = "BaseReg/IndexReg already set!";
461               return true;
462             }
463             IndexReg = TmpReg;
464             Scale = 0;
465           }
466         }
467         break;
468       }
469       PrevState = CurrState;
470       return false;
471     }
onMinus(StringRef & ErrMsg)472     bool onMinus(StringRef &ErrMsg) {
473       IntelExprState CurrState = State;
474       switch (State) {
475       default:
476         State = IES_ERROR;
477         break;
478       case IES_OR:
479       case IES_XOR:
480       case IES_AND:
481       case IES_LSHIFT:
482       case IES_RSHIFT:
483       case IES_PLUS:
484       case IES_NOT:
485       case IES_MULTIPLY:
486       case IES_DIVIDE:
487       case IES_MOD:
488       case IES_LPAREN:
489       case IES_RPAREN:
490       case IES_LBRAC:
491       case IES_RBRAC:
492       case IES_INTEGER:
493       case IES_REGISTER:
494       case IES_INIT:
495         State = IES_MINUS;
496         // push minus operator if it is not a negate operator
497         if (CurrState == IES_REGISTER || CurrState == IES_RPAREN ||
498             CurrState == IES_INTEGER  || CurrState == IES_RBRAC)
499           IC.pushOperator(IC_MINUS);
500         else if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
501           // We have negate operator for Scale: it's illegal
502           ErrMsg = "Scale can't be negative";
503           return true;
504         } else
505           IC.pushOperator(IC_NEG);
506         if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
507           // If we already have a BaseReg, then assume this is the IndexReg with
508           // no explicit scale.
509           if (!BaseReg) {
510             BaseReg = TmpReg;
511           } else {
512             if (IndexReg) {
513               ErrMsg = "BaseReg/IndexReg already set!";
514               return true;
515             }
516             IndexReg = TmpReg;
517             Scale = 0;
518           }
519         }
520         break;
521       }
522       PrevState = CurrState;
523       return false;
524     }
onNot()525     void onNot() {
526       IntelExprState CurrState = State;
527       switch (State) {
528       default:
529         State = IES_ERROR;
530         break;
531       case IES_OR:
532       case IES_XOR:
533       case IES_AND:
534       case IES_LSHIFT:
535       case IES_RSHIFT:
536       case IES_PLUS:
537       case IES_MINUS:
538       case IES_NOT:
539       case IES_MULTIPLY:
540       case IES_DIVIDE:
541       case IES_MOD:
542       case IES_LPAREN:
543       case IES_LBRAC:
544       case IES_INIT:
545         State = IES_NOT;
546         IC.pushOperator(IC_NOT);
547         break;
548       }
549       PrevState = CurrState;
550     }
551 
onRegister(unsigned Reg,StringRef & ErrMsg)552     bool onRegister(unsigned Reg, StringRef &ErrMsg) {
553       IntelExprState CurrState = State;
554       switch (State) {
555       default:
556         State = IES_ERROR;
557         break;
558       case IES_PLUS:
559       case IES_LPAREN:
560       case IES_LBRAC:
561         State = IES_REGISTER;
562         TmpReg = Reg;
563         IC.pushOperand(IC_REGISTER);
564         break;
565       case IES_MULTIPLY:
566         // Index Register - Scale * Register
567         if (PrevState == IES_INTEGER) {
568           if (IndexReg) {
569             ErrMsg = "BaseReg/IndexReg already set!";
570             return true;
571           }
572           State = IES_REGISTER;
573           IndexReg = Reg;
574           // Get the scale and replace the 'Scale * Register' with '0'.
575           Scale = IC.popOperand();
576           if (checkScale(Scale, ErrMsg))
577             return true;
578           IC.pushOperand(IC_IMM);
579           IC.popOperator();
580         } else {
581           State = IES_ERROR;
582         }
583         break;
584       }
585       PrevState = CurrState;
586       return false;
587     }
onIdentifierExpr(const MCExpr * SymRef,StringRef SymRefName,const InlineAsmIdentifierInfo & IDInfo,bool ParsingInlineAsm,StringRef & ErrMsg)588     bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName,
589                           const InlineAsmIdentifierInfo &IDInfo,
590                           bool ParsingInlineAsm, StringRef &ErrMsg) {
591       // InlineAsm: Treat an enum value as an integer
592       if (ParsingInlineAsm)
593         if (IDInfo.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
594           return onInteger(IDInfo.Enum.EnumVal, ErrMsg);
595       // Treat a symbolic constant like an integer
596       if (auto *CE = dyn_cast<MCConstantExpr>(SymRef))
597         return onInteger(CE->getValue(), ErrMsg);
598       PrevState = State;
599       bool HasSymbol = Sym != nullptr;
600       switch (State) {
601       default:
602         State = IES_ERROR;
603         break;
604       case IES_PLUS:
605       case IES_MINUS:
606       case IES_NOT:
607       case IES_INIT:
608       case IES_LBRAC:
609         MemExpr = true;
610         State = IES_INTEGER;
611         Sym = SymRef;
612         SymName = SymRefName;
613         IC.pushOperand(IC_IMM);
614         if (ParsingInlineAsm)
615           Info = IDInfo;
616         break;
617       }
618       if (HasSymbol)
619         ErrMsg = "cannot use more than one symbol in memory operand";
620       return HasSymbol;
621     }
onInteger(int64_t TmpInt,StringRef & ErrMsg)622     bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
623       IntelExprState CurrState = State;
624       switch (State) {
625       default:
626         State = IES_ERROR;
627         break;
628       case IES_PLUS:
629       case IES_MINUS:
630       case IES_NOT:
631       case IES_OR:
632       case IES_XOR:
633       case IES_AND:
634       case IES_LSHIFT:
635       case IES_RSHIFT:
636       case IES_DIVIDE:
637       case IES_MOD:
638       case IES_MULTIPLY:
639       case IES_LPAREN:
640       case IES_INIT:
641       case IES_LBRAC:
642         State = IES_INTEGER;
643         if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
644           // Index Register - Register * Scale
645           if (IndexReg) {
646             ErrMsg = "BaseReg/IndexReg already set!";
647             return true;
648           }
649           IndexReg = TmpReg;
650           Scale = TmpInt;
651           if (checkScale(Scale, ErrMsg))
652             return true;
653           // Get the scale and replace the 'Register * Scale' with '0'.
654           IC.popOperator();
655         } else {
656           IC.pushOperand(IC_IMM, TmpInt);
657         }
658         break;
659       }
660       PrevState = CurrState;
661       return false;
662     }
onStar()663     void onStar() {
664       PrevState = State;
665       switch (State) {
666       default:
667         State = IES_ERROR;
668         break;
669       case IES_INTEGER:
670       case IES_REGISTER:
671       case IES_RPAREN:
672         State = IES_MULTIPLY;
673         IC.pushOperator(IC_MULTIPLY);
674         break;
675       }
676     }
onDivide()677     void onDivide() {
678       PrevState = State;
679       switch (State) {
680       default:
681         State = IES_ERROR;
682         break;
683       case IES_INTEGER:
684       case IES_RPAREN:
685         State = IES_DIVIDE;
686         IC.pushOperator(IC_DIVIDE);
687         break;
688       }
689     }
onMod()690     void onMod() {
691       PrevState = State;
692       switch (State) {
693       default:
694         State = IES_ERROR;
695         break;
696       case IES_INTEGER:
697       case IES_RPAREN:
698         State = IES_MOD;
699         IC.pushOperator(IC_MOD);
700         break;
701       }
702     }
onLBrac()703     bool onLBrac() {
704       if (BracCount)
705         return true;
706       PrevState = State;
707       switch (State) {
708       default:
709         State = IES_ERROR;
710         break;
711       case IES_RBRAC:
712       case IES_INTEGER:
713       case IES_RPAREN:
714         State = IES_PLUS;
715         IC.pushOperator(IC_PLUS);
716         break;
717       case IES_INIT:
718         assert(!BracCount && "BracCount should be zero on parsing's start");
719         State = IES_LBRAC;
720         break;
721       }
722       MemExpr = true;
723       BracCount++;
724       return false;
725     }
onRBrac()726     bool onRBrac() {
727       IntelExprState CurrState = State;
728       switch (State) {
729       default:
730         State = IES_ERROR;
731         break;
732       case IES_INTEGER:
733       case IES_REGISTER:
734       case IES_RPAREN:
735         if (BracCount-- != 1)
736           return true;
737         State = IES_RBRAC;
738         if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
739           // If we already have a BaseReg, then assume this is the IndexReg with
740           // no explicit scale.
741           if (!BaseReg) {
742             BaseReg = TmpReg;
743           } else {
744             assert (!IndexReg && "BaseReg/IndexReg already set!");
745             IndexReg = TmpReg;
746             Scale = 0;
747           }
748         }
749         break;
750       }
751       PrevState = CurrState;
752       return false;
753     }
onLParen()754     void onLParen() {
755       IntelExprState CurrState = State;
756       switch (State) {
757       default:
758         State = IES_ERROR;
759         break;
760       case IES_PLUS:
761       case IES_MINUS:
762       case IES_NOT:
763       case IES_OR:
764       case IES_XOR:
765       case IES_AND:
766       case IES_LSHIFT:
767       case IES_RSHIFT:
768       case IES_MULTIPLY:
769       case IES_DIVIDE:
770       case IES_MOD:
771       case IES_LPAREN:
772       case IES_INIT:
773       case IES_LBRAC:
774         State = IES_LPAREN;
775         IC.pushOperator(IC_LPAREN);
776         break;
777       }
778       PrevState = CurrState;
779     }
onRParen()780     void onRParen() {
781       PrevState = State;
782       switch (State) {
783       default:
784         State = IES_ERROR;
785         break;
786       case IES_INTEGER:
787       case IES_REGISTER:
788       case IES_RPAREN:
789         State = IES_RPAREN;
790         IC.pushOperator(IC_RPAREN);
791         break;
792       }
793     }
794   };
795 
Error(SMLoc L,const Twine & Msg,SMRange Range=None,bool MatchingInlineAsm=false)796   bool Error(SMLoc L, const Twine &Msg, SMRange Range = None,
797              bool MatchingInlineAsm = false) {
798     MCAsmParser &Parser = getParser();
799     if (MatchingInlineAsm) {
800       if (!getLexer().isAtStartOfStatement())
801         Parser.eatToEndOfStatement();
802       return false;
803     }
804     return Parser.Error(L, Msg, Range);
805   }
806 
ErrorOperand(SMLoc Loc,StringRef Msg,SMRange R=SMRange ())807   std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg, SMRange R = SMRange()) {
808     Error(Loc, Msg, R);
809     return nullptr;
810   }
811 
812   std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
813   std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
814   bool IsSIReg(unsigned Reg);
815   unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg);
816   void
817   AddDefaultSrcDestOperands(OperandVector &Operands,
818                             std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
819                             std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
820   bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
821                                OperandVector &FinalOperands);
822   std::unique_ptr<X86Operand> ParseOperand();
823   std::unique_ptr<X86Operand> ParseATTOperand();
824   std::unique_ptr<X86Operand> ParseIntelOperand();
825   std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
826   bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End);
827   unsigned IdentifyIntelInlineAsmOperator(StringRef Name);
828   unsigned ParseIntelInlineAsmOperator(unsigned OpKind);
829   std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start);
830   bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM);
831   void RewriteIntelExpression(IntelExprStateMachine &SM, SMLoc Start,
832                               SMLoc End);
833   bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
834   bool ParseIntelInlineAsmIdentifier(const MCExpr *&Val, StringRef &Identifier,
835                                      InlineAsmIdentifierInfo &Info,
836                                      bool IsUnevaluatedOperand, SMLoc &End);
837 
838   std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg,
839                                               const MCExpr *&Disp,
840                                               const SMLoc &StartLoc,
841                                               SMLoc &EndLoc);
842 
843   bool ParseIntelMemoryOperandSize(unsigned &Size);
844   std::unique_ptr<X86Operand>
845   CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
846                         unsigned IndexReg, unsigned Scale, SMLoc Start,
847                         SMLoc End, unsigned Size, StringRef Identifier,
848                         const InlineAsmIdentifierInfo &Info);
849 
850   bool parseDirectiveEven(SMLoc L);
851   bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
852 
853   /// CodeView FPO data directives.
854   bool parseDirectiveFPOProc(SMLoc L);
855   bool parseDirectiveFPOSetFrame(SMLoc L);
856   bool parseDirectiveFPOPushReg(SMLoc L);
857   bool parseDirectiveFPOStackAlloc(SMLoc L);
858   bool parseDirectiveFPOStackAlign(SMLoc L);
859   bool parseDirectiveFPOEndPrologue(SMLoc L);
860   bool parseDirectiveFPOEndProc(SMLoc L);
861   bool parseDirectiveFPOData(SMLoc L);
862 
863   bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
864   bool processInstruction(MCInst &Inst, const OperandVector &Ops);
865 
866   /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
867   /// instrumentation around Inst.
868   void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
869 
870   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
871                                OperandVector &Operands, MCStreamer &Out,
872                                uint64_t &ErrorInfo,
873                                bool MatchingInlineAsm) override;
874 
875   void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
876                          MCStreamer &Out, bool MatchingInlineAsm);
877 
878   bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
879                            bool MatchingInlineAsm);
880 
881   bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
882                                   OperandVector &Operands, MCStreamer &Out,
883                                   uint64_t &ErrorInfo,
884                                   bool MatchingInlineAsm);
885 
886   bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
887                                     OperandVector &Operands, MCStreamer &Out,
888                                     uint64_t &ErrorInfo,
889                                     bool MatchingInlineAsm);
890 
891   bool OmitRegisterFromClobberLists(unsigned RegNo) override;
892 
893   /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
894   /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
895   /// return false if no parsing errors occurred, true otherwise.
896   bool HandleAVX512Operand(OperandVector &Operands,
897                            const MCParsedAsmOperand &Op);
898 
899   bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc);
900 
is64BitMode() const901   bool is64BitMode() const {
902     // FIXME: Can tablegen auto-generate this?
903     return getSTI().getFeatureBits()[X86::Mode64Bit];
904   }
is32BitMode() const905   bool is32BitMode() const {
906     // FIXME: Can tablegen auto-generate this?
907     return getSTI().getFeatureBits()[X86::Mode32Bit];
908   }
is16BitMode() const909   bool is16BitMode() const {
910     // FIXME: Can tablegen auto-generate this?
911     return getSTI().getFeatureBits()[X86::Mode16Bit];
912   }
SwitchMode(unsigned mode)913   void SwitchMode(unsigned mode) {
914     MCSubtargetInfo &STI = copySTI();
915     FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit});
916     FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
917     uint64_t FB = ComputeAvailableFeatures(
918       STI.ToggleFeature(OldMode.flip(mode)));
919     setAvailableFeatures(FB);
920 
921     assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
922   }
923 
getPointerWidth()924   unsigned getPointerWidth() {
925     if (is16BitMode()) return 16;
926     if (is32BitMode()) return 32;
927     if (is64BitMode()) return 64;
928     llvm_unreachable("invalid mode");
929   }
930 
isParsingIntelSyntax()931   bool isParsingIntelSyntax() {
932     return getParser().getAssemblerDialect();
933   }
934 
935   /// @name Auto-generated Matcher Functions
936   /// {
937 
938 #define GET_ASSEMBLER_HEADER
939 #include "X86GenAsmMatcher.inc"
940 
941   /// }
942 
943 public:
944 
X86AsmParser(const MCSubtargetInfo & sti,MCAsmParser & Parser,const MCInstrInfo & mii,const MCTargetOptions & Options)945   X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
946                const MCInstrInfo &mii, const MCTargetOptions &Options)
947       : MCTargetAsmParser(Options, sti, mii),  InstInfo(nullptr),
948         Code16GCC(false) {
949 
950     Parser.addAliasForDirective(".word", ".2byte");
951 
952     // Initialize the set of available features.
953     setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
954     Instrumentation.reset(
955         CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
956   }
957 
958   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
959 
960   void SetFrameRegister(unsigned RegNo) override;
961 
962   bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
963 
964   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
965                         SMLoc NameLoc, OperandVector &Operands) override;
966 
967   bool ParseDirective(AsmToken DirectiveID) override;
968 };
969 } // end anonymous namespace
970 
971 /// @name Auto-generated Match Functions
972 /// {
973 
974 static unsigned MatchRegisterName(StringRef Name);
975 
976 /// }
977 
CheckBaseRegAndIndexRegAndScale(unsigned BaseReg,unsigned IndexReg,unsigned Scale,bool Is64BitMode,StringRef & ErrMsg)978 static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg,
979                                             unsigned Scale, bool Is64BitMode,
980                                             StringRef &ErrMsg) {
981   // If we have both a base register and an index register make sure they are
982   // both 64-bit or 32-bit registers.
983   // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
984 
985   if (BaseReg != 0 &&
986       !(BaseReg == X86::RIP || BaseReg == X86::EIP ||
987         X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) ||
988         X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) ||
989         X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg))) {
990     ErrMsg = "invalid base+index expression";
991     return true;
992   }
993 
994   if (IndexReg != 0 &&
995       !(IndexReg == X86::EIZ || IndexReg == X86::RIZ ||
996         X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
997         X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
998         X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
999         X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
1000         X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
1001         X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg))) {
1002     ErrMsg = "invalid base+index expression";
1003     return true;
1004   }
1005 
1006   if (((BaseReg == X86::RIP || BaseReg == X86::EIP) && IndexReg != 0) ||
1007       IndexReg == X86::EIP || IndexReg == X86::RIP ||
1008       IndexReg == X86::ESP || IndexReg == X86::RSP) {
1009     ErrMsg = "invalid base+index expression";
1010     return true;
1011   }
1012 
1013   // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1014   // and then only in non-64-bit modes.
1015   if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1016       (Is64BitMode || (BaseReg != X86::BX && BaseReg != X86::BP &&
1017                        BaseReg != X86::SI && BaseReg != X86::DI))) {
1018     ErrMsg = "invalid 16-bit base register";
1019     return true;
1020   }
1021 
1022   if (BaseReg == 0 &&
1023       X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1024     ErrMsg = "16-bit memory operand may not include only index register";
1025     return true;
1026   }
1027 
1028   if (BaseReg != 0 && IndexReg != 0) {
1029     if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
1030         (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1031          X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1032          IndexReg == X86::EIZ)) {
1033       ErrMsg = "base register is 64-bit, but index register is not";
1034       return true;
1035     }
1036     if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
1037         (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1038          X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
1039          IndexReg == X86::RIZ)) {
1040       ErrMsg = "base register is 32-bit, but index register is not";
1041       return true;
1042     }
1043     if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
1044       if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1045           X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
1046         ErrMsg = "base register is 16-bit, but index register is not";
1047         return true;
1048       }
1049       if ((BaseReg != X86::BX && BaseReg != X86::BP) ||
1050           (IndexReg != X86::SI && IndexReg != X86::DI)) {
1051         ErrMsg = "invalid 16-bit base/index register combination";
1052         return true;
1053       }
1054     }
1055   }
1056 
1057   // RIP/EIP-relative addressing is only supported in 64-bit mode.
1058   if (!Is64BitMode && BaseReg != 0 &&
1059       (BaseReg == X86::RIP || BaseReg == X86::EIP)) {
1060     ErrMsg = "IP-relative addressing requires 64-bit mode";
1061     return true;
1062   }
1063 
1064   return checkScale(Scale, ErrMsg);
1065 }
1066 
ParseRegister(unsigned & RegNo,SMLoc & StartLoc,SMLoc & EndLoc)1067 bool X86AsmParser::ParseRegister(unsigned &RegNo,
1068                                  SMLoc &StartLoc, SMLoc &EndLoc) {
1069   MCAsmParser &Parser = getParser();
1070   RegNo = 0;
1071   const AsmToken &PercentTok = Parser.getTok();
1072   StartLoc = PercentTok.getLoc();
1073 
1074   // If we encounter a %, ignore it. This code handles registers with and
1075   // without the prefix, unprefixed registers can occur in cfi directives.
1076   if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
1077     Parser.Lex(); // Eat percent token.
1078 
1079   const AsmToken &Tok = Parser.getTok();
1080   EndLoc = Tok.getEndLoc();
1081 
1082   if (Tok.isNot(AsmToken::Identifier)) {
1083     if (isParsingIntelSyntax()) return true;
1084     return Error(StartLoc, "invalid register name",
1085                  SMRange(StartLoc, EndLoc));
1086   }
1087 
1088   RegNo = MatchRegisterName(Tok.getString());
1089 
1090   // If the match failed, try the register name as lowercase.
1091   if (RegNo == 0)
1092     RegNo = MatchRegisterName(Tok.getString().lower());
1093 
1094   // The "flags" register cannot be referenced directly.
1095   // Treat it as an identifier instead.
1096   if (isParsingInlineAsm() && isParsingIntelSyntax() && RegNo == X86::EFLAGS)
1097     RegNo = 0;
1098 
1099   if (!is64BitMode()) {
1100     // FIXME: This should be done using Requires<Not64BitMode> and
1101     // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
1102     // checked.
1103     // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
1104     // REX prefix.
1105     if (RegNo == X86::RIZ || RegNo == X86::RIP ||
1106         X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
1107         X86II::isX86_64NonExtLowByteReg(RegNo) ||
1108         X86II::isX86_64ExtendedReg(RegNo)) {
1109       StringRef RegName = Tok.getString();
1110       Parser.Lex(); // Eat register name.
1111       return Error(StartLoc,
1112                    "register %" + RegName + " is only available in 64-bit mode",
1113                    SMRange(StartLoc, EndLoc));
1114     }
1115   }
1116 
1117   // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
1118   if (RegNo == X86::ST0) {
1119     Parser.Lex(); // Eat 'st'
1120 
1121     // Check to see if we have '(4)' after %st.
1122     if (getLexer().isNot(AsmToken::LParen))
1123       return false;
1124     // Lex the paren.
1125     getParser().Lex();
1126 
1127     const AsmToken &IntTok = Parser.getTok();
1128     if (IntTok.isNot(AsmToken::Integer))
1129       return Error(IntTok.getLoc(), "expected stack index");
1130     switch (IntTok.getIntVal()) {
1131     case 0: RegNo = X86::ST0; break;
1132     case 1: RegNo = X86::ST1; break;
1133     case 2: RegNo = X86::ST2; break;
1134     case 3: RegNo = X86::ST3; break;
1135     case 4: RegNo = X86::ST4; break;
1136     case 5: RegNo = X86::ST5; break;
1137     case 6: RegNo = X86::ST6; break;
1138     case 7: RegNo = X86::ST7; break;
1139     default: return Error(IntTok.getLoc(), "invalid stack index");
1140     }
1141 
1142     if (getParser().Lex().isNot(AsmToken::RParen))
1143       return Error(Parser.getTok().getLoc(), "expected ')'");
1144 
1145     EndLoc = Parser.getTok().getEndLoc();
1146     Parser.Lex(); // Eat ')'
1147     return false;
1148   }
1149 
1150   EndLoc = Parser.getTok().getEndLoc();
1151 
1152   // If this is "db[0-15]", match it as an alias
1153   // for dr[0-15].
1154   if (RegNo == 0 && Tok.getString().startswith("db")) {
1155     if (Tok.getString().size() == 3) {
1156       switch (Tok.getString()[2]) {
1157       case '0': RegNo = X86::DR0; break;
1158       case '1': RegNo = X86::DR1; break;
1159       case '2': RegNo = X86::DR2; break;
1160       case '3': RegNo = X86::DR3; break;
1161       case '4': RegNo = X86::DR4; break;
1162       case '5': RegNo = X86::DR5; break;
1163       case '6': RegNo = X86::DR6; break;
1164       case '7': RegNo = X86::DR7; break;
1165       case '8': RegNo = X86::DR8; break;
1166       case '9': RegNo = X86::DR9; break;
1167       }
1168     } else if (Tok.getString().size() == 4 && Tok.getString()[2] == '1') {
1169       switch (Tok.getString()[3]) {
1170       case '0': RegNo = X86::DR10; break;
1171       case '1': RegNo = X86::DR11; break;
1172       case '2': RegNo = X86::DR12; break;
1173       case '3': RegNo = X86::DR13; break;
1174       case '4': RegNo = X86::DR14; break;
1175       case '5': RegNo = X86::DR15; break;
1176       }
1177     }
1178 
1179     if (RegNo != 0) {
1180       EndLoc = Parser.getTok().getEndLoc();
1181       Parser.Lex(); // Eat it.
1182       return false;
1183     }
1184   }
1185 
1186   if (RegNo == 0) {
1187     if (isParsingIntelSyntax()) return true;
1188     return Error(StartLoc, "invalid register name",
1189                  SMRange(StartLoc, EndLoc));
1190   }
1191 
1192   Parser.Lex(); // Eat identifier token.
1193   return false;
1194 }
1195 
SetFrameRegister(unsigned RegNo)1196 void X86AsmParser::SetFrameRegister(unsigned RegNo) {
1197   Instrumentation->SetInitialFrameRegister(RegNo);
1198 }
1199 
DefaultMemSIOperand(SMLoc Loc)1200 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1201   bool Parse32 = is32BitMode() || Code16GCC;
1202   unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI);
1203   const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1204   return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1205                                /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1206                                Loc, Loc, 0);
1207 }
1208 
DefaultMemDIOperand(SMLoc Loc)1209 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1210   bool Parse32 = is32BitMode() || Code16GCC;
1211   unsigned Basereg = is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI);
1212   const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1213   return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1214                                /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1215                                Loc, Loc, 0);
1216 }
1217 
IsSIReg(unsigned Reg)1218 bool X86AsmParser::IsSIReg(unsigned Reg) {
1219   switch (Reg) {
1220   default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!");
1221   case X86::RSI:
1222   case X86::ESI:
1223   case X86::SI:
1224     return true;
1225   case X86::RDI:
1226   case X86::EDI:
1227   case X86::DI:
1228     return false;
1229   }
1230 }
1231 
GetSIDIForRegClass(unsigned RegClassID,unsigned Reg,bool IsSIReg)1232 unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg,
1233                                           bool IsSIReg) {
1234   switch (RegClassID) {
1235   default: llvm_unreachable("Unexpected register class");
1236   case X86::GR64RegClassID:
1237     return IsSIReg ? X86::RSI : X86::RDI;
1238   case X86::GR32RegClassID:
1239     return IsSIReg ? X86::ESI : X86::EDI;
1240   case X86::GR16RegClassID:
1241     return IsSIReg ? X86::SI : X86::DI;
1242   }
1243 }
1244 
AddDefaultSrcDestOperands(OperandVector & Operands,std::unique_ptr<llvm::MCParsedAsmOperand> && Src,std::unique_ptr<llvm::MCParsedAsmOperand> && Dst)1245 void X86AsmParser::AddDefaultSrcDestOperands(
1246     OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1247     std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) {
1248   if (isParsingIntelSyntax()) {
1249     Operands.push_back(std::move(Dst));
1250     Operands.push_back(std::move(Src));
1251   }
1252   else {
1253     Operands.push_back(std::move(Src));
1254     Operands.push_back(std::move(Dst));
1255   }
1256 }
1257 
VerifyAndAdjustOperands(OperandVector & OrigOperands,OperandVector & FinalOperands)1258 bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
1259                                            OperandVector &FinalOperands) {
1260 
1261   if (OrigOperands.size() > 1) {
1262     // Check if sizes match, OrigOperands also contains the instruction name
1263     assert(OrigOperands.size() == FinalOperands.size() + 1 &&
1264            "Operand size mismatch");
1265 
1266     SmallVector<std::pair<SMLoc, std::string>, 2> Warnings;
1267     // Verify types match
1268     int RegClassID = -1;
1269     for (unsigned int i = 0; i < FinalOperands.size(); ++i) {
1270       X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]);
1271       X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]);
1272 
1273       if (FinalOp.isReg() &&
1274           (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg()))
1275         // Return false and let a normal complaint about bogus operands happen
1276         return false;
1277 
1278       if (FinalOp.isMem()) {
1279 
1280         if (!OrigOp.isMem())
1281           // Return false and let a normal complaint about bogus operands happen
1282           return false;
1283 
1284         unsigned OrigReg = OrigOp.Mem.BaseReg;
1285         unsigned FinalReg = FinalOp.Mem.BaseReg;
1286 
1287         // If we've already encounterd a register class, make sure all register
1288         // bases are of the same register class
1289         if (RegClassID != -1 &&
1290             !X86MCRegisterClasses[RegClassID].contains(OrigReg)) {
1291           return Error(OrigOp.getStartLoc(),
1292                        "mismatching source and destination index registers");
1293         }
1294 
1295         if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg))
1296           RegClassID = X86::GR64RegClassID;
1297         else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg))
1298           RegClassID = X86::GR32RegClassID;
1299         else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg))
1300           RegClassID = X86::GR16RegClassID;
1301         else
1302           // Unexpected register class type
1303           // Return false and let a normal complaint about bogus operands happen
1304           return false;
1305 
1306         bool IsSI = IsSIReg(FinalReg);
1307         FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI);
1308 
1309         if (FinalReg != OrigReg) {
1310           std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI";
1311           Warnings.push_back(std::make_pair(
1312               OrigOp.getStartLoc(),
1313               "memory operand is only for determining the size, " + RegName +
1314                   " will be used for the location"));
1315         }
1316 
1317         FinalOp.Mem.Size = OrigOp.Mem.Size;
1318         FinalOp.Mem.SegReg = OrigOp.Mem.SegReg;
1319         FinalOp.Mem.BaseReg = FinalReg;
1320       }
1321     }
1322 
1323     // Produce warnings only if all the operands passed the adjustment - prevent
1324     // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings
1325     for (auto &WarningMsg : Warnings) {
1326       Warning(WarningMsg.first, WarningMsg.second);
1327     }
1328 
1329     // Remove old operands
1330     for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1331       OrigOperands.pop_back();
1332   }
1333   // OrigOperands.append(FinalOperands.begin(), FinalOperands.end());
1334   for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1335     OrigOperands.push_back(std::move(FinalOperands[i]));
1336 
1337   return false;
1338 }
1339 
ParseOperand()1340 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
1341   if (isParsingIntelSyntax())
1342     return ParseIntelOperand();
1343   return ParseATTOperand();
1344 }
1345 
CreateMemForInlineAsm(unsigned SegReg,const MCExpr * Disp,unsigned BaseReg,unsigned IndexReg,unsigned Scale,SMLoc Start,SMLoc End,unsigned Size,StringRef Identifier,const InlineAsmIdentifierInfo & Info)1346 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
1347     unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1348     unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1349     const InlineAsmIdentifierInfo &Info) {
1350   // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1351   // some other label reference.
1352   if (Info.isKind(InlineAsmIdentifierInfo::IK_Label)) {
1353     // Insert an explicit size if the user didn't have one.
1354     if (!Size) {
1355       Size = getPointerWidth();
1356       InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
1357                                           /*Len=*/0, Size);
1358     }
1359     // Create an absolute memory reference in order to match against
1360     // instructions taking a PC relative operand.
1361     return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size,
1362                                  Identifier, Info.Label.Decl);
1363   }
1364   // We either have a direct symbol reference, or an offset from a symbol.  The
1365   // parser always puts the symbol on the LHS, so look there for size
1366   // calculation purposes.
1367   unsigned FrontendSize = 0;
1368   void *Decl = nullptr;
1369   bool IsGlobalLV = false;
1370   if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
1371     // Size is in terms of bits in this context.
1372     FrontendSize = Info.Var.Type * 8;
1373     Decl = Info.Var.Decl;
1374     IsGlobalLV = Info.Var.IsGlobalLV;
1375   }
1376   // It is widely common for MS InlineAsm to use a global variable and one/two
1377   // registers in a mmory expression, and though unaccessible via rip/eip.
1378   if (IsGlobalLV && (BaseReg || IndexReg)) {
1379     return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End);
1380   // Otherwise, we set the base register to a non-zero value
1381   // if we don't know the actual value at this time.  This is necessary to
1382   // get the matching correct in some cases.
1383   } else {
1384     BaseReg = BaseReg ? BaseReg : 1;
1385     return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1386                                  IndexReg, Scale, Start, End, Size, Identifier,
1387                                  Decl, FrontendSize);
1388   }
1389 }
1390 
1391 // Some binary bitwise operators have a named synonymous
1392 // Query a candidate string for being such a named operator
1393 // and if so - invoke the appropriate handler
ParseIntelNamedOperator(StringRef Name,IntelExprStateMachine & SM)1394 bool X86AsmParser::ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM) {
1395   // A named operator should be either lower or upper case, but not a mix
1396   if (Name.compare(Name.lower()) && Name.compare(Name.upper()))
1397     return false;
1398   if (Name.equals_lower("not"))
1399     SM.onNot();
1400   else if (Name.equals_lower("or"))
1401     SM.onOr();
1402   else if (Name.equals_lower("shl"))
1403     SM.onLShift();
1404   else if (Name.equals_lower("shr"))
1405     SM.onRShift();
1406   else if (Name.equals_lower("xor"))
1407     SM.onXor();
1408   else if (Name.equals_lower("and"))
1409     SM.onAnd();
1410   else if (Name.equals_lower("mod"))
1411     SM.onMod();
1412   else
1413     return false;
1414   return true;
1415 }
1416 
ParseIntelExpression(IntelExprStateMachine & SM,SMLoc & End)1417 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1418   MCAsmParser &Parser = getParser();
1419   const AsmToken &Tok = Parser.getTok();
1420   StringRef ErrMsg;
1421 
1422   AsmToken::TokenKind PrevTK = AsmToken::Error;
1423   bool Done = false;
1424   while (!Done) {
1425     bool UpdateLocLex = true;
1426     AsmToken::TokenKind TK = getLexer().getKind();
1427 
1428     switch (TK) {
1429     default:
1430       if ((Done = SM.isValidEndState()))
1431         break;
1432       return Error(Tok.getLoc(), "unknown token in expression");
1433     case AsmToken::EndOfStatement:
1434       Done = true;
1435       break;
1436     case AsmToken::Real:
1437       // DotOperator: [ebx].0
1438       UpdateLocLex = false;
1439       if (ParseIntelDotOperator(SM, End))
1440         return true;
1441       break;
1442     case AsmToken::At:
1443     case AsmToken::String:
1444     case AsmToken::Identifier: {
1445       SMLoc IdentLoc = Tok.getLoc();
1446       StringRef Identifier = Tok.getString();
1447       UpdateLocLex = false;
1448       // Register
1449       unsigned Reg;
1450       if (Tok.is(AsmToken::Identifier) && !ParseRegister(Reg, IdentLoc, End)) {
1451         if (SM.onRegister(Reg, ErrMsg))
1452           return Error(Tok.getLoc(), ErrMsg);
1453         break;
1454       }
1455       // Operator synonymous ("not", "or" etc.)
1456       if ((UpdateLocLex = ParseIntelNamedOperator(Identifier, SM)))
1457         break;
1458       // Symbol reference, when parsing assembly content
1459       InlineAsmIdentifierInfo Info;
1460       const MCExpr *Val;
1461       if (!isParsingInlineAsm()) {
1462         if (getParser().parsePrimaryExpr(Val, End)) {
1463           return Error(Tok.getLoc(), "Unexpected identifier!");
1464         } else if (SM.onIdentifierExpr(Val, Identifier, Info, false, ErrMsg)) {
1465           return Error(IdentLoc, ErrMsg);
1466         } else
1467           break;
1468       }
1469       // MS InlineAsm operators (TYPE/LENGTH/SIZE)
1470       if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) {
1471         if (OpKind == IOK_OFFSET)
1472           return Error(IdentLoc, "Dealing OFFSET operator as part of"
1473             "a compound immediate expression is yet to be supported");
1474         if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) {
1475           if (SM.onInteger(Val, ErrMsg))
1476             return Error(IdentLoc, ErrMsg);
1477         } else
1478           return true;
1479         break;
1480       }
1481       // MS Dot Operator expression
1482       if (Identifier.count('.') && PrevTK == AsmToken::RBrac) {
1483         if (ParseIntelDotOperator(SM, End))
1484           return true;
1485         break;
1486       }
1487       // MS InlineAsm identifier
1488       // Call parseIdentifier() to combine @ with the identifier behind it.
1489       if (TK == AsmToken::At && Parser.parseIdentifier(Identifier))
1490         return Error(IdentLoc, "expected identifier");
1491       if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End))
1492         return true;
1493       else if (SM.onIdentifierExpr(Val, Identifier, Info, true, ErrMsg))
1494         return Error(IdentLoc, ErrMsg);
1495       break;
1496     }
1497     case AsmToken::Integer: {
1498       // Look for 'b' or 'f' following an Integer as a directional label
1499       SMLoc Loc = getTok().getLoc();
1500       int64_t IntVal = getTok().getIntVal();
1501       End = consumeToken();
1502       UpdateLocLex = false;
1503       if (getLexer().getKind() == AsmToken::Identifier) {
1504         StringRef IDVal = getTok().getString();
1505         if (IDVal == "f" || IDVal == "b") {
1506           MCSymbol *Sym =
1507               getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
1508           MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1509           const MCExpr *Val =
1510               MCSymbolRefExpr::create(Sym, Variant, getContext());
1511           if (IDVal == "b" && Sym->isUndefined())
1512             return Error(Loc, "invalid reference to undefined symbol");
1513           StringRef Identifier = Sym->getName();
1514           InlineAsmIdentifierInfo Info;
1515           if (SM.onIdentifierExpr(Val, Identifier, Info,
1516               isParsingInlineAsm(), ErrMsg))
1517             return Error(Loc, ErrMsg);
1518           End = consumeToken();
1519         } else {
1520           if (SM.onInteger(IntVal, ErrMsg))
1521             return Error(Loc, ErrMsg);
1522         }
1523       } else {
1524         if (SM.onInteger(IntVal, ErrMsg))
1525           return Error(Loc, ErrMsg);
1526       }
1527       break;
1528     }
1529     case AsmToken::Plus:
1530       if (SM.onPlus(ErrMsg))
1531         return Error(getTok().getLoc(), ErrMsg);
1532       break;
1533     case AsmToken::Minus:
1534       if (SM.onMinus(ErrMsg))
1535         return Error(getTok().getLoc(), ErrMsg);
1536       break;
1537     case AsmToken::Tilde:   SM.onNot(); break;
1538     case AsmToken::Star:    SM.onStar(); break;
1539     case AsmToken::Slash:   SM.onDivide(); break;
1540     case AsmToken::Percent: SM.onMod(); break;
1541     case AsmToken::Pipe:    SM.onOr(); break;
1542     case AsmToken::Caret:   SM.onXor(); break;
1543     case AsmToken::Amp:     SM.onAnd(); break;
1544     case AsmToken::LessLess:
1545                             SM.onLShift(); break;
1546     case AsmToken::GreaterGreater:
1547                             SM.onRShift(); break;
1548     case AsmToken::LBrac:
1549       if (SM.onLBrac())
1550         return Error(Tok.getLoc(), "unexpected bracket encountered");
1551       break;
1552     case AsmToken::RBrac:
1553       if (SM.onRBrac())
1554         return Error(Tok.getLoc(), "unexpected bracket encountered");
1555       break;
1556     case AsmToken::LParen:  SM.onLParen(); break;
1557     case AsmToken::RParen:  SM.onRParen(); break;
1558     }
1559     if (SM.hadError())
1560       return Error(Tok.getLoc(), "unknown token in expression");
1561 
1562     if (!Done && UpdateLocLex)
1563       End = consumeToken();
1564 
1565     PrevTK = TK;
1566   }
1567   return false;
1568 }
1569 
RewriteIntelExpression(IntelExprStateMachine & SM,SMLoc Start,SMLoc End)1570 void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine &SM,
1571                                           SMLoc Start, SMLoc End) {
1572   SMLoc Loc = Start;
1573   unsigned ExprLen = End.getPointer() - Start.getPointer();
1574   // Skip everything before a symbol displacement (if we have one)
1575   if (SM.getSym()) {
1576     StringRef SymName = SM.getSymName();
1577     if (unsigned Len =  SymName.data() - Start.getPointer())
1578       InstInfo->AsmRewrites->emplace_back(AOK_Skip, Start, Len);
1579     Loc = SMLoc::getFromPointer(SymName.data() + SymName.size());
1580     ExprLen = End.getPointer() - (SymName.data() + SymName.size());
1581     // If we have only a symbol than there's no need for complex rewrite,
1582     // simply skip everything after it
1583     if (!(SM.getBaseReg() || SM.getIndexReg() || SM.getImm())) {
1584       if (ExprLen)
1585         InstInfo->AsmRewrites->emplace_back(AOK_Skip, Loc, ExprLen);
1586       return;
1587     }
1588   }
1589   // Build an Intel Expression rewrite
1590   StringRef BaseRegStr;
1591   StringRef IndexRegStr;
1592   if (SM.getBaseReg())
1593     BaseRegStr = X86IntelInstPrinter::getRegisterName(SM.getBaseReg());
1594   if (SM.getIndexReg())
1595     IndexRegStr = X86IntelInstPrinter::getRegisterName(SM.getIndexReg());
1596   // Emit it
1597   IntelExpr Expr(BaseRegStr, IndexRegStr, SM.getScale(), SM.getImm(), SM.isMemExpr());
1598   InstInfo->AsmRewrites->emplace_back(Loc, ExprLen, Expr);
1599 }
1600 
1601 // Inline assembly may use variable names with namespace alias qualifiers.
ParseIntelInlineAsmIdentifier(const MCExpr * & Val,StringRef & Identifier,InlineAsmIdentifierInfo & Info,bool IsUnevaluatedOperand,SMLoc & End)1602 bool X86AsmParser::ParseIntelInlineAsmIdentifier(const MCExpr *&Val,
1603                                                  StringRef &Identifier,
1604                                                  InlineAsmIdentifierInfo &Info,
1605                                                  bool IsUnevaluatedOperand,
1606                                                  SMLoc &End) {
1607   MCAsmParser &Parser = getParser();
1608   assert(isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1609   Val = nullptr;
1610 
1611   StringRef LineBuf(Identifier.data());
1612   SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1613 
1614   const AsmToken &Tok = Parser.getTok();
1615   SMLoc Loc = Tok.getLoc();
1616 
1617   // Advance the token stream until the end of the current token is
1618   // after the end of what the frontend claimed.
1619   const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1620   do {
1621     End = Tok.getEndLoc();
1622     getLexer().Lex();
1623   } while (End.getPointer() < EndPtr);
1624   Identifier = LineBuf;
1625 
1626   // The frontend should end parsing on an assembler token boundary, unless it
1627   // failed parsing.
1628   assert((End.getPointer() == EndPtr ||
1629           Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) &&
1630           "frontend claimed part of a token?");
1631 
1632   // If the identifier lookup was unsuccessful, assume that we are dealing with
1633   // a label.
1634   if (Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) {
1635     StringRef InternalName =
1636       SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
1637                                          Loc, false);
1638     assert(InternalName.size() && "We should have an internal name here.");
1639     // Push a rewrite for replacing the identifier name with the internal name.
1640     InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
1641                                         InternalName);
1642   } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
1643     return false;
1644   // Create the symbol reference.
1645   MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
1646   MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1647   Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
1648   return false;
1649 }
1650 
1651 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
1652 std::unique_ptr<X86Operand>
ParseRoundingModeOp(SMLoc Start)1653 X86AsmParser::ParseRoundingModeOp(SMLoc Start) {
1654   MCAsmParser &Parser = getParser();
1655   const AsmToken &Tok = Parser.getTok();
1656   // Eat "{" and mark the current place.
1657   const SMLoc consumedToken = consumeToken();
1658   if (Tok.getIdentifier().startswith("r")){
1659     int rndMode = StringSwitch<int>(Tok.getIdentifier())
1660       .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
1661       .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
1662       .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
1663       .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
1664       .Default(-1);
1665     if (-1 == rndMode)
1666       return ErrorOperand(Tok.getLoc(), "Invalid rounding mode.");
1667      Parser.Lex();  // Eat "r*" of r*-sae
1668     if (!getLexer().is(AsmToken::Minus))
1669       return ErrorOperand(Tok.getLoc(), "Expected - at this point");
1670     Parser.Lex();  // Eat "-"
1671     Parser.Lex();  // Eat the sae
1672     if (!getLexer().is(AsmToken::RCurly))
1673       return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1674     SMLoc End = Tok.getEndLoc();
1675     Parser.Lex();  // Eat "}"
1676     const MCExpr *RndModeOp =
1677       MCConstantExpr::create(rndMode, Parser.getContext());
1678     return X86Operand::CreateImm(RndModeOp, Start, End);
1679   }
1680   if(Tok.getIdentifier().equals("sae")){
1681     Parser.Lex();  // Eat the sae
1682     if (!getLexer().is(AsmToken::RCurly))
1683       return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1684     Parser.Lex();  // Eat "}"
1685     return X86Operand::CreateToken("{sae}", consumedToken);
1686   }
1687   return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1688 }
1689 
1690 /// Parse the '.' operator.
ParseIntelDotOperator(IntelExprStateMachine & SM,SMLoc & End)1691 bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End) {
1692   const AsmToken &Tok = getTok();
1693   unsigned Offset;
1694 
1695   // Drop the optional '.'.
1696   StringRef DotDispStr = Tok.getString();
1697   if (DotDispStr.startswith("."))
1698     DotDispStr = DotDispStr.drop_front(1);
1699 
1700   // .Imm gets lexed as a real.
1701   if (Tok.is(AsmToken::Real)) {
1702     APInt DotDisp;
1703     DotDispStr.getAsInteger(10, DotDisp);
1704     Offset = DotDisp.getZExtValue();
1705   } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1706     std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1707     if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1708                                            Offset))
1709       return Error(Tok.getLoc(), "Unable to lookup field reference!");
1710   } else
1711     return Error(Tok.getLoc(), "Unexpected token type!");
1712 
1713   // Eat the DotExpression and update End
1714   End = SMLoc::getFromPointer(DotDispStr.data());
1715   const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size();
1716   while (Tok.getLoc().getPointer() < DotExprEndLoc)
1717     Lex();
1718   SM.addImm(Offset);
1719   return false;
1720 }
1721 
1722 /// Parse the 'offset' operator.  This operator is used to specify the
1723 /// location rather then the content of a variable.
ParseIntelOffsetOfOperator()1724 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1725   MCAsmParser &Parser = getParser();
1726   const AsmToken &Tok = Parser.getTok();
1727   SMLoc OffsetOfLoc = Tok.getLoc();
1728   Parser.Lex(); // Eat offset.
1729 
1730   const MCExpr *Val;
1731   InlineAsmIdentifierInfo Info;
1732   SMLoc Start = Tok.getLoc(), End;
1733   StringRef Identifier = Tok.getString();
1734   if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info,
1735                                     /*Unevaluated=*/false, End))
1736     return nullptr;
1737 
1738   void *Decl = nullptr;
1739   // FIXME: MS evaluates "offset <Constant>" to the underlying integral
1740   if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
1741     return ErrorOperand(Start, "offset operator cannot yet handle constants");
1742   else if (Info.isKind(InlineAsmIdentifierInfo::IK_Var))
1743     Decl = Info.Var.Decl;
1744   // Don't emit the offset operator.
1745   InstInfo->AsmRewrites->emplace_back(AOK_Skip, OffsetOfLoc, 7);
1746 
1747   // The offset operator will have an 'r' constraint, thus we need to create
1748   // register operand to ensure proper matching.  Just pick a GPR based on
1749   // the size of a pointer.
1750   bool Parse32 = is32BitMode() || Code16GCC;
1751   unsigned RegNo = is64BitMode() ? X86::RBX : (Parse32 ? X86::EBX : X86::BX);
1752 
1753   return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1754                                OffsetOfLoc, Identifier, Decl);
1755 }
1756 
1757 // Query a candidate string for being an Intel assembly operator
1758 // Report back its kind, or IOK_INVALID if does not evaluated as a known one
IdentifyIntelInlineAsmOperator(StringRef Name)1759 unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) {
1760   return StringSwitch<unsigned>(Name)
1761     .Cases("TYPE","type",IOK_TYPE)
1762     .Cases("SIZE","size",IOK_SIZE)
1763     .Cases("LENGTH","length",IOK_LENGTH)
1764     .Cases("OFFSET","offset",IOK_OFFSET)
1765     .Default(IOK_INVALID);
1766 }
1767 
1768 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators.  The LENGTH operator
1769 /// returns the number of elements in an array.  It returns the value 1 for
1770 /// non-array variables.  The SIZE operator returns the size of a C or C++
1771 /// variable.  A variable's size is the product of its LENGTH and TYPE.  The
1772 /// TYPE operator returns the size of a C or C++ type or variable. If the
1773 /// variable is an array, TYPE returns the size of a single element.
ParseIntelInlineAsmOperator(unsigned OpKind)1774 unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) {
1775   MCAsmParser &Parser = getParser();
1776   const AsmToken &Tok = Parser.getTok();
1777   Parser.Lex(); // Eat operator.
1778 
1779   const MCExpr *Val = nullptr;
1780   InlineAsmIdentifierInfo Info;
1781   SMLoc Start = Tok.getLoc(), End;
1782   StringRef Identifier = Tok.getString();
1783   if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info,
1784                                     /*Unevaluated=*/true, End))
1785     return 0;
1786 
1787   if (!Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
1788     Error(Start, "unable to lookup expression");
1789     return 0;
1790   }
1791 
1792   unsigned CVal = 0;
1793   switch(OpKind) {
1794   default: llvm_unreachable("Unexpected operand kind!");
1795   case IOK_LENGTH: CVal = Info.Var.Length; break;
1796   case IOK_SIZE: CVal = Info.Var.Size; break;
1797   case IOK_TYPE: CVal = Info.Var.Type; break;
1798   }
1799 
1800   return CVal;
1801 }
1802 
ParseIntelMemoryOperandSize(unsigned & Size)1803 bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
1804   Size = StringSwitch<unsigned>(getTok().getString())
1805     .Cases("BYTE", "byte", 8)
1806     .Cases("WORD", "word", 16)
1807     .Cases("DWORD", "dword", 32)
1808     .Cases("FLOAT", "float", 32)
1809     .Cases("LONG", "long", 32)
1810     .Cases("FWORD", "fword", 48)
1811     .Cases("DOUBLE", "double", 64)
1812     .Cases("QWORD", "qword", 64)
1813     .Cases("MMWORD","mmword", 64)
1814     .Cases("XWORD", "xword", 80)
1815     .Cases("TBYTE", "tbyte", 80)
1816     .Cases("XMMWORD", "xmmword", 128)
1817     .Cases("YMMWORD", "ymmword", 256)
1818     .Cases("ZMMWORD", "zmmword", 512)
1819     .Default(0);
1820   if (Size) {
1821     const AsmToken &Tok = Lex(); // Eat operand size (e.g., byte, word).
1822     if (!(Tok.getString().equals("PTR") || Tok.getString().equals("ptr")))
1823       return Error(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
1824     Lex(); // Eat ptr.
1825   }
1826   return false;
1827 }
1828 
ParseIntelOperand()1829 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1830   MCAsmParser &Parser = getParser();
1831   const AsmToken &Tok = Parser.getTok();
1832   SMLoc Start, End;
1833 
1834   // FIXME: Offset operator
1835   // Should be handled as part of immediate expression, as other operators
1836   // Currently, only supported as a stand-alone operand
1837   if (isParsingInlineAsm())
1838     if (IdentifyIntelInlineAsmOperator(Tok.getString()) == IOK_OFFSET)
1839       return ParseIntelOffsetOfOperator();
1840 
1841   // Parse optional Size directive.
1842   unsigned Size;
1843   if (ParseIntelMemoryOperandSize(Size))
1844     return nullptr;
1845   bool PtrInOperand = bool(Size);
1846 
1847   Start = Tok.getLoc();
1848 
1849   // Rounding mode operand.
1850   if (getLexer().is(AsmToken::LCurly))
1851     return ParseRoundingModeOp(Start);
1852 
1853   // Register operand.
1854   unsigned RegNo = 0;
1855   if (Tok.is(AsmToken::Identifier) && !ParseRegister(RegNo, Start, End)) {
1856     if (RegNo == X86::RIP)
1857       return ErrorOperand(Start, "rip can only be used as a base register");
1858     // A Register followed by ':' is considered a segment override
1859     if (Tok.isNot(AsmToken::Colon))
1860       return !PtrInOperand ? X86Operand::CreateReg(RegNo, Start, End) :
1861         ErrorOperand(Start, "expected memory operand after 'ptr', "
1862                             "found register operand instead");
1863     // An alleged segment override. check if we have a valid segment register
1864     if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1865       return ErrorOperand(Start, "invalid segment register");
1866     // Eat ':' and update Start location
1867     Start = Lex().getLoc();
1868   }
1869 
1870   // Immediates and Memory
1871   IntelExprStateMachine SM;
1872   if (ParseIntelExpression(SM, End))
1873     return nullptr;
1874 
1875   if (isParsingInlineAsm())
1876     RewriteIntelExpression(SM, Start, Tok.getLoc());
1877 
1878   int64_t Imm = SM.getImm();
1879   const MCExpr *Disp = SM.getSym();
1880   const MCExpr *ImmDisp = MCConstantExpr::create(Imm, getContext());
1881   if (Disp && Imm)
1882     Disp = MCBinaryExpr::createAdd(Disp, ImmDisp, getContext());
1883   if (!Disp)
1884     Disp = ImmDisp;
1885 
1886   // RegNo != 0 specifies a valid segment register,
1887   // and we are parsing a segment override
1888   if (!SM.isMemExpr() && !RegNo)
1889     return X86Operand::CreateImm(Disp, Start, End);
1890 
1891   StringRef ErrMsg;
1892   unsigned BaseReg = SM.getBaseReg();
1893   unsigned IndexReg = SM.getIndexReg();
1894   unsigned Scale = SM.getScale();
1895 
1896   if (Scale == 0 && BaseReg != X86::ESP && BaseReg != X86::RSP &&
1897       (IndexReg == X86::ESP || IndexReg == X86::RSP))
1898     std::swap(BaseReg, IndexReg);
1899 
1900   // If BaseReg is a vector register and IndexReg is not, swap them unless
1901   // Scale was specified in which case it would be an error.
1902   if (Scale == 0 &&
1903       !(X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
1904         X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
1905         X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg)) &&
1906       (X86MCRegisterClasses[X86::VR128XRegClassID].contains(BaseReg) ||
1907        X86MCRegisterClasses[X86::VR256XRegClassID].contains(BaseReg) ||
1908        X86MCRegisterClasses[X86::VR512RegClassID].contains(BaseReg)))
1909     std::swap(BaseReg, IndexReg);
1910 
1911   if (Scale != 0 &&
1912       X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg))
1913     return ErrorOperand(Start, "16-bit addresses cannot have a scale");
1914 
1915   // If there was no explicit scale specified, change it to 1.
1916   if (Scale == 0)
1917     Scale = 1;
1918 
1919   // If this is a 16-bit addressing mode with the base and index in the wrong
1920   // order, swap them so CheckBaseRegAndIndexRegAndScale doesn't fail. It is
1921   // shared with att syntax where order matters.
1922   if ((BaseReg == X86::SI || BaseReg == X86::DI) &&
1923       (IndexReg == X86::BX || IndexReg == X86::BP))
1924     std::swap(BaseReg, IndexReg);
1925 
1926   if ((BaseReg || IndexReg) &&
1927       CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
1928                                       ErrMsg))
1929     return ErrorOperand(Start, ErrMsg);
1930   if (isParsingInlineAsm())
1931     return CreateMemForInlineAsm(RegNo, Disp, BaseReg, IndexReg,
1932                                  Scale, Start, End, Size, SM.getSymName(),
1933                                  SM.getIdentifierInfo());
1934   if (!(BaseReg || IndexReg || RegNo))
1935     return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size);
1936   return X86Operand::CreateMem(getPointerWidth(), RegNo, Disp,
1937                                BaseReg, IndexReg, Scale, Start, End, Size);
1938 }
1939 
ParseATTOperand()1940 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1941   MCAsmParser &Parser = getParser();
1942   switch (getLexer().getKind()) {
1943   case AsmToken::Dollar: {
1944     // $42 or $ID -> immediate.
1945     SMLoc Start = Parser.getTok().getLoc(), End;
1946     Parser.Lex();
1947     const MCExpr *Val;
1948     // This is an immediate, so we should not parse a register. Do a precheck
1949     // for '%' to supercede intra-register parse errors.
1950     SMLoc L = Parser.getTok().getLoc();
1951     if (check(getLexer().is(AsmToken::Percent), L,
1952               "expected immediate expression") ||
1953         getParser().parseExpression(Val, End) ||
1954         check(isa<X86MCExpr>(Val), L, "expected immediate expression"))
1955       return nullptr;
1956     return X86Operand::CreateImm(Val, Start, End);
1957   }
1958   case AsmToken::LCurly: {
1959     SMLoc Start = Parser.getTok().getLoc();
1960     return ParseRoundingModeOp(Start);
1961   }
1962   default: {
1963     // This a memory operand or a register. We have some parsing complications
1964     // as a '(' may be part of an immediate expression or the addressing mode
1965     // block. This is complicated by the fact that an assembler-level variable
1966     // may refer either to a register or an immediate expression.
1967 
1968     SMLoc Loc = Parser.getTok().getLoc(), EndLoc;
1969     const MCExpr *Expr = nullptr;
1970     unsigned Reg = 0;
1971     if (getLexer().isNot(AsmToken::LParen)) {
1972       // No '(' so this is either a displacement expression or a register.
1973       if (Parser.parseExpression(Expr, EndLoc))
1974         return nullptr;
1975       if (auto *RE = dyn_cast<X86MCExpr>(Expr)) {
1976         // Segment Register. Reset Expr and copy value to register.
1977         Expr = nullptr;
1978         Reg = RE->getRegNo();
1979 
1980         // Sanity check register.
1981         if (Reg == X86::EIZ || Reg == X86::RIZ)
1982           return ErrorOperand(
1983               Loc, "%eiz and %riz can only be used as index registers",
1984               SMRange(Loc, EndLoc));
1985         if (Reg == X86::RIP)
1986           return ErrorOperand(Loc, "%rip can only be used as a base register",
1987                               SMRange(Loc, EndLoc));
1988         // Return register that are not segment prefixes immediately.
1989         if (!Parser.parseOptionalToken(AsmToken::Colon))
1990           return X86Operand::CreateReg(Reg, Loc, EndLoc);
1991         if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(Reg))
1992           return ErrorOperand(Loc, "invalid segment register");
1993       }
1994     }
1995     // This is a Memory operand.
1996     return ParseMemOperand(Reg, Expr, Loc, EndLoc);
1997   }
1998   }
1999 }
2000 
2001 // true on failure, false otherwise
2002 // If no {z} mark was found - Parser doesn't advance
ParseZ(std::unique_ptr<X86Operand> & Z,const SMLoc & StartLoc)2003 bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z,
2004                           const SMLoc &StartLoc) {
2005   MCAsmParser &Parser = getParser();
2006   // Assuming we are just pass the '{' mark, quering the next token
2007   // Searched for {z}, but none was found. Return false, as no parsing error was
2008   // encountered
2009   if (!(getLexer().is(AsmToken::Identifier) &&
2010         (getLexer().getTok().getIdentifier() == "z")))
2011     return false;
2012   Parser.Lex(); // Eat z
2013   // Query and eat the '}' mark
2014   if (!getLexer().is(AsmToken::RCurly))
2015     return Error(getLexer().getLoc(), "Expected } at this point");
2016   Parser.Lex(); // Eat '}'
2017   // Assign Z with the {z} mark opernad
2018   Z = X86Operand::CreateToken("{z}", StartLoc);
2019   return false;
2020 }
2021 
2022 // true on failure, false otherwise
HandleAVX512Operand(OperandVector & Operands,const MCParsedAsmOperand & Op)2023 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
2024                                        const MCParsedAsmOperand &Op) {
2025   MCAsmParser &Parser = getParser();
2026   if (getLexer().is(AsmToken::LCurly)) {
2027     // Eat "{" and mark the current place.
2028     const SMLoc consumedToken = consumeToken();
2029     // Distinguish {1to<NUM>} from {%k<NUM>}.
2030     if(getLexer().is(AsmToken::Integer)) {
2031       // Parse memory broadcasting ({1to<NUM>}).
2032       if (getLexer().getTok().getIntVal() != 1)
2033         return TokError("Expected 1to<NUM> at this point");
2034       Parser.Lex();  // Eat "1" of 1to8
2035       if (!getLexer().is(AsmToken::Identifier) ||
2036           !getLexer().getTok().getIdentifier().startswith("to"))
2037         return TokError("Expected 1to<NUM> at this point");
2038       // Recognize only reasonable suffixes.
2039       const char *BroadcastPrimitive =
2040         StringSwitch<const char*>(getLexer().getTok().getIdentifier())
2041           .Case("to2",  "{1to2}")
2042           .Case("to4",  "{1to4}")
2043           .Case("to8",  "{1to8}")
2044           .Case("to16", "{1to16}")
2045           .Default(nullptr);
2046       if (!BroadcastPrimitive)
2047         return TokError("Invalid memory broadcast primitive.");
2048       Parser.Lex();  // Eat "toN" of 1toN
2049       if (!getLexer().is(AsmToken::RCurly))
2050         return TokError("Expected } at this point");
2051       Parser.Lex();  // Eat "}"
2052       Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
2053                                                  consumedToken));
2054       // No AVX512 specific primitives can pass
2055       // after memory broadcasting, so return.
2056       return false;
2057     } else {
2058       // Parse either {k}{z}, {z}{k}, {k} or {z}
2059       // last one have no meaning, but GCC accepts it
2060       // Currently, we're just pass a '{' mark
2061       std::unique_ptr<X86Operand> Z;
2062       if (ParseZ(Z, consumedToken))
2063         return true;
2064       // Reaching here means that parsing of the allegadly '{z}' mark yielded
2065       // no errors.
2066       // Query for the need of further parsing for a {%k<NUM>} mark
2067       if (!Z || getLexer().is(AsmToken::LCurly)) {
2068         SMLoc StartLoc = Z ? consumeToken() : consumedToken;
2069         // Parse an op-mask register mark ({%k<NUM>}), which is now to be
2070         // expected
2071         unsigned RegNo;
2072         SMLoc RegLoc;
2073         if (!ParseRegister(RegNo, RegLoc, StartLoc) &&
2074             X86MCRegisterClasses[X86::VK1RegClassID].contains(RegNo)) {
2075           if (RegNo == X86::K0)
2076             return Error(RegLoc, "Register k0 can't be used as write mask");
2077           if (!getLexer().is(AsmToken::RCurly))
2078             return Error(getLexer().getLoc(), "Expected } at this point");
2079           Operands.push_back(X86Operand::CreateToken("{", StartLoc));
2080           Operands.push_back(
2081               X86Operand::CreateReg(RegNo, StartLoc, StartLoc));
2082           Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
2083         } else
2084           return Error(getLexer().getLoc(),
2085                         "Expected an op-mask register at this point");
2086         // {%k<NUM>} mark is found, inquire for {z}
2087         if (getLexer().is(AsmToken::LCurly) && !Z) {
2088           // Have we've found a parsing error, or found no (expected) {z} mark
2089           // - report an error
2090           if (ParseZ(Z, consumeToken()) || !Z)
2091             return Error(getLexer().getLoc(),
2092                          "Expected a {z} mark at this point");
2093 
2094         }
2095         // '{z}' on its own is meaningless, hence should be ignored.
2096         // on the contrary - have it been accompanied by a K register,
2097         // allow it.
2098         if (Z)
2099           Operands.push_back(std::move(Z));
2100       }
2101     }
2102   }
2103   return false;
2104 }
2105 
2106 /// ParseMemOperand: 'seg : disp(basereg, indexreg, scale)'.  The '%ds:' prefix
2107 /// has already been parsed if present. disp may be provided as well.
ParseMemOperand(unsigned SegReg,const MCExpr * & Disp,const SMLoc & StartLoc,SMLoc & EndLoc)2108 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
2109                                                           const MCExpr *&Disp,
2110                                                           const SMLoc &StartLoc,
2111                                                           SMLoc &EndLoc) {
2112   MCAsmParser &Parser = getParser();
2113   SMLoc Loc;
2114   // Based on the initial passed values, we may be in any of these cases, we are
2115   // in one of these cases (with current position (*)):
2116 
2117   //   1. seg : * disp  (base-index-scale-expr)
2118   //   2. seg : *(disp) (base-index-scale-expr)
2119   //   3. seg :       *(base-index-scale-expr)
2120   //   4.        disp  *(base-index-scale-expr)
2121   //   5.      *(disp)  (base-index-scale-expr)
2122   //   6.             *(base-index-scale-expr)
2123   //   7.  disp *
2124   //   8. *(disp)
2125 
2126   // If we do not have an displacement yet, check if we're in cases 4 or 6 by
2127   // checking if the first object after the parenthesis is a register (or an
2128   // identifier referring to a register) and parse the displacement or default
2129   // to 0 as appropriate.
2130   auto isAtMemOperand = [this]() {
2131     if (this->getLexer().isNot(AsmToken::LParen))
2132       return false;
2133     AsmToken Buf[2];
2134     StringRef Id;
2135     auto TokCount = this->getLexer().peekTokens(Buf, true);
2136     if (TokCount == 0)
2137       return false;
2138     switch (Buf[0].getKind()) {
2139     case AsmToken::Percent:
2140     case AsmToken::Comma:
2141       return true;
2142     // These lower cases are doing a peekIdentifier.
2143     case AsmToken::At:
2144     case AsmToken::Dollar:
2145       if ((TokCount > 1) &&
2146           (Buf[1].is(AsmToken::Identifier) || Buf[1].is(AsmToken::String)) &&
2147           (Buf[0].getLoc().getPointer() + 1 == Buf[1].getLoc().getPointer()))
2148         Id = StringRef(Buf[0].getLoc().getPointer(),
2149                        Buf[1].getIdentifier().size() + 1);
2150       break;
2151     case AsmToken::Identifier:
2152     case AsmToken::String:
2153       Id = Buf[0].getIdentifier();
2154       break;
2155     default:
2156       return false;
2157     }
2158     // We have an ID. Check if it is bound to a register.
2159     if (!Id.empty()) {
2160       MCSymbol *Sym = this->getContext().getOrCreateSymbol(Id);
2161       if (Sym->isVariable()) {
2162         auto V = Sym->getVariableValue(/*SetUsed*/ false);
2163         return isa<X86MCExpr>(V);
2164       }
2165     }
2166     return false;
2167   };
2168 
2169   if (!Disp) {
2170     // Parse immediate if we're not at a mem operand yet.
2171     if (!isAtMemOperand()) {
2172       if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(Disp, EndLoc))
2173         return nullptr;
2174       assert(!isa<X86MCExpr>(Disp) && "Expected non-register here.");
2175     } else {
2176       // Disp is implicitly zero if we haven't parsed it yet.
2177       Disp = MCConstantExpr::create(0, Parser.getContext());
2178     }
2179   }
2180 
2181   // We are now either at the end of the operand or at the '(' at the start of a
2182   // base-index-scale-expr.
2183 
2184   if (!parseOptionalToken(AsmToken::LParen)) {
2185     if (SegReg == 0)
2186       return X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc);
2187     return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
2188                                  StartLoc, EndLoc);
2189   }
2190 
2191   // If we reached here, then eat the '(' and Process
2192   // the rest of the memory operand.
2193   unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
2194   SMLoc BaseLoc = getLexer().getLoc();
2195   const MCExpr *E;
2196   StringRef ErrMsg;
2197 
2198   // Parse BaseReg if one is provided.
2199   if (getLexer().isNot(AsmToken::Comma) && getLexer().isNot(AsmToken::RParen)) {
2200     if (Parser.parseExpression(E, EndLoc) ||
2201         check(!isa<X86MCExpr>(E), BaseLoc, "expected register here"))
2202       return nullptr;
2203 
2204     // Sanity check register.
2205     BaseReg = cast<X86MCExpr>(E)->getRegNo();
2206     if (BaseReg == X86::EIZ || BaseReg == X86::RIZ)
2207       return ErrorOperand(BaseLoc,
2208                           "eiz and riz can only be used as index registers",
2209                           SMRange(BaseLoc, EndLoc));
2210   }
2211 
2212   if (parseOptionalToken(AsmToken::Comma)) {
2213     // Following the comma we should have either an index register, or a scale
2214     // value. We don't support the later form, but we want to parse it
2215     // correctly.
2216     //
2217     // Even though it would be completely consistent to support syntax like
2218     // "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
2219     if (getLexer().isNot(AsmToken::RParen)) {
2220       if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(E, EndLoc))
2221         return nullptr;
2222 
2223       if (!isa<X86MCExpr>(E)) {
2224         // We've parsed an unexpected Scale Value instead of an index
2225         // register. Interpret it as an absolute.
2226         int64_t ScaleVal;
2227         if (!E->evaluateAsAbsolute(ScaleVal, getStreamer().getAssemblerPtr()))
2228           return ErrorOperand(Loc, "expected absolute expression");
2229         if (ScaleVal != 1)
2230           Warning(Loc, "scale factor without index register is ignored");
2231         Scale = 1;
2232       } else { // IndexReg Found.
2233         IndexReg = cast<X86MCExpr>(E)->getRegNo();
2234 
2235         if (BaseReg == X86::RIP)
2236           return ErrorOperand(
2237               Loc, "%rip as base register can not have an index register");
2238         if (IndexReg == X86::RIP)
2239           return ErrorOperand(Loc, "%rip is not allowed as an index register");
2240 
2241         if (parseOptionalToken(AsmToken::Comma)) {
2242           // Parse the scale amount:
2243           //  ::= ',' [scale-expression]
2244 
2245           // A scale amount without an index is ignored.
2246           if (getLexer().isNot(AsmToken::RParen)) {
2247             int64_t ScaleVal;
2248             if (Parser.parseTokenLoc(Loc) ||
2249                 Parser.parseAbsoluteExpression(ScaleVal))
2250               return ErrorOperand(Loc, "expected scale expression");
2251             Scale = (unsigned)ScaleVal;
2252             // Validate the scale amount.
2253             if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2254                 Scale != 1)
2255               return ErrorOperand(Loc,
2256                                   "scale factor in 16-bit address must be 1");
2257             if (checkScale(Scale, ErrMsg))
2258               return ErrorOperand(Loc, ErrMsg);
2259           }
2260         }
2261       }
2262     }
2263   }
2264 
2265   // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
2266   if (parseToken(AsmToken::RParen, "unexpected token in memory operand"))
2267     return nullptr;
2268 
2269   // This is to support otherwise illegal operand (%dx) found in various
2270   // unofficial manuals examples (e.g. "out[s]?[bwl]? %al, (%dx)") and must now
2271   // be supported. Mark such DX variants separately fix only in special cases.
2272   if (BaseReg == X86::DX && IndexReg == 0 && Scale == 1 && SegReg == 0 &&
2273       isa<MCConstantExpr>(Disp) && cast<MCConstantExpr>(Disp)->getValue() == 0)
2274     return X86Operand::CreateDXReg(BaseLoc, BaseLoc);
2275 
2276   if (CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
2277                                       ErrMsg))
2278     return ErrorOperand(BaseLoc, ErrMsg);
2279 
2280   if (SegReg || BaseReg || IndexReg)
2281     return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
2282                                  IndexReg, Scale, StartLoc, EndLoc);
2283   return X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc);
2284 }
2285 
2286 // Parse either a standard primary expression or a register.
parsePrimaryExpr(const MCExpr * & Res,SMLoc & EndLoc)2287 bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
2288   MCAsmParser &Parser = getParser();
2289   // See if this is a register first.
2290   if (getTok().is(AsmToken::Percent) ||
2291       (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier) &&
2292        MatchRegisterName(Parser.getTok().getString()))) {
2293     SMLoc StartLoc = Parser.getTok().getLoc();
2294     unsigned RegNo;
2295     if (ParseRegister(RegNo, StartLoc, EndLoc))
2296       return true;
2297     Res = X86MCExpr::create(RegNo, Parser.getContext());
2298     return false;
2299   }
2300   return Parser.parsePrimaryExpr(Res, EndLoc);
2301 }
2302 
ParseInstruction(ParseInstructionInfo & Info,StringRef Name,SMLoc NameLoc,OperandVector & Operands)2303 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
2304                                     SMLoc NameLoc, OperandVector &Operands) {
2305   MCAsmParser &Parser = getParser();
2306   InstInfo = &Info;
2307   StringRef PatchedName = Name;
2308 
2309   if ((Name.equals("jmp") || Name.equals("jc") || Name.equals("jz")) &&
2310       isParsingIntelSyntax() && isParsingInlineAsm()) {
2311     StringRef NextTok = Parser.getTok().getString();
2312     if (NextTok == "short") {
2313       SMLoc NameEndLoc =
2314           NameLoc.getFromPointer(NameLoc.getPointer() + Name.size());
2315       // Eat the short keyword
2316       Parser.Lex();
2317       // MS ignores the short keyword, it determines the jmp type based
2318       // on the distance of the label
2319       InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc,
2320                                           NextTok.size() + 1);
2321     }
2322   }
2323 
2324   // FIXME: Hack to recognize setneb as setne.
2325   if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
2326       PatchedName != "setb" && PatchedName != "setnb")
2327     PatchedName = PatchedName.substr(0, Name.size()-1);
2328 
2329   // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
2330   if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
2331       (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
2332        PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
2333     bool IsVCMP = PatchedName[0] == 'v';
2334     unsigned CCIdx = IsVCMP ? 4 : 3;
2335     unsigned ComparisonCode = StringSwitch<unsigned>(
2336       PatchedName.slice(CCIdx, PatchedName.size() - 2))
2337       .Case("eq",       0x00)
2338       .Case("eq_oq",    0x00)
2339       .Case("lt",       0x01)
2340       .Case("lt_os",    0x01)
2341       .Case("le",       0x02)
2342       .Case("le_os",    0x02)
2343       .Case("unord",    0x03)
2344       .Case("unord_q",  0x03)
2345       .Case("neq",      0x04)
2346       .Case("neq_uq",   0x04)
2347       .Case("nlt",      0x05)
2348       .Case("nlt_us",   0x05)
2349       .Case("nle",      0x06)
2350       .Case("nle_us",   0x06)
2351       .Case("ord",      0x07)
2352       .Case("ord_q",    0x07)
2353       /* AVX only from here */
2354       .Case("eq_uq",    0x08)
2355       .Case("nge",      0x09)
2356       .Case("nge_us",   0x09)
2357       .Case("ngt",      0x0A)
2358       .Case("ngt_us",   0x0A)
2359       .Case("false",    0x0B)
2360       .Case("false_oq", 0x0B)
2361       .Case("neq_oq",   0x0C)
2362       .Case("ge",       0x0D)
2363       .Case("ge_os",    0x0D)
2364       .Case("gt",       0x0E)
2365       .Case("gt_os",    0x0E)
2366       .Case("true",     0x0F)
2367       .Case("true_uq",  0x0F)
2368       .Case("eq_os",    0x10)
2369       .Case("lt_oq",    0x11)
2370       .Case("le_oq",    0x12)
2371       .Case("unord_s",  0x13)
2372       .Case("neq_us",   0x14)
2373       .Case("nlt_uq",   0x15)
2374       .Case("nle_uq",   0x16)
2375       .Case("ord_s",    0x17)
2376       .Case("eq_us",    0x18)
2377       .Case("nge_uq",   0x19)
2378       .Case("ngt_uq",   0x1A)
2379       .Case("false_os", 0x1B)
2380       .Case("neq_os",   0x1C)
2381       .Case("ge_oq",    0x1D)
2382       .Case("gt_oq",    0x1E)
2383       .Case("true_us",  0x1F)
2384       .Default(~0U);
2385     if (ComparisonCode != ~0U && (IsVCMP || ComparisonCode < 8)) {
2386 
2387       Operands.push_back(X86Operand::CreateToken(PatchedName.slice(0, CCIdx),
2388                                                  NameLoc));
2389 
2390       const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2391                                                    getParser().getContext());
2392       Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2393 
2394       PatchedName = PatchedName.substr(PatchedName.size() - 2);
2395     }
2396   }
2397 
2398   // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2399   if (PatchedName.startswith("vpcmp") &&
2400       (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2401        PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2402     unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2403     unsigned ComparisonCode = StringSwitch<unsigned>(
2404       PatchedName.slice(5, PatchedName.size() - CCIdx))
2405       .Case("eq",    0x0) // Only allowed on unsigned. Checked below.
2406       .Case("lt",    0x1)
2407       .Case("le",    0x2)
2408       //.Case("false", 0x3) // Not a documented alias.
2409       .Case("neq",   0x4)
2410       .Case("nlt",   0x5)
2411       .Case("nle",   0x6)
2412       //.Case("true",  0x7) // Not a documented alias.
2413       .Default(~0U);
2414     if (ComparisonCode != ~0U && (ComparisonCode != 0 || CCIdx == 2)) {
2415       Operands.push_back(X86Operand::CreateToken("vpcmp", NameLoc));
2416 
2417       const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2418                                                    getParser().getContext());
2419       Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2420 
2421       PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2422     }
2423   }
2424 
2425   // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2426   if (PatchedName.startswith("vpcom") &&
2427       (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2428        PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2429     unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2430     unsigned ComparisonCode = StringSwitch<unsigned>(
2431       PatchedName.slice(5, PatchedName.size() - CCIdx))
2432       .Case("lt",    0x0)
2433       .Case("le",    0x1)
2434       .Case("gt",    0x2)
2435       .Case("ge",    0x3)
2436       .Case("eq",    0x4)
2437       .Case("neq",   0x5)
2438       .Case("false", 0x6)
2439       .Case("true",  0x7)
2440       .Default(~0U);
2441     if (ComparisonCode != ~0U) {
2442       Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc));
2443 
2444       const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2445                                                    getParser().getContext());
2446       Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2447 
2448       PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2449     }
2450   }
2451 
2452 
2453   // Determine whether this is an instruction prefix.
2454   // FIXME:
2455   // Enhance prefixes integrity robustness. for example, following forms
2456   // are currently tolerated:
2457   // repz repnz <insn>    ; GAS errors for the use of two similar prefixes
2458   // lock addq %rax, %rbx ; Destination operand must be of memory type
2459   // xacquire <insn>      ; xacquire must be accompanied by 'lock'
2460   bool isPrefix = StringSwitch<bool>(Name)
2461                       .Cases("rex64", "data32", "data16", true)
2462                       .Cases("xacquire", "xrelease", true)
2463                       .Cases("acquire", "release", isParsingIntelSyntax())
2464                       .Default(false);
2465 
2466   auto isLockRepeatNtPrefix = [](StringRef N) {
2467     return StringSwitch<bool>(N)
2468         .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true)
2469         .Default(false);
2470   };
2471 
2472   bool CurlyAsEndOfStatement = false;
2473 
2474   unsigned Flags = X86::IP_NO_PREFIX;
2475   while (isLockRepeatNtPrefix(Name.lower())) {
2476     unsigned Prefix =
2477         StringSwitch<unsigned>(Name)
2478             .Cases("lock", "lock", X86::IP_HAS_LOCK)
2479             .Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT)
2480             .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE)
2481             .Cases("notrack", "notrack", X86::IP_HAS_NOTRACK)
2482             .Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible)
2483     Flags |= Prefix;
2484     if (getLexer().is(AsmToken::EndOfStatement)) {
2485       // We don't have real instr with the given prefix
2486       //  let's use the prefix as the instr.
2487       // TODO: there could be several prefixes one after another
2488       Flags = X86::IP_NO_PREFIX;
2489       break;
2490     }
2491     Name = Parser.getTok().getString();
2492     Parser.Lex(); // eat the prefix
2493     // Hack: we could have something like "rep # some comment" or
2494     //    "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl"
2495     while (Name.startswith(";") || Name.startswith("\n") ||
2496            Name.startswith("#") || Name.startswith("\t") ||
2497            Name.startswith("/")) {
2498       Name = Parser.getTok().getString();
2499       Parser.Lex(); // go to next prefix or instr
2500     }
2501   }
2502 
2503   if (Flags)
2504     PatchedName = Name;
2505 
2506   // Hacks to handle 'data16' and 'data32'
2507   if (PatchedName == "data16" && is16BitMode()) {
2508     return Error(NameLoc, "redundant data16 prefix");
2509   }
2510   if (PatchedName == "data32") {
2511     if (is32BitMode())
2512       return Error(NameLoc, "redundant data32 prefix");
2513     if (is64BitMode())
2514       return Error(NameLoc, "'data32' is not supported in 64-bit mode");
2515     // Hack to 'data16' for the table lookup.
2516     PatchedName = "data16";
2517   }
2518 
2519   Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
2520 
2521   // This does the actual operand parsing.  Don't parse any more if we have a
2522   // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
2523   // just want to parse the "lock" as the first instruction and the "incl" as
2524   // the next one.
2525   if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
2526     // Parse '*' modifier.
2527     if (getLexer().is(AsmToken::Star))
2528       Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2529 
2530     // Read the operands.
2531     while(1) {
2532       if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
2533         Operands.push_back(std::move(Op));
2534         if (HandleAVX512Operand(Operands, *Operands.back()))
2535           return true;
2536       } else {
2537          return true;
2538       }
2539       // check for comma and eat it
2540       if (getLexer().is(AsmToken::Comma))
2541         Parser.Lex();
2542       else
2543         break;
2544      }
2545 
2546     // In MS inline asm curly braces mark the beginning/end of a block,
2547     // therefore they should be interepreted as end of statement
2548     CurlyAsEndOfStatement =
2549         isParsingIntelSyntax() && isParsingInlineAsm() &&
2550         (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly));
2551     if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement)
2552       return TokError("unexpected token in argument list");
2553   }
2554 
2555   // Consume the EndOfStatement or the prefix separator Slash
2556   if (getLexer().is(AsmToken::EndOfStatement) ||
2557       (isPrefix && getLexer().is(AsmToken::Slash)))
2558     Parser.Lex();
2559   else if (CurlyAsEndOfStatement)
2560     // Add an actual EndOfStatement before the curly brace
2561     Info.AsmRewrites->emplace_back(AOK_EndOfStatement,
2562                                    getLexer().getTok().getLoc(), 0);
2563 
2564   // This is for gas compatibility and cannot be done in td.
2565   // Adding "p" for some floating point with no argument.
2566   // For example: fsub --> fsubp
2567   bool IsFp =
2568     Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr";
2569   if (IsFp && Operands.size() == 1) {
2570     const char *Repl = StringSwitch<const char *>(Name)
2571       .Case("fsub", "fsubp")
2572       .Case("fdiv", "fdivp")
2573       .Case("fsubr", "fsubrp")
2574       .Case("fdivr", "fdivrp");
2575     static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
2576   }
2577 
2578   // Moving a 32 or 16 bit value into a segment register has the same
2579   // behavior. Modify such instructions to always take shorter form.
2580   if ((Name == "mov" || Name == "movw" || Name == "movl") &&
2581       (Operands.size() == 3)) {
2582     X86Operand &Op1 = (X86Operand &)*Operands[1];
2583     X86Operand &Op2 = (X86Operand &)*Operands[2];
2584     SMLoc Loc = Op1.getEndLoc();
2585     if (Op1.isReg() && Op2.isReg() &&
2586         X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(
2587             Op2.getReg()) &&
2588         (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) ||
2589          X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) {
2590       // Change instruction name to match new instruction.
2591       if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) {
2592         Name = is16BitMode() ? "movw" : "movl";
2593         Operands[0] = X86Operand::CreateToken(Name, NameLoc);
2594       }
2595       // Select the correct equivalent 16-/32-bit source register.
2596       unsigned Reg =
2597           getX86SubSuperRegisterOrZero(Op1.getReg(), is16BitMode() ? 16 : 32);
2598       Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc);
2599     }
2600   }
2601 
2602   // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
2603   // "outb %al, %dx".  Out doesn't take a memory form, but this is a widely
2604   // documented form in various unofficial manuals, so a lot of code uses it.
2605   if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" ||
2606        Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") &&
2607       Operands.size() == 3) {
2608     X86Operand &Op = (X86Operand &)*Operands.back();
2609     if (Op.isDXReg())
2610       Operands.back() = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
2611                                               Op.getEndLoc());
2612   }
2613   // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al".
2614   if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" ||
2615        Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") &&
2616       Operands.size() == 3) {
2617     X86Operand &Op = (X86Operand &)*Operands[1];
2618     if (Op.isDXReg())
2619       Operands[1] = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
2620                                           Op.getEndLoc());
2621   }
2622 
2623   SmallVector<std::unique_ptr<MCParsedAsmOperand>, 2> TmpOperands;
2624   bool HadVerifyError = false;
2625 
2626   // Append default arguments to "ins[bwld]"
2627   if (Name.startswith("ins") &&
2628       (Operands.size() == 1 || Operands.size() == 3) &&
2629       (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" ||
2630        Name == "ins")) {
2631 
2632     AddDefaultSrcDestOperands(TmpOperands,
2633                               X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
2634                               DefaultMemDIOperand(NameLoc));
2635     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2636   }
2637 
2638   // Append default arguments to "outs[bwld]"
2639   if (Name.startswith("outs") &&
2640       (Operands.size() == 1 || Operands.size() == 3) &&
2641       (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2642        Name == "outsd" || Name == "outs")) {
2643     AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
2644                               X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2645     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2646   }
2647 
2648   // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2649   // values of $SIREG according to the mode. It would be nice if this
2650   // could be achieved with InstAlias in the tables.
2651   if (Name.startswith("lods") &&
2652       (Operands.size() == 1 || Operands.size() == 2) &&
2653       (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2654        Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) {
2655     TmpOperands.push_back(DefaultMemSIOperand(NameLoc));
2656     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2657   }
2658 
2659   // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2660   // values of $DIREG according to the mode. It would be nice if this
2661   // could be achieved with InstAlias in the tables.
2662   if (Name.startswith("stos") &&
2663       (Operands.size() == 1 || Operands.size() == 2) &&
2664       (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2665        Name == "stosl" || Name == "stosd" || Name == "stosq")) {
2666     TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
2667     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2668   }
2669 
2670   // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2671   // values of $DIREG according to the mode. It would be nice if this
2672   // could be achieved with InstAlias in the tables.
2673   if (Name.startswith("scas") &&
2674       (Operands.size() == 1 || Operands.size() == 2) &&
2675       (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2676        Name == "scasl" || Name == "scasd" || Name == "scasq")) {
2677     TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
2678     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2679   }
2680 
2681   // Add default SI and DI operands to "cmps[bwlq]".
2682   if (Name.startswith("cmps") &&
2683       (Operands.size() == 1 || Operands.size() == 3) &&
2684       (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2685        Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2686     AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc),
2687                               DefaultMemSIOperand(NameLoc));
2688     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2689   }
2690 
2691   // Add default SI and DI operands to "movs[bwlq]".
2692   if (((Name.startswith("movs") &&
2693         (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2694          Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2695        (Name.startswith("smov") &&
2696         (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2697          Name == "smovl" || Name == "smovd" || Name == "smovq"))) &&
2698       (Operands.size() == 1 || Operands.size() == 3)) {
2699     if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax())
2700       Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2701     AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
2702                               DefaultMemDIOperand(NameLoc));
2703     HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2704   }
2705 
2706   // Check if we encountered an error for one the string insturctions
2707   if (HadVerifyError) {
2708     return HadVerifyError;
2709   }
2710 
2711   // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>.  Canonicalize to
2712   // "shift <op>".
2713   if ((Name.startswith("shr") || Name.startswith("sar") ||
2714        Name.startswith("shl") || Name.startswith("sal") ||
2715        Name.startswith("rcl") || Name.startswith("rcr") ||
2716        Name.startswith("rol") || Name.startswith("ror")) &&
2717       Operands.size() == 3) {
2718     if (isParsingIntelSyntax()) {
2719       // Intel syntax
2720       X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2721       if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2722           cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2723         Operands.pop_back();
2724     } else {
2725       X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2726       if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2727           cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2728         Operands.erase(Operands.begin() + 1);
2729     }
2730   }
2731 
2732   // Transforms "int $3" into "int3" as a size optimization.  We can't write an
2733   // instalias with an immediate operand yet.
2734   if (Name == "int" && Operands.size() == 2) {
2735     X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2736     if (Op1.isImm())
2737       if (auto *CE = dyn_cast<MCConstantExpr>(Op1.getImm()))
2738         if (CE->getValue() == 3) {
2739           Operands.erase(Operands.begin() + 1);
2740           static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2741         }
2742   }
2743 
2744   // Transforms "xlat mem8" into "xlatb"
2745   if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) {
2746     X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2747     if (Op1.isMem8()) {
2748       Warning(Op1.getStartLoc(), "memory operand is only for determining the "
2749                                  "size, (R|E)BX will be used for the location");
2750       Operands.pop_back();
2751       static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb");
2752     }
2753   }
2754 
2755   if (Flags)
2756     Operands.push_back(X86Operand::CreatePrefix(Flags, NameLoc, NameLoc));
2757   return false;
2758 }
2759 
processInstruction(MCInst & Inst,const OperandVector & Ops)2760 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2761   return false;
2762 }
2763 
validateInstruction(MCInst & Inst,const OperandVector & Ops)2764 bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
2765   const MCRegisterInfo *MRI = getContext().getRegisterInfo();
2766 
2767   switch (Inst.getOpcode()) {
2768   case X86::VGATHERDPDYrm:
2769   case X86::VGATHERDPDrm:
2770   case X86::VGATHERDPSYrm:
2771   case X86::VGATHERDPSrm:
2772   case X86::VGATHERQPDYrm:
2773   case X86::VGATHERQPDrm:
2774   case X86::VGATHERQPSYrm:
2775   case X86::VGATHERQPSrm:
2776   case X86::VPGATHERDDYrm:
2777   case X86::VPGATHERDDrm:
2778   case X86::VPGATHERDQYrm:
2779   case X86::VPGATHERDQrm:
2780   case X86::VPGATHERQDYrm:
2781   case X86::VPGATHERQDrm:
2782   case X86::VPGATHERQQYrm:
2783   case X86::VPGATHERQQrm: {
2784     unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
2785     unsigned Mask = MRI->getEncodingValue(Inst.getOperand(1).getReg());
2786     unsigned Index =
2787       MRI->getEncodingValue(Inst.getOperand(3 + X86::AddrIndexReg).getReg());
2788     if (Dest == Mask || Dest == Index || Mask == Index)
2789       return Warning(Ops[0]->getStartLoc(), "mask, index, and destination "
2790                                             "registers should be distinct");
2791     break;
2792   }
2793   case X86::VGATHERDPDZ128rm:
2794   case X86::VGATHERDPDZ256rm:
2795   case X86::VGATHERDPDZrm:
2796   case X86::VGATHERDPSZ128rm:
2797   case X86::VGATHERDPSZ256rm:
2798   case X86::VGATHERDPSZrm:
2799   case X86::VGATHERQPDZ128rm:
2800   case X86::VGATHERQPDZ256rm:
2801   case X86::VGATHERQPDZrm:
2802   case X86::VGATHERQPSZ128rm:
2803   case X86::VGATHERQPSZ256rm:
2804   case X86::VGATHERQPSZrm:
2805   case X86::VPGATHERDDZ128rm:
2806   case X86::VPGATHERDDZ256rm:
2807   case X86::VPGATHERDDZrm:
2808   case X86::VPGATHERDQZ128rm:
2809   case X86::VPGATHERDQZ256rm:
2810   case X86::VPGATHERDQZrm:
2811   case X86::VPGATHERQDZ128rm:
2812   case X86::VPGATHERQDZ256rm:
2813   case X86::VPGATHERQDZrm:
2814   case X86::VPGATHERQQZ128rm:
2815   case X86::VPGATHERQQZ256rm:
2816   case X86::VPGATHERQQZrm: {
2817     unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
2818     unsigned Index =
2819       MRI->getEncodingValue(Inst.getOperand(4 + X86::AddrIndexReg).getReg());
2820     if (Dest == Index)
2821       return Warning(Ops[0]->getStartLoc(), "index and destination registers "
2822                                             "should be distinct");
2823     break;
2824   }
2825   case X86::V4FMADDPSrm:
2826   case X86::V4FMADDPSrmk:
2827   case X86::V4FMADDPSrmkz:
2828   case X86::V4FMADDSSrm:
2829   case X86::V4FMADDSSrmk:
2830   case X86::V4FMADDSSrmkz:
2831   case X86::V4FNMADDPSrm:
2832   case X86::V4FNMADDPSrmk:
2833   case X86::V4FNMADDPSrmkz:
2834   case X86::V4FNMADDSSrm:
2835   case X86::V4FNMADDSSrmk:
2836   case X86::V4FNMADDSSrmkz:
2837   case X86::VP4DPWSSDSrm:
2838   case X86::VP4DPWSSDSrmk:
2839   case X86::VP4DPWSSDSrmkz:
2840   case X86::VP4DPWSSDrm:
2841   case X86::VP4DPWSSDrmk:
2842   case X86::VP4DPWSSDrmkz: {
2843     unsigned Src2 = Inst.getOperand(Inst.getNumOperands() -
2844                                     X86::AddrNumOperands - 1).getReg();
2845     unsigned Src2Enc = MRI->getEncodingValue(Src2);
2846     if (Src2Enc % 4 != 0) {
2847       StringRef RegName = X86IntelInstPrinter::getRegisterName(Src2);
2848       unsigned GroupStart = (Src2Enc / 4) * 4;
2849       unsigned GroupEnd = GroupStart + 3;
2850       return Warning(Ops[0]->getStartLoc(),
2851                      "source register '" + RegName + "' implicitly denotes '" +
2852                      RegName.take_front(3) + Twine(GroupStart) + "' to '" +
2853                      RegName.take_front(3) + Twine(GroupEnd) +
2854                      "' source group");
2855     }
2856     break;
2857   }
2858   }
2859 
2860   return false;
2861 }
2862 
2863 static const char *getSubtargetFeatureName(uint64_t Val);
2864 
EmitInstruction(MCInst & Inst,OperandVector & Operands,MCStreamer & Out)2865 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2866                                    MCStreamer &Out) {
2867   Instrumentation->InstrumentAndEmitInstruction(
2868       Inst, Operands, getContext(), MII, Out,
2869       getParser().shouldPrintSchedInfo());
2870 }
2871 
MatchAndEmitInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)2872 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2873                                            OperandVector &Operands,
2874                                            MCStreamer &Out, uint64_t &ErrorInfo,
2875                                            bool MatchingInlineAsm) {
2876   if (isParsingIntelSyntax())
2877     return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2878                                         MatchingInlineAsm);
2879   return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2880                                     MatchingInlineAsm);
2881 }
2882 
MatchFPUWaitAlias(SMLoc IDLoc,X86Operand & Op,OperandVector & Operands,MCStreamer & Out,bool MatchingInlineAsm)2883 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
2884                                      OperandVector &Operands, MCStreamer &Out,
2885                                      bool MatchingInlineAsm) {
2886   // FIXME: This should be replaced with a real .td file alias mechanism.
2887   // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2888   // call.
2889   const char *Repl = StringSwitch<const char *>(Op.getToken())
2890                          .Case("finit", "fninit")
2891                          .Case("fsave", "fnsave")
2892                          .Case("fstcw", "fnstcw")
2893                          .Case("fstcww", "fnstcw")
2894                          .Case("fstenv", "fnstenv")
2895                          .Case("fstsw", "fnstsw")
2896                          .Case("fstsww", "fnstsw")
2897                          .Case("fclex", "fnclex")
2898                          .Default(nullptr);
2899   if (Repl) {
2900     MCInst Inst;
2901     Inst.setOpcode(X86::WAIT);
2902     Inst.setLoc(IDLoc);
2903     if (!MatchingInlineAsm)
2904       EmitInstruction(Inst, Operands, Out);
2905     Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2906   }
2907 }
2908 
ErrorMissingFeature(SMLoc IDLoc,uint64_t ErrorInfo,bool MatchingInlineAsm)2909 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
2910                                        bool MatchingInlineAsm) {
2911   assert(ErrorInfo && "Unknown missing feature!");
2912   SmallString<126> Msg;
2913   raw_svector_ostream OS(Msg);
2914   OS << "instruction requires:";
2915   uint64_t Mask = 1;
2916   for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2917     if (ErrorInfo & Mask)
2918       OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask);
2919     Mask <<= 1;
2920   }
2921   return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm);
2922 }
2923 
getPrefixes(OperandVector & Operands)2924 static unsigned getPrefixes(OperandVector &Operands) {
2925   unsigned Result = 0;
2926   X86Operand &Prefix = static_cast<X86Operand &>(*Operands.back());
2927   if (Prefix.isPrefix()) {
2928     Result = Prefix.getPrefix();
2929     Operands.pop_back();
2930   }
2931   return Result;
2932 }
2933 
MatchAndEmitATTInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)2934 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
2935                                               OperandVector &Operands,
2936                                               MCStreamer &Out,
2937                                               uint64_t &ErrorInfo,
2938                                               bool MatchingInlineAsm) {
2939   assert(!Operands.empty() && "Unexpect empty operand list!");
2940   X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2941   assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2942   SMRange EmptyRange = None;
2943 
2944   // First, handle aliases that expand to multiple instructions.
2945   MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2946 
2947   bool WasOriginallyInvalidOperand = false;
2948   unsigned Prefixes = getPrefixes(Operands);
2949 
2950   MCInst Inst;
2951 
2952   if (Prefixes)
2953     Inst.setFlags(Prefixes);
2954 
2955   // First, try a direct match.
2956   switch (MatchInstruction(Operands, Inst, ErrorInfo, MatchingInlineAsm,
2957                            isParsingIntelSyntax())) {
2958   default: llvm_unreachable("Unexpected match result!");
2959   case Match_Success:
2960     if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
2961       return true;
2962     // Some instructions need post-processing to, for example, tweak which
2963     // encoding is selected. Loop on it while changes happen so the
2964     // individual transformations can chain off each other.
2965     if (!MatchingInlineAsm)
2966       while (processInstruction(Inst, Operands))
2967         ;
2968 
2969     Inst.setLoc(IDLoc);
2970     if (!MatchingInlineAsm)
2971       EmitInstruction(Inst, Operands, Out);
2972     Opcode = Inst.getOpcode();
2973     return false;
2974   case Match_MissingFeature:
2975     return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
2976   case Match_InvalidOperand:
2977     WasOriginallyInvalidOperand = true;
2978     break;
2979   case Match_MnemonicFail:
2980     break;
2981   }
2982 
2983   // FIXME: Ideally, we would only attempt suffix matches for things which are
2984   // valid prefixes, and we could just infer the right unambiguous
2985   // type. However, that requires substantially more matcher support than the
2986   // following hack.
2987 
2988   // Change the operand to point to a temporary token.
2989   StringRef Base = Op.getToken();
2990   SmallString<16> Tmp;
2991   Tmp += Base;
2992   Tmp += ' ';
2993   Op.setTokenValue(Tmp);
2994 
2995   // If this instruction starts with an 'f', then it is a floating point stack
2996   // instruction.  These come in up to three forms for 32-bit, 64-bit, and
2997   // 80-bit floating point, which use the suffixes s,l,t respectively.
2998   //
2999   // Otherwise, we assume that this may be an integer instruction, which comes
3000   // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
3001   const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
3002 
3003   // Check for the various suffix matches.
3004   uint64_t ErrorInfoIgnore;
3005   uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
3006   unsigned Match[4];
3007 
3008   for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
3009     Tmp.back() = Suffixes[I];
3010     Match[I] = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
3011                                 MatchingInlineAsm, isParsingIntelSyntax());
3012     // If this returned as a missing feature failure, remember that.
3013     if (Match[I] == Match_MissingFeature)
3014       ErrorInfoMissingFeature = ErrorInfoIgnore;
3015   }
3016 
3017   // Restore the old token.
3018   Op.setTokenValue(Base);
3019 
3020   // If exactly one matched, then we treat that as a successful match (and the
3021   // instruction will already have been filled in correctly, since the failing
3022   // matches won't have modified it).
3023   unsigned NumSuccessfulMatches =
3024       std::count(std::begin(Match), std::end(Match), Match_Success);
3025   if (NumSuccessfulMatches == 1) {
3026     Inst.setLoc(IDLoc);
3027     if (!MatchingInlineAsm)
3028       EmitInstruction(Inst, Operands, Out);
3029     Opcode = Inst.getOpcode();
3030     return false;
3031   }
3032 
3033   // Otherwise, the match failed, try to produce a decent error message.
3034 
3035   // If we had multiple suffix matches, then identify this as an ambiguous
3036   // match.
3037   if (NumSuccessfulMatches > 1) {
3038     char MatchChars[4];
3039     unsigned NumMatches = 0;
3040     for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
3041       if (Match[I] == Match_Success)
3042         MatchChars[NumMatches++] = Suffixes[I];
3043 
3044     SmallString<126> Msg;
3045     raw_svector_ostream OS(Msg);
3046     OS << "ambiguous instructions require an explicit suffix (could be ";
3047     for (unsigned i = 0; i != NumMatches; ++i) {
3048       if (i != 0)
3049         OS << ", ";
3050       if (i + 1 == NumMatches)
3051         OS << "or ";
3052       OS << "'" << Base << MatchChars[i] << "'";
3053     }
3054     OS << ")";
3055     Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm);
3056     return true;
3057   }
3058 
3059   // Okay, we know that none of the variants matched successfully.
3060 
3061   // If all of the instructions reported an invalid mnemonic, then the original
3062   // mnemonic was invalid.
3063   if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
3064     if (!WasOriginallyInvalidOperand) {
3065       return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
3066                    Op.getLocRange(), MatchingInlineAsm);
3067     }
3068 
3069     // Recover location info for the operand if we know which was the problem.
3070     if (ErrorInfo != ~0ULL) {
3071       if (ErrorInfo >= Operands.size())
3072         return Error(IDLoc, "too few operands for instruction", EmptyRange,
3073                      MatchingInlineAsm);
3074 
3075       X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
3076       if (Operand.getStartLoc().isValid()) {
3077         SMRange OperandRange = Operand.getLocRange();
3078         return Error(Operand.getStartLoc(), "invalid operand for instruction",
3079                      OperandRange, MatchingInlineAsm);
3080       }
3081     }
3082 
3083     return Error(IDLoc, "invalid operand for instruction", EmptyRange,
3084                  MatchingInlineAsm);
3085   }
3086 
3087   // If one instruction matched with a missing feature, report this as a
3088   // missing feature.
3089   if (std::count(std::begin(Match), std::end(Match),
3090                  Match_MissingFeature) == 1) {
3091     ErrorInfo = ErrorInfoMissingFeature;
3092     return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
3093                                MatchingInlineAsm);
3094   }
3095 
3096   // If one instruction matched with an invalid operand, report this as an
3097   // operand failure.
3098   if (std::count(std::begin(Match), std::end(Match),
3099                  Match_InvalidOperand) == 1) {
3100     return Error(IDLoc, "invalid operand for instruction", EmptyRange,
3101                  MatchingInlineAsm);
3102   }
3103 
3104   // If all of these were an outright failure, report it in a useless way.
3105   Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
3106         EmptyRange, MatchingInlineAsm);
3107   return true;
3108 }
3109 
MatchAndEmitIntelInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)3110 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
3111                                                 OperandVector &Operands,
3112                                                 MCStreamer &Out,
3113                                                 uint64_t &ErrorInfo,
3114                                                 bool MatchingInlineAsm) {
3115   assert(!Operands.empty() && "Unexpect empty operand list!");
3116   X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
3117   assert(Op.isToken() && "Leading operand should always be a mnemonic!");
3118   StringRef Mnemonic = Op.getToken();
3119   SMRange EmptyRange = None;
3120   StringRef Base = Op.getToken();
3121   unsigned Prefixes = getPrefixes(Operands);
3122 
3123   // First, handle aliases that expand to multiple instructions.
3124   MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
3125 
3126   MCInst Inst;
3127 
3128   if (Prefixes)
3129     Inst.setFlags(Prefixes);
3130 
3131   // Find one unsized memory operand, if present.
3132   X86Operand *UnsizedMemOp = nullptr;
3133   for (const auto &Op : Operands) {
3134     X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
3135     if (X86Op->isMemUnsized()) {
3136       UnsizedMemOp = X86Op;
3137       // Have we found an unqualified memory operand,
3138       // break. IA allows only one memory operand.
3139       break;
3140     }
3141   }
3142 
3143   // Allow some instructions to have implicitly pointer-sized operands.  This is
3144   // compatible with gas.
3145   if (UnsizedMemOp) {
3146     static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
3147     for (const char *Instr : PtrSizedInstrs) {
3148       if (Mnemonic == Instr) {
3149         UnsizedMemOp->Mem.Size = getPointerWidth();
3150         break;
3151       }
3152     }
3153   }
3154 
3155   SmallVector<unsigned, 8> Match;
3156   uint64_t ErrorInfoMissingFeature = 0;
3157 
3158   // If unsized push has immediate operand we should default the default pointer
3159   // size for the size.
3160   if (Mnemonic == "push" && Operands.size() == 2) {
3161     auto *X86Op = static_cast<X86Operand *>(Operands[1].get());
3162     if (X86Op->isImm()) {
3163       // If it's not a constant fall through and let remainder take care of it.
3164       const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm());
3165       unsigned Size = getPointerWidth();
3166       if (CE &&
3167           (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) {
3168         SmallString<16> Tmp;
3169         Tmp += Base;
3170         Tmp += (is64BitMode())
3171                    ? "q"
3172                    : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " ";
3173         Op.setTokenValue(Tmp);
3174         // Do match in ATT mode to allow explicit suffix usage.
3175         Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo,
3176                                          MatchingInlineAsm,
3177                                          false /*isParsingIntelSyntax()*/));
3178         Op.setTokenValue(Base);
3179       }
3180     }
3181   }
3182 
3183   // If an unsized memory operand is present, try to match with each memory
3184   // operand size.  In Intel assembly, the size is not part of the instruction
3185   // mnemonic.
3186   if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
3187     static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
3188     for (unsigned Size : MopSizes) {
3189       UnsizedMemOp->Mem.Size = Size;
3190       uint64_t ErrorInfoIgnore;
3191       unsigned LastOpcode = Inst.getOpcode();
3192       unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
3193                                     MatchingInlineAsm, isParsingIntelSyntax());
3194       if (Match.empty() || LastOpcode != Inst.getOpcode())
3195         Match.push_back(M);
3196 
3197       // If this returned as a missing feature failure, remember that.
3198       if (Match.back() == Match_MissingFeature)
3199         ErrorInfoMissingFeature = ErrorInfoIgnore;
3200     }
3201 
3202     // Restore the size of the unsized memory operand if we modified it.
3203     UnsizedMemOp->Mem.Size = 0;
3204   }
3205 
3206   // If we haven't matched anything yet, this is not a basic integer or FPU
3207   // operation.  There shouldn't be any ambiguity in our mnemonic table, so try
3208   // matching with the unsized operand.
3209   if (Match.empty()) {
3210     Match.push_back(MatchInstruction(
3211         Operands, Inst, ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax()));
3212     // If this returned as a missing feature failure, remember that.
3213     if (Match.back() == Match_MissingFeature)
3214       ErrorInfoMissingFeature = ErrorInfo;
3215   }
3216 
3217   // Restore the size of the unsized memory operand if we modified it.
3218   if (UnsizedMemOp)
3219     UnsizedMemOp->Mem.Size = 0;
3220 
3221   // If it's a bad mnemonic, all results will be the same.
3222   if (Match.back() == Match_MnemonicFail) {
3223     return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
3224                  Op.getLocRange(), MatchingInlineAsm);
3225   }
3226 
3227   unsigned NumSuccessfulMatches =
3228       std::count(std::begin(Match), std::end(Match), Match_Success);
3229 
3230   // If matching was ambiguous and we had size information from the frontend,
3231   // try again with that. This handles cases like "movxz eax, m8/m16".
3232   if (UnsizedMemOp && NumSuccessfulMatches > 1 &&
3233       UnsizedMemOp->getMemFrontendSize()) {
3234     UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize();
3235     unsigned M = MatchInstruction(
3236         Operands, Inst, ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax());
3237     if (M == Match_Success)
3238       NumSuccessfulMatches = 1;
3239 
3240     // Add a rewrite that encodes the size information we used from the
3241     // frontend.
3242     InstInfo->AsmRewrites->emplace_back(
3243         AOK_SizeDirective, UnsizedMemOp->getStartLoc(),
3244         /*Len=*/0, UnsizedMemOp->getMemFrontendSize());
3245   }
3246 
3247   // If exactly one matched, then we treat that as a successful match (and the
3248   // instruction will already have been filled in correctly, since the failing
3249   // matches won't have modified it).
3250   if (NumSuccessfulMatches == 1) {
3251     if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
3252       return true;
3253     // Some instructions need post-processing to, for example, tweak which
3254     // encoding is selected. Loop on it while changes happen so the individual
3255     // transformations can chain off each other.
3256     if (!MatchingInlineAsm)
3257       while (processInstruction(Inst, Operands))
3258         ;
3259     Inst.setLoc(IDLoc);
3260     if (!MatchingInlineAsm)
3261       EmitInstruction(Inst, Operands, Out);
3262     Opcode = Inst.getOpcode();
3263     return false;
3264   } else if (NumSuccessfulMatches > 1) {
3265     assert(UnsizedMemOp &&
3266            "multiple matches only possible with unsized memory operands");
3267     return Error(UnsizedMemOp->getStartLoc(),
3268                  "ambiguous operand size for instruction '" + Mnemonic + "\'",
3269                  UnsizedMemOp->getLocRange());
3270   }
3271 
3272   // If one instruction matched with a missing feature, report this as a
3273   // missing feature.
3274   if (std::count(std::begin(Match), std::end(Match),
3275                  Match_MissingFeature) == 1) {
3276     ErrorInfo = ErrorInfoMissingFeature;
3277     return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
3278                                MatchingInlineAsm);
3279   }
3280 
3281   // If one instruction matched with an invalid operand, report this as an
3282   // operand failure.
3283   if (std::count(std::begin(Match), std::end(Match),
3284                  Match_InvalidOperand) == 1) {
3285     return Error(IDLoc, "invalid operand for instruction", EmptyRange,
3286                  MatchingInlineAsm);
3287   }
3288 
3289   // If all of these were an outright failure, report it in a useless way.
3290   return Error(IDLoc, "unknown instruction mnemonic", EmptyRange,
3291                MatchingInlineAsm);
3292 }
3293 
OmitRegisterFromClobberLists(unsigned RegNo)3294 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
3295   return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
3296 }
3297 
ParseDirective(AsmToken DirectiveID)3298 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
3299   MCAsmParser &Parser = getParser();
3300   StringRef IDVal = DirectiveID.getIdentifier();
3301   if (IDVal.startswith(".code"))
3302     return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
3303   else if (IDVal.startswith(".att_syntax")) {
3304     if (getLexer().isNot(AsmToken::EndOfStatement)) {
3305       if (Parser.getTok().getString() == "prefix")
3306         Parser.Lex();
3307       else if (Parser.getTok().getString() == "noprefix")
3308         return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
3309                                            "supported: registers must have a "
3310                                            "'%' prefix in .att_syntax");
3311     }
3312     getParser().setAssemblerDialect(0);
3313     return false;
3314   } else if (IDVal.startswith(".intel_syntax")) {
3315     getParser().setAssemblerDialect(1);
3316     if (getLexer().isNot(AsmToken::EndOfStatement)) {
3317       if (Parser.getTok().getString() == "noprefix")
3318         Parser.Lex();
3319       else if (Parser.getTok().getString() == "prefix")
3320         return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
3321                                            "supported: registers must not have "
3322                                            "a '%' prefix in .intel_syntax");
3323     }
3324     return false;
3325   } else if (IDVal == ".even")
3326     return parseDirectiveEven(DirectiveID.getLoc());
3327   else if (IDVal == ".cv_fpo_proc")
3328     return parseDirectiveFPOProc(DirectiveID.getLoc());
3329   else if (IDVal == ".cv_fpo_setframe")
3330     return parseDirectiveFPOSetFrame(DirectiveID.getLoc());
3331   else if (IDVal == ".cv_fpo_pushreg")
3332     return parseDirectiveFPOPushReg(DirectiveID.getLoc());
3333   else if (IDVal == ".cv_fpo_stackalloc")
3334     return parseDirectiveFPOStackAlloc(DirectiveID.getLoc());
3335   else if (IDVal == ".cv_fpo_stackalign")
3336     return parseDirectiveFPOStackAlign(DirectiveID.getLoc());
3337   else if (IDVal == ".cv_fpo_endprologue")
3338     return parseDirectiveFPOEndPrologue(DirectiveID.getLoc());
3339   else if (IDVal == ".cv_fpo_endproc")
3340     return parseDirectiveFPOEndProc(DirectiveID.getLoc());
3341 
3342   return true;
3343 }
3344 
3345 /// parseDirectiveEven
3346 ///  ::= .even
parseDirectiveEven(SMLoc L)3347 bool X86AsmParser::parseDirectiveEven(SMLoc L) {
3348   if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
3349     return false;
3350 
3351   const MCSection *Section = getStreamer().getCurrentSectionOnly();
3352   if (!Section) {
3353     getStreamer().InitSections(false);
3354     Section = getStreamer().getCurrentSectionOnly();
3355   }
3356   if (Section->UseCodeAlign())
3357     getStreamer().EmitCodeAlignment(2, 0);
3358   else
3359     getStreamer().EmitValueToAlignment(2, 0, 1, 0);
3360   return false;
3361 }
3362 
3363 /// ParseDirectiveCode
3364 ///  ::= .code16 | .code32 | .code64
ParseDirectiveCode(StringRef IDVal,SMLoc L)3365 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
3366   MCAsmParser &Parser = getParser();
3367   Code16GCC = false;
3368   if (IDVal == ".code16") {
3369     Parser.Lex();
3370     if (!is16BitMode()) {
3371       SwitchMode(X86::Mode16Bit);
3372       getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
3373     }
3374   } else if (IDVal == ".code16gcc") {
3375     // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode.
3376     Parser.Lex();
3377     Code16GCC = true;
3378     if (!is16BitMode()) {
3379       SwitchMode(X86::Mode16Bit);
3380       getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
3381     }
3382   } else if (IDVal == ".code32") {
3383     Parser.Lex();
3384     if (!is32BitMode()) {
3385       SwitchMode(X86::Mode32Bit);
3386       getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
3387     }
3388   } else if (IDVal == ".code64") {
3389     Parser.Lex();
3390     if (!is64BitMode()) {
3391       SwitchMode(X86::Mode64Bit);
3392       getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
3393     }
3394   } else {
3395     Error(L, "unknown directive " + IDVal);
3396     return false;
3397   }
3398 
3399   return false;
3400 }
3401 
3402 // .cv_fpo_proc foo
parseDirectiveFPOProc(SMLoc L)3403 bool X86AsmParser::parseDirectiveFPOProc(SMLoc L) {
3404   MCAsmParser &Parser = getParser();
3405   StringRef ProcName;
3406   int64_t ParamsSize;
3407   if (Parser.parseIdentifier(ProcName))
3408     return Parser.TokError("expected symbol name");
3409   if (Parser.parseIntToken(ParamsSize, "expected parameter byte count"))
3410     return true;
3411   if (!isUIntN(32, ParamsSize))
3412     return Parser.TokError("parameters size out of range");
3413   if (Parser.parseEOL("unexpected tokens"))
3414     return addErrorSuffix(" in '.cv_fpo_proc' directive");
3415   MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
3416   return getTargetStreamer().emitFPOProc(ProcSym, ParamsSize, L);
3417 }
3418 
3419 // .cv_fpo_setframe ebp
parseDirectiveFPOSetFrame(SMLoc L)3420 bool X86AsmParser::parseDirectiveFPOSetFrame(SMLoc L) {
3421   MCAsmParser &Parser = getParser();
3422   unsigned Reg;
3423   SMLoc DummyLoc;
3424   if (ParseRegister(Reg, DummyLoc, DummyLoc) ||
3425       Parser.parseEOL("unexpected tokens"))
3426     return addErrorSuffix(" in '.cv_fpo_setframe' directive");
3427   return getTargetStreamer().emitFPOSetFrame(Reg, L);
3428 }
3429 
3430 // .cv_fpo_pushreg ebx
parseDirectiveFPOPushReg(SMLoc L)3431 bool X86AsmParser::parseDirectiveFPOPushReg(SMLoc L) {
3432   MCAsmParser &Parser = getParser();
3433   unsigned Reg;
3434   SMLoc DummyLoc;
3435   if (ParseRegister(Reg, DummyLoc, DummyLoc) ||
3436       Parser.parseEOL("unexpected tokens"))
3437     return addErrorSuffix(" in '.cv_fpo_pushreg' directive");
3438   return getTargetStreamer().emitFPOPushReg(Reg, L);
3439 }
3440 
3441 // .cv_fpo_stackalloc 20
parseDirectiveFPOStackAlloc(SMLoc L)3442 bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L) {
3443   MCAsmParser &Parser = getParser();
3444   int64_t Offset;
3445   if (Parser.parseIntToken(Offset, "expected offset") ||
3446       Parser.parseEOL("unexpected tokens"))
3447     return addErrorSuffix(" in '.cv_fpo_stackalloc' directive");
3448   return getTargetStreamer().emitFPOStackAlloc(Offset, L);
3449 }
3450 
3451 // .cv_fpo_stackalign 8
parseDirectiveFPOStackAlign(SMLoc L)3452 bool X86AsmParser::parseDirectiveFPOStackAlign(SMLoc L) {
3453   MCAsmParser &Parser = getParser();
3454   int64_t Offset;
3455   if (Parser.parseIntToken(Offset, "expected offset") ||
3456       Parser.parseEOL("unexpected tokens"))
3457     return addErrorSuffix(" in '.cv_fpo_stackalign' directive");
3458   return getTargetStreamer().emitFPOStackAlign(Offset, L);
3459 }
3460 
3461 // .cv_fpo_endprologue
parseDirectiveFPOEndPrologue(SMLoc L)3462 bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L) {
3463   MCAsmParser &Parser = getParser();
3464   if (Parser.parseEOL("unexpected tokens"))
3465     return addErrorSuffix(" in '.cv_fpo_endprologue' directive");
3466   return getTargetStreamer().emitFPOEndPrologue(L);
3467 }
3468 
3469 // .cv_fpo_endproc
parseDirectiveFPOEndProc(SMLoc L)3470 bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L) {
3471   MCAsmParser &Parser = getParser();
3472   if (Parser.parseEOL("unexpected tokens"))
3473     return addErrorSuffix(" in '.cv_fpo_endproc' directive");
3474   return getTargetStreamer().emitFPOEndProc(L);
3475 }
3476 
3477 // Force static initialization.
LLVMInitializeX86AsmParser()3478 extern "C" void LLVMInitializeX86AsmParser() {
3479   RegisterMCAsmParser<X86AsmParser> X(getTheX86_32Target());
3480   RegisterMCAsmParser<X86AsmParser> Y(getTheX86_64Target());
3481 }
3482 
3483 #define GET_REGISTER_MATCHER
3484 #define GET_MATCHER_IMPLEMENTATION
3485 #define GET_SUBTARGET_FEATURE_NAME
3486 #include "X86GenAsmMatcher.inc"
3487