1 //===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
10 #define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
11 
12 #include "llvm/ADT/StringRef.h"
13 #include "llvm/MC/MCExpr.h"
14 #include "llvm/MC/MCInstrInfo.h"
15 #include "llvm/MC/MCParser/MCAsmLexer.h"
16 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
17 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
18 #include "llvm/MC/MCTargetOptions.h"
19 #include "llvm/MC/SubtargetFeature.h"
20 #include "llvm/Support/SMLoc.h"
21 #include <cstdint>
22 #include <memory>
23 
24 namespace llvm {
25 
26 class MCInst;
27 class MCStreamer;
28 class MCSubtargetInfo;
29 template <typename T> class SmallVectorImpl;
30 
31 using OperandVector = SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>>;
32 
33 enum AsmRewriteKind {
34   AOK_Align,          // Rewrite align as .align.
35   AOK_EVEN,           // Rewrite even as .even.
36   AOK_Emit,           // Rewrite _emit as .byte.
37   AOK_CallInput,      // Rewrite in terms of ${N:P}.
38   AOK_Input,          // Rewrite in terms of $N.
39   AOK_Output,         // Rewrite in terms of $N.
40   AOK_SizeDirective,  // Add a sizing directive (e.g., dword ptr).
41   AOK_Label,          // Rewrite local labels.
42   AOK_EndOfStatement, // Add EndOfStatement (e.g., "\n\t").
43   AOK_Skip,           // Skip emission (e.g., offset/type operators).
44   AOK_IntelExpr       // SizeDirective SymDisp [BaseReg + IndexReg * Scale + ImmDisp]
45 };
46 
47 const char AsmRewritePrecedence [] = {
48   2, // AOK_Align
49   2, // AOK_EVEN
50   2, // AOK_Emit
51   3, // AOK_Input
52   3, // AOK_CallInput
53   3, // AOK_Output
54   5, // AOK_SizeDirective
55   1, // AOK_Label
56   5, // AOK_EndOfStatement
57   2, // AOK_Skip
58   2  // AOK_IntelExpr
59 };
60 
61 // Represnt the various parts which makes up an intel expression,
62 // used for emitting compound intel expressions
63 struct IntelExpr {
64   bool NeedBracs;
65   int64_t Imm;
66   StringRef BaseReg;
67   StringRef IndexReg;
68   StringRef OffsetName;
69   unsigned Scale;
70 
IntelExprIntelExpr71   IntelExpr()
72       : NeedBracs(false), Imm(0), BaseReg(StringRef()), IndexReg(StringRef()),
73         OffsetName(StringRef()), Scale(1) {}
74   // [BaseReg + IndexReg * ScaleExpression + OFFSET name + ImmediateExpression]
IntelExprIntelExpr75   IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale,
76             StringRef offsetName, int64_t imm, bool needBracs)
77       : NeedBracs(needBracs), Imm(imm), BaseReg(baseReg), IndexReg(indexReg),
78         OffsetName(offsetName), Scale(1) {
79     if (scale)
80       Scale = scale;
81   }
hasBaseRegIntelExpr82   bool hasBaseReg() const { return !BaseReg.empty(); }
hasIndexRegIntelExpr83   bool hasIndexReg() const { return !IndexReg.empty(); }
hasRegsIntelExpr84   bool hasRegs() const { return hasBaseReg() || hasIndexReg(); }
hasOffsetIntelExpr85   bool hasOffset() const { return !OffsetName.empty(); }
86   // Normally we won't emit immediates unconditionally,
87   // unless we've got no other components
emitImmIntelExpr88   bool emitImm() const { return !(hasRegs() || hasOffset()); }
isValidIntelExpr89   bool isValid() const {
90     return (Scale == 1) ||
91            (hasIndexReg() && (Scale == 2 || Scale == 4 || Scale == 8));
92   }
93 };
94 
95 struct AsmRewrite {
96   AsmRewriteKind Kind;
97   SMLoc Loc;
98   unsigned Len;
99   bool Done;
100   int64_t Val;
101   StringRef Label;
102   IntelExpr IntelExp;
103 
104 public:
105   AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0)
KindAsmRewrite106     : Kind(kind), Loc(loc), Len(len), Done(false), Val(val) {}
AsmRewriteAsmRewrite107   AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label)
108     : AsmRewrite(kind, loc, len) { Label = label; }
AsmRewriteAsmRewrite109   AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp)
110     : AsmRewrite(AOK_IntelExpr, loc, len) { IntelExp = exp; }
111 };
112 
113 struct ParseInstructionInfo {
114   SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
115 
116   ParseInstructionInfo() = default;
ParseInstructionInfoParseInstructionInfo117   ParseInstructionInfo(SmallVectorImpl<AsmRewrite> *rewrites)
118     : AsmRewrites(rewrites) {}
119 };
120 
121 enum OperandMatchResultTy {
122   MatchOperand_Success,  // operand matched successfully
123   MatchOperand_NoMatch,  // operand did not match
124   MatchOperand_ParseFail // operand matched but had errors
125 };
126 
127 enum class DiagnosticPredicateTy {
128   Match,
129   NearMatch,
130   NoMatch,
131 };
132 
133 // When an operand is parsed, the assembler will try to iterate through a set of
134 // possible operand classes that the operand might match and call the
135 // corresponding PredicateMethod to determine that.
136 //
137 // If there are two AsmOperands that would give a specific diagnostic if there
138 // is no match, there is currently no mechanism to distinguish which operand is
139 // a closer match. The DiagnosticPredicate distinguishes between 'completely
140 // no match' and 'near match', so the assembler can decide whether to give a
141 // specific diagnostic, or use 'InvalidOperand' and continue to find a
142 // 'better matching' diagnostic.
143 //
144 // For example:
145 //    opcode opnd0, onpd1, opnd2
146 //
147 // where:
148 //    opnd2 could be an 'immediate of range [-8, 7]'
149 //    opnd2 could be a  'register + shift/extend'.
150 //
151 // If opnd2 is a valid register, but with a wrong shift/extend suffix, it makes
152 // little sense to give a diagnostic that the operand should be an immediate
153 // in range [-8, 7].
154 //
155 // This is a light-weight alternative to the 'NearMissInfo' approach
156 // below which collects *all* possible diagnostics. This alternative
157 // is optional and fully backward compatible with existing
158 // PredicateMethods that return a 'bool' (match or no match).
159 struct DiagnosticPredicate {
160   DiagnosticPredicateTy Type;
161 
DiagnosticPredicateDiagnosticPredicate162   explicit DiagnosticPredicate(bool Match)
163       : Type(Match ? DiagnosticPredicateTy::Match
164                    : DiagnosticPredicateTy::NearMatch) {}
DiagnosticPredicateDiagnosticPredicate165   DiagnosticPredicate(DiagnosticPredicateTy T) : Type(T) {}
166   DiagnosticPredicate(const DiagnosticPredicate &) = default;
167   DiagnosticPredicate& operator=(const DiagnosticPredicate &) = default;
168 
169   operator bool() const { return Type == DiagnosticPredicateTy::Match; }
isMatchDiagnosticPredicate170   bool isMatch() const { return Type == DiagnosticPredicateTy::Match; }
isNearMatchDiagnosticPredicate171   bool isNearMatch() const { return Type == DiagnosticPredicateTy::NearMatch; }
isNoMatchDiagnosticPredicate172   bool isNoMatch() const { return Type == DiagnosticPredicateTy::NoMatch; }
173 };
174 
175 // When matching of an assembly instruction fails, there may be multiple
176 // encodings that are close to being a match. It's often ambiguous which one
177 // the programmer intended to use, so we want to report an error which mentions
178 // each of these "near-miss" encodings. This struct contains information about
179 // one such encoding, and why it did not match the parsed instruction.
180 class NearMissInfo {
181 public:
182   enum NearMissKind {
183     NoNearMiss,
184     NearMissOperand,
185     NearMissFeature,
186     NearMissPredicate,
187     NearMissTooFewOperands,
188   };
189 
190   // The encoding is valid for the parsed assembly string. This is only used
191   // internally to the table-generated assembly matcher.
getSuccess()192   static NearMissInfo getSuccess() { return NearMissInfo(); }
193 
194   // The instruction encoding is not valid because it requires some target
195   // features that are not currently enabled. MissingFeatures has a bit set for
196   // each feature that the encoding needs but which is not enabled.
getMissedFeature(const FeatureBitset & MissingFeatures)197   static NearMissInfo getMissedFeature(const FeatureBitset &MissingFeatures) {
198     NearMissInfo Result;
199     Result.Kind = NearMissFeature;
200     Result.Features = MissingFeatures;
201     return Result;
202   }
203 
204   // The instruction encoding is not valid because the target-specific
205   // predicate function returned an error code. FailureCode is the
206   // target-specific error code returned by the predicate.
getMissedPredicate(unsigned FailureCode)207   static NearMissInfo getMissedPredicate(unsigned FailureCode) {
208     NearMissInfo Result;
209     Result.Kind = NearMissPredicate;
210     Result.PredicateError = FailureCode;
211     return Result;
212   }
213 
214   // The instruction encoding is not valid because one (and only one) parsed
215   // operand is not of the correct type. OperandError is the error code
216   // relating to the operand class expected by the encoding. OperandClass is
217   // the type of the expected operand. Opcode is the opcode of the encoding.
218   // OperandIndex is the index into the parsed operand list.
getMissedOperand(unsigned OperandError,unsigned OperandClass,unsigned Opcode,unsigned OperandIndex)219   static NearMissInfo getMissedOperand(unsigned OperandError,
220                                        unsigned OperandClass, unsigned Opcode,
221                                        unsigned OperandIndex) {
222     NearMissInfo Result;
223     Result.Kind = NearMissOperand;
224     Result.MissedOperand.Error = OperandError;
225     Result.MissedOperand.Class = OperandClass;
226     Result.MissedOperand.Opcode = Opcode;
227     Result.MissedOperand.Index = OperandIndex;
228     return Result;
229   }
230 
231   // The instruction encoding is not valid because it expects more operands
232   // than were parsed. OperandClass is the class of the expected operand that
233   // was not provided. Opcode is the instruction encoding.
getTooFewOperands(unsigned OperandClass,unsigned Opcode)234   static NearMissInfo getTooFewOperands(unsigned OperandClass,
235                                         unsigned Opcode) {
236     NearMissInfo Result;
237     Result.Kind = NearMissTooFewOperands;
238     Result.TooFewOperands.Class = OperandClass;
239     Result.TooFewOperands.Opcode = Opcode;
240     return Result;
241   }
242 
243   operator bool() const { return Kind != NoNearMiss; }
244 
getKind()245   NearMissKind getKind() const { return Kind; }
246 
247   // Feature flags required by the instruction, that the current target does
248   // not have.
getFeatures()249   const FeatureBitset& getFeatures() const {
250     assert(Kind == NearMissFeature);
251     return Features;
252   }
253   // Error code returned by the target predicate when validating this
254   // instruction encoding.
getPredicateError()255   unsigned getPredicateError() const {
256     assert(Kind == NearMissPredicate);
257     return PredicateError;
258   }
259   // MatchClassKind of the operand that we expected to see.
getOperandClass()260   unsigned getOperandClass() const {
261     assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
262     return MissedOperand.Class;
263   }
264   // Opcode of the encoding we were trying to match.
getOpcode()265   unsigned getOpcode() const {
266     assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
267     return MissedOperand.Opcode;
268   }
269   // Error code returned when validating the operand.
getOperandError()270   unsigned getOperandError() const {
271     assert(Kind == NearMissOperand);
272     return MissedOperand.Error;
273   }
274   // Index of the actual operand we were trying to match in the list of parsed
275   // operands.
getOperandIndex()276   unsigned getOperandIndex() const {
277     assert(Kind == NearMissOperand);
278     return MissedOperand.Index;
279   }
280 
281 private:
282   NearMissKind Kind;
283 
284   // These two structs share a common prefix, so we can safely rely on the fact
285   // that they overlap in the union.
286   struct MissedOpInfo {
287     unsigned Class;
288     unsigned Opcode;
289     unsigned Error;
290     unsigned Index;
291   };
292 
293   struct TooFewOperandsInfo {
294     unsigned Class;
295     unsigned Opcode;
296   };
297 
298   union {
299     FeatureBitset Features;
300     unsigned PredicateError;
301     MissedOpInfo MissedOperand;
302     TooFewOperandsInfo TooFewOperands;
303   };
304 
NearMissInfo()305   NearMissInfo() : Kind(NoNearMiss) {}
306 };
307 
308 /// MCTargetAsmParser - Generic interface to target specific assembly parsers.
309 class MCTargetAsmParser : public MCAsmParserExtension {
310 public:
311   enum MatchResultTy {
312     Match_InvalidOperand,
313     Match_InvalidTiedOperand,
314     Match_MissingFeature,
315     Match_MnemonicFail,
316     Match_Success,
317     Match_NearMisses,
318     FIRST_TARGET_MATCH_RESULT_TY
319   };
320 
321 protected: // Can only create subclasses.
322   MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI,
323                     const MCInstrInfo &MII);
324 
325   /// Create a copy of STI and return a non-const reference to it.
326   MCSubtargetInfo &copySTI();
327 
328   /// AvailableFeatures - The current set of available features.
329   FeatureBitset AvailableFeatures;
330 
331   /// ParsingMSInlineAsm - Are we parsing ms-style inline assembly?
332   bool ParsingMSInlineAsm = false;
333 
334   /// SemaCallback - The Sema callback implementation.  Must be set when parsing
335   /// ms-style inline assembly.
336   MCAsmParserSemaCallback *SemaCallback = nullptr;
337 
338   /// Set of options which affects instrumentation of inline assembly.
339   MCTargetOptions MCOptions;
340 
341   /// Current STI.
342   const MCSubtargetInfo *STI;
343 
344   const MCInstrInfo &MII;
345 
346 public:
347   MCTargetAsmParser(const MCTargetAsmParser &) = delete;
348   MCTargetAsmParser &operator=(const MCTargetAsmParser &) = delete;
349 
350   ~MCTargetAsmParser() override;
351 
352   const MCSubtargetInfo &getSTI() const;
353 
getAvailableFeatures()354   const FeatureBitset& getAvailableFeatures() const {
355     return AvailableFeatures;
356   }
setAvailableFeatures(const FeatureBitset & Value)357   void setAvailableFeatures(const FeatureBitset& Value) {
358     AvailableFeatures = Value;
359   }
360 
isParsingMSInlineAsm()361   bool isParsingMSInlineAsm () { return ParsingMSInlineAsm; }
setParsingMSInlineAsm(bool Value)362   void setParsingMSInlineAsm (bool Value) { ParsingMSInlineAsm = Value; }
363 
getTargetOptions()364   MCTargetOptions getTargetOptions() const { return MCOptions; }
365 
setSemaCallback(MCAsmParserSemaCallback * Callback)366   void setSemaCallback(MCAsmParserSemaCallback *Callback) {
367     SemaCallback = Callback;
368   }
369 
370   // Target-specific parsing of expression.
parsePrimaryExpr(const MCExpr * & Res,SMLoc & EndLoc)371   virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
372     return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
373   }
374 
375   virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
376                              SMLoc &EndLoc) = 0;
377 
378   /// tryParseRegister - parse one register if possible
379   ///
380   /// Check whether a register specification can be parsed at the current
381   /// location, without failing the entire parse if it can't. Must not consume
382   /// tokens if the parse fails.
383   virtual OperandMatchResultTy
384   tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) = 0;
385 
386   /// ParseInstruction - Parse one assembly instruction.
387   ///
388   /// The parser is positioned following the instruction name. The target
389   /// specific instruction parser should parse the entire instruction and
390   /// construct the appropriate MCInst, or emit an error. On success, the entire
391   /// line should be parsed up to and including the end-of-statement token. On
392   /// failure, the parser is not required to read to the end of the line.
393   //
394   /// \param Name - The instruction name.
395   /// \param NameLoc - The source location of the name.
396   /// \param Operands [out] - The list of parsed operands, this returns
397   ///        ownership of them to the caller.
398   /// \return True on failure.
399   virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
400                                 SMLoc NameLoc, OperandVector &Operands) = 0;
ParseInstruction(ParseInstructionInfo & Info,StringRef Name,AsmToken Token,OperandVector & Operands)401   virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
402                                 AsmToken Token, OperandVector &Operands) {
403     return ParseInstruction(Info, Name, Token.getLoc(), Operands);
404   }
405 
406   /// ParseDirective - Parse a target specific assembler directive
407   ///
408   /// The parser is positioned following the directive name.  The target
409   /// specific directive parser should parse the entire directive doing or
410   /// recording any target specific work, or return true and do nothing if the
411   /// directive is not target specific. If the directive is specific for
412   /// the target, the entire line is parsed up to and including the
413   /// end-of-statement token and false is returned.
414   ///
415   /// \param DirectiveID - the identifier token of the directive.
416   virtual bool ParseDirective(AsmToken DirectiveID) = 0;
417 
418   /// MatchAndEmitInstruction - Recognize a series of operands of a parsed
419   /// instruction as an actual MCInst and emit it to the specified MCStreamer.
420   /// This returns false on success and returns true on failure to match.
421   ///
422   /// On failure, the target parser is responsible for emitting a diagnostic
423   /// explaining the match failure.
424   virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
425                                        OperandVector &Operands, MCStreamer &Out,
426                                        uint64_t &ErrorInfo,
427                                        bool MatchingInlineAsm) = 0;
428 
429   /// Allows targets to let registers opt out of clobber lists.
OmitRegisterFromClobberLists(unsigned RegNo)430   virtual bool OmitRegisterFromClobberLists(unsigned RegNo) { return false; }
431 
432   /// Allow a target to add special case operand matching for things that
433   /// tblgen doesn't/can't handle effectively. For example, literal
434   /// immediates on ARM. TableGen expects a token operand, but the parser
435   /// will recognize them as immediates.
validateTargetOperandClass(MCParsedAsmOperand & Op,unsigned Kind)436   virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
437                                               unsigned Kind) {
438     return Match_InvalidOperand;
439   }
440 
441   /// Validate the instruction match against any complex target predicates
442   /// before rendering any operands to it.
443   virtual unsigned
checkEarlyTargetMatchPredicate(MCInst & Inst,const OperandVector & Operands)444   checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) {
445     return Match_Success;
446   }
447 
448   /// checkTargetMatchPredicate - Validate the instruction match against
449   /// any complex target predicates not expressible via match classes.
checkTargetMatchPredicate(MCInst & Inst)450   virtual unsigned checkTargetMatchPredicate(MCInst &Inst) {
451     return Match_Success;
452   }
453 
454   virtual void convertToMapAndConstraints(unsigned Kind,
455                                           const OperandVector &Operands) = 0;
456 
457   /// Returns whether two registers are equal and is used by the tied-operands
458   /// checks in the AsmMatcher. This method can be overridden allow e.g. a
459   /// sub- or super-register as the tied operand.
regsEqual(const MCParsedAsmOperand & Op1,const MCParsedAsmOperand & Op2)460   virtual bool regsEqual(const MCParsedAsmOperand &Op1,
461                          const MCParsedAsmOperand &Op2) const {
462     assert(Op1.isReg() && Op2.isReg() && "Operands not all regs");
463     return Op1.getReg() == Op2.getReg();
464   }
465 
466   // Return whether this parser uses assignment statements with equals tokens
equalIsAsmAssignment()467   virtual bool equalIsAsmAssignment() { return true; };
468   // Return whether this start of statement identifier is a label
isLabel(AsmToken & Token)469   virtual bool isLabel(AsmToken &Token) { return true; };
470   // Return whether this parser accept star as start of statement
starIsStartOfStatement()471   virtual bool starIsStartOfStatement() { return false; };
472 
applyModifierToExpr(const MCExpr * E,MCSymbolRefExpr::VariantKind,MCContext & Ctx)473   virtual const MCExpr *applyModifierToExpr(const MCExpr *E,
474                                             MCSymbolRefExpr::VariantKind,
475                                             MCContext &Ctx) {
476     return nullptr;
477   }
478 
479   // For actions that have to be performed before a label is emitted
doBeforeLabelEmit(MCSymbol * Symbol)480   virtual void doBeforeLabelEmit(MCSymbol *Symbol) {}
481 
onLabelParsed(MCSymbol * Symbol)482   virtual void onLabelParsed(MCSymbol *Symbol) {}
483 
484   /// Ensure that all previously parsed instructions have been emitted to the
485   /// output streamer, if the target does not emit them immediately.
flushPendingInstructions(MCStreamer & Out)486   virtual void flushPendingInstructions(MCStreamer &Out) {}
487 
createTargetUnaryExpr(const MCExpr * E,AsmToken::TokenKind OperatorToken,MCContext & Ctx)488   virtual const MCExpr *createTargetUnaryExpr(const MCExpr *E,
489                                               AsmToken::TokenKind OperatorToken,
490                                               MCContext &Ctx) {
491     return nullptr;
492   }
493 
494   // For any initialization at the beginning of parsing.
onBeginOfFile()495   virtual void onBeginOfFile() {}
496 
497   // For any checks or cleanups at the end of parsing.
onEndOfFile()498   virtual void onEndOfFile() {}
499 };
500 
501 } // end namespace llvm
502 
503 #endif // LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
504