1 //===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
10 #define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
11 
12 #include "llvm/ADT/StringRef.h"
13 #include "llvm/MC/MCExpr.h"
14 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
15 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
16 #include "llvm/MC/MCTargetOptions.h"
17 #include "llvm/MC/SubtargetFeature.h"
18 #include "llvm/Support/SMLoc.h"
19 #include <cstdint>
20 #include <memory>
21 
22 namespace llvm {
23 
24 class MCContext;
25 class MCInst;
26 class MCInstrInfo;
27 class MCRegister;
28 class MCStreamer;
29 class MCSubtargetInfo;
30 class MCSymbol;
31 template <typename T> class SmallVectorImpl;
32 
33 using OperandVector = SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>>;
34 
35 enum AsmRewriteKind {
36   AOK_Align,          // Rewrite align as .align.
37   AOK_EVEN,           // Rewrite even as .even.
38   AOK_Emit,           // Rewrite _emit as .byte.
39   AOK_CallInput,      // Rewrite in terms of ${N:P}.
40   AOK_Input,          // Rewrite in terms of $N.
41   AOK_Output,         // Rewrite in terms of $N.
42   AOK_SizeDirective,  // Add a sizing directive (e.g., dword ptr).
43   AOK_Label,          // Rewrite local labels.
44   AOK_EndOfStatement, // Add EndOfStatement (e.g., "\n\t").
45   AOK_Skip,           // Skip emission (e.g., offset/type operators).
46   AOK_IntelExpr       // SizeDirective SymDisp [BaseReg + IndexReg * Scale + ImmDisp]
47 };
48 
49 const char AsmRewritePrecedence [] = {
50   2, // AOK_Align
51   2, // AOK_EVEN
52   2, // AOK_Emit
53   3, // AOK_Input
54   3, // AOK_CallInput
55   3, // AOK_Output
56   5, // AOK_SizeDirective
57   1, // AOK_Label
58   5, // AOK_EndOfStatement
59   2, // AOK_Skip
60   2  // AOK_IntelExpr
61 };
62 
63 // Represnt the various parts which makes up an intel expression,
64 // used for emitting compound intel expressions
65 struct IntelExpr {
66   bool NeedBracs;
67   int64_t Imm;
68   StringRef BaseReg;
69   StringRef IndexReg;
70   StringRef OffsetName;
71   unsigned Scale;
72 
73   IntelExpr()
74       : NeedBracs(false), Imm(0), BaseReg(StringRef()), IndexReg(StringRef()),
75         OffsetName(StringRef()), Scale(1) {}
76   // [BaseReg + IndexReg * ScaleExpression + OFFSET name + ImmediateExpression]
77   IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale,
78             StringRef offsetName, int64_t imm, bool needBracs)
79       : NeedBracs(needBracs), Imm(imm), BaseReg(baseReg), IndexReg(indexReg),
80         OffsetName(offsetName), Scale(1) {
81     if (scale)
82       Scale = scale;
83   }
84   bool hasBaseReg() const { return !BaseReg.empty(); }
85   bool hasIndexReg() const { return !IndexReg.empty(); }
86   bool hasRegs() const { return hasBaseReg() || hasIndexReg(); }
87   bool hasOffset() const { return !OffsetName.empty(); }
88   // Normally we won't emit immediates unconditionally,
89   // unless we've got no other components
90   bool emitImm() const { return !(hasRegs() || hasOffset()); }
91   bool isValid() const {
92     return (Scale == 1) ||
93            (hasIndexReg() && (Scale == 2 || Scale == 4 || Scale == 8));
94   }
95 };
96 
97 struct AsmRewrite {
98   AsmRewriteKind Kind;
99   SMLoc Loc;
100   unsigned Len;
101   bool Done;
102   int64_t Val;
103   StringRef Label;
104   IntelExpr IntelExp;
105   bool IntelExpRestricted;
106 
107 public:
108   AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0,
109              bool Restricted = false)
110       : Kind(kind), Loc(loc), Len(len), Done(false), Val(val) {
111     IntelExpRestricted = Restricted;
112   }
113   AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label)
114     : AsmRewrite(kind, loc, len) { Label = label; }
115   AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp)
116     : AsmRewrite(AOK_IntelExpr, loc, len) { IntelExp = exp; }
117 };
118 
119 struct ParseInstructionInfo {
120   SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
121 
122   ParseInstructionInfo() = default;
123   ParseInstructionInfo(SmallVectorImpl<AsmRewrite> *rewrites)
124     : AsmRewrites(rewrites) {}
125 };
126 
127 enum OperandMatchResultTy {
128   MatchOperand_Success,  // operand matched successfully
129   MatchOperand_NoMatch,  // operand did not match
130   MatchOperand_ParseFail // operand matched but had errors
131 };
132 
133 enum class DiagnosticPredicateTy {
134   Match,
135   NearMatch,
136   NoMatch,
137 };
138 
139 // When an operand is parsed, the assembler will try to iterate through a set of
140 // possible operand classes that the operand might match and call the
141 // corresponding PredicateMethod to determine that.
142 //
143 // If there are two AsmOperands that would give a specific diagnostic if there
144 // is no match, there is currently no mechanism to distinguish which operand is
145 // a closer match. The DiagnosticPredicate distinguishes between 'completely
146 // no match' and 'near match', so the assembler can decide whether to give a
147 // specific diagnostic, or use 'InvalidOperand' and continue to find a
148 // 'better matching' diagnostic.
149 //
150 // For example:
151 //    opcode opnd0, onpd1, opnd2
152 //
153 // where:
154 //    opnd2 could be an 'immediate of range [-8, 7]'
155 //    opnd2 could be a  'register + shift/extend'.
156 //
157 // If opnd2 is a valid register, but with a wrong shift/extend suffix, it makes
158 // little sense to give a diagnostic that the operand should be an immediate
159 // in range [-8, 7].
160 //
161 // This is a light-weight alternative to the 'NearMissInfo' approach
162 // below which collects *all* possible diagnostics. This alternative
163 // is optional and fully backward compatible with existing
164 // PredicateMethods that return a 'bool' (match or no match).
165 struct DiagnosticPredicate {
166   DiagnosticPredicateTy Type;
167 
168   explicit DiagnosticPredicate(bool Match)
169       : Type(Match ? DiagnosticPredicateTy::Match
170                    : DiagnosticPredicateTy::NearMatch) {}
171   DiagnosticPredicate(DiagnosticPredicateTy T) : Type(T) {}
172   DiagnosticPredicate(const DiagnosticPredicate &) = default;
173   DiagnosticPredicate& operator=(const DiagnosticPredicate &) = default;
174 
175   operator bool() const { return Type == DiagnosticPredicateTy::Match; }
176   bool isMatch() const { return Type == DiagnosticPredicateTy::Match; }
177   bool isNearMatch() const { return Type == DiagnosticPredicateTy::NearMatch; }
178   bool isNoMatch() const { return Type == DiagnosticPredicateTy::NoMatch; }
179 };
180 
181 // When matching of an assembly instruction fails, there may be multiple
182 // encodings that are close to being a match. It's often ambiguous which one
183 // the programmer intended to use, so we want to report an error which mentions
184 // each of these "near-miss" encodings. This struct contains information about
185 // one such encoding, and why it did not match the parsed instruction.
186 class NearMissInfo {
187 public:
188   enum NearMissKind {
189     NoNearMiss,
190     NearMissOperand,
191     NearMissFeature,
192     NearMissPredicate,
193     NearMissTooFewOperands,
194   };
195 
196   // The encoding is valid for the parsed assembly string. This is only used
197   // internally to the table-generated assembly matcher.
198   static NearMissInfo getSuccess() { return NearMissInfo(); }
199 
200   // The instruction encoding is not valid because it requires some target
201   // features that are not currently enabled. MissingFeatures has a bit set for
202   // each feature that the encoding needs but which is not enabled.
203   static NearMissInfo getMissedFeature(const FeatureBitset &MissingFeatures) {
204     NearMissInfo Result;
205     Result.Kind = NearMissFeature;
206     Result.Features = MissingFeatures;
207     return Result;
208   }
209 
210   // The instruction encoding is not valid because the target-specific
211   // predicate function returned an error code. FailureCode is the
212   // target-specific error code returned by the predicate.
213   static NearMissInfo getMissedPredicate(unsigned FailureCode) {
214     NearMissInfo Result;
215     Result.Kind = NearMissPredicate;
216     Result.PredicateError = FailureCode;
217     return Result;
218   }
219 
220   // The instruction encoding is not valid because one (and only one) parsed
221   // operand is not of the correct type. OperandError is the error code
222   // relating to the operand class expected by the encoding. OperandClass is
223   // the type of the expected operand. Opcode is the opcode of the encoding.
224   // OperandIndex is the index into the parsed operand list.
225   static NearMissInfo getMissedOperand(unsigned OperandError,
226                                        unsigned OperandClass, unsigned Opcode,
227                                        unsigned OperandIndex) {
228     NearMissInfo Result;
229     Result.Kind = NearMissOperand;
230     Result.MissedOperand.Error = OperandError;
231     Result.MissedOperand.Class = OperandClass;
232     Result.MissedOperand.Opcode = Opcode;
233     Result.MissedOperand.Index = OperandIndex;
234     return Result;
235   }
236 
237   // The instruction encoding is not valid because it expects more operands
238   // than were parsed. OperandClass is the class of the expected operand that
239   // was not provided. Opcode is the instruction encoding.
240   static NearMissInfo getTooFewOperands(unsigned OperandClass,
241                                         unsigned Opcode) {
242     NearMissInfo Result;
243     Result.Kind = NearMissTooFewOperands;
244     Result.TooFewOperands.Class = OperandClass;
245     Result.TooFewOperands.Opcode = Opcode;
246     return Result;
247   }
248 
249   operator bool() const { return Kind != NoNearMiss; }
250 
251   NearMissKind getKind() const { return Kind; }
252 
253   // Feature flags required by the instruction, that the current target does
254   // not have.
255   const FeatureBitset& getFeatures() const {
256     assert(Kind == NearMissFeature);
257     return Features;
258   }
259   // Error code returned by the target predicate when validating this
260   // instruction encoding.
261   unsigned getPredicateError() const {
262     assert(Kind == NearMissPredicate);
263     return PredicateError;
264   }
265   // MatchClassKind of the operand that we expected to see.
266   unsigned getOperandClass() const {
267     assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
268     return MissedOperand.Class;
269   }
270   // Opcode of the encoding we were trying to match.
271   unsigned getOpcode() const {
272     assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
273     return MissedOperand.Opcode;
274   }
275   // Error code returned when validating the operand.
276   unsigned getOperandError() const {
277     assert(Kind == NearMissOperand);
278     return MissedOperand.Error;
279   }
280   // Index of the actual operand we were trying to match in the list of parsed
281   // operands.
282   unsigned getOperandIndex() const {
283     assert(Kind == NearMissOperand);
284     return MissedOperand.Index;
285   }
286 
287 private:
288   NearMissKind Kind;
289 
290   // These two structs share a common prefix, so we can safely rely on the fact
291   // that they overlap in the union.
292   struct MissedOpInfo {
293     unsigned Class;
294     unsigned Opcode;
295     unsigned Error;
296     unsigned Index;
297   };
298 
299   struct TooFewOperandsInfo {
300     unsigned Class;
301     unsigned Opcode;
302   };
303 
304   union {
305     FeatureBitset Features;
306     unsigned PredicateError;
307     MissedOpInfo MissedOperand;
308     TooFewOperandsInfo TooFewOperands;
309   };
310 
311   NearMissInfo() : Kind(NoNearMiss) {}
312 };
313 
314 /// MCTargetAsmParser - Generic interface to target specific assembly parsers.
315 class MCTargetAsmParser : public MCAsmParserExtension {
316 public:
317   enum MatchResultTy {
318     Match_InvalidOperand,
319     Match_InvalidTiedOperand,
320     Match_MissingFeature,
321     Match_MnemonicFail,
322     Match_Success,
323     Match_NearMisses,
324     FIRST_TARGET_MATCH_RESULT_TY
325   };
326 
327 protected: // Can only create subclasses.
328   MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI,
329                     const MCInstrInfo &MII);
330 
331   /// Create a copy of STI and return a non-const reference to it.
332   MCSubtargetInfo &copySTI();
333 
334   /// AvailableFeatures - The current set of available features.
335   FeatureBitset AvailableFeatures;
336 
337   /// ParsingMSInlineAsm - Are we parsing ms-style inline assembly?
338   bool ParsingMSInlineAsm = false;
339 
340   /// SemaCallback - The Sema callback implementation.  Must be set when parsing
341   /// ms-style inline assembly.
342   MCAsmParserSemaCallback *SemaCallback = nullptr;
343 
344   /// Set of options which affects instrumentation of inline assembly.
345   MCTargetOptions MCOptions;
346 
347   /// Current STI.
348   const MCSubtargetInfo *STI;
349 
350   const MCInstrInfo &MII;
351 
352 public:
353   MCTargetAsmParser(const MCTargetAsmParser &) = delete;
354   MCTargetAsmParser &operator=(const MCTargetAsmParser &) = delete;
355 
356   ~MCTargetAsmParser() override;
357 
358   const MCSubtargetInfo &getSTI() const;
359 
360   const FeatureBitset& getAvailableFeatures() const {
361     return AvailableFeatures;
362   }
363   void setAvailableFeatures(const FeatureBitset& Value) {
364     AvailableFeatures = Value;
365   }
366 
367   bool isParsingMSInlineAsm () { return ParsingMSInlineAsm; }
368   void setParsingMSInlineAsm (bool Value) { ParsingMSInlineAsm = Value; }
369 
370   MCTargetOptions getTargetOptions() const { return MCOptions; }
371 
372   void setSemaCallback(MCAsmParserSemaCallback *Callback) {
373     SemaCallback = Callback;
374   }
375 
376   // Target-specific parsing of expression.
377   virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
378     return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
379   }
380 
381   virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc,
382                              SMLoc &EndLoc) = 0;
383 
384   /// tryParseRegister - parse one register if possible
385   ///
386   /// Check whether a register specification can be parsed at the current
387   /// location, without failing the entire parse if it can't. Must not consume
388   /// tokens if the parse fails.
389   virtual OperandMatchResultTy
390   tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) = 0;
391 
392   /// ParseInstruction - Parse one assembly instruction.
393   ///
394   /// The parser is positioned following the instruction name. The target
395   /// specific instruction parser should parse the entire instruction and
396   /// construct the appropriate MCInst, or emit an error. On success, the entire
397   /// line should be parsed up to and including the end-of-statement token. On
398   /// failure, the parser is not required to read to the end of the line.
399   //
400   /// \param Name - The instruction name.
401   /// \param NameLoc - The source location of the name.
402   /// \param Operands [out] - The list of parsed operands, this returns
403   ///        ownership of them to the caller.
404   /// \return True on failure.
405   virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
406                                 SMLoc NameLoc, OperandVector &Operands) = 0;
407   virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
408                                 AsmToken Token, OperandVector &Operands) {
409     return ParseInstruction(Info, Name, Token.getLoc(), Operands);
410   }
411 
412   /// ParseDirective - Parse a target specific assembler directive
413   ///
414   /// The parser is positioned following the directive name.  The target
415   /// specific directive parser should parse the entire directive doing or
416   /// recording any target specific work, or return true and do nothing if the
417   /// directive is not target specific. If the directive is specific for
418   /// the target, the entire line is parsed up to and including the
419   /// end-of-statement token and false is returned.
420   ///
421   /// \param DirectiveID - the identifier token of the directive.
422   virtual bool ParseDirective(AsmToken DirectiveID) = 0;
423 
424   /// MatchAndEmitInstruction - Recognize a series of operands of a parsed
425   /// instruction as an actual MCInst and emit it to the specified MCStreamer.
426   /// This returns false on success and returns true on failure to match.
427   ///
428   /// On failure, the target parser is responsible for emitting a diagnostic
429   /// explaining the match failure.
430   virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
431                                        OperandVector &Operands, MCStreamer &Out,
432                                        uint64_t &ErrorInfo,
433                                        bool MatchingInlineAsm) = 0;
434 
435   /// Allows targets to let registers opt out of clobber lists.
436   virtual bool OmitRegisterFromClobberLists(unsigned RegNo) { return false; }
437 
438   /// Allow a target to add special case operand matching for things that
439   /// tblgen doesn't/can't handle effectively. For example, literal
440   /// immediates on ARM. TableGen expects a token operand, but the parser
441   /// will recognize them as immediates.
442   virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
443                                               unsigned Kind) {
444     return Match_InvalidOperand;
445   }
446 
447   /// Validate the instruction match against any complex target predicates
448   /// before rendering any operands to it.
449   virtual unsigned
450   checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) {
451     return Match_Success;
452   }
453 
454   /// checkTargetMatchPredicate - Validate the instruction match against
455   /// any complex target predicates not expressible via match classes.
456   virtual unsigned checkTargetMatchPredicate(MCInst &Inst) {
457     return Match_Success;
458   }
459 
460   virtual void convertToMapAndConstraints(unsigned Kind,
461                                           const OperandVector &Operands) = 0;
462 
463   /// Returns whether two operands are registers and are equal. This is used
464   /// by the tied-operands checks in the AsmMatcher. This method can be
465   /// overridden to allow e.g. a sub- or super-register as the tied operand.
466   virtual bool areEqualRegs(const MCParsedAsmOperand &Op1,
467                             const MCParsedAsmOperand &Op2) const {
468     return Op1.isReg() && Op2.isReg() && Op1.getReg() == Op2.getReg();
469   }
470 
471   // Return whether this parser uses assignment statements with equals tokens
472   virtual bool equalIsAsmAssignment() { return true; };
473   // Return whether this start of statement identifier is a label
474   virtual bool isLabel(AsmToken &Token) { return true; };
475   // Return whether this parser accept star as start of statement
476   virtual bool starIsStartOfStatement() { return false; };
477 
478   virtual const MCExpr *applyModifierToExpr(const MCExpr *E,
479                                             MCSymbolRefExpr::VariantKind,
480                                             MCContext &Ctx) {
481     return nullptr;
482   }
483 
484   // For actions that have to be performed before a label is emitted
485   virtual void doBeforeLabelEmit(MCSymbol *Symbol, SMLoc IDLoc) {}
486 
487   virtual void onLabelParsed(MCSymbol *Symbol) {}
488 
489   /// Ensure that all previously parsed instructions have been emitted to the
490   /// output streamer, if the target does not emit them immediately.
491   virtual void flushPendingInstructions(MCStreamer &Out) {}
492 
493   virtual const MCExpr *createTargetUnaryExpr(const MCExpr *E,
494                                               AsmToken::TokenKind OperatorToken,
495                                               MCContext &Ctx) {
496     return nullptr;
497   }
498 
499   // For any initialization at the beginning of parsing.
500   virtual void onBeginOfFile() {}
501 
502   // For any checks or cleanups at the end of parsing.
503   virtual void onEndOfFile() {}
504 };
505 
506 } // end namespace llvm
507 
508 #endif // LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
509