1 //===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
10 #define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
11 
12 #include "llvm/ADT/StringRef.h"
13 #include "llvm/MC/MCExpr.h"
14 #include "llvm/MC/MCInstrInfo.h"
15 #include "llvm/MC/MCParser/MCAsmLexer.h"
16 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
17 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
18 #include "llvm/MC/MCTargetOptions.h"
19 #include "llvm/MC/SubtargetFeature.h"
20 #include "llvm/Support/SMLoc.h"
21 #include <cstdint>
22 #include <memory>
23 
24 namespace llvm {
25 
26 class MCInst;
27 class MCParsedAsmOperand;
28 class MCStreamer;
29 class MCSubtargetInfo;
30 template <typename T> class SmallVectorImpl;
31 
32 using OperandVector = SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>>;
33 
34 enum AsmRewriteKind {
35   AOK_Align,          // Rewrite align as .align.
36   AOK_EVEN,           // Rewrite even as .even.
37   AOK_Emit,           // Rewrite _emit as .byte.
38   AOK_CallInput,      // Rewrite in terms of ${N:P}.
39   AOK_Input,          // Rewrite in terms of $N.
40   AOK_Output,         // Rewrite in terms of $N.
41   AOK_SizeDirective,  // Add a sizing directive (e.g., dword ptr).
42   AOK_Label,          // Rewrite local labels.
43   AOK_EndOfStatement, // Add EndOfStatement (e.g., "\n\t").
44   AOK_Skip,           // Skip emission (e.g., offset/type operators).
45   AOK_IntelExpr       // SizeDirective SymDisp [BaseReg + IndexReg * Scale + ImmDisp]
46 };
47 
48 const char AsmRewritePrecedence [] = {
49   2, // AOK_Align
50   2, // AOK_EVEN
51   2, // AOK_Emit
52   3, // AOK_Input
53   3, // AOK_CallInput
54   3, // AOK_Output
55   5, // AOK_SizeDirective
56   1, // AOK_Label
57   5, // AOK_EndOfStatement
58   2, // AOK_Skip
59   2  // AOK_IntelExpr
60 };
61 
62 // Represnt the various parts which makes up an intel expression,
63 // used for emitting compound intel expressions
64 struct IntelExpr {
65   bool NeedBracs;
66   int64_t Imm;
67   StringRef BaseReg;
68   StringRef IndexReg;
69   StringRef OffsetName;
70   unsigned Scale;
71 
72   IntelExpr()
73       : NeedBracs(false), Imm(0), BaseReg(StringRef()), IndexReg(StringRef()),
74         OffsetName(StringRef()), Scale(1) {}
75   // [BaseReg + IndexReg * ScaleExpression + OFFSET name + ImmediateExpression]
76   IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale,
77             StringRef offsetName, int64_t imm, bool needBracs)
78       : NeedBracs(needBracs), Imm(imm), BaseReg(baseReg), IndexReg(indexReg),
79         OffsetName(offsetName), Scale(1) {
80     if (scale)
81       Scale = scale;
82   }
83   bool hasBaseReg() const { return !BaseReg.empty(); }
84   bool hasIndexReg() const { return !IndexReg.empty(); }
85   bool hasRegs() const { return hasBaseReg() || hasIndexReg(); }
86   bool hasOffset() const { return !OffsetName.empty(); }
87   // Normally we won't emit immediates unconditionally,
88   // unless we've got no other components
89   bool emitImm() const { return !(hasRegs() || hasOffset()); }
90   bool isValid() const {
91     return (Scale == 1) ||
92            (hasIndexReg() && (Scale == 2 || Scale == 4 || Scale == 8));
93   }
94 };
95 
96 struct AsmRewrite {
97   AsmRewriteKind Kind;
98   SMLoc Loc;
99   unsigned Len;
100   bool Done;
101   int64_t Val;
102   StringRef Label;
103   IntelExpr IntelExp;
104 
105 public:
106   AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0)
107     : Kind(kind), Loc(loc), Len(len), Done(false), Val(val) {}
108   AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label)
109     : AsmRewrite(kind, loc, len) { Label = label; }
110   AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp)
111     : AsmRewrite(AOK_IntelExpr, loc, len) { IntelExp = exp; }
112 };
113 
114 struct ParseInstructionInfo {
115   SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
116 
117   ParseInstructionInfo() = default;
118   ParseInstructionInfo(SmallVectorImpl<AsmRewrite> *rewrites)
119     : AsmRewrites(rewrites) {}
120 };
121 
122 enum OperandMatchResultTy {
123   MatchOperand_Success,  // operand matched successfully
124   MatchOperand_NoMatch,  // operand did not match
125   MatchOperand_ParseFail // operand matched but had errors
126 };
127 
128 enum class DiagnosticPredicateTy {
129   Match,
130   NearMatch,
131   NoMatch,
132 };
133 
134 // When an operand is parsed, the assembler will try to iterate through a set of
135 // possible operand classes that the operand might match and call the
136 // corresponding PredicateMethod to determine that.
137 //
138 // If there are two AsmOperands that would give a specific diagnostic if there
139 // is no match, there is currently no mechanism to distinguish which operand is
140 // a closer match. The DiagnosticPredicate distinguishes between 'completely
141 // no match' and 'near match', so the assembler can decide whether to give a
142 // specific diagnostic, or use 'InvalidOperand' and continue to find a
143 // 'better matching' diagnostic.
144 //
145 // For example:
146 //    opcode opnd0, onpd1, opnd2
147 //
148 // where:
149 //    opnd2 could be an 'immediate of range [-8, 7]'
150 //    opnd2 could be a  'register + shift/extend'.
151 //
152 // If opnd2 is a valid register, but with a wrong shift/extend suffix, it makes
153 // little sense to give a diagnostic that the operand should be an immediate
154 // in range [-8, 7].
155 //
156 // This is a light-weight alternative to the 'NearMissInfo' approach
157 // below which collects *all* possible diagnostics. This alternative
158 // is optional and fully backward compatible with existing
159 // PredicateMethods that return a 'bool' (match or no match).
160 struct DiagnosticPredicate {
161   DiagnosticPredicateTy Type;
162 
163   explicit DiagnosticPredicate(bool Match)
164       : Type(Match ? DiagnosticPredicateTy::Match
165                    : DiagnosticPredicateTy::NearMatch) {}
166   DiagnosticPredicate(DiagnosticPredicateTy T) : Type(T) {}
167   DiagnosticPredicate(const DiagnosticPredicate &) = default;
168   DiagnosticPredicate& operator=(const DiagnosticPredicate &) = default;
169 
170   operator bool() const { return Type == DiagnosticPredicateTy::Match; }
171   bool isMatch() const { return Type == DiagnosticPredicateTy::Match; }
172   bool isNearMatch() const { return Type == DiagnosticPredicateTy::NearMatch; }
173   bool isNoMatch() const { return Type == DiagnosticPredicateTy::NoMatch; }
174 };
175 
176 // When matching of an assembly instruction fails, there may be multiple
177 // encodings that are close to being a match. It's often ambiguous which one
178 // the programmer intended to use, so we want to report an error which mentions
179 // each of these "near-miss" encodings. This struct contains information about
180 // one such encoding, and why it did not match the parsed instruction.
181 class NearMissInfo {
182 public:
183   enum NearMissKind {
184     NoNearMiss,
185     NearMissOperand,
186     NearMissFeature,
187     NearMissPredicate,
188     NearMissTooFewOperands,
189   };
190 
191   // The encoding is valid for the parsed assembly string. This is only used
192   // internally to the table-generated assembly matcher.
193   static NearMissInfo getSuccess() { return NearMissInfo(); }
194 
195   // The instruction encoding is not valid because it requires some target
196   // features that are not currently enabled. MissingFeatures has a bit set for
197   // each feature that the encoding needs but which is not enabled.
198   static NearMissInfo getMissedFeature(const FeatureBitset &MissingFeatures) {
199     NearMissInfo Result;
200     Result.Kind = NearMissFeature;
201     Result.Features = MissingFeatures;
202     return Result;
203   }
204 
205   // The instruction encoding is not valid because the target-specific
206   // predicate function returned an error code. FailureCode is the
207   // target-specific error code returned by the predicate.
208   static NearMissInfo getMissedPredicate(unsigned FailureCode) {
209     NearMissInfo Result;
210     Result.Kind = NearMissPredicate;
211     Result.PredicateError = FailureCode;
212     return Result;
213   }
214 
215   // The instruction encoding is not valid because one (and only one) parsed
216   // operand is not of the correct type. OperandError is the error code
217   // relating to the operand class expected by the encoding. OperandClass is
218   // the type of the expected operand. Opcode is the opcode of the encoding.
219   // OperandIndex is the index into the parsed operand list.
220   static NearMissInfo getMissedOperand(unsigned OperandError,
221                                        unsigned OperandClass, unsigned Opcode,
222                                        unsigned OperandIndex) {
223     NearMissInfo Result;
224     Result.Kind = NearMissOperand;
225     Result.MissedOperand.Error = OperandError;
226     Result.MissedOperand.Class = OperandClass;
227     Result.MissedOperand.Opcode = Opcode;
228     Result.MissedOperand.Index = OperandIndex;
229     return Result;
230   }
231 
232   // The instruction encoding is not valid because it expects more operands
233   // than were parsed. OperandClass is the class of the expected operand that
234   // was not provided. Opcode is the instruction encoding.
235   static NearMissInfo getTooFewOperands(unsigned OperandClass,
236                                         unsigned Opcode) {
237     NearMissInfo Result;
238     Result.Kind = NearMissTooFewOperands;
239     Result.TooFewOperands.Class = OperandClass;
240     Result.TooFewOperands.Opcode = Opcode;
241     return Result;
242   }
243 
244   operator bool() const { return Kind != NoNearMiss; }
245 
246   NearMissKind getKind() const { return Kind; }
247 
248   // Feature flags required by the instruction, that the current target does
249   // not have.
250   const FeatureBitset& getFeatures() const {
251     assert(Kind == NearMissFeature);
252     return Features;
253   }
254   // Error code returned by the target predicate when validating this
255   // instruction encoding.
256   unsigned getPredicateError() const {
257     assert(Kind == NearMissPredicate);
258     return PredicateError;
259   }
260   // MatchClassKind of the operand that we expected to see.
261   unsigned getOperandClass() const {
262     assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
263     return MissedOperand.Class;
264   }
265   // Opcode of the encoding we were trying to match.
266   unsigned getOpcode() const {
267     assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
268     return MissedOperand.Opcode;
269   }
270   // Error code returned when validating the operand.
271   unsigned getOperandError() const {
272     assert(Kind == NearMissOperand);
273     return MissedOperand.Error;
274   }
275   // Index of the actual operand we were trying to match in the list of parsed
276   // operands.
277   unsigned getOperandIndex() const {
278     assert(Kind == NearMissOperand);
279     return MissedOperand.Index;
280   }
281 
282 private:
283   NearMissKind Kind;
284 
285   // These two structs share a common prefix, so we can safely rely on the fact
286   // that they overlap in the union.
287   struct MissedOpInfo {
288     unsigned Class;
289     unsigned Opcode;
290     unsigned Error;
291     unsigned Index;
292   };
293 
294   struct TooFewOperandsInfo {
295     unsigned Class;
296     unsigned Opcode;
297   };
298 
299   union {
300     FeatureBitset Features;
301     unsigned PredicateError;
302     MissedOpInfo MissedOperand;
303     TooFewOperandsInfo TooFewOperands;
304   };
305 
306   NearMissInfo() : Kind(NoNearMiss) {}
307 };
308 
309 /// MCTargetAsmParser - Generic interface to target specific assembly parsers.
310 class MCTargetAsmParser : public MCAsmParserExtension {
311 public:
312   enum MatchResultTy {
313     Match_InvalidOperand,
314     Match_InvalidTiedOperand,
315     Match_MissingFeature,
316     Match_MnemonicFail,
317     Match_Success,
318     Match_NearMisses,
319     FIRST_TARGET_MATCH_RESULT_TY
320   };
321 
322 protected: // Can only create subclasses.
323   MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI,
324                     const MCInstrInfo &MII);
325 
326   /// Create a copy of STI and return a non-const reference to it.
327   MCSubtargetInfo &copySTI();
328 
329   /// AvailableFeatures - The current set of available features.
330   FeatureBitset AvailableFeatures;
331 
332   /// ParsingInlineAsm - Are we parsing ms-style inline assembly?
333   bool ParsingInlineAsm = false;
334 
335   /// SemaCallback - The Sema callback implementation.  Must be set when parsing
336   /// ms-style inline assembly.
337   MCAsmParserSemaCallback *SemaCallback;
338 
339   /// Set of options which affects instrumentation of inline assembly.
340   MCTargetOptions MCOptions;
341 
342   /// Current STI.
343   const MCSubtargetInfo *STI;
344 
345   const MCInstrInfo &MII;
346 
347 public:
348   MCTargetAsmParser(const MCTargetAsmParser &) = delete;
349   MCTargetAsmParser &operator=(const MCTargetAsmParser &) = delete;
350 
351   ~MCTargetAsmParser() override;
352 
353   const MCSubtargetInfo &getSTI() const;
354 
355   const FeatureBitset& getAvailableFeatures() const {
356     return AvailableFeatures;
357   }
358   void setAvailableFeatures(const FeatureBitset& Value) {
359     AvailableFeatures = Value;
360   }
361 
362   bool isParsingInlineAsm () { return ParsingInlineAsm; }
363   void setParsingInlineAsm (bool Value) { ParsingInlineAsm = Value; }
364 
365   MCTargetOptions getTargetOptions() const { return MCOptions; }
366 
367   void setSemaCallback(MCAsmParserSemaCallback *Callback) {
368     SemaCallback = Callback;
369   }
370 
371   // Target-specific parsing of expression.
372   virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
373     return getParser().parsePrimaryExpr(Res, EndLoc);
374   }
375 
376   virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
377                              SMLoc &EndLoc) = 0;
378 
379   /// ParseInstruction - Parse one assembly instruction.
380   ///
381   /// The parser is positioned following the instruction name. The target
382   /// specific instruction parser should parse the entire instruction and
383   /// construct the appropriate MCInst, or emit an error. On success, the entire
384   /// line should be parsed up to and including the end-of-statement token. On
385   /// failure, the parser is not required to read to the end of the line.
386   //
387   /// \param Name - The instruction name.
388   /// \param NameLoc - The source location of the name.
389   /// \param Operands [out] - The list of parsed operands, this returns
390   ///        ownership of them to the caller.
391   /// \return True on failure.
392   virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
393                                 SMLoc NameLoc, OperandVector &Operands) = 0;
394   virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
395                                 AsmToken Token, OperandVector &Operands) {
396     return ParseInstruction(Info, Name, Token.getLoc(), Operands);
397   }
398 
399   /// ParseDirective - Parse a target specific assembler directive
400   ///
401   /// The parser is positioned following the directive name.  The target
402   /// specific directive parser should parse the entire directive doing or
403   /// recording any target specific work, or return true and do nothing if the
404   /// directive is not target specific. If the directive is specific for
405   /// the target, the entire line is parsed up to and including the
406   /// end-of-statement token and false is returned.
407   ///
408   /// \param DirectiveID - the identifier token of the directive.
409   virtual bool ParseDirective(AsmToken DirectiveID) = 0;
410 
411   /// MatchAndEmitInstruction - Recognize a series of operands of a parsed
412   /// instruction as an actual MCInst and emit it to the specified MCStreamer.
413   /// This returns false on success and returns true on failure to match.
414   ///
415   /// On failure, the target parser is responsible for emitting a diagnostic
416   /// explaining the match failure.
417   virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
418                                        OperandVector &Operands, MCStreamer &Out,
419                                        uint64_t &ErrorInfo,
420                                        bool MatchingInlineAsm) = 0;
421 
422   /// Allows targets to let registers opt out of clobber lists.
423   virtual bool OmitRegisterFromClobberLists(unsigned RegNo) { return false; }
424 
425   /// Allow a target to add special case operand matching for things that
426   /// tblgen doesn't/can't handle effectively. For example, literal
427   /// immediates on ARM. TableGen expects a token operand, but the parser
428   /// will recognize them as immediates.
429   virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
430                                               unsigned Kind) {
431     return Match_InvalidOperand;
432   }
433 
434   /// Validate the instruction match against any complex target predicates
435   /// before rendering any operands to it.
436   virtual unsigned
437   checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) {
438     return Match_Success;
439   }
440 
441   /// checkTargetMatchPredicate - Validate the instruction match against
442   /// any complex target predicates not expressible via match classes.
443   virtual unsigned checkTargetMatchPredicate(MCInst &Inst) {
444     return Match_Success;
445   }
446 
447   virtual void convertToMapAndConstraints(unsigned Kind,
448                                           const OperandVector &Operands) = 0;
449 
450   /// Returns whether two registers are equal and is used by the tied-operands
451   /// checks in the AsmMatcher. This method can be overridden allow e.g. a
452   /// sub- or super-register as the tied operand.
453   virtual bool regsEqual(const MCParsedAsmOperand &Op1,
454                          const MCParsedAsmOperand &Op2) const {
455     assert(Op1.isReg() && Op2.isReg() && "Operands not all regs");
456     return Op1.getReg() == Op2.getReg();
457   }
458 
459   // Return whether this parser uses assignment statements with equals tokens
460   virtual bool equalIsAsmAssignment() { return true; };
461   // Return whether this start of statement identifier is a label
462   virtual bool isLabel(AsmToken &Token) { return true; };
463   // Return whether this parser accept star as start of statement
464   virtual bool starIsStartOfStatement() { return false; };
465 
466   virtual const MCExpr *applyModifierToExpr(const MCExpr *E,
467                                             MCSymbolRefExpr::VariantKind,
468                                             MCContext &Ctx) {
469     return nullptr;
470   }
471 
472   // For actions that have to be performed before a label is emitted
473   virtual void doBeforeLabelEmit(MCSymbol *Symbol) {}
474 
475   virtual void onLabelParsed(MCSymbol *Symbol) {}
476 
477   /// Ensure that all previously parsed instructions have been emitted to the
478   /// output streamer, if the target does not emit them immediately.
479   virtual void flushPendingInstructions(MCStreamer &Out) {}
480 
481   virtual const MCExpr *createTargetUnaryExpr(const MCExpr *E,
482                                               AsmToken::TokenKind OperatorToken,
483                                               MCContext &Ctx) {
484     return nullptr;
485   }
486 
487   // For any checks or cleanups at the end of parsing.
488   virtual void onEndOfFile() {}
489 };
490 
491 } // end namespace llvm
492 
493 #endif // LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
494