1 //===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
10 #define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
11 
12 #include "llvm/ADT/StringRef.h"
13 #include "llvm/MC/MCExpr.h"
14 #include "llvm/MC/MCParser/MCAsmParserExtension.h"
15 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
16 #include "llvm/MC/MCTargetOptions.h"
17 #include "llvm/Support/SMLoc.h"
18 #include "llvm/TargetParser/SubtargetFeature.h"
19 #include <cstdint>
20 #include <memory>
21 
22 namespace llvm {
23 
24 class MCContext;
25 class MCInst;
26 class MCInstrInfo;
27 class MCRegister;
28 class MCStreamer;
29 class MCSubtargetInfo;
30 class MCSymbol;
31 template <typename T> class SmallVectorImpl;
32 
33 using OperandVector = SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>>;
34 
35 enum AsmRewriteKind {
36   AOK_Align,          // Rewrite align as .align.
37   AOK_EVEN,           // Rewrite even as .even.
38   AOK_Emit,           // Rewrite _emit as .byte.
39   AOK_CallInput,      // Rewrite in terms of ${N:P}.
40   AOK_Input,          // Rewrite in terms of $N.
41   AOK_Output,         // Rewrite in terms of $N.
42   AOK_SizeDirective,  // Add a sizing directive (e.g., dword ptr).
43   AOK_Label,          // Rewrite local labels.
44   AOK_EndOfStatement, // Add EndOfStatement (e.g., "\n\t").
45   AOK_Skip,           // Skip emission (e.g., offset/type operators).
46   AOK_IntelExpr       // SizeDirective SymDisp [BaseReg + IndexReg * Scale + ImmDisp]
47 };
48 
49 const char AsmRewritePrecedence [] = {
50   2, // AOK_Align
51   2, // AOK_EVEN
52   2, // AOK_Emit
53   3, // AOK_Input
54   3, // AOK_CallInput
55   3, // AOK_Output
56   5, // AOK_SizeDirective
57   1, // AOK_Label
58   5, // AOK_EndOfStatement
59   2, // AOK_Skip
60   2  // AOK_IntelExpr
61 };
62 
63 // Represent the various parts which make up an intel expression,
64 // used for emitting compound intel expressions
65 struct IntelExpr {
66   bool NeedBracs = false;
67   int64_t Imm = 0;
68   StringRef BaseReg;
69   StringRef IndexReg;
70   StringRef OffsetName;
71   unsigned Scale = 1;
72 
73   IntelExpr() = default;
74   // [BaseReg + IndexReg * ScaleExpression + OFFSET name + ImmediateExpression]
75   IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale,
76             StringRef offsetName, int64_t imm, bool needBracs)
77       : NeedBracs(needBracs), Imm(imm), BaseReg(baseReg), IndexReg(indexReg),
78         OffsetName(offsetName), Scale(1) {
79     if (scale)
80       Scale = scale;
81   }
82   bool hasBaseReg() const { return !BaseReg.empty(); }
83   bool hasIndexReg() const { return !IndexReg.empty(); }
84   bool hasRegs() const { return hasBaseReg() || hasIndexReg(); }
85   bool hasOffset() const { return !OffsetName.empty(); }
86   // Normally we won't emit immediates unconditionally,
87   // unless we've got no other components
88   bool emitImm() const { return !(hasRegs() || hasOffset()); }
89   bool isValid() const {
90     return (Scale == 1) ||
91            (hasIndexReg() && (Scale == 2 || Scale == 4 || Scale == 8));
92   }
93 };
94 
95 struct AsmRewrite {
96   AsmRewriteKind Kind;
97   SMLoc Loc;
98   unsigned Len;
99   bool Done;
100   int64_t Val;
101   StringRef Label;
102   IntelExpr IntelExp;
103   bool IntelExpRestricted;
104 
105 public:
106   AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0,
107              bool Restricted = false)
108       : Kind(kind), Loc(loc), Len(len), Done(false), Val(val) {
109     IntelExpRestricted = Restricted;
110   }
111   AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label)
112     : AsmRewrite(kind, loc, len) { Label = label; }
113   AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp)
114     : AsmRewrite(AOK_IntelExpr, loc, len) { IntelExp = exp; }
115 };
116 
117 struct ParseInstructionInfo {
118   SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr;
119 
120   ParseInstructionInfo() = default;
121   ParseInstructionInfo(SmallVectorImpl<AsmRewrite> *rewrites)
122     : AsmRewrites(rewrites) {}
123 };
124 
125 enum OperandMatchResultTy {
126   MatchOperand_Success,  // operand matched successfully
127   MatchOperand_NoMatch,  // operand did not match
128   MatchOperand_ParseFail // operand matched but had errors
129 };
130 
131 /// Ternary parse status returned by various parse* methods.
132 class ParseStatus {
133   enum class StatusTy { Success, Failure, NoMatch } Status;
134 
135 public:
136 #if __cplusplus >= 202002L
137   using enum StatusTy;
138 #else
139   static constexpr StatusTy Success = StatusTy::Success;
140   static constexpr StatusTy Failure = StatusTy::Failure;
141   static constexpr StatusTy NoMatch = StatusTy::NoMatch;
142 #endif
143 
144   constexpr ParseStatus() : Status(NoMatch) {}
145 
146   constexpr ParseStatus(StatusTy Status) : Status(Status) {}
147 
148   constexpr ParseStatus(bool Error) : Status(Error ? Failure : Success) {}
149 
150   template <typename T> constexpr ParseStatus(T) = delete;
151 
152   constexpr bool isSuccess() const { return Status == StatusTy::Success; }
153   constexpr bool isFailure() const { return Status == StatusTy::Failure; }
154   constexpr bool isNoMatch() const { return Status == StatusTy::NoMatch; }
155 
156   // Allow implicit conversions to / from OperandMatchResultTy.
157   constexpr ParseStatus(OperandMatchResultTy R)
158       : Status(R == MatchOperand_Success     ? Success
159                : R == MatchOperand_ParseFail ? Failure
160                                              : NoMatch) {}
161   constexpr operator OperandMatchResultTy() const {
162     return isSuccess()   ? MatchOperand_Success
163            : isFailure() ? MatchOperand_ParseFail
164                          : MatchOperand_NoMatch;
165   }
166 };
167 
168 enum class DiagnosticPredicateTy {
169   Match,
170   NearMatch,
171   NoMatch,
172 };
173 
174 // When an operand is parsed, the assembler will try to iterate through a set of
175 // possible operand classes that the operand might match and call the
176 // corresponding PredicateMethod to determine that.
177 //
178 // If there are two AsmOperands that would give a specific diagnostic if there
179 // is no match, there is currently no mechanism to distinguish which operand is
180 // a closer match. The DiagnosticPredicate distinguishes between 'completely
181 // no match' and 'near match', so the assembler can decide whether to give a
182 // specific diagnostic, or use 'InvalidOperand' and continue to find a
183 // 'better matching' diagnostic.
184 //
185 // For example:
186 //    opcode opnd0, onpd1, opnd2
187 //
188 // where:
189 //    opnd2 could be an 'immediate of range [-8, 7]'
190 //    opnd2 could be a  'register + shift/extend'.
191 //
192 // If opnd2 is a valid register, but with a wrong shift/extend suffix, it makes
193 // little sense to give a diagnostic that the operand should be an immediate
194 // in range [-8, 7].
195 //
196 // This is a light-weight alternative to the 'NearMissInfo' approach
197 // below which collects *all* possible diagnostics. This alternative
198 // is optional and fully backward compatible with existing
199 // PredicateMethods that return a 'bool' (match or no match).
200 struct DiagnosticPredicate {
201   DiagnosticPredicateTy Type;
202 
203   explicit DiagnosticPredicate(bool Match)
204       : Type(Match ? DiagnosticPredicateTy::Match
205                    : DiagnosticPredicateTy::NearMatch) {}
206   DiagnosticPredicate(DiagnosticPredicateTy T) : Type(T) {}
207   DiagnosticPredicate(const DiagnosticPredicate &) = default;
208   DiagnosticPredicate& operator=(const DiagnosticPredicate &) = default;
209 
210   operator bool() const { return Type == DiagnosticPredicateTy::Match; }
211   bool isMatch() const { return Type == DiagnosticPredicateTy::Match; }
212   bool isNearMatch() const { return Type == DiagnosticPredicateTy::NearMatch; }
213   bool isNoMatch() const { return Type == DiagnosticPredicateTy::NoMatch; }
214 };
215 
216 // When matching of an assembly instruction fails, there may be multiple
217 // encodings that are close to being a match. It's often ambiguous which one
218 // the programmer intended to use, so we want to report an error which mentions
219 // each of these "near-miss" encodings. This struct contains information about
220 // one such encoding, and why it did not match the parsed instruction.
221 class NearMissInfo {
222 public:
223   enum NearMissKind {
224     NoNearMiss,
225     NearMissOperand,
226     NearMissFeature,
227     NearMissPredicate,
228     NearMissTooFewOperands,
229   };
230 
231   // The encoding is valid for the parsed assembly string. This is only used
232   // internally to the table-generated assembly matcher.
233   static NearMissInfo getSuccess() { return NearMissInfo(); }
234 
235   // The instruction encoding is not valid because it requires some target
236   // features that are not currently enabled. MissingFeatures has a bit set for
237   // each feature that the encoding needs but which is not enabled.
238   static NearMissInfo getMissedFeature(const FeatureBitset &MissingFeatures) {
239     NearMissInfo Result;
240     Result.Kind = NearMissFeature;
241     Result.Features = MissingFeatures;
242     return Result;
243   }
244 
245   // The instruction encoding is not valid because the target-specific
246   // predicate function returned an error code. FailureCode is the
247   // target-specific error code returned by the predicate.
248   static NearMissInfo getMissedPredicate(unsigned FailureCode) {
249     NearMissInfo Result;
250     Result.Kind = NearMissPredicate;
251     Result.PredicateError = FailureCode;
252     return Result;
253   }
254 
255   // The instruction encoding is not valid because one (and only one) parsed
256   // operand is not of the correct type. OperandError is the error code
257   // relating to the operand class expected by the encoding. OperandClass is
258   // the type of the expected operand. Opcode is the opcode of the encoding.
259   // OperandIndex is the index into the parsed operand list.
260   static NearMissInfo getMissedOperand(unsigned OperandError,
261                                        unsigned OperandClass, unsigned Opcode,
262                                        unsigned OperandIndex) {
263     NearMissInfo Result;
264     Result.Kind = NearMissOperand;
265     Result.MissedOperand.Error = OperandError;
266     Result.MissedOperand.Class = OperandClass;
267     Result.MissedOperand.Opcode = Opcode;
268     Result.MissedOperand.Index = OperandIndex;
269     return Result;
270   }
271 
272   // The instruction encoding is not valid because it expects more operands
273   // than were parsed. OperandClass is the class of the expected operand that
274   // was not provided. Opcode is the instruction encoding.
275   static NearMissInfo getTooFewOperands(unsigned OperandClass,
276                                         unsigned Opcode) {
277     NearMissInfo Result;
278     Result.Kind = NearMissTooFewOperands;
279     Result.TooFewOperands.Class = OperandClass;
280     Result.TooFewOperands.Opcode = Opcode;
281     return Result;
282   }
283 
284   operator bool() const { return Kind != NoNearMiss; }
285 
286   NearMissKind getKind() const { return Kind; }
287 
288   // Feature flags required by the instruction, that the current target does
289   // not have.
290   const FeatureBitset& getFeatures() const {
291     assert(Kind == NearMissFeature);
292     return Features;
293   }
294   // Error code returned by the target predicate when validating this
295   // instruction encoding.
296   unsigned getPredicateError() const {
297     assert(Kind == NearMissPredicate);
298     return PredicateError;
299   }
300   // MatchClassKind of the operand that we expected to see.
301   unsigned getOperandClass() const {
302     assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
303     return MissedOperand.Class;
304   }
305   // Opcode of the encoding we were trying to match.
306   unsigned getOpcode() const {
307     assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands);
308     return MissedOperand.Opcode;
309   }
310   // Error code returned when validating the operand.
311   unsigned getOperandError() const {
312     assert(Kind == NearMissOperand);
313     return MissedOperand.Error;
314   }
315   // Index of the actual operand we were trying to match in the list of parsed
316   // operands.
317   unsigned getOperandIndex() const {
318     assert(Kind == NearMissOperand);
319     return MissedOperand.Index;
320   }
321 
322 private:
323   NearMissKind Kind;
324 
325   // These two structs share a common prefix, so we can safely rely on the fact
326   // that they overlap in the union.
327   struct MissedOpInfo {
328     unsigned Class;
329     unsigned Opcode;
330     unsigned Error;
331     unsigned Index;
332   };
333 
334   struct TooFewOperandsInfo {
335     unsigned Class;
336     unsigned Opcode;
337   };
338 
339   union {
340     FeatureBitset Features;
341     unsigned PredicateError;
342     MissedOpInfo MissedOperand;
343     TooFewOperandsInfo TooFewOperands;
344   };
345 
346   NearMissInfo() : Kind(NoNearMiss) {}
347 };
348 
349 /// MCTargetAsmParser - Generic interface to target specific assembly parsers.
350 class MCTargetAsmParser : public MCAsmParserExtension {
351 public:
352   enum MatchResultTy {
353     Match_InvalidOperand,
354     Match_InvalidTiedOperand,
355     Match_MissingFeature,
356     Match_MnemonicFail,
357     Match_Success,
358     Match_NearMisses,
359     FIRST_TARGET_MATCH_RESULT_TY
360   };
361 
362 protected: // Can only create subclasses.
363   MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI,
364                     const MCInstrInfo &MII);
365 
366   /// Create a copy of STI and return a non-const reference to it.
367   MCSubtargetInfo &copySTI();
368 
369   /// AvailableFeatures - The current set of available features.
370   FeatureBitset AvailableFeatures;
371 
372   /// ParsingMSInlineAsm - Are we parsing ms-style inline assembly?
373   bool ParsingMSInlineAsm = false;
374 
375   /// SemaCallback - The Sema callback implementation.  Must be set when parsing
376   /// ms-style inline assembly.
377   MCAsmParserSemaCallback *SemaCallback = nullptr;
378 
379   /// Set of options which affects instrumentation of inline assembly.
380   MCTargetOptions MCOptions;
381 
382   /// Current STI.
383   const MCSubtargetInfo *STI;
384 
385   const MCInstrInfo &MII;
386 
387 public:
388   MCTargetAsmParser(const MCTargetAsmParser &) = delete;
389   MCTargetAsmParser &operator=(const MCTargetAsmParser &) = delete;
390 
391   ~MCTargetAsmParser() override;
392 
393   const MCSubtargetInfo &getSTI() const;
394 
395   const FeatureBitset& getAvailableFeatures() const {
396     return AvailableFeatures;
397   }
398   void setAvailableFeatures(const FeatureBitset& Value) {
399     AvailableFeatures = Value;
400   }
401 
402   bool isParsingMSInlineAsm () { return ParsingMSInlineAsm; }
403   void setParsingMSInlineAsm (bool Value) { ParsingMSInlineAsm = Value; }
404 
405   MCTargetOptions getTargetOptions() const { return MCOptions; }
406 
407   void setSemaCallback(MCAsmParserSemaCallback *Callback) {
408     SemaCallback = Callback;
409   }
410 
411   // Target-specific parsing of expression.
412   virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
413     return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
414   }
415 
416   virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc,
417                              SMLoc &EndLoc) = 0;
418 
419   /// tryParseRegister - parse one register if possible
420   ///
421   /// Check whether a register specification can be parsed at the current
422   /// location, without failing the entire parse if it can't. Must not consume
423   /// tokens if the parse fails.
424   virtual OperandMatchResultTy
425   tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) = 0;
426 
427   /// ParseInstruction - Parse one assembly instruction.
428   ///
429   /// The parser is positioned following the instruction name. The target
430   /// specific instruction parser should parse the entire instruction and
431   /// construct the appropriate MCInst, or emit an error. On success, the entire
432   /// line should be parsed up to and including the end-of-statement token. On
433   /// failure, the parser is not required to read to the end of the line.
434   //
435   /// \param Name - The instruction name.
436   /// \param NameLoc - The source location of the name.
437   /// \param Operands [out] - The list of parsed operands, this returns
438   ///        ownership of them to the caller.
439   /// \return True on failure.
440   virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
441                                 SMLoc NameLoc, OperandVector &Operands) = 0;
442   virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
443                                 AsmToken Token, OperandVector &Operands) {
444     return ParseInstruction(Info, Name, Token.getLoc(), Operands);
445   }
446 
447   /// ParseDirective - Parse a target specific assembler directive
448   /// This method is deprecated, use 'parseDirective' instead.
449   ///
450   /// The parser is positioned following the directive name.  The target
451   /// specific directive parser should parse the entire directive doing or
452   /// recording any target specific work, or return true and do nothing if the
453   /// directive is not target specific. If the directive is specific for
454   /// the target, the entire line is parsed up to and including the
455   /// end-of-statement token and false is returned.
456   ///
457   /// \param DirectiveID - the identifier token of the directive.
458   virtual bool ParseDirective(AsmToken DirectiveID) { return true; }
459 
460   /// Parses a target-specific assembler directive.
461   ///
462   /// The parser is positioned following the directive name. The target-specific
463   /// directive parser should parse the entire directive doing or recording any
464   /// target-specific work, or emit an error. On success, the entire line should
465   /// be parsed up to and including the end-of-statement token. On failure, the
466   /// parser is not required to read to the end of the line. If the directive is
467   /// not target-specific, no tokens should be consumed and NoMatch is returned.
468   ///
469   /// \param DirectiveID - The token identifying the directive.
470   virtual ParseStatus parseDirective(AsmToken DirectiveID);
471 
472   /// MatchAndEmitInstruction - Recognize a series of operands of a parsed
473   /// instruction as an actual MCInst and emit it to the specified MCStreamer.
474   /// This returns false on success and returns true on failure to match.
475   ///
476   /// On failure, the target parser is responsible for emitting a diagnostic
477   /// explaining the match failure.
478   virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
479                                        OperandVector &Operands, MCStreamer &Out,
480                                        uint64_t &ErrorInfo,
481                                        bool MatchingInlineAsm) = 0;
482 
483   /// Allows targets to let registers opt out of clobber lists.
484   virtual bool OmitRegisterFromClobberLists(unsigned RegNo) { return false; }
485 
486   /// Allow a target to add special case operand matching for things that
487   /// tblgen doesn't/can't handle effectively. For example, literal
488   /// immediates on ARM. TableGen expects a token operand, but the parser
489   /// will recognize them as immediates.
490   virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
491                                               unsigned Kind) {
492     return Match_InvalidOperand;
493   }
494 
495   /// Validate the instruction match against any complex target predicates
496   /// before rendering any operands to it.
497   virtual unsigned
498   checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) {
499     return Match_Success;
500   }
501 
502   /// checkTargetMatchPredicate - Validate the instruction match against
503   /// any complex target predicates not expressible via match classes.
504   virtual unsigned checkTargetMatchPredicate(MCInst &Inst) {
505     return Match_Success;
506   }
507 
508   virtual void convertToMapAndConstraints(unsigned Kind,
509                                           const OperandVector &Operands) = 0;
510 
511   /// Returns whether two operands are registers and are equal. This is used
512   /// by the tied-operands checks in the AsmMatcher. This method can be
513   /// overridden to allow e.g. a sub- or super-register as the tied operand.
514   virtual bool areEqualRegs(const MCParsedAsmOperand &Op1,
515                             const MCParsedAsmOperand &Op2) const {
516     return Op1.isReg() && Op2.isReg() && Op1.getReg() == Op2.getReg();
517   }
518 
519   // Return whether this parser uses assignment statements with equals tokens
520   virtual bool equalIsAsmAssignment() { return true; };
521   // Return whether this start of statement identifier is a label
522   virtual bool isLabel(AsmToken &Token) { return true; };
523   // Return whether this parser accept star as start of statement
524   virtual bool starIsStartOfStatement() { return false; };
525 
526   virtual const MCExpr *applyModifierToExpr(const MCExpr *E,
527                                             MCSymbolRefExpr::VariantKind,
528                                             MCContext &Ctx) {
529     return nullptr;
530   }
531 
532   // For actions that have to be performed before a label is emitted
533   virtual void doBeforeLabelEmit(MCSymbol *Symbol, SMLoc IDLoc) {}
534 
535   virtual void onLabelParsed(MCSymbol *Symbol) {}
536 
537   /// Ensure that all previously parsed instructions have been emitted to the
538   /// output streamer, if the target does not emit them immediately.
539   virtual void flushPendingInstructions(MCStreamer &Out) {}
540 
541   virtual const MCExpr *createTargetUnaryExpr(const MCExpr *E,
542                                               AsmToken::TokenKind OperatorToken,
543                                               MCContext &Ctx) {
544     return nullptr;
545   }
546 
547   // For any initialization at the beginning of parsing.
548   virtual void onBeginOfFile() {}
549 
550   // For any checks or cleanups at the end of parsing.
551   virtual void onEndOfFile() {}
552 };
553 
554 } // end namespace llvm
555 
556 #endif // LLVM_MC_MCPARSER_MCTARGETASMPARSER_H
557