1 //===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H 10 #define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H 11 12 #include "llvm/ADT/StringRef.h" 13 #include "llvm/MC/MCExpr.h" 14 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 15 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 16 #include "llvm/MC/MCTargetOptions.h" 17 #include "llvm/MC/SubtargetFeature.h" 18 #include "llvm/Support/SMLoc.h" 19 #include <cstdint> 20 #include <memory> 21 22 namespace llvm { 23 24 class MCContext; 25 class MCInst; 26 class MCInstrInfo; 27 class MCRegister; 28 class MCStreamer; 29 class MCSubtargetInfo; 30 class MCSymbol; 31 template <typename T> class SmallVectorImpl; 32 33 using OperandVector = SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>>; 34 35 enum AsmRewriteKind { 36 AOK_Align, // Rewrite align as .align. 37 AOK_EVEN, // Rewrite even as .even. 38 AOK_Emit, // Rewrite _emit as .byte. 39 AOK_CallInput, // Rewrite in terms of ${N:P}. 40 AOK_Input, // Rewrite in terms of $N. 41 AOK_Output, // Rewrite in terms of $N. 42 AOK_SizeDirective, // Add a sizing directive (e.g., dword ptr). 43 AOK_Label, // Rewrite local labels. 44 AOK_EndOfStatement, // Add EndOfStatement (e.g., "\n\t"). 45 AOK_Skip, // Skip emission (e.g., offset/type operators). 46 AOK_IntelExpr // SizeDirective SymDisp [BaseReg + IndexReg * Scale + ImmDisp] 47 }; 48 49 const char AsmRewritePrecedence [] = { 50 2, // AOK_Align 51 2, // AOK_EVEN 52 2, // AOK_Emit 53 3, // AOK_Input 54 3, // AOK_CallInput 55 3, // AOK_Output 56 5, // AOK_SizeDirective 57 1, // AOK_Label 58 5, // AOK_EndOfStatement 59 2, // AOK_Skip 60 2 // AOK_IntelExpr 61 }; 62 63 // Represnt the various parts which makes up an intel expression, 64 // used for emitting compound intel expressions 65 struct IntelExpr { 66 bool NeedBracs; 67 int64_t Imm; 68 StringRef BaseReg; 69 StringRef IndexReg; 70 StringRef OffsetName; 71 unsigned Scale; 72 IntelExprIntelExpr73 IntelExpr() 74 : NeedBracs(false), Imm(0), BaseReg(StringRef()), IndexReg(StringRef()), 75 OffsetName(StringRef()), Scale(1) {} 76 // [BaseReg + IndexReg * ScaleExpression + OFFSET name + ImmediateExpression] IntelExprIntelExpr77 IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale, 78 StringRef offsetName, int64_t imm, bool needBracs) 79 : NeedBracs(needBracs), Imm(imm), BaseReg(baseReg), IndexReg(indexReg), 80 OffsetName(offsetName), Scale(1) { 81 if (scale) 82 Scale = scale; 83 } hasBaseRegIntelExpr84 bool hasBaseReg() const { return !BaseReg.empty(); } hasIndexRegIntelExpr85 bool hasIndexReg() const { return !IndexReg.empty(); } hasRegsIntelExpr86 bool hasRegs() const { return hasBaseReg() || hasIndexReg(); } hasOffsetIntelExpr87 bool hasOffset() const { return !OffsetName.empty(); } 88 // Normally we won't emit immediates unconditionally, 89 // unless we've got no other components emitImmIntelExpr90 bool emitImm() const { return !(hasRegs() || hasOffset()); } isValidIntelExpr91 bool isValid() const { 92 return (Scale == 1) || 93 (hasIndexReg() && (Scale == 2 || Scale == 4 || Scale == 8)); 94 } 95 }; 96 97 struct AsmRewrite { 98 AsmRewriteKind Kind; 99 SMLoc Loc; 100 unsigned Len; 101 bool Done; 102 int64_t Val; 103 StringRef Label; 104 IntelExpr IntelExp; 105 bool IntelExpRestricted; 106 107 public: 108 AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0, 109 bool Restricted = false) KindAsmRewrite110 : Kind(kind), Loc(loc), Len(len), Done(false), Val(val) { 111 IntelExpRestricted = Restricted; 112 } AsmRewriteAsmRewrite113 AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label) 114 : AsmRewrite(kind, loc, len) { Label = label; } AsmRewriteAsmRewrite115 AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp) 116 : AsmRewrite(AOK_IntelExpr, loc, len) { IntelExp = exp; } 117 }; 118 119 struct ParseInstructionInfo { 120 SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr; 121 122 ParseInstructionInfo() = default; ParseInstructionInfoParseInstructionInfo123 ParseInstructionInfo(SmallVectorImpl<AsmRewrite> *rewrites) 124 : AsmRewrites(rewrites) {} 125 }; 126 127 enum OperandMatchResultTy { 128 MatchOperand_Success, // operand matched successfully 129 MatchOperand_NoMatch, // operand did not match 130 MatchOperand_ParseFail // operand matched but had errors 131 }; 132 133 enum class DiagnosticPredicateTy { 134 Match, 135 NearMatch, 136 NoMatch, 137 }; 138 139 // When an operand is parsed, the assembler will try to iterate through a set of 140 // possible operand classes that the operand might match and call the 141 // corresponding PredicateMethod to determine that. 142 // 143 // If there are two AsmOperands that would give a specific diagnostic if there 144 // is no match, there is currently no mechanism to distinguish which operand is 145 // a closer match. The DiagnosticPredicate distinguishes between 'completely 146 // no match' and 'near match', so the assembler can decide whether to give a 147 // specific diagnostic, or use 'InvalidOperand' and continue to find a 148 // 'better matching' diagnostic. 149 // 150 // For example: 151 // opcode opnd0, onpd1, opnd2 152 // 153 // where: 154 // opnd2 could be an 'immediate of range [-8, 7]' 155 // opnd2 could be a 'register + shift/extend'. 156 // 157 // If opnd2 is a valid register, but with a wrong shift/extend suffix, it makes 158 // little sense to give a diagnostic that the operand should be an immediate 159 // in range [-8, 7]. 160 // 161 // This is a light-weight alternative to the 'NearMissInfo' approach 162 // below which collects *all* possible diagnostics. This alternative 163 // is optional and fully backward compatible with existing 164 // PredicateMethods that return a 'bool' (match or no match). 165 struct DiagnosticPredicate { 166 DiagnosticPredicateTy Type; 167 DiagnosticPredicateDiagnosticPredicate168 explicit DiagnosticPredicate(bool Match) 169 : Type(Match ? DiagnosticPredicateTy::Match 170 : DiagnosticPredicateTy::NearMatch) {} DiagnosticPredicateDiagnosticPredicate171 DiagnosticPredicate(DiagnosticPredicateTy T) : Type(T) {} 172 DiagnosticPredicate(const DiagnosticPredicate &) = default; 173 DiagnosticPredicate& operator=(const DiagnosticPredicate &) = default; 174 175 operator bool() const { return Type == DiagnosticPredicateTy::Match; } isMatchDiagnosticPredicate176 bool isMatch() const { return Type == DiagnosticPredicateTy::Match; } isNearMatchDiagnosticPredicate177 bool isNearMatch() const { return Type == DiagnosticPredicateTy::NearMatch; } isNoMatchDiagnosticPredicate178 bool isNoMatch() const { return Type == DiagnosticPredicateTy::NoMatch; } 179 }; 180 181 // When matching of an assembly instruction fails, there may be multiple 182 // encodings that are close to being a match. It's often ambiguous which one 183 // the programmer intended to use, so we want to report an error which mentions 184 // each of these "near-miss" encodings. This struct contains information about 185 // one such encoding, and why it did not match the parsed instruction. 186 class NearMissInfo { 187 public: 188 enum NearMissKind { 189 NoNearMiss, 190 NearMissOperand, 191 NearMissFeature, 192 NearMissPredicate, 193 NearMissTooFewOperands, 194 }; 195 196 // The encoding is valid for the parsed assembly string. This is only used 197 // internally to the table-generated assembly matcher. getSuccess()198 static NearMissInfo getSuccess() { return NearMissInfo(); } 199 200 // The instruction encoding is not valid because it requires some target 201 // features that are not currently enabled. MissingFeatures has a bit set for 202 // each feature that the encoding needs but which is not enabled. getMissedFeature(const FeatureBitset & MissingFeatures)203 static NearMissInfo getMissedFeature(const FeatureBitset &MissingFeatures) { 204 NearMissInfo Result; 205 Result.Kind = NearMissFeature; 206 Result.Features = MissingFeatures; 207 return Result; 208 } 209 210 // The instruction encoding is not valid because the target-specific 211 // predicate function returned an error code. FailureCode is the 212 // target-specific error code returned by the predicate. getMissedPredicate(unsigned FailureCode)213 static NearMissInfo getMissedPredicate(unsigned FailureCode) { 214 NearMissInfo Result; 215 Result.Kind = NearMissPredicate; 216 Result.PredicateError = FailureCode; 217 return Result; 218 } 219 220 // The instruction encoding is not valid because one (and only one) parsed 221 // operand is not of the correct type. OperandError is the error code 222 // relating to the operand class expected by the encoding. OperandClass is 223 // the type of the expected operand. Opcode is the opcode of the encoding. 224 // OperandIndex is the index into the parsed operand list. getMissedOperand(unsigned OperandError,unsigned OperandClass,unsigned Opcode,unsigned OperandIndex)225 static NearMissInfo getMissedOperand(unsigned OperandError, 226 unsigned OperandClass, unsigned Opcode, 227 unsigned OperandIndex) { 228 NearMissInfo Result; 229 Result.Kind = NearMissOperand; 230 Result.MissedOperand.Error = OperandError; 231 Result.MissedOperand.Class = OperandClass; 232 Result.MissedOperand.Opcode = Opcode; 233 Result.MissedOperand.Index = OperandIndex; 234 return Result; 235 } 236 237 // The instruction encoding is not valid because it expects more operands 238 // than were parsed. OperandClass is the class of the expected operand that 239 // was not provided. Opcode is the instruction encoding. getTooFewOperands(unsigned OperandClass,unsigned Opcode)240 static NearMissInfo getTooFewOperands(unsigned OperandClass, 241 unsigned Opcode) { 242 NearMissInfo Result; 243 Result.Kind = NearMissTooFewOperands; 244 Result.TooFewOperands.Class = OperandClass; 245 Result.TooFewOperands.Opcode = Opcode; 246 return Result; 247 } 248 249 operator bool() const { return Kind != NoNearMiss; } 250 getKind()251 NearMissKind getKind() const { return Kind; } 252 253 // Feature flags required by the instruction, that the current target does 254 // not have. getFeatures()255 const FeatureBitset& getFeatures() const { 256 assert(Kind == NearMissFeature); 257 return Features; 258 } 259 // Error code returned by the target predicate when validating this 260 // instruction encoding. getPredicateError()261 unsigned getPredicateError() const { 262 assert(Kind == NearMissPredicate); 263 return PredicateError; 264 } 265 // MatchClassKind of the operand that we expected to see. getOperandClass()266 unsigned getOperandClass() const { 267 assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands); 268 return MissedOperand.Class; 269 } 270 // Opcode of the encoding we were trying to match. getOpcode()271 unsigned getOpcode() const { 272 assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands); 273 return MissedOperand.Opcode; 274 } 275 // Error code returned when validating the operand. getOperandError()276 unsigned getOperandError() const { 277 assert(Kind == NearMissOperand); 278 return MissedOperand.Error; 279 } 280 // Index of the actual operand we were trying to match in the list of parsed 281 // operands. getOperandIndex()282 unsigned getOperandIndex() const { 283 assert(Kind == NearMissOperand); 284 return MissedOperand.Index; 285 } 286 287 private: 288 NearMissKind Kind; 289 290 // These two structs share a common prefix, so we can safely rely on the fact 291 // that they overlap in the union. 292 struct MissedOpInfo { 293 unsigned Class; 294 unsigned Opcode; 295 unsigned Error; 296 unsigned Index; 297 }; 298 299 struct TooFewOperandsInfo { 300 unsigned Class; 301 unsigned Opcode; 302 }; 303 304 union { 305 FeatureBitset Features; 306 unsigned PredicateError; 307 MissedOpInfo MissedOperand; 308 TooFewOperandsInfo TooFewOperands; 309 }; 310 NearMissInfo()311 NearMissInfo() : Kind(NoNearMiss) {} 312 }; 313 314 /// MCTargetAsmParser - Generic interface to target specific assembly parsers. 315 class MCTargetAsmParser : public MCAsmParserExtension { 316 public: 317 enum MatchResultTy { 318 Match_InvalidOperand, 319 Match_InvalidTiedOperand, 320 Match_MissingFeature, 321 Match_MnemonicFail, 322 Match_Success, 323 Match_NearMisses, 324 FIRST_TARGET_MATCH_RESULT_TY 325 }; 326 327 protected: // Can only create subclasses. 328 MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI, 329 const MCInstrInfo &MII); 330 331 /// Create a copy of STI and return a non-const reference to it. 332 MCSubtargetInfo ©STI(); 333 334 /// AvailableFeatures - The current set of available features. 335 FeatureBitset AvailableFeatures; 336 337 /// ParsingMSInlineAsm - Are we parsing ms-style inline assembly? 338 bool ParsingMSInlineAsm = false; 339 340 /// SemaCallback - The Sema callback implementation. Must be set when parsing 341 /// ms-style inline assembly. 342 MCAsmParserSemaCallback *SemaCallback = nullptr; 343 344 /// Set of options which affects instrumentation of inline assembly. 345 MCTargetOptions MCOptions; 346 347 /// Current STI. 348 const MCSubtargetInfo *STI; 349 350 const MCInstrInfo &MII; 351 352 public: 353 MCTargetAsmParser(const MCTargetAsmParser &) = delete; 354 MCTargetAsmParser &operator=(const MCTargetAsmParser &) = delete; 355 356 ~MCTargetAsmParser() override; 357 358 const MCSubtargetInfo &getSTI() const; 359 getAvailableFeatures()360 const FeatureBitset& getAvailableFeatures() const { 361 return AvailableFeatures; 362 } setAvailableFeatures(const FeatureBitset & Value)363 void setAvailableFeatures(const FeatureBitset& Value) { 364 AvailableFeatures = Value; 365 } 366 isParsingMSInlineAsm()367 bool isParsingMSInlineAsm () { return ParsingMSInlineAsm; } setParsingMSInlineAsm(bool Value)368 void setParsingMSInlineAsm (bool Value) { ParsingMSInlineAsm = Value; } 369 getTargetOptions()370 MCTargetOptions getTargetOptions() const { return MCOptions; } 371 setSemaCallback(MCAsmParserSemaCallback * Callback)372 void setSemaCallback(MCAsmParserSemaCallback *Callback) { 373 SemaCallback = Callback; 374 } 375 376 // Target-specific parsing of expression. parsePrimaryExpr(const MCExpr * & Res,SMLoc & EndLoc)377 virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { 378 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr); 379 } 380 381 virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, 382 SMLoc &EndLoc) = 0; 383 384 /// tryParseRegister - parse one register if possible 385 /// 386 /// Check whether a register specification can be parsed at the current 387 /// location, without failing the entire parse if it can't. Must not consume 388 /// tokens if the parse fails. 389 virtual OperandMatchResultTy 390 tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) = 0; 391 392 /// ParseInstruction - Parse one assembly instruction. 393 /// 394 /// The parser is positioned following the instruction name. The target 395 /// specific instruction parser should parse the entire instruction and 396 /// construct the appropriate MCInst, or emit an error. On success, the entire 397 /// line should be parsed up to and including the end-of-statement token. On 398 /// failure, the parser is not required to read to the end of the line. 399 // 400 /// \param Name - The instruction name. 401 /// \param NameLoc - The source location of the name. 402 /// \param Operands [out] - The list of parsed operands, this returns 403 /// ownership of them to the caller. 404 /// \return True on failure. 405 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 406 SMLoc NameLoc, OperandVector &Operands) = 0; ParseInstruction(ParseInstructionInfo & Info,StringRef Name,AsmToken Token,OperandVector & Operands)407 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 408 AsmToken Token, OperandVector &Operands) { 409 return ParseInstruction(Info, Name, Token.getLoc(), Operands); 410 } 411 412 /// ParseDirective - Parse a target specific assembler directive 413 /// 414 /// The parser is positioned following the directive name. The target 415 /// specific directive parser should parse the entire directive doing or 416 /// recording any target specific work, or return true and do nothing if the 417 /// directive is not target specific. If the directive is specific for 418 /// the target, the entire line is parsed up to and including the 419 /// end-of-statement token and false is returned. 420 /// 421 /// \param DirectiveID - the identifier token of the directive. 422 virtual bool ParseDirective(AsmToken DirectiveID) = 0; 423 424 /// MatchAndEmitInstruction - Recognize a series of operands of a parsed 425 /// instruction as an actual MCInst and emit it to the specified MCStreamer. 426 /// This returns false on success and returns true on failure to match. 427 /// 428 /// On failure, the target parser is responsible for emitting a diagnostic 429 /// explaining the match failure. 430 virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 431 OperandVector &Operands, MCStreamer &Out, 432 uint64_t &ErrorInfo, 433 bool MatchingInlineAsm) = 0; 434 435 /// Allows targets to let registers opt out of clobber lists. OmitRegisterFromClobberLists(unsigned RegNo)436 virtual bool OmitRegisterFromClobberLists(unsigned RegNo) { return false; } 437 438 /// Allow a target to add special case operand matching for things that 439 /// tblgen doesn't/can't handle effectively. For example, literal 440 /// immediates on ARM. TableGen expects a token operand, but the parser 441 /// will recognize them as immediates. validateTargetOperandClass(MCParsedAsmOperand & Op,unsigned Kind)442 virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 443 unsigned Kind) { 444 return Match_InvalidOperand; 445 } 446 447 /// Validate the instruction match against any complex target predicates 448 /// before rendering any operands to it. 449 virtual unsigned checkEarlyTargetMatchPredicate(MCInst & Inst,const OperandVector & Operands)450 checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) { 451 return Match_Success; 452 } 453 454 /// checkTargetMatchPredicate - Validate the instruction match against 455 /// any complex target predicates not expressible via match classes. checkTargetMatchPredicate(MCInst & Inst)456 virtual unsigned checkTargetMatchPredicate(MCInst &Inst) { 457 return Match_Success; 458 } 459 460 virtual void convertToMapAndConstraints(unsigned Kind, 461 const OperandVector &Operands) = 0; 462 463 /// Returns whether two operands are registers and are equal. This is used 464 /// by the tied-operands checks in the AsmMatcher. This method can be 465 /// overridden to allow e.g. a sub- or super-register as the tied operand. areEqualRegs(const MCParsedAsmOperand & Op1,const MCParsedAsmOperand & Op2)466 virtual bool areEqualRegs(const MCParsedAsmOperand &Op1, 467 const MCParsedAsmOperand &Op2) const { 468 return Op1.isReg() && Op2.isReg() && Op1.getReg() == Op2.getReg(); 469 } 470 471 // Return whether this parser uses assignment statements with equals tokens equalIsAsmAssignment()472 virtual bool equalIsAsmAssignment() { return true; }; 473 // Return whether this start of statement identifier is a label isLabel(AsmToken & Token)474 virtual bool isLabel(AsmToken &Token) { return true; }; 475 // Return whether this parser accept star as start of statement starIsStartOfStatement()476 virtual bool starIsStartOfStatement() { return false; }; 477 applyModifierToExpr(const MCExpr * E,MCSymbolRefExpr::VariantKind,MCContext & Ctx)478 virtual const MCExpr *applyModifierToExpr(const MCExpr *E, 479 MCSymbolRefExpr::VariantKind, 480 MCContext &Ctx) { 481 return nullptr; 482 } 483 484 // For actions that have to be performed before a label is emitted doBeforeLabelEmit(MCSymbol * Symbol,SMLoc IDLoc)485 virtual void doBeforeLabelEmit(MCSymbol *Symbol, SMLoc IDLoc) {} 486 onLabelParsed(MCSymbol * Symbol)487 virtual void onLabelParsed(MCSymbol *Symbol) {} 488 489 /// Ensure that all previously parsed instructions have been emitted to the 490 /// output streamer, if the target does not emit them immediately. flushPendingInstructions(MCStreamer & Out)491 virtual void flushPendingInstructions(MCStreamer &Out) {} 492 createTargetUnaryExpr(const MCExpr * E,AsmToken::TokenKind OperatorToken,MCContext & Ctx)493 virtual const MCExpr *createTargetUnaryExpr(const MCExpr *E, 494 AsmToken::TokenKind OperatorToken, 495 MCContext &Ctx) { 496 return nullptr; 497 } 498 499 // For any initialization at the beginning of parsing. onBeginOfFile()500 virtual void onBeginOfFile() {} 501 502 // For any checks or cleanups at the end of parsing. onEndOfFile()503 virtual void onEndOfFile() {} 504 }; 505 506 } // end namespace llvm 507 508 #endif // LLVM_MC_MCPARSER_MCTARGETASMPARSER_H 509