1 //===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H 10 #define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H 11 12 #include "llvm/ADT/StringRef.h" 13 #include "llvm/MC/MCExpr.h" 14 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 15 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 16 #include "llvm/MC/MCTargetOptions.h" 17 #include "llvm/Support/SMLoc.h" 18 #include "llvm/TargetParser/SubtargetFeature.h" 19 #include <cstdint> 20 #include <memory> 21 22 namespace llvm { 23 24 class MCContext; 25 class MCInst; 26 class MCInstrInfo; 27 class MCRegister; 28 class MCStreamer; 29 class MCSubtargetInfo; 30 class MCSymbol; 31 template <typename T> class SmallVectorImpl; 32 33 using OperandVector = SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>>; 34 35 enum AsmRewriteKind { 36 AOK_Align, // Rewrite align as .align. 37 AOK_EVEN, // Rewrite even as .even. 38 AOK_Emit, // Rewrite _emit as .byte. 39 AOK_CallInput, // Rewrite in terms of ${N:P}. 40 AOK_Input, // Rewrite in terms of $N. 41 AOK_Output, // Rewrite in terms of $N. 42 AOK_SizeDirective, // Add a sizing directive (e.g., dword ptr). 43 AOK_Label, // Rewrite local labels. 44 AOK_EndOfStatement, // Add EndOfStatement (e.g., "\n\t"). 45 AOK_Skip, // Skip emission (e.g., offset/type operators). 46 AOK_IntelExpr // SizeDirective SymDisp [BaseReg + IndexReg * Scale + ImmDisp] 47 }; 48 49 const char AsmRewritePrecedence [] = { 50 2, // AOK_Align 51 2, // AOK_EVEN 52 2, // AOK_Emit 53 3, // AOK_Input 54 3, // AOK_CallInput 55 3, // AOK_Output 56 5, // AOK_SizeDirective 57 1, // AOK_Label 58 5, // AOK_EndOfStatement 59 2, // AOK_Skip 60 2 // AOK_IntelExpr 61 }; 62 63 // Represent the various parts which make up an intel expression, 64 // used for emitting compound intel expressions 65 struct IntelExpr { 66 bool NeedBracs = false; 67 int64_t Imm = 0; 68 StringRef BaseReg; 69 StringRef IndexReg; 70 StringRef OffsetName; 71 unsigned Scale = 1; 72 73 IntelExpr() = default; 74 // [BaseReg + IndexReg * ScaleExpression + OFFSET name + ImmediateExpression] 75 IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale, 76 StringRef offsetName, int64_t imm, bool needBracs) 77 : NeedBracs(needBracs), Imm(imm), BaseReg(baseReg), IndexReg(indexReg), 78 OffsetName(offsetName), Scale(1) { 79 if (scale) 80 Scale = scale; 81 } 82 bool hasBaseReg() const { return !BaseReg.empty(); } 83 bool hasIndexReg() const { return !IndexReg.empty(); } 84 bool hasRegs() const { return hasBaseReg() || hasIndexReg(); } 85 bool hasOffset() const { return !OffsetName.empty(); } 86 // Normally we won't emit immediates unconditionally, 87 // unless we've got no other components 88 bool emitImm() const { return !(hasRegs() || hasOffset()); } 89 bool isValid() const { 90 return (Scale == 1) || 91 (hasIndexReg() && (Scale == 2 || Scale == 4 || Scale == 8)); 92 } 93 }; 94 95 struct AsmRewrite { 96 AsmRewriteKind Kind; 97 SMLoc Loc; 98 unsigned Len; 99 bool Done; 100 int64_t Val; 101 StringRef Label; 102 IntelExpr IntelExp; 103 bool IntelExpRestricted; 104 105 public: 106 AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0, 107 bool Restricted = false) 108 : Kind(kind), Loc(loc), Len(len), Done(false), Val(val) { 109 IntelExpRestricted = Restricted; 110 } 111 AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label) 112 : AsmRewrite(kind, loc, len) { Label = label; } 113 AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp) 114 : AsmRewrite(AOK_IntelExpr, loc, len) { IntelExp = exp; } 115 }; 116 117 struct ParseInstructionInfo { 118 SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr; 119 120 ParseInstructionInfo() = default; 121 ParseInstructionInfo(SmallVectorImpl<AsmRewrite> *rewrites) 122 : AsmRewrites(rewrites) {} 123 }; 124 125 enum OperandMatchResultTy { 126 MatchOperand_Success, // operand matched successfully 127 MatchOperand_NoMatch, // operand did not match 128 MatchOperand_ParseFail // operand matched but had errors 129 }; 130 131 /// Ternary parse status returned by various parse* methods. 132 class ParseStatus { 133 enum class StatusTy { Success, Failure, NoMatch } Status; 134 135 public: 136 #if __cplusplus >= 202002L 137 using enum StatusTy; 138 #else 139 static constexpr StatusTy Success = StatusTy::Success; 140 static constexpr StatusTy Failure = StatusTy::Failure; 141 static constexpr StatusTy NoMatch = StatusTy::NoMatch; 142 #endif 143 144 constexpr ParseStatus() : Status(NoMatch) {} 145 146 constexpr ParseStatus(StatusTy Status) : Status(Status) {} 147 148 constexpr ParseStatus(bool Error) : Status(Error ? Failure : Success) {} 149 150 template <typename T> constexpr ParseStatus(T) = delete; 151 152 constexpr bool isSuccess() const { return Status == StatusTy::Success; } 153 constexpr bool isFailure() const { return Status == StatusTy::Failure; } 154 constexpr bool isNoMatch() const { return Status == StatusTy::NoMatch; } 155 156 // Allow implicit conversions to / from OperandMatchResultTy. 157 constexpr ParseStatus(OperandMatchResultTy R) 158 : Status(R == MatchOperand_Success ? Success 159 : R == MatchOperand_ParseFail ? Failure 160 : NoMatch) {} 161 constexpr operator OperandMatchResultTy() const { 162 return isSuccess() ? MatchOperand_Success 163 : isFailure() ? MatchOperand_ParseFail 164 : MatchOperand_NoMatch; 165 } 166 }; 167 168 enum class DiagnosticPredicateTy { 169 Match, 170 NearMatch, 171 NoMatch, 172 }; 173 174 // When an operand is parsed, the assembler will try to iterate through a set of 175 // possible operand classes that the operand might match and call the 176 // corresponding PredicateMethod to determine that. 177 // 178 // If there are two AsmOperands that would give a specific diagnostic if there 179 // is no match, there is currently no mechanism to distinguish which operand is 180 // a closer match. The DiagnosticPredicate distinguishes between 'completely 181 // no match' and 'near match', so the assembler can decide whether to give a 182 // specific diagnostic, or use 'InvalidOperand' and continue to find a 183 // 'better matching' diagnostic. 184 // 185 // For example: 186 // opcode opnd0, onpd1, opnd2 187 // 188 // where: 189 // opnd2 could be an 'immediate of range [-8, 7]' 190 // opnd2 could be a 'register + shift/extend'. 191 // 192 // If opnd2 is a valid register, but with a wrong shift/extend suffix, it makes 193 // little sense to give a diagnostic that the operand should be an immediate 194 // in range [-8, 7]. 195 // 196 // This is a light-weight alternative to the 'NearMissInfo' approach 197 // below which collects *all* possible diagnostics. This alternative 198 // is optional and fully backward compatible with existing 199 // PredicateMethods that return a 'bool' (match or no match). 200 struct DiagnosticPredicate { 201 DiagnosticPredicateTy Type; 202 203 explicit DiagnosticPredicate(bool Match) 204 : Type(Match ? DiagnosticPredicateTy::Match 205 : DiagnosticPredicateTy::NearMatch) {} 206 DiagnosticPredicate(DiagnosticPredicateTy T) : Type(T) {} 207 DiagnosticPredicate(const DiagnosticPredicate &) = default; 208 DiagnosticPredicate& operator=(const DiagnosticPredicate &) = default; 209 210 operator bool() const { return Type == DiagnosticPredicateTy::Match; } 211 bool isMatch() const { return Type == DiagnosticPredicateTy::Match; } 212 bool isNearMatch() const { return Type == DiagnosticPredicateTy::NearMatch; } 213 bool isNoMatch() const { return Type == DiagnosticPredicateTy::NoMatch; } 214 }; 215 216 // When matching of an assembly instruction fails, there may be multiple 217 // encodings that are close to being a match. It's often ambiguous which one 218 // the programmer intended to use, so we want to report an error which mentions 219 // each of these "near-miss" encodings. This struct contains information about 220 // one such encoding, and why it did not match the parsed instruction. 221 class NearMissInfo { 222 public: 223 enum NearMissKind { 224 NoNearMiss, 225 NearMissOperand, 226 NearMissFeature, 227 NearMissPredicate, 228 NearMissTooFewOperands, 229 }; 230 231 // The encoding is valid for the parsed assembly string. This is only used 232 // internally to the table-generated assembly matcher. 233 static NearMissInfo getSuccess() { return NearMissInfo(); } 234 235 // The instruction encoding is not valid because it requires some target 236 // features that are not currently enabled. MissingFeatures has a bit set for 237 // each feature that the encoding needs but which is not enabled. 238 static NearMissInfo getMissedFeature(const FeatureBitset &MissingFeatures) { 239 NearMissInfo Result; 240 Result.Kind = NearMissFeature; 241 Result.Features = MissingFeatures; 242 return Result; 243 } 244 245 // The instruction encoding is not valid because the target-specific 246 // predicate function returned an error code. FailureCode is the 247 // target-specific error code returned by the predicate. 248 static NearMissInfo getMissedPredicate(unsigned FailureCode) { 249 NearMissInfo Result; 250 Result.Kind = NearMissPredicate; 251 Result.PredicateError = FailureCode; 252 return Result; 253 } 254 255 // The instruction encoding is not valid because one (and only one) parsed 256 // operand is not of the correct type. OperandError is the error code 257 // relating to the operand class expected by the encoding. OperandClass is 258 // the type of the expected operand. Opcode is the opcode of the encoding. 259 // OperandIndex is the index into the parsed operand list. 260 static NearMissInfo getMissedOperand(unsigned OperandError, 261 unsigned OperandClass, unsigned Opcode, 262 unsigned OperandIndex) { 263 NearMissInfo Result; 264 Result.Kind = NearMissOperand; 265 Result.MissedOperand.Error = OperandError; 266 Result.MissedOperand.Class = OperandClass; 267 Result.MissedOperand.Opcode = Opcode; 268 Result.MissedOperand.Index = OperandIndex; 269 return Result; 270 } 271 272 // The instruction encoding is not valid because it expects more operands 273 // than were parsed. OperandClass is the class of the expected operand that 274 // was not provided. Opcode is the instruction encoding. 275 static NearMissInfo getTooFewOperands(unsigned OperandClass, 276 unsigned Opcode) { 277 NearMissInfo Result; 278 Result.Kind = NearMissTooFewOperands; 279 Result.TooFewOperands.Class = OperandClass; 280 Result.TooFewOperands.Opcode = Opcode; 281 return Result; 282 } 283 284 operator bool() const { return Kind != NoNearMiss; } 285 286 NearMissKind getKind() const { return Kind; } 287 288 // Feature flags required by the instruction, that the current target does 289 // not have. 290 const FeatureBitset& getFeatures() const { 291 assert(Kind == NearMissFeature); 292 return Features; 293 } 294 // Error code returned by the target predicate when validating this 295 // instruction encoding. 296 unsigned getPredicateError() const { 297 assert(Kind == NearMissPredicate); 298 return PredicateError; 299 } 300 // MatchClassKind of the operand that we expected to see. 301 unsigned getOperandClass() const { 302 assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands); 303 return MissedOperand.Class; 304 } 305 // Opcode of the encoding we were trying to match. 306 unsigned getOpcode() const { 307 assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands); 308 return MissedOperand.Opcode; 309 } 310 // Error code returned when validating the operand. 311 unsigned getOperandError() const { 312 assert(Kind == NearMissOperand); 313 return MissedOperand.Error; 314 } 315 // Index of the actual operand we were trying to match in the list of parsed 316 // operands. 317 unsigned getOperandIndex() const { 318 assert(Kind == NearMissOperand); 319 return MissedOperand.Index; 320 } 321 322 private: 323 NearMissKind Kind; 324 325 // These two structs share a common prefix, so we can safely rely on the fact 326 // that they overlap in the union. 327 struct MissedOpInfo { 328 unsigned Class; 329 unsigned Opcode; 330 unsigned Error; 331 unsigned Index; 332 }; 333 334 struct TooFewOperandsInfo { 335 unsigned Class; 336 unsigned Opcode; 337 }; 338 339 union { 340 FeatureBitset Features; 341 unsigned PredicateError; 342 MissedOpInfo MissedOperand; 343 TooFewOperandsInfo TooFewOperands; 344 }; 345 346 NearMissInfo() : Kind(NoNearMiss) {} 347 }; 348 349 /// MCTargetAsmParser - Generic interface to target specific assembly parsers. 350 class MCTargetAsmParser : public MCAsmParserExtension { 351 public: 352 enum MatchResultTy { 353 Match_InvalidOperand, 354 Match_InvalidTiedOperand, 355 Match_MissingFeature, 356 Match_MnemonicFail, 357 Match_Success, 358 Match_NearMisses, 359 FIRST_TARGET_MATCH_RESULT_TY 360 }; 361 362 protected: // Can only create subclasses. 363 MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI, 364 const MCInstrInfo &MII); 365 366 /// Create a copy of STI and return a non-const reference to it. 367 MCSubtargetInfo ©STI(); 368 369 /// AvailableFeatures - The current set of available features. 370 FeatureBitset AvailableFeatures; 371 372 /// ParsingMSInlineAsm - Are we parsing ms-style inline assembly? 373 bool ParsingMSInlineAsm = false; 374 375 /// SemaCallback - The Sema callback implementation. Must be set when parsing 376 /// ms-style inline assembly. 377 MCAsmParserSemaCallback *SemaCallback = nullptr; 378 379 /// Set of options which affects instrumentation of inline assembly. 380 MCTargetOptions MCOptions; 381 382 /// Current STI. 383 const MCSubtargetInfo *STI; 384 385 const MCInstrInfo &MII; 386 387 public: 388 MCTargetAsmParser(const MCTargetAsmParser &) = delete; 389 MCTargetAsmParser &operator=(const MCTargetAsmParser &) = delete; 390 391 ~MCTargetAsmParser() override; 392 393 const MCSubtargetInfo &getSTI() const; 394 395 const FeatureBitset& getAvailableFeatures() const { 396 return AvailableFeatures; 397 } 398 void setAvailableFeatures(const FeatureBitset& Value) { 399 AvailableFeatures = Value; 400 } 401 402 bool isParsingMSInlineAsm () { return ParsingMSInlineAsm; } 403 void setParsingMSInlineAsm (bool Value) { ParsingMSInlineAsm = Value; } 404 405 MCTargetOptions getTargetOptions() const { return MCOptions; } 406 407 void setSemaCallback(MCAsmParserSemaCallback *Callback) { 408 SemaCallback = Callback; 409 } 410 411 // Target-specific parsing of expression. 412 virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { 413 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr); 414 } 415 416 virtual bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, 417 SMLoc &EndLoc) = 0; 418 419 /// tryParseRegister - parse one register if possible 420 /// 421 /// Check whether a register specification can be parsed at the current 422 /// location, without failing the entire parse if it can't. Must not consume 423 /// tokens if the parse fails. 424 virtual OperandMatchResultTy 425 tryParseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) = 0; 426 427 /// ParseInstruction - Parse one assembly instruction. 428 /// 429 /// The parser is positioned following the instruction name. The target 430 /// specific instruction parser should parse the entire instruction and 431 /// construct the appropriate MCInst, or emit an error. On success, the entire 432 /// line should be parsed up to and including the end-of-statement token. On 433 /// failure, the parser is not required to read to the end of the line. 434 // 435 /// \param Name - The instruction name. 436 /// \param NameLoc - The source location of the name. 437 /// \param Operands [out] - The list of parsed operands, this returns 438 /// ownership of them to the caller. 439 /// \return True on failure. 440 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 441 SMLoc NameLoc, OperandVector &Operands) = 0; 442 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 443 AsmToken Token, OperandVector &Operands) { 444 return ParseInstruction(Info, Name, Token.getLoc(), Operands); 445 } 446 447 /// ParseDirective - Parse a target specific assembler directive 448 /// This method is deprecated, use 'parseDirective' instead. 449 /// 450 /// The parser is positioned following the directive name. The target 451 /// specific directive parser should parse the entire directive doing or 452 /// recording any target specific work, or return true and do nothing if the 453 /// directive is not target specific. If the directive is specific for 454 /// the target, the entire line is parsed up to and including the 455 /// end-of-statement token and false is returned. 456 /// 457 /// \param DirectiveID - the identifier token of the directive. 458 virtual bool ParseDirective(AsmToken DirectiveID) { return true; } 459 460 /// Parses a target-specific assembler directive. 461 /// 462 /// The parser is positioned following the directive name. The target-specific 463 /// directive parser should parse the entire directive doing or recording any 464 /// target-specific work, or emit an error. On success, the entire line should 465 /// be parsed up to and including the end-of-statement token. On failure, the 466 /// parser is not required to read to the end of the line. If the directive is 467 /// not target-specific, no tokens should be consumed and NoMatch is returned. 468 /// 469 /// \param DirectiveID - The token identifying the directive. 470 virtual ParseStatus parseDirective(AsmToken DirectiveID); 471 472 /// MatchAndEmitInstruction - Recognize a series of operands of a parsed 473 /// instruction as an actual MCInst and emit it to the specified MCStreamer. 474 /// This returns false on success and returns true on failure to match. 475 /// 476 /// On failure, the target parser is responsible for emitting a diagnostic 477 /// explaining the match failure. 478 virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 479 OperandVector &Operands, MCStreamer &Out, 480 uint64_t &ErrorInfo, 481 bool MatchingInlineAsm) = 0; 482 483 /// Allows targets to let registers opt out of clobber lists. 484 virtual bool OmitRegisterFromClobberLists(unsigned RegNo) { return false; } 485 486 /// Allow a target to add special case operand matching for things that 487 /// tblgen doesn't/can't handle effectively. For example, literal 488 /// immediates on ARM. TableGen expects a token operand, but the parser 489 /// will recognize them as immediates. 490 virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 491 unsigned Kind) { 492 return Match_InvalidOperand; 493 } 494 495 /// Validate the instruction match against any complex target predicates 496 /// before rendering any operands to it. 497 virtual unsigned 498 checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) { 499 return Match_Success; 500 } 501 502 /// checkTargetMatchPredicate - Validate the instruction match against 503 /// any complex target predicates not expressible via match classes. 504 virtual unsigned checkTargetMatchPredicate(MCInst &Inst) { 505 return Match_Success; 506 } 507 508 virtual void convertToMapAndConstraints(unsigned Kind, 509 const OperandVector &Operands) = 0; 510 511 /// Returns whether two operands are registers and are equal. This is used 512 /// by the tied-operands checks in the AsmMatcher. This method can be 513 /// overridden to allow e.g. a sub- or super-register as the tied operand. 514 virtual bool areEqualRegs(const MCParsedAsmOperand &Op1, 515 const MCParsedAsmOperand &Op2) const { 516 return Op1.isReg() && Op2.isReg() && Op1.getReg() == Op2.getReg(); 517 } 518 519 // Return whether this parser uses assignment statements with equals tokens 520 virtual bool equalIsAsmAssignment() { return true; }; 521 // Return whether this start of statement identifier is a label 522 virtual bool isLabel(AsmToken &Token) { return true; }; 523 // Return whether this parser accept star as start of statement 524 virtual bool starIsStartOfStatement() { return false; }; 525 526 virtual const MCExpr *applyModifierToExpr(const MCExpr *E, 527 MCSymbolRefExpr::VariantKind, 528 MCContext &Ctx) { 529 return nullptr; 530 } 531 532 // For actions that have to be performed before a label is emitted 533 virtual void doBeforeLabelEmit(MCSymbol *Symbol, SMLoc IDLoc) {} 534 535 virtual void onLabelParsed(MCSymbol *Symbol) {} 536 537 /// Ensure that all previously parsed instructions have been emitted to the 538 /// output streamer, if the target does not emit them immediately. 539 virtual void flushPendingInstructions(MCStreamer &Out) {} 540 541 virtual const MCExpr *createTargetUnaryExpr(const MCExpr *E, 542 AsmToken::TokenKind OperatorToken, 543 MCContext &Ctx) { 544 return nullptr; 545 } 546 547 // For any initialization at the beginning of parsing. 548 virtual void onBeginOfFile() {} 549 550 // For any checks or cleanups at the end of parsing. 551 virtual void onEndOfFile() {} 552 }; 553 554 } // end namespace llvm 555 556 #endif // LLVM_MC_MCPARSER_MCTARGETASMPARSER_H 557