1 //===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H 10 #define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H 11 12 #include "llvm/ADT/StringRef.h" 13 #include "llvm/MC/MCExpr.h" 14 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 15 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 16 #include "llvm/MC/MCTargetOptions.h" 17 #include "llvm/MC/SubtargetFeature.h" 18 #include "llvm/Support/SMLoc.h" 19 #include <cstdint> 20 #include <memory> 21 22 namespace llvm { 23 24 class MCContext; 25 class MCInst; 26 class MCInstrInfo; 27 class MCStreamer; 28 class MCSubtargetInfo; 29 class MCSymbol; 30 template <typename T> class SmallVectorImpl; 31 32 using OperandVector = SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>>; 33 34 enum AsmRewriteKind { 35 AOK_Align, // Rewrite align as .align. 36 AOK_EVEN, // Rewrite even as .even. 37 AOK_Emit, // Rewrite _emit as .byte. 38 AOK_CallInput, // Rewrite in terms of ${N:P}. 39 AOK_Input, // Rewrite in terms of $N. 40 AOK_Output, // Rewrite in terms of $N. 41 AOK_SizeDirective, // Add a sizing directive (e.g., dword ptr). 42 AOK_Label, // Rewrite local labels. 43 AOK_EndOfStatement, // Add EndOfStatement (e.g., "\n\t"). 44 AOK_Skip, // Skip emission (e.g., offset/type operators). 45 AOK_IntelExpr // SizeDirective SymDisp [BaseReg + IndexReg * Scale + ImmDisp] 46 }; 47 48 const char AsmRewritePrecedence [] = { 49 2, // AOK_Align 50 2, // AOK_EVEN 51 2, // AOK_Emit 52 3, // AOK_Input 53 3, // AOK_CallInput 54 3, // AOK_Output 55 5, // AOK_SizeDirective 56 1, // AOK_Label 57 5, // AOK_EndOfStatement 58 2, // AOK_Skip 59 2 // AOK_IntelExpr 60 }; 61 62 // Represnt the various parts which makes up an intel expression, 63 // used for emitting compound intel expressions 64 struct IntelExpr { 65 bool NeedBracs; 66 int64_t Imm; 67 StringRef BaseReg; 68 StringRef IndexReg; 69 StringRef OffsetName; 70 unsigned Scale; 71 72 IntelExpr() 73 : NeedBracs(false), Imm(0), BaseReg(StringRef()), IndexReg(StringRef()), 74 OffsetName(StringRef()), Scale(1) {} 75 // [BaseReg + IndexReg * ScaleExpression + OFFSET name + ImmediateExpression] 76 IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale, 77 StringRef offsetName, int64_t imm, bool needBracs) 78 : NeedBracs(needBracs), Imm(imm), BaseReg(baseReg), IndexReg(indexReg), 79 OffsetName(offsetName), Scale(1) { 80 if (scale) 81 Scale = scale; 82 } 83 bool hasBaseReg() const { return !BaseReg.empty(); } 84 bool hasIndexReg() const { return !IndexReg.empty(); } 85 bool hasRegs() const { return hasBaseReg() || hasIndexReg(); } 86 bool hasOffset() const { return !OffsetName.empty(); } 87 // Normally we won't emit immediates unconditionally, 88 // unless we've got no other components 89 bool emitImm() const { return !(hasRegs() || hasOffset()); } 90 bool isValid() const { 91 return (Scale == 1) || 92 (hasIndexReg() && (Scale == 2 || Scale == 4 || Scale == 8)); 93 } 94 }; 95 96 struct AsmRewrite { 97 AsmRewriteKind Kind; 98 SMLoc Loc; 99 unsigned Len; 100 bool Done; 101 int64_t Val; 102 StringRef Label; 103 IntelExpr IntelExp; 104 bool IntelExpRestricted; 105 106 public: 107 AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0, 108 bool Restricted = false) 109 : Kind(kind), Loc(loc), Len(len), Done(false), Val(val) { 110 IntelExpRestricted = Restricted; 111 } 112 AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label) 113 : AsmRewrite(kind, loc, len) { Label = label; } 114 AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp) 115 : AsmRewrite(AOK_IntelExpr, loc, len) { IntelExp = exp; } 116 }; 117 118 struct ParseInstructionInfo { 119 SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr; 120 121 ParseInstructionInfo() = default; 122 ParseInstructionInfo(SmallVectorImpl<AsmRewrite> *rewrites) 123 : AsmRewrites(rewrites) {} 124 }; 125 126 enum OperandMatchResultTy { 127 MatchOperand_Success, // operand matched successfully 128 MatchOperand_NoMatch, // operand did not match 129 MatchOperand_ParseFail // operand matched but had errors 130 }; 131 132 enum class DiagnosticPredicateTy { 133 Match, 134 NearMatch, 135 NoMatch, 136 }; 137 138 // When an operand is parsed, the assembler will try to iterate through a set of 139 // possible operand classes that the operand might match and call the 140 // corresponding PredicateMethod to determine that. 141 // 142 // If there are two AsmOperands that would give a specific diagnostic if there 143 // is no match, there is currently no mechanism to distinguish which operand is 144 // a closer match. The DiagnosticPredicate distinguishes between 'completely 145 // no match' and 'near match', so the assembler can decide whether to give a 146 // specific diagnostic, or use 'InvalidOperand' and continue to find a 147 // 'better matching' diagnostic. 148 // 149 // For example: 150 // opcode opnd0, onpd1, opnd2 151 // 152 // where: 153 // opnd2 could be an 'immediate of range [-8, 7]' 154 // opnd2 could be a 'register + shift/extend'. 155 // 156 // If opnd2 is a valid register, but with a wrong shift/extend suffix, it makes 157 // little sense to give a diagnostic that the operand should be an immediate 158 // in range [-8, 7]. 159 // 160 // This is a light-weight alternative to the 'NearMissInfo' approach 161 // below which collects *all* possible diagnostics. This alternative 162 // is optional and fully backward compatible with existing 163 // PredicateMethods that return a 'bool' (match or no match). 164 struct DiagnosticPredicate { 165 DiagnosticPredicateTy Type; 166 167 explicit DiagnosticPredicate(bool Match) 168 : Type(Match ? DiagnosticPredicateTy::Match 169 : DiagnosticPredicateTy::NearMatch) {} 170 DiagnosticPredicate(DiagnosticPredicateTy T) : Type(T) {} 171 DiagnosticPredicate(const DiagnosticPredicate &) = default; 172 DiagnosticPredicate& operator=(const DiagnosticPredicate &) = default; 173 174 operator bool() const { return Type == DiagnosticPredicateTy::Match; } 175 bool isMatch() const { return Type == DiagnosticPredicateTy::Match; } 176 bool isNearMatch() const { return Type == DiagnosticPredicateTy::NearMatch; } 177 bool isNoMatch() const { return Type == DiagnosticPredicateTy::NoMatch; } 178 }; 179 180 // When matching of an assembly instruction fails, there may be multiple 181 // encodings that are close to being a match. It's often ambiguous which one 182 // the programmer intended to use, so we want to report an error which mentions 183 // each of these "near-miss" encodings. This struct contains information about 184 // one such encoding, and why it did not match the parsed instruction. 185 class NearMissInfo { 186 public: 187 enum NearMissKind { 188 NoNearMiss, 189 NearMissOperand, 190 NearMissFeature, 191 NearMissPredicate, 192 NearMissTooFewOperands, 193 }; 194 195 // The encoding is valid for the parsed assembly string. This is only used 196 // internally to the table-generated assembly matcher. 197 static NearMissInfo getSuccess() { return NearMissInfo(); } 198 199 // The instruction encoding is not valid because it requires some target 200 // features that are not currently enabled. MissingFeatures has a bit set for 201 // each feature that the encoding needs but which is not enabled. 202 static NearMissInfo getMissedFeature(const FeatureBitset &MissingFeatures) { 203 NearMissInfo Result; 204 Result.Kind = NearMissFeature; 205 Result.Features = MissingFeatures; 206 return Result; 207 } 208 209 // The instruction encoding is not valid because the target-specific 210 // predicate function returned an error code. FailureCode is the 211 // target-specific error code returned by the predicate. 212 static NearMissInfo getMissedPredicate(unsigned FailureCode) { 213 NearMissInfo Result; 214 Result.Kind = NearMissPredicate; 215 Result.PredicateError = FailureCode; 216 return Result; 217 } 218 219 // The instruction encoding is not valid because one (and only one) parsed 220 // operand is not of the correct type. OperandError is the error code 221 // relating to the operand class expected by the encoding. OperandClass is 222 // the type of the expected operand. Opcode is the opcode of the encoding. 223 // OperandIndex is the index into the parsed operand list. 224 static NearMissInfo getMissedOperand(unsigned OperandError, 225 unsigned OperandClass, unsigned Opcode, 226 unsigned OperandIndex) { 227 NearMissInfo Result; 228 Result.Kind = NearMissOperand; 229 Result.MissedOperand.Error = OperandError; 230 Result.MissedOperand.Class = OperandClass; 231 Result.MissedOperand.Opcode = Opcode; 232 Result.MissedOperand.Index = OperandIndex; 233 return Result; 234 } 235 236 // The instruction encoding is not valid because it expects more operands 237 // than were parsed. OperandClass is the class of the expected operand that 238 // was not provided. Opcode is the instruction encoding. 239 static NearMissInfo getTooFewOperands(unsigned OperandClass, 240 unsigned Opcode) { 241 NearMissInfo Result; 242 Result.Kind = NearMissTooFewOperands; 243 Result.TooFewOperands.Class = OperandClass; 244 Result.TooFewOperands.Opcode = Opcode; 245 return Result; 246 } 247 248 operator bool() const { return Kind != NoNearMiss; } 249 250 NearMissKind getKind() const { return Kind; } 251 252 // Feature flags required by the instruction, that the current target does 253 // not have. 254 const FeatureBitset& getFeatures() const { 255 assert(Kind == NearMissFeature); 256 return Features; 257 } 258 // Error code returned by the target predicate when validating this 259 // instruction encoding. 260 unsigned getPredicateError() const { 261 assert(Kind == NearMissPredicate); 262 return PredicateError; 263 } 264 // MatchClassKind of the operand that we expected to see. 265 unsigned getOperandClass() const { 266 assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands); 267 return MissedOperand.Class; 268 } 269 // Opcode of the encoding we were trying to match. 270 unsigned getOpcode() const { 271 assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands); 272 return MissedOperand.Opcode; 273 } 274 // Error code returned when validating the operand. 275 unsigned getOperandError() const { 276 assert(Kind == NearMissOperand); 277 return MissedOperand.Error; 278 } 279 // Index of the actual operand we were trying to match in the list of parsed 280 // operands. 281 unsigned getOperandIndex() const { 282 assert(Kind == NearMissOperand); 283 return MissedOperand.Index; 284 } 285 286 private: 287 NearMissKind Kind; 288 289 // These two structs share a common prefix, so we can safely rely on the fact 290 // that they overlap in the union. 291 struct MissedOpInfo { 292 unsigned Class; 293 unsigned Opcode; 294 unsigned Error; 295 unsigned Index; 296 }; 297 298 struct TooFewOperandsInfo { 299 unsigned Class; 300 unsigned Opcode; 301 }; 302 303 union { 304 FeatureBitset Features; 305 unsigned PredicateError; 306 MissedOpInfo MissedOperand; 307 TooFewOperandsInfo TooFewOperands; 308 }; 309 310 NearMissInfo() : Kind(NoNearMiss) {} 311 }; 312 313 /// MCTargetAsmParser - Generic interface to target specific assembly parsers. 314 class MCTargetAsmParser : public MCAsmParserExtension { 315 public: 316 enum MatchResultTy { 317 Match_InvalidOperand, 318 Match_InvalidTiedOperand, 319 Match_MissingFeature, 320 Match_MnemonicFail, 321 Match_Success, 322 Match_NearMisses, 323 FIRST_TARGET_MATCH_RESULT_TY 324 }; 325 326 protected: // Can only create subclasses. 327 MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI, 328 const MCInstrInfo &MII); 329 330 /// Create a copy of STI and return a non-const reference to it. 331 MCSubtargetInfo ©STI(); 332 333 /// AvailableFeatures - The current set of available features. 334 FeatureBitset AvailableFeatures; 335 336 /// ParsingMSInlineAsm - Are we parsing ms-style inline assembly? 337 bool ParsingMSInlineAsm = false; 338 339 /// SemaCallback - The Sema callback implementation. Must be set when parsing 340 /// ms-style inline assembly. 341 MCAsmParserSemaCallback *SemaCallback = nullptr; 342 343 /// Set of options which affects instrumentation of inline assembly. 344 MCTargetOptions MCOptions; 345 346 /// Current STI. 347 const MCSubtargetInfo *STI; 348 349 const MCInstrInfo &MII; 350 351 public: 352 MCTargetAsmParser(const MCTargetAsmParser &) = delete; 353 MCTargetAsmParser &operator=(const MCTargetAsmParser &) = delete; 354 355 ~MCTargetAsmParser() override; 356 357 const MCSubtargetInfo &getSTI() const; 358 359 const FeatureBitset& getAvailableFeatures() const { 360 return AvailableFeatures; 361 } 362 void setAvailableFeatures(const FeatureBitset& Value) { 363 AvailableFeatures = Value; 364 } 365 366 bool isParsingMSInlineAsm () { return ParsingMSInlineAsm; } 367 void setParsingMSInlineAsm (bool Value) { ParsingMSInlineAsm = Value; } 368 369 MCTargetOptions getTargetOptions() const { return MCOptions; } 370 371 void setSemaCallback(MCAsmParserSemaCallback *Callback) { 372 SemaCallback = Callback; 373 } 374 375 // Target-specific parsing of expression. 376 virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { 377 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr); 378 } 379 380 virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 381 SMLoc &EndLoc) = 0; 382 383 /// tryParseRegister - parse one register if possible 384 /// 385 /// Check whether a register specification can be parsed at the current 386 /// location, without failing the entire parse if it can't. Must not consume 387 /// tokens if the parse fails. 388 virtual OperandMatchResultTy 389 tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) = 0; 390 391 /// ParseInstruction - Parse one assembly instruction. 392 /// 393 /// The parser is positioned following the instruction name. The target 394 /// specific instruction parser should parse the entire instruction and 395 /// construct the appropriate MCInst, or emit an error. On success, the entire 396 /// line should be parsed up to and including the end-of-statement token. On 397 /// failure, the parser is not required to read to the end of the line. 398 // 399 /// \param Name - The instruction name. 400 /// \param NameLoc - The source location of the name. 401 /// \param Operands [out] - The list of parsed operands, this returns 402 /// ownership of them to the caller. 403 /// \return True on failure. 404 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 405 SMLoc NameLoc, OperandVector &Operands) = 0; 406 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 407 AsmToken Token, OperandVector &Operands) { 408 return ParseInstruction(Info, Name, Token.getLoc(), Operands); 409 } 410 411 /// ParseDirective - Parse a target specific assembler directive 412 /// 413 /// The parser is positioned following the directive name. The target 414 /// specific directive parser should parse the entire directive doing or 415 /// recording any target specific work, or return true and do nothing if the 416 /// directive is not target specific. If the directive is specific for 417 /// the target, the entire line is parsed up to and including the 418 /// end-of-statement token and false is returned. 419 /// 420 /// \param DirectiveID - the identifier token of the directive. 421 virtual bool ParseDirective(AsmToken DirectiveID) = 0; 422 423 /// MatchAndEmitInstruction - Recognize a series of operands of a parsed 424 /// instruction as an actual MCInst and emit it to the specified MCStreamer. 425 /// This returns false on success and returns true on failure to match. 426 /// 427 /// On failure, the target parser is responsible for emitting a diagnostic 428 /// explaining the match failure. 429 virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 430 OperandVector &Operands, MCStreamer &Out, 431 uint64_t &ErrorInfo, 432 bool MatchingInlineAsm) = 0; 433 434 /// Allows targets to let registers opt out of clobber lists. 435 virtual bool OmitRegisterFromClobberLists(unsigned RegNo) { return false; } 436 437 /// Allow a target to add special case operand matching for things that 438 /// tblgen doesn't/can't handle effectively. For example, literal 439 /// immediates on ARM. TableGen expects a token operand, but the parser 440 /// will recognize them as immediates. 441 virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 442 unsigned Kind) { 443 return Match_InvalidOperand; 444 } 445 446 /// Validate the instruction match against any complex target predicates 447 /// before rendering any operands to it. 448 virtual unsigned 449 checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) { 450 return Match_Success; 451 } 452 453 /// checkTargetMatchPredicate - Validate the instruction match against 454 /// any complex target predicates not expressible via match classes. 455 virtual unsigned checkTargetMatchPredicate(MCInst &Inst) { 456 return Match_Success; 457 } 458 459 virtual void convertToMapAndConstraints(unsigned Kind, 460 const OperandVector &Operands) = 0; 461 462 /// Returns whether two registers are equal and is used by the tied-operands 463 /// checks in the AsmMatcher. This method can be overridden allow e.g. a 464 /// sub- or super-register as the tied operand. 465 virtual bool regsEqual(const MCParsedAsmOperand &Op1, 466 const MCParsedAsmOperand &Op2) const { 467 assert(Op1.isReg() && Op2.isReg() && "Operands not all regs"); 468 return Op1.getReg() == Op2.getReg(); 469 } 470 471 // Return whether this parser uses assignment statements with equals tokens 472 virtual bool equalIsAsmAssignment() { return true; }; 473 // Return whether this start of statement identifier is a label 474 virtual bool isLabel(AsmToken &Token) { return true; }; 475 // Return whether this parser accept star as start of statement 476 virtual bool starIsStartOfStatement() { return false; }; 477 478 virtual const MCExpr *applyModifierToExpr(const MCExpr *E, 479 MCSymbolRefExpr::VariantKind, 480 MCContext &Ctx) { 481 return nullptr; 482 } 483 484 // For actions that have to be performed before a label is emitted 485 virtual void doBeforeLabelEmit(MCSymbol *Symbol) {} 486 487 virtual void onLabelParsed(MCSymbol *Symbol) {} 488 489 /// Ensure that all previously parsed instructions have been emitted to the 490 /// output streamer, if the target does not emit them immediately. 491 virtual void flushPendingInstructions(MCStreamer &Out) {} 492 493 virtual const MCExpr *createTargetUnaryExpr(const MCExpr *E, 494 AsmToken::TokenKind OperatorToken, 495 MCContext &Ctx) { 496 return nullptr; 497 } 498 499 // For any initialization at the beginning of parsing. 500 virtual void onBeginOfFile() {} 501 502 // For any checks or cleanups at the end of parsing. 503 virtual void onEndOfFile() {} 504 }; 505 506 } // end namespace llvm 507 508 #endif // LLVM_MC_MCPARSER_MCTARGETASMPARSER_H 509