1 //===- llvm/MC/MCTargetAsmParser.h - Target Assembly Parser -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_MC_MCPARSER_MCTARGETASMPARSER_H 10 #define LLVM_MC_MCPARSER_MCTARGETASMPARSER_H 11 12 #include "llvm/ADT/StringRef.h" 13 #include "llvm/MC/MCExpr.h" 14 #include "llvm/MC/MCInstrInfo.h" 15 #include "llvm/MC/MCParser/MCAsmLexer.h" 16 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 17 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 18 #include "llvm/MC/MCTargetOptions.h" 19 #include "llvm/MC/SubtargetFeature.h" 20 #include "llvm/Support/SMLoc.h" 21 #include <cstdint> 22 #include <memory> 23 24 namespace llvm { 25 26 class MCInst; 27 class MCStreamer; 28 class MCSubtargetInfo; 29 template <typename T> class SmallVectorImpl; 30 31 using OperandVector = SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>>; 32 33 enum AsmRewriteKind { 34 AOK_Align, // Rewrite align as .align. 35 AOK_EVEN, // Rewrite even as .even. 36 AOK_Emit, // Rewrite _emit as .byte. 37 AOK_CallInput, // Rewrite in terms of ${N:P}. 38 AOK_Input, // Rewrite in terms of $N. 39 AOK_Output, // Rewrite in terms of $N. 40 AOK_SizeDirective, // Add a sizing directive (e.g., dword ptr). 41 AOK_Label, // Rewrite local labels. 42 AOK_EndOfStatement, // Add EndOfStatement (e.g., "\n\t"). 43 AOK_Skip, // Skip emission (e.g., offset/type operators). 44 AOK_IntelExpr // SizeDirective SymDisp [BaseReg + IndexReg * Scale + ImmDisp] 45 }; 46 47 const char AsmRewritePrecedence [] = { 48 2, // AOK_Align 49 2, // AOK_EVEN 50 2, // AOK_Emit 51 3, // AOK_Input 52 3, // AOK_CallInput 53 3, // AOK_Output 54 5, // AOK_SizeDirective 55 1, // AOK_Label 56 5, // AOK_EndOfStatement 57 2, // AOK_Skip 58 2 // AOK_IntelExpr 59 }; 60 61 // Represnt the various parts which makes up an intel expression, 62 // used for emitting compound intel expressions 63 struct IntelExpr { 64 bool NeedBracs; 65 int64_t Imm; 66 StringRef BaseReg; 67 StringRef IndexReg; 68 StringRef OffsetName; 69 unsigned Scale; 70 IntelExprIntelExpr71 IntelExpr() 72 : NeedBracs(false), Imm(0), BaseReg(StringRef()), IndexReg(StringRef()), 73 OffsetName(StringRef()), Scale(1) {} 74 // [BaseReg + IndexReg * ScaleExpression + OFFSET name + ImmediateExpression] IntelExprIntelExpr75 IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale, 76 StringRef offsetName, int64_t imm, bool needBracs) 77 : NeedBracs(needBracs), Imm(imm), BaseReg(baseReg), IndexReg(indexReg), 78 OffsetName(offsetName), Scale(1) { 79 if (scale) 80 Scale = scale; 81 } hasBaseRegIntelExpr82 bool hasBaseReg() const { return !BaseReg.empty(); } hasIndexRegIntelExpr83 bool hasIndexReg() const { return !IndexReg.empty(); } hasRegsIntelExpr84 bool hasRegs() const { return hasBaseReg() || hasIndexReg(); } hasOffsetIntelExpr85 bool hasOffset() const { return !OffsetName.empty(); } 86 // Normally we won't emit immediates unconditionally, 87 // unless we've got no other components emitImmIntelExpr88 bool emitImm() const { return !(hasRegs() || hasOffset()); } isValidIntelExpr89 bool isValid() const { 90 return (Scale == 1) || 91 (hasIndexReg() && (Scale == 2 || Scale == 4 || Scale == 8)); 92 } 93 }; 94 95 struct AsmRewrite { 96 AsmRewriteKind Kind; 97 SMLoc Loc; 98 unsigned Len; 99 bool Done; 100 int64_t Val; 101 StringRef Label; 102 IntelExpr IntelExp; 103 104 public: 105 AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len = 0, int64_t val = 0) KindAsmRewrite106 : Kind(kind), Loc(loc), Len(len), Done(false), Val(val) {} AsmRewriteAsmRewrite107 AsmRewrite(AsmRewriteKind kind, SMLoc loc, unsigned len, StringRef label) 108 : AsmRewrite(kind, loc, len) { Label = label; } AsmRewriteAsmRewrite109 AsmRewrite(SMLoc loc, unsigned len, IntelExpr exp) 110 : AsmRewrite(AOK_IntelExpr, loc, len) { IntelExp = exp; } 111 }; 112 113 struct ParseInstructionInfo { 114 SmallVectorImpl<AsmRewrite> *AsmRewrites = nullptr; 115 116 ParseInstructionInfo() = default; ParseInstructionInfoParseInstructionInfo117 ParseInstructionInfo(SmallVectorImpl<AsmRewrite> *rewrites) 118 : AsmRewrites(rewrites) {} 119 }; 120 121 enum OperandMatchResultTy { 122 MatchOperand_Success, // operand matched successfully 123 MatchOperand_NoMatch, // operand did not match 124 MatchOperand_ParseFail // operand matched but had errors 125 }; 126 127 enum class DiagnosticPredicateTy { 128 Match, 129 NearMatch, 130 NoMatch, 131 }; 132 133 // When an operand is parsed, the assembler will try to iterate through a set of 134 // possible operand classes that the operand might match and call the 135 // corresponding PredicateMethod to determine that. 136 // 137 // If there are two AsmOperands that would give a specific diagnostic if there 138 // is no match, there is currently no mechanism to distinguish which operand is 139 // a closer match. The DiagnosticPredicate distinguishes between 'completely 140 // no match' and 'near match', so the assembler can decide whether to give a 141 // specific diagnostic, or use 'InvalidOperand' and continue to find a 142 // 'better matching' diagnostic. 143 // 144 // For example: 145 // opcode opnd0, onpd1, opnd2 146 // 147 // where: 148 // opnd2 could be an 'immediate of range [-8, 7]' 149 // opnd2 could be a 'register + shift/extend'. 150 // 151 // If opnd2 is a valid register, but with a wrong shift/extend suffix, it makes 152 // little sense to give a diagnostic that the operand should be an immediate 153 // in range [-8, 7]. 154 // 155 // This is a light-weight alternative to the 'NearMissInfo' approach 156 // below which collects *all* possible diagnostics. This alternative 157 // is optional and fully backward compatible with existing 158 // PredicateMethods that return a 'bool' (match or no match). 159 struct DiagnosticPredicate { 160 DiagnosticPredicateTy Type; 161 DiagnosticPredicateDiagnosticPredicate162 explicit DiagnosticPredicate(bool Match) 163 : Type(Match ? DiagnosticPredicateTy::Match 164 : DiagnosticPredicateTy::NearMatch) {} DiagnosticPredicateDiagnosticPredicate165 DiagnosticPredicate(DiagnosticPredicateTy T) : Type(T) {} 166 DiagnosticPredicate(const DiagnosticPredicate &) = default; 167 DiagnosticPredicate& operator=(const DiagnosticPredicate &) = default; 168 169 operator bool() const { return Type == DiagnosticPredicateTy::Match; } isMatchDiagnosticPredicate170 bool isMatch() const { return Type == DiagnosticPredicateTy::Match; } isNearMatchDiagnosticPredicate171 bool isNearMatch() const { return Type == DiagnosticPredicateTy::NearMatch; } isNoMatchDiagnosticPredicate172 bool isNoMatch() const { return Type == DiagnosticPredicateTy::NoMatch; } 173 }; 174 175 // When matching of an assembly instruction fails, there may be multiple 176 // encodings that are close to being a match. It's often ambiguous which one 177 // the programmer intended to use, so we want to report an error which mentions 178 // each of these "near-miss" encodings. This struct contains information about 179 // one such encoding, and why it did not match the parsed instruction. 180 class NearMissInfo { 181 public: 182 enum NearMissKind { 183 NoNearMiss, 184 NearMissOperand, 185 NearMissFeature, 186 NearMissPredicate, 187 NearMissTooFewOperands, 188 }; 189 190 // The encoding is valid for the parsed assembly string. This is only used 191 // internally to the table-generated assembly matcher. getSuccess()192 static NearMissInfo getSuccess() { return NearMissInfo(); } 193 194 // The instruction encoding is not valid because it requires some target 195 // features that are not currently enabled. MissingFeatures has a bit set for 196 // each feature that the encoding needs but which is not enabled. getMissedFeature(const FeatureBitset & MissingFeatures)197 static NearMissInfo getMissedFeature(const FeatureBitset &MissingFeatures) { 198 NearMissInfo Result; 199 Result.Kind = NearMissFeature; 200 Result.Features = MissingFeatures; 201 return Result; 202 } 203 204 // The instruction encoding is not valid because the target-specific 205 // predicate function returned an error code. FailureCode is the 206 // target-specific error code returned by the predicate. getMissedPredicate(unsigned FailureCode)207 static NearMissInfo getMissedPredicate(unsigned FailureCode) { 208 NearMissInfo Result; 209 Result.Kind = NearMissPredicate; 210 Result.PredicateError = FailureCode; 211 return Result; 212 } 213 214 // The instruction encoding is not valid because one (and only one) parsed 215 // operand is not of the correct type. OperandError is the error code 216 // relating to the operand class expected by the encoding. OperandClass is 217 // the type of the expected operand. Opcode is the opcode of the encoding. 218 // OperandIndex is the index into the parsed operand list. getMissedOperand(unsigned OperandError,unsigned OperandClass,unsigned Opcode,unsigned OperandIndex)219 static NearMissInfo getMissedOperand(unsigned OperandError, 220 unsigned OperandClass, unsigned Opcode, 221 unsigned OperandIndex) { 222 NearMissInfo Result; 223 Result.Kind = NearMissOperand; 224 Result.MissedOperand.Error = OperandError; 225 Result.MissedOperand.Class = OperandClass; 226 Result.MissedOperand.Opcode = Opcode; 227 Result.MissedOperand.Index = OperandIndex; 228 return Result; 229 } 230 231 // The instruction encoding is not valid because it expects more operands 232 // than were parsed. OperandClass is the class of the expected operand that 233 // was not provided. Opcode is the instruction encoding. getTooFewOperands(unsigned OperandClass,unsigned Opcode)234 static NearMissInfo getTooFewOperands(unsigned OperandClass, 235 unsigned Opcode) { 236 NearMissInfo Result; 237 Result.Kind = NearMissTooFewOperands; 238 Result.TooFewOperands.Class = OperandClass; 239 Result.TooFewOperands.Opcode = Opcode; 240 return Result; 241 } 242 243 operator bool() const { return Kind != NoNearMiss; } 244 getKind()245 NearMissKind getKind() const { return Kind; } 246 247 // Feature flags required by the instruction, that the current target does 248 // not have. getFeatures()249 const FeatureBitset& getFeatures() const { 250 assert(Kind == NearMissFeature); 251 return Features; 252 } 253 // Error code returned by the target predicate when validating this 254 // instruction encoding. getPredicateError()255 unsigned getPredicateError() const { 256 assert(Kind == NearMissPredicate); 257 return PredicateError; 258 } 259 // MatchClassKind of the operand that we expected to see. getOperandClass()260 unsigned getOperandClass() const { 261 assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands); 262 return MissedOperand.Class; 263 } 264 // Opcode of the encoding we were trying to match. getOpcode()265 unsigned getOpcode() const { 266 assert(Kind == NearMissOperand || Kind == NearMissTooFewOperands); 267 return MissedOperand.Opcode; 268 } 269 // Error code returned when validating the operand. getOperandError()270 unsigned getOperandError() const { 271 assert(Kind == NearMissOperand); 272 return MissedOperand.Error; 273 } 274 // Index of the actual operand we were trying to match in the list of parsed 275 // operands. getOperandIndex()276 unsigned getOperandIndex() const { 277 assert(Kind == NearMissOperand); 278 return MissedOperand.Index; 279 } 280 281 private: 282 NearMissKind Kind; 283 284 // These two structs share a common prefix, so we can safely rely on the fact 285 // that they overlap in the union. 286 struct MissedOpInfo { 287 unsigned Class; 288 unsigned Opcode; 289 unsigned Error; 290 unsigned Index; 291 }; 292 293 struct TooFewOperandsInfo { 294 unsigned Class; 295 unsigned Opcode; 296 }; 297 298 union { 299 FeatureBitset Features; 300 unsigned PredicateError; 301 MissedOpInfo MissedOperand; 302 TooFewOperandsInfo TooFewOperands; 303 }; 304 NearMissInfo()305 NearMissInfo() : Kind(NoNearMiss) {} 306 }; 307 308 /// MCTargetAsmParser - Generic interface to target specific assembly parsers. 309 class MCTargetAsmParser : public MCAsmParserExtension { 310 public: 311 enum MatchResultTy { 312 Match_InvalidOperand, 313 Match_InvalidTiedOperand, 314 Match_MissingFeature, 315 Match_MnemonicFail, 316 Match_Success, 317 Match_NearMisses, 318 FIRST_TARGET_MATCH_RESULT_TY 319 }; 320 321 protected: // Can only create subclasses. 322 MCTargetAsmParser(MCTargetOptions const &, const MCSubtargetInfo &STI, 323 const MCInstrInfo &MII); 324 325 /// Create a copy of STI and return a non-const reference to it. 326 MCSubtargetInfo ©STI(); 327 328 /// AvailableFeatures - The current set of available features. 329 FeatureBitset AvailableFeatures; 330 331 /// ParsingMSInlineAsm - Are we parsing ms-style inline assembly? 332 bool ParsingMSInlineAsm = false; 333 334 /// SemaCallback - The Sema callback implementation. Must be set when parsing 335 /// ms-style inline assembly. 336 MCAsmParserSemaCallback *SemaCallback = nullptr; 337 338 /// Set of options which affects instrumentation of inline assembly. 339 MCTargetOptions MCOptions; 340 341 /// Current STI. 342 const MCSubtargetInfo *STI; 343 344 const MCInstrInfo &MII; 345 346 public: 347 MCTargetAsmParser(const MCTargetAsmParser &) = delete; 348 MCTargetAsmParser &operator=(const MCTargetAsmParser &) = delete; 349 350 ~MCTargetAsmParser() override; 351 352 const MCSubtargetInfo &getSTI() const; 353 getAvailableFeatures()354 const FeatureBitset& getAvailableFeatures() const { 355 return AvailableFeatures; 356 } setAvailableFeatures(const FeatureBitset & Value)357 void setAvailableFeatures(const FeatureBitset& Value) { 358 AvailableFeatures = Value; 359 } 360 isParsingMSInlineAsm()361 bool isParsingMSInlineAsm () { return ParsingMSInlineAsm; } setParsingMSInlineAsm(bool Value)362 void setParsingMSInlineAsm (bool Value) { ParsingMSInlineAsm = Value; } 363 getTargetOptions()364 MCTargetOptions getTargetOptions() const { return MCOptions; } 365 setSemaCallback(MCAsmParserSemaCallback * Callback)366 void setSemaCallback(MCAsmParserSemaCallback *Callback) { 367 SemaCallback = Callback; 368 } 369 370 // Target-specific parsing of expression. parsePrimaryExpr(const MCExpr * & Res,SMLoc & EndLoc)371 virtual bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { 372 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr); 373 } 374 375 virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 376 SMLoc &EndLoc) = 0; 377 378 /// tryParseRegister - parse one register if possible 379 /// 380 /// Check whether a register specification can be parsed at the current 381 /// location, without failing the entire parse if it can't. Must not consume 382 /// tokens if the parse fails. 383 virtual OperandMatchResultTy 384 tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) = 0; 385 386 /// ParseInstruction - Parse one assembly instruction. 387 /// 388 /// The parser is positioned following the instruction name. The target 389 /// specific instruction parser should parse the entire instruction and 390 /// construct the appropriate MCInst, or emit an error. On success, the entire 391 /// line should be parsed up to and including the end-of-statement token. On 392 /// failure, the parser is not required to read to the end of the line. 393 // 394 /// \param Name - The instruction name. 395 /// \param NameLoc - The source location of the name. 396 /// \param Operands [out] - The list of parsed operands, this returns 397 /// ownership of them to the caller. 398 /// \return True on failure. 399 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 400 SMLoc NameLoc, OperandVector &Operands) = 0; ParseInstruction(ParseInstructionInfo & Info,StringRef Name,AsmToken Token,OperandVector & Operands)401 virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 402 AsmToken Token, OperandVector &Operands) { 403 return ParseInstruction(Info, Name, Token.getLoc(), Operands); 404 } 405 406 /// ParseDirective - Parse a target specific assembler directive 407 /// 408 /// The parser is positioned following the directive name. The target 409 /// specific directive parser should parse the entire directive doing or 410 /// recording any target specific work, or return true and do nothing if the 411 /// directive is not target specific. If the directive is specific for 412 /// the target, the entire line is parsed up to and including the 413 /// end-of-statement token and false is returned. 414 /// 415 /// \param DirectiveID - the identifier token of the directive. 416 virtual bool ParseDirective(AsmToken DirectiveID) = 0; 417 418 /// MatchAndEmitInstruction - Recognize a series of operands of a parsed 419 /// instruction as an actual MCInst and emit it to the specified MCStreamer. 420 /// This returns false on success and returns true on failure to match. 421 /// 422 /// On failure, the target parser is responsible for emitting a diagnostic 423 /// explaining the match failure. 424 virtual bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 425 OperandVector &Operands, MCStreamer &Out, 426 uint64_t &ErrorInfo, 427 bool MatchingInlineAsm) = 0; 428 429 /// Allows targets to let registers opt out of clobber lists. OmitRegisterFromClobberLists(unsigned RegNo)430 virtual bool OmitRegisterFromClobberLists(unsigned RegNo) { return false; } 431 432 /// Allow a target to add special case operand matching for things that 433 /// tblgen doesn't/can't handle effectively. For example, literal 434 /// immediates on ARM. TableGen expects a token operand, but the parser 435 /// will recognize them as immediates. validateTargetOperandClass(MCParsedAsmOperand & Op,unsigned Kind)436 virtual unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 437 unsigned Kind) { 438 return Match_InvalidOperand; 439 } 440 441 /// Validate the instruction match against any complex target predicates 442 /// before rendering any operands to it. 443 virtual unsigned checkEarlyTargetMatchPredicate(MCInst & Inst,const OperandVector & Operands)444 checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) { 445 return Match_Success; 446 } 447 448 /// checkTargetMatchPredicate - Validate the instruction match against 449 /// any complex target predicates not expressible via match classes. checkTargetMatchPredicate(MCInst & Inst)450 virtual unsigned checkTargetMatchPredicate(MCInst &Inst) { 451 return Match_Success; 452 } 453 454 virtual void convertToMapAndConstraints(unsigned Kind, 455 const OperandVector &Operands) = 0; 456 457 /// Returns whether two registers are equal and is used by the tied-operands 458 /// checks in the AsmMatcher. This method can be overridden allow e.g. a 459 /// sub- or super-register as the tied operand. regsEqual(const MCParsedAsmOperand & Op1,const MCParsedAsmOperand & Op2)460 virtual bool regsEqual(const MCParsedAsmOperand &Op1, 461 const MCParsedAsmOperand &Op2) const { 462 assert(Op1.isReg() && Op2.isReg() && "Operands not all regs"); 463 return Op1.getReg() == Op2.getReg(); 464 } 465 466 // Return whether this parser uses assignment statements with equals tokens equalIsAsmAssignment()467 virtual bool equalIsAsmAssignment() { return true; }; 468 // Return whether this start of statement identifier is a label isLabel(AsmToken & Token)469 virtual bool isLabel(AsmToken &Token) { return true; }; 470 // Return whether this parser accept star as start of statement starIsStartOfStatement()471 virtual bool starIsStartOfStatement() { return false; }; 472 applyModifierToExpr(const MCExpr * E,MCSymbolRefExpr::VariantKind,MCContext & Ctx)473 virtual const MCExpr *applyModifierToExpr(const MCExpr *E, 474 MCSymbolRefExpr::VariantKind, 475 MCContext &Ctx) { 476 return nullptr; 477 } 478 479 // For actions that have to be performed before a label is emitted doBeforeLabelEmit(MCSymbol * Symbol)480 virtual void doBeforeLabelEmit(MCSymbol *Symbol) {} 481 onLabelParsed(MCSymbol * Symbol)482 virtual void onLabelParsed(MCSymbol *Symbol) {} 483 484 /// Ensure that all previously parsed instructions have been emitted to the 485 /// output streamer, if the target does not emit them immediately. flushPendingInstructions(MCStreamer & Out)486 virtual void flushPendingInstructions(MCStreamer &Out) {} 487 createTargetUnaryExpr(const MCExpr * E,AsmToken::TokenKind OperatorToken,MCContext & Ctx)488 virtual const MCExpr *createTargetUnaryExpr(const MCExpr *E, 489 AsmToken::TokenKind OperatorToken, 490 MCContext &Ctx) { 491 return nullptr; 492 } 493 494 // For any initialization at the beginning of parsing. onBeginOfFile()495 virtual void onBeginOfFile() {} 496 497 // For any checks or cleanups at the end of parsing. onEndOfFile()498 virtual void onEndOfFile() {} 499 }; 500 501 } // end namespace llvm 502 503 #endif // LLVM_MC_MCPARSER_MCTARGETASMPARSER_H 504